lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. lsst/pipe/base/_instrument.py +10 -12
  2. lsst/pipe/base/_status.py +29 -10
  3. lsst/pipe/base/automatic_connection_constants.py +9 -1
  4. lsst/pipe/base/cli/cmd/__init__.py +16 -2
  5. lsst/pipe/base/cli/cmd/commands.py +42 -4
  6. lsst/pipe/base/connectionTypes.py +72 -160
  7. lsst/pipe/base/connections.py +3 -6
  8. lsst/pipe/base/execution_reports.py +0 -5
  9. lsst/pipe/base/log_capture.py +8 -4
  10. lsst/pipe/base/log_on_close.py +79 -0
  11. lsst/pipe/base/mp_graph_executor.py +51 -15
  12. lsst/pipe/base/pipeline.py +3 -4
  13. lsst/pipe/base/pipelineIR.py +0 -6
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_edges.py +19 -7
  16. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
  17. lsst/pipe/base/quantum_graph/_common.py +7 -4
  18. lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
  19. lsst/pipe/base/quantum_graph/_predicted.py +111 -10
  20. lsst/pipe/base/quantum_graph/_provenance.py +727 -26
  21. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
  22. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  23. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  24. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
  25. lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
  26. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
  27. lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
  28. lsst/pipe/base/quantum_graph/formatter.py +171 -0
  29. lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
  30. lsst/pipe/base/quantum_graph_executor.py +116 -13
  31. lsst/pipe/base/quantum_provenance_graph.py +17 -2
  32. lsst/pipe/base/separable_pipeline_executor.py +18 -2
  33. lsst/pipe/base/single_quantum_executor.py +59 -41
  34. lsst/pipe/base/struct.py +4 -0
  35. lsst/pipe/base/version.py +1 -1
  36. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
  37. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
  38. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
  39. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
  40. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
  41. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
  42. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
  43. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
  44. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
  45. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
@@ -31,7 +31,6 @@ __all__ = ("Instrument",)
31
31
 
32
32
  import contextlib
33
33
  import datetime
34
- import os.path
35
34
  from abc import ABCMeta, abstractmethod
36
35
  from collections.abc import Sequence
37
36
  from typing import TYPE_CHECKING, Any, Self, cast, final
@@ -39,6 +38,7 @@ from typing import TYPE_CHECKING, Any, Self, cast, final
39
38
  from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
40
39
  from lsst.daf.butler.registry import DataIdError
41
40
  from lsst.pex.config import Config, RegistryField
41
+ from lsst.resources import ResourcePath, ResourcePathExpression
42
42
  from lsst.utils import doImportType
43
43
  from lsst.utils.introspection import get_full_type_name
44
44
 
@@ -65,7 +65,7 @@ class Instrument(metaclass=ABCMeta):
65
65
  the base class.
66
66
  """
67
67
 
68
- configPaths: Sequence[str] = ()
68
+ configPaths: Sequence[ResourcePathExpression] = ()
69
69
  """Paths to config files to read for specific Tasks.
70
70
 
71
71
  The paths in this list should contain files of the form `task.py`, for
@@ -109,6 +109,10 @@ class Instrument(metaclass=ABCMeta):
109
109
  If `True` (`False` is default), update existing records if they
110
110
  differ from the new ones.
111
111
 
112
+ Returns
113
+ -------
114
+ None
115
+
112
116
  Raises
113
117
  ------
114
118
  lsst.daf.butler.registry.ConflictingDefinitionError
@@ -127,13 +131,6 @@ class Instrument(metaclass=ABCMeta):
127
131
  the level of individual dimension entries; new detectors and filters
128
132
  should be added, but changes to any existing record should not be.
129
133
  This can generally be achieved via a block like
130
-
131
- .. code-block:: python
132
-
133
- with registry.transaction():
134
- registry.syncDimensionData("instrument", ...)
135
- registry.syncDimensionData("detector", ...)
136
- self.registerFilters(registry)
137
134
  """
138
135
  raise NotImplementedError()
139
136
 
@@ -366,9 +363,10 @@ class Instrument(metaclass=ABCMeta):
366
363
  Config instance to which overrides should be applied.
367
364
  """
368
365
  for root in self.configPaths:
369
- path = os.path.join(root, f"{name}.py")
370
- if os.path.exists(path):
371
- config.load(path)
366
+ resource = ResourcePath(root, forceDirectory=True, forceAbsolute=True)
367
+ uri = resource.join(f"{name}.py", forceDirectory=False)
368
+ if uri.exists():
369
+ config.load(uri)
372
370
 
373
371
  @staticmethod
374
372
  def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str:
lsst/pipe/base/_status.py CHANGED
@@ -275,15 +275,23 @@ class ExceptionInfo(pydantic.BaseModel):
275
275
  class QuantumAttemptStatus(enum.Enum):
276
276
  """Enum summarizing an attempt to run a quantum."""
277
277
 
278
+ ABORTED = -4
279
+ """The quantum failed with a hard error that prevented both logs and
280
+ metadata from being written.
281
+
282
+ This state is only set if information from higher-level tooling (e.g. BPS)
283
+ is available to distinguish it from ``UNKNOWN``.
284
+ """
285
+
278
286
  UNKNOWN = -3
279
287
  """The status of this attempt is unknown.
280
288
 
281
- This usually means no logs or metadata were written, and it at least could
282
- not be determined whether the quantum was blocked by an upstream failure
283
- (if it was definitely blocked, `BLOCKED` is set instead).
289
+ This means no logs or metadata were written, and it at least could not be
290
+ determined whether the quantum was blocked by an upstream failure (if it
291
+ was definitely blocked, `BLOCKED` is set instead).
284
292
  """
285
293
 
286
- LOGS_MISSING = -2
294
+ ABORTED_SUCCESS = -2
287
295
  """Task metadata was written for this attempt but logs were not.
288
296
 
289
297
  This is a rare condition that requires a hard failure (i.e. the kind that
@@ -292,20 +300,21 @@ class QuantumAttemptStatus(enum.Enum):
292
300
  """
293
301
 
294
302
  FAILED = -1
295
- """Execution of the quantum failed.
303
+ """Execution of the quantum failed gracefully.
296
304
 
297
305
  This is always set if the task metadata dataset was not written but logs
298
306
  were, as is the case when a Python exception is caught and handled by the
299
- execution system. It may also be set in cases where logs were not written
300
- either, but other information was available (e.g. from higher-level
301
- orchestration tooling) to mark it as a failure.
307
+ execution system.
308
+
309
+ This status guarantees that the task log dataset was produced but the
310
+ metadata dataset was not.
302
311
  """
303
312
 
304
313
  BLOCKED = 0
305
314
  """This quantum was not executed because an upstream quantum failed.
306
315
 
307
- Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
308
- `LOGS_MISSING` is not.
316
+ Upstream quanta with status `UNKNOWN`, `FAILED`, or `ABORTED` are
317
+ considered blockers; `ABORTED_SUCCESS` is not.
309
318
  """
310
319
 
311
320
  SUCCESSFUL = 1
@@ -319,6 +328,16 @@ class QuantumAttemptStatus(enum.Enum):
319
328
  these "successes with caveats" are reported.
320
329
  """
321
330
 
331
+ @property
332
+ def has_metadata(self) -> bool:
333
+ """Whether the task metadata dataset was produced."""
334
+ return self is self.SUCCESSFUL or self is self.ABORTED_SUCCESS
335
+
336
+ @property
337
+ def has_log(self) -> bool:
338
+ """Whether the log dataset was produced."""
339
+ return self is self.SUCCESSFUL or self is self.FAILED
340
+
322
341
 
323
342
  class GetSetDictMetadataHolder(Protocol):
324
343
  """Protocol for objects that have a ``metadata`` attribute that satisfies
@@ -26,7 +26,7 @@
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
28
  """Constants used to define the connections automatically added for each
29
- PipelineTask by the execution system.
29
+ PipelineTask by the execution system, as well as other special dataset types.
30
30
  """
31
31
 
32
32
  from __future__ import annotations
@@ -43,6 +43,8 @@ __all__ = (
43
43
  "METADATA_OUTPUT_TEMPLATE",
44
44
  "PACKAGES_INIT_OUTPUT_NAME",
45
45
  "PACKAGES_INIT_OUTPUT_STORAGE_CLASS",
46
+ "PROVENANCE_DATASET_TYPE_NAME",
47
+ "PROVENANCE_STORAGE_CLASS",
46
48
  )
47
49
 
48
50
 
@@ -91,3 +93,9 @@ type names.
91
93
  METADATA_OUTPUT_STORAGE_CLASS: str = "TaskMetadata"
92
94
  """Name of the storage class for task metadata output datasets.
93
95
  """
96
+
97
+ PROVENANCE_DATASET_TYPE_NAME: str = "run_provenance"
98
+ """Name of the dataset used to store per-RUN provenance."""
99
+
100
+ PROVENANCE_STORAGE_CLASS: str = "ProvenanceQuantumGraph"
101
+ """Name of the storage class used to store provenance."""
@@ -25,6 +25,20 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
27
27
 
28
- __all__ = ["register_instrument", "transfer_from_graph", "zip_from_graph", "retrieve_artifacts_for_quanta", "aggregate_graph"]
28
+ __all__ = [
29
+ "register_instrument",
30
+ "transfer_from_graph",
31
+ "zip_from_graph",
32
+ "retrieve_artifacts_for_quanta",
33
+ "aggregate_graph",
34
+ "ingest_graph",
35
+ ]
29
36
 
30
- from .commands import (register_instrument, retrieve_artifacts_for_quanta, transfer_from_graph, zip_from_graph, aggregate_graph)
37
+ from .commands import (
38
+ register_instrument,
39
+ retrieve_artifacts_for_quanta,
40
+ transfer_from_graph,
41
+ zip_from_graph,
42
+ aggregate_graph,
43
+ ingest_graph,
44
+ )
@@ -161,7 +161,7 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
161
161
 
162
162
  @click.command(short_help="Scan for the outputs of an active or completed quantum graph.", cls=ButlerCommand)
163
163
  @click.argument("predicted_graph", required=True)
164
- @repo_argument(required=True, help="Path to the central butler repository.")
164
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
165
165
  @click.option(
166
166
  "-o",
167
167
  "--output",
@@ -181,9 +181,9 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
181
181
  help="Number of processes to use.",
182
182
  )
183
183
  @click.option(
184
- "--complete/--incomplete",
185
- "assume_complete",
186
- default=_AGGREGATOR_DEFAULTS.assume_complete,
184
+ "--incomplete/--complete",
185
+ "incomplete",
186
+ default=_AGGREGATOR_DEFAULTS.incomplete,
187
187
  help="Whether execution has completed (and failures cannot be retried).",
188
188
  )
189
189
  @click.option(
@@ -249,6 +249,14 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
249
249
  default=_AGGREGATOR_DEFAULTS.mock_storage_classes,
250
250
  help="Enable support for storage classes created by the lsst.pipe.base.tests.mocks package.",
251
251
  )
252
+ @click.option(
253
+ "--promise-ingest-graph/--no-promise-ingest-graph",
254
+ default=_AGGREGATOR_DEFAULTS.promise_ingest_graph,
255
+ help=(
256
+ "Promise to run 'butler ingest-graph' later, allowing aggregate-graph "
257
+ "to skip metadata/log/config ingestion for now."
258
+ ),
259
+ )
252
260
  def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
253
261
  """Scan for quantum graph's outputs to gather provenance, ingest datasets
254
262
  into the central butler repository, and delete datasets that are no
@@ -268,3 +276,33 @@ def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
268
276
  # When this exception is raised, we'll have already logged the relevant
269
277
  # traceback from a separate worker.
270
278
  raise click.ClickException(str(err)) from None
279
+
280
+
281
+ @click.command(
282
+ short_help="Ingest a provenance quantum graph into a butler, finalizing a RUN collection.",
283
+ cls=ButlerCommand,
284
+ )
285
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
286
+ @click.argument("provenance_graph", required=False)
287
+ @transfer_option(default="move")
288
+ @click.option("--batch-size", default=10000, help="How many datasets to process in each transaction.")
289
+ @click.option(
290
+ "--output-run",
291
+ default=None,
292
+ help=(
293
+ "Name of the output RUN collection. Must be provided if the provenance graph is not"
294
+ " provided (so the graph can be found in the butler)."
295
+ ),
296
+ )
297
+ def ingest_graph(
298
+ *,
299
+ repo: str,
300
+ provenance_graph: str | None,
301
+ transfer: str | None,
302
+ batch_size: int,
303
+ output_run: str | None,
304
+ ) -> None:
305
+ """Ingest a provenance graph into a butler repository."""
306
+ from ...quantum_graph.ingest_graph import ingest_graph as ingest_graph_py
307
+
308
+ ingest_graph_py(repo, provenance_graph, transfer=transfer, batch_size=batch_size, output_run=output_run)
@@ -41,35 +41,36 @@ from lsst.utils.introspection import find_outside_stacklevel
41
41
 
42
42
  @dataclasses.dataclass(frozen=True)
43
43
  class BaseConnection:
44
- """Base class used for declaring `PipelineTask` connections.
45
-
46
- Attributes
47
- ----------
48
- name : `str`
49
- The name used to identify the dataset type.
50
- storageClass : `str`
51
- The storage class used when (un)/persisting the dataset type.
52
- multiple : `bool`
53
- Indicates if this connection should expect to contain multiple objects
54
- of the given dataset type. Tasks with more than one connection with
55
- ``multiple=True`` with the same dimensions may want to implement
56
- `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
57
- consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and
58
- notify the execution system as early as possible of outputs that will
59
- not be produced because the corresponding input is missing.
60
- deprecated : `str`, optional
61
- A description of why this connection is deprecated, including the
62
- version after which it may be removed.
63
-
64
- If not `None`, the string is appended to the docstring for this
65
- connection and the corresponding config Field.
66
- """
44
+ """Base class used for declaring `PipelineTask` connections."""
67
45
 
68
46
  name: str
47
+ """The name used to identify the dataset type."""
48
+
69
49
  storageClass: str
50
+ """The storage class used when (un)/persisting the dataset type."""
51
+
70
52
  doc: str = ""
53
+ """Documentation for this connection."""
54
+
71
55
  multiple: bool = False
56
+ """Indicates if this connection should expect to contain multiple objects
57
+ of the given dataset type.
58
+
59
+ Tasks with more than one connection with ``multiple=True`` with the same
60
+ dimensions may want to implement `.PipelineTaskConnections.adjustQuantum`
61
+ to ensure those datasets are consistent (i.e. zip-iterable) in
62
+ `PipelineTask.runQuantum()` and notify the execution system as early as
63
+ possible of outputs that will not be produced because the corresponding
64
+ input is missing.
65
+ """
66
+
72
67
  deprecated: str | None = dataclasses.field(default=None, kw_only=True)
68
+ """A description of why this connection is deprecated, including the
69
+ version after which it may be removed.
70
+
71
+ If not `None`, the string is appended to the docstring for this
72
+ connection and the corresponding config Field.
73
+ """
73
74
 
74
75
  _connection_type_set: ClassVar[str]
75
76
  _deprecation_context: str = ""
@@ -110,32 +111,15 @@ class BaseConnection:
110
111
  class DimensionedConnection(BaseConnection):
111
112
  """Class used for declaring PipelineTask connections that includes
112
113
  dimensions.
113
-
114
- Attributes
115
- ----------
116
- name : `str`
117
- The name used to identify the dataset type.
118
- storageClass : `str`
119
- The storage class used when (un)/persisting the dataset type.
120
- multiple : `bool`
121
- Indicates if this connection should expect to contain multiple objects
122
- of the given dataset type. Tasks with more than one connection with
123
- ``multiple=True`` with the same dimensions may want to implement
124
- `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
125
- consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
126
- the execution system as early as possible of outputs that will not be
127
- produced because the corresponding input is missing.
128
- dimensions : iterable of `str`
129
- The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
130
- to identify the dataset type identified by the specified name.
131
- isCalibration : `bool`, optional
132
- `True` if this dataset type may be included in CALIBRATION-type
133
- collections to associate it with a validity range, `False` (default)
134
- otherwise.
135
114
  """
136
115
 
137
116
  dimensions: Iterable[str] = ()
117
+ """The keys of the butler data coordinates for this dataset type."""
118
+
138
119
  isCalibration: bool = False
120
+ """ `True` if this dataset type may be included in
121
+ `~lsst.daf.butler.CollectionType.CALIBRATION` collections to associate it
122
+ with a validity range, `False` (default) otherwise."""
139
123
 
140
124
  def __post_init__(self):
141
125
  super().__post_init__()
@@ -151,39 +135,6 @@ class DimensionedConnection(BaseConnection):
151
135
  class BaseInput(DimensionedConnection):
152
136
  """Class used for declaring PipelineTask input connections.
153
137
 
154
- Attributes
155
- ----------
156
- name : `str`
157
- The default name used to identify the dataset type.
158
- storageClass : `str`
159
- The storage class used when (un)/persisting the dataset type.
160
- multiple : `bool`
161
- Indicates if this connection should expect to contain multiple objects
162
- of the given dataset type. Tasks with more than one connection with
163
- ``multiple=True`` with the same dimensions may want to implement
164
- `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
165
- consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
166
- the execution system as early as possible of outputs that will not be
167
- produced because the corresponding input is missing.
168
- dimensions : iterable of `str`
169
- The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
170
- to identify the dataset type identified by the specified name.
171
- deferLoad : `bool`
172
- Indicates that this dataset type will be loaded as a
173
- `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
174
- object to load the object at a later time.
175
- minimum : `bool`
176
- Minimum number of datasets required for this connection, per quantum.
177
- This is checked in the base implementation of
178
- `.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
179
- the minimum is not met for `Input` connections (causing the quantum to
180
- be pruned, skipped, or never created, depending on the context), and
181
- `FileNotFoundError` for `PrerequisiteInput` connections (causing
182
- QuantumGraph generation to fail). `PipelineTask` implementations may
183
- provide custom `~.PipelineTaskConnections.adjustQuantum`
184
- implementations for more fine-grained or configuration-driven
185
- constraints, as long as they are compatible with this minium.
186
-
187
138
  Raises
188
139
  ------
189
140
  TypeError
@@ -194,7 +145,24 @@ class BaseInput(DimensionedConnection):
194
145
  """
195
146
 
196
147
  deferLoad: bool = False
148
+ """Whether this dataset type will be loaded as a
149
+ `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
150
+ object to load the object at a later time.
151
+ """
152
+
197
153
  minimum: int = 1
154
+ """Minimum number of datasets required for this connection, per quantum.
155
+
156
+ This is checked in the base implementation of
157
+ `.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if the
158
+ minimum is not met for `Input` connections (causing the quantum to be
159
+ pruned, skipped, or never created, depending on the context), and
160
+ `FileNotFoundError` for `PrerequisiteInput` connections (causing
161
+ QuantumGraph generation to fail). `PipelineTask` implementations may
162
+ provide custom `~.PipelineTaskConnections.adjustQuantum` implementations
163
+ for more fine-grained or configuration-driven constraints, as long as they
164
+ are compatible with this minimum.
165
+ """
198
166
 
199
167
  def __post_init__(self) -> None:
200
168
  super().__post_init__()
@@ -206,56 +174,6 @@ class BaseInput(DimensionedConnection):
206
174
  class Input(BaseInput):
207
175
  """Class used for declaring PipelineTask input connections.
208
176
 
209
- Attributes
210
- ----------
211
- name : `str`
212
- The default name used to identify the dataset type.
213
- storageClass : `str`
214
- The storage class used when (un)/persisting the dataset type.
215
- multiple : `bool`
216
- Indicates if this connection should expect to contain multiple objects
217
- of the given dataset type. Tasks with more than one connection with
218
- ``multiple=True`` with the same dimensions may want to implement
219
- `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
220
- consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
221
- the execution system as early as possible of outputs that will not be
222
- produced because the corresponding input is missing.
223
- dimensions : iterable of `str`
224
- The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
225
- to identify the dataset type identified by the specified name.
226
- deferLoad : `bool`
227
- Indicates that this dataset type will be loaded as a
228
- `lsst.daf.butler.DeferredDatasetHandle`. PipelineTasks can use this
229
- object to load the object at a later time.
230
- minimum : `bool`
231
- Minimum number of datasets required for this connection, per quantum.
232
- This is checked in the base implementation of
233
- `.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
234
- the minimum is not met for `Input` connections (causing the quantum to
235
- be pruned, skipped, or never created, depending on the context), and
236
- `FileNotFoundError` for `PrerequisiteInput` connections (causing
237
- QuantumGraph generation to fail). `PipelineTask` implementations may
238
- provide custom `~.PipelineTaskConnections.adjustQuantum`
239
- implementations for more fine-grained or configuration-driven
240
- constraints, as long as they are compatible with this minium.
241
- deferGraphConstraint : `bool`, optional
242
- If `True`, do not include this dataset type's existence in the initial
243
- query that starts the QuantumGraph generation process. This can be
244
- used to make QuantumGraph generation faster by avoiding redundant
245
- datasets, and in certain cases it can (along with careful attention to
246
- which tasks are included in the same QuantumGraph) be used to work
247
- around the QuantumGraph generation algorithm's inflexible handling of
248
- spatial overlaps. This option has no effect when the connection is not
249
- an overall input of the pipeline (or subset thereof) for which a graph
250
- is being created, and it never affects the ordering of quanta.
251
- deferBinding : `bool`, optional
252
- If `True`, the dataset will not be automatically included in
253
- the pipeline graph, ``deferGraphConstraint`` is implied.
254
- The custom QuantumGraphBuilder is required to bind it and add a
255
- corresponding edge to the pipeline graph.
256
- This option allows to have the same dataset type as both
257
- input and output of a quantum.
258
-
259
177
  Raises
260
178
  ------
261
179
  TypeError
@@ -266,8 +184,27 @@ class Input(BaseInput):
266
184
  """
267
185
 
268
186
  deferGraphConstraint: bool = False
187
+ """If `True`, do not include this dataset type's existence in the initial
188
+ query that starts the QuantumGraph generation process.
189
+
190
+ This can be used to make QuantumGraph generation faster by avoiding
191
+ redundant datasets, and in certain cases it can (along with careful
192
+ attention to which tasks are included in the same QuantumGraph) be used to
193
+ work around the QuantumGraph generation algorithm's inflexible handling of
194
+ spatial overlaps. This option has no effect when the connection is not an
195
+ overall input of the pipeline (or subset thereof) for which a graph is
196
+ being created, and it never affects the ordering of quanta.
197
+ """
269
198
 
270
199
  deferBinding: bool = False
200
+ """If `True`, the dataset will not be automatically included in the
201
+ pipeline graph (``deferGraphConstraint=True`` is implied).
202
+
203
+ A custom `~.quantum_graph_builder.QuantumGraphBuilder` is required to bind
204
+ it and add a corresponding edge to the pipeline graph. This option allows
205
+ the same dataset type to be used as both an input and an output of a
206
+ quantum.
207
+ """
271
208
 
272
209
  _connection_type_set: ClassVar[str] = "inputs"
273
210
 
@@ -276,38 +213,6 @@ class Input(BaseInput):
276
213
  class PrerequisiteInput(BaseInput):
277
214
  """Class used for declaring PipelineTask prerequisite connections.
278
215
 
279
- Attributes
280
- ----------
281
- name : `str`
282
- The default name used to identify the dataset type.
283
- storageClass : `str`
284
- The storage class used when (un)/persisting the dataset type.
285
- multiple : `bool`
286
- Indicates if this connection should expect to contain multiple objects
287
- of the given dataset type. Tasks with more than one connection with
288
- ``multiple=True`` with the same dimensions may want to implement
289
- `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
290
- consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
291
- the execution system as early as possible of outputs that will not be
292
- produced because the corresponding input is missing.
293
- dimensions : iterable of `str`
294
- The `lsst.daf.butler.Butler` `lsst.daf.butler.Registry` dimensions used
295
- to identify the dataset type identified by the specified name.
296
- minimum : `bool`
297
- Minimum number of datasets required for this connection, per quantum.
298
- This is checked in the base implementation of
299
- `.PipelineTaskConnections.adjustQuantum`, which raises
300
- `FileNotFoundError` (causing QuantumGraph generation to fail).
301
- `PipelineTask` implementations may provide custom
302
- `~.PipelineTaskConnections.adjustQuantum` implementations for more
303
- fine-grained or configuration-driven constraints, as long as they are
304
- compatible with this minium.
305
- lookupFunction : `typing.Callable`, optional
306
- An optional callable function that will look up PrerequisiteInputs
307
- using the DatasetType, registry, quantum dataId, and input collections
308
- passed to it. If no function is specified, the default temporal spatial
309
- lookup will be used.
310
-
311
216
  Raises
312
217
  ------
313
218
  TypeError
@@ -342,6 +247,13 @@ class PrerequisiteInput(BaseInput):
342
247
  lookupFunction: (
343
248
  Callable[[DatasetType, Registry, DataCoordinate, Sequence[str]], Iterable[DatasetRef]] | None
344
249
  ) = None
250
+ """An optional callable function that will look up PrerequisiteInputs
251
+ using the DatasetType, registry, quantum dataId, and input collections
252
+ passed to it.
253
+
254
+ If no function is specified, the default temporal/spatial lookup will be
255
+ used.
256
+ """
345
257
 
346
258
  _connection_type_set: ClassVar[str] = "prerequisiteInputs"
347
259
 
@@ -495,15 +495,12 @@ class DeferredDatasetRef:
495
495
  """A wrapper class for `~lsst.daf.butler.DatasetRef` that indicates that a
496
496
  `PipelineTask` should receive a `~lsst.daf.butler.DeferredDatasetHandle`
497
497
  instead of an in-memory dataset.
498
-
499
- Attributes
500
- ----------
501
- datasetRef : `lsst.daf.butler.DatasetRef`
502
- The `lsst.daf.butler.DatasetRef` that will be eventually used to
503
- resolve a dataset.
504
498
  """
505
499
 
506
500
  datasetRef: DatasetRef
501
+ """The `lsst.daf.butler.DatasetRef` that will be eventually used to
502
+ resolve a dataset.
503
+ """
507
504
 
508
505
  def __getattr__(self, name: str) -> Any:
509
506
  # make sure reduce is called on DeferredDatasetRef and not on
@@ -299,11 +299,6 @@ class QuantumGraphExecutionReport:
299
299
  produced DatasetTypes for each task. This report can be output as a
300
300
  dictionary or a yaml file.
301
301
 
302
- Attributes
303
- ----------
304
- tasks : `dict`
305
- A dictionary of TaskExecutionReports by task label.
306
-
307
302
  See Also
308
303
  --------
309
304
  TaskExecutionReport : A task report.
@@ -163,7 +163,9 @@ class LogCapture:
163
163
  return cls(butler, butler)
164
164
 
165
165
  @contextmanager
166
- def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
166
+ def capture_logging(
167
+ self, task_node: TaskNode, /, quantum: Quantum, records: ButlerLogRecords | None = None
168
+ ) -> Iterator[_LogCaptureContext]:
167
169
  """Configure logging system to capture logs for execution of this task.
168
170
 
169
171
  Parameters
@@ -172,6 +174,9 @@ class LogCapture:
172
174
  The task definition.
173
175
  quantum : `~lsst.daf.butler.Quantum`
174
176
  Single Quantum instance.
177
+ records : `lsst.daf.butler.logging.ButlerLogRecords`, optional
178
+ Log record container to append to and save. If provided, streaming
179
+ mode is disabled (since we'll be saving logs in memory anyway).
175
180
 
176
181
  Notes
177
182
  -----
@@ -213,7 +218,7 @@ class LogCapture:
213
218
  ) from exc
214
219
  # Either accumulate into ButlerLogRecords or stream JSON records to
215
220
  # file and ingest that (ingest is possible only with full butler).
216
- if self.stream_json_logs and self.full_butler is not None:
221
+ if self.stream_json_logs and self.full_butler is not None and records is None:
217
222
  with TemporaryForIngest(self.full_butler, ref) as temporary:
218
223
  log_handler_file = FileHandler(temporary.ospath)
219
224
  log_handler_file.setFormatter(JsonLogFormatter())
@@ -236,7 +241,7 @@ class LogCapture:
236
241
  temporary.ingest()
237
242
 
238
243
  else:
239
- log_handler_memory = ButlerLogRecordHandler()
244
+ log_handler_memory = ButlerLogRecordHandler(records)
240
245
  logging.getLogger().addHandler(log_handler_memory)
241
246
 
242
247
  try:
@@ -255,7 +260,6 @@ class LogCapture:
255
260
  logging.getLogger().removeHandler(log_handler_memory)
256
261
  if ctx.store:
257
262
  self._store_log_records(quantum, log_dataset_name, log_handler_memory)
258
- log_handler_memory.records.clear()
259
263
 
260
264
  else:
261
265
  with ButlerMDC.set_mdc(mdc):