lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. lsst/pipe/base/_instrument.py +21 -12
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/automatic_connection_constants.py +20 -1
  5. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  6. lsst/pipe/base/cli/cmd/commands.py +149 -4
  7. lsst/pipe/base/connectionTypes.py +72 -160
  8. lsst/pipe/base/connections.py +3 -6
  9. lsst/pipe/base/execution_reports.py +0 -5
  10. lsst/pipe/base/graph/graph.py +9 -8
  11. lsst/pipe/base/log_capture.py +1 -1
  12. lsst/pipe/base/pipeline.py +5 -6
  13. lsst/pipe/base/pipelineIR.py +1 -7
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  16. lsst/pipe/base/pipeline_graph/_edges.py +30 -18
  17. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +10 -2
  18. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  19. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  20. lsst/pipe/base/prerequisite_helpers.py +2 -1
  21. lsst/pipe/base/quantum_graph/_common.py +3 -1
  22. lsst/pipe/base/quantum_graph/_multiblock.py +29 -13
  23. lsst/pipe/base/quantum_graph/_predicted.py +7 -0
  24. lsst/pipe/base/quantum_graph/_provenance.py +498 -56
  25. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  26. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -1
  27. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  28. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  29. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +14 -6
  30. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  31. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +14 -13
  32. lsst/pipe/base/quantum_graph/aggregator/_writer.py +2 -2
  33. lsst/pipe/base/quantum_graph/formatter.py +74 -4
  34. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  35. lsst/pipe/base/quantum_graph_builder.py +1 -8
  36. lsst/pipe/base/quantum_graph_skeleton.py +29 -27
  37. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  38. lsst/pipe/base/separable_pipeline_executor.py +6 -7
  39. lsst/pipe/base/single_quantum_executor.py +7 -7
  40. lsst/pipe/base/struct.py +4 -0
  41. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  42. lsst/pipe/base/version.py +1 -1
  43. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/METADATA +2 -1
  44. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/RECORD +52 -51
  45. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/WHEEL +1 -1
  46. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/entry_points.txt +0 -0
  47. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/COPYRIGHT +0 -0
  48. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/LICENSE +0 -0
  49. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/bsd_license.txt +0 -0
  50. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/gpl-v3.0.txt +0 -0
  51. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/top_level.txt +0 -0
  52. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/zip-safe +0 -0
@@ -35,7 +35,7 @@ from abc import ABCMeta, abstractmethod
35
35
  from collections.abc import Sequence
36
36
  from typing import TYPE_CHECKING, Any, Self, cast, final
37
37
 
38
- from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
38
+ from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter, FormatterV2
39
39
  from lsst.daf.butler.registry import DataIdError
40
40
  from lsst.pex.config import Config, RegistryField
41
41
  from lsst.resources import ResourcePath, ResourcePathExpression
@@ -109,6 +109,10 @@ class Instrument(metaclass=ABCMeta):
109
109
  If `True` (`False` is default), update existing records if they
110
110
  differ from the new ones.
111
111
 
112
+ Returns
113
+ -------
114
+ None
115
+
112
116
  Raises
113
117
  ------
114
118
  lsst.daf.butler.registry.ConflictingDefinitionError
@@ -127,13 +131,6 @@ class Instrument(metaclass=ABCMeta):
127
131
  the level of individual dimension entries; new detectors and filters
128
132
  should be added, but changes to any existing record should not be.
129
133
  This can generally be achieved via a block like
130
-
131
- .. code-block:: python
132
-
133
- with registry.transaction():
134
- registry.syncDimensionData("instrument", ...)
135
- registry.syncDimensionData("detector", ...)
136
- self.registerFilters(registry)
137
134
  """
138
135
  raise NotImplementedError()
139
136
 
@@ -314,7 +311,7 @@ class Instrument(metaclass=ABCMeta):
314
311
  return instrument_cls(collection_prefix=collection_prefix)
315
312
 
316
313
  @staticmethod
317
- def importAll(registry: Registry) -> None:
314
+ def importAll(registry: Registry) -> dict[str, type[Instrument]]:
318
315
  """Import all the instruments known to this registry.
319
316
 
320
317
  This will ensure that all metadata translators have been registered.
@@ -324,20 +321,31 @@ class Instrument(metaclass=ABCMeta):
324
321
  registry : `lsst.daf.butler.Registry`
325
322
  Butler registry to query to find the information.
326
323
 
324
+ Returns
325
+ -------
326
+ imported : `dict` [`str`, `type` [`Instrument`]]
327
+ A mapping containing all the instrument classes that were loaded
328
+ successfully, keyed by their butler names.
329
+
327
330
  Notes
328
331
  -----
329
332
  It is allowed for a particular instrument class to fail on import.
330
333
  This might simply indicate that a particular obs package has
331
334
  not been setup.
332
335
  """
336
+ imported: dict[str, type[Instrument]] = {}
333
337
  records = list(registry.queryDimensionRecords("instrument"))
334
338
  for record in records:
335
339
  cls = record.class_name
340
+ instrument_name: str = cast(str, record.name)
336
341
  with contextlib.suppress(Exception):
337
- doImportType(cls)
342
+ instr = doImportType(cls)
343
+ assert issubclass(instr, Instrument)
344
+ imported[instrument_name] = instr
345
+ return imported
338
346
 
339
347
  @abstractmethod
340
- def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
348
+ def getRawFormatter(self, dataId: DataId) -> type[Formatter | FormatterV2]:
341
349
  """Return the Formatter class that should be used to read a particular
342
350
  raw file.
343
351
 
@@ -348,7 +356,8 @@ class Instrument(metaclass=ABCMeta):
348
356
 
349
357
  Returns
350
358
  -------
351
- formatter : `lsst.daf.butler.Formatter` class
359
+ formatter : `type` \
360
+ [`lsst.daf.butler.Formatter` | `lsst.daf.butler.FormatterV2` ]
352
361
  Class to be used that reads the file into the correct
353
362
  Python object for the raw data.
354
363
  """
@@ -380,8 +380,8 @@ class QuantumContext:
380
380
  if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
381
381
  or a single `~lsst.daf.butler.DatasetRef`. If ``values.NAME`` is
382
382
  None, no output is written.
383
- dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \
384
- or `DatasetRef`
383
+ dataset : `OutputQuantizedConnection` or `list` \
384
+ [`lsst.daf.butler.DatasetRef`] or `lsst.daf.butler.DatasetRef`
385
385
  This argument may either be an `InputQuantizedConnection` which
386
386
  describes all the inputs of a quantum, a list of
387
387
  `lsst.daf.butler.DatasetRef`, or a single
@@ -460,7 +460,7 @@ class QuantumContext:
460
460
 
461
461
  Parameters
462
462
  ----------
463
- ref : `DatasetRef`
463
+ ref : `lsst.daf.butler.DatasetRef`
464
464
  The dataset to attach provenance to. This dataset must have been
465
465
  retrieved by this quantum context.
466
466
  extra : `dict` [ `str`, `int` | `float` | `str` | `bool` ]
lsst/pipe/base/_status.py CHANGED
@@ -275,15 +275,23 @@ class ExceptionInfo(pydantic.BaseModel):
275
275
  class QuantumAttemptStatus(enum.Enum):
276
276
  """Enum summarizing an attempt to run a quantum."""
277
277
 
278
+ ABORTED = -4
279
+ """The quantum failed with a hard error that prevented both logs and
280
+ metadata from being written.
281
+
282
+ This state is only set if information from higher-level tooling (e.g. BPS)
283
+ is available to distinguish it from ``UNKNOWN``.
284
+ """
285
+
278
286
  UNKNOWN = -3
279
287
  """The status of this attempt is unknown.
280
288
 
281
- This usually means no logs or metadata were written, and it at least could
282
- not be determined whether the quantum was blocked by an upstream failure
283
- (if it was definitely blocked, `BLOCKED` is set instead).
289
+ This means no logs or metadata were written, and it at least could not be
290
+ determined whether the quantum was blocked by an upstream failure (if it
291
+ was definitely blocked, `BLOCKED` is set instead).
284
292
  """
285
293
 
286
- LOGS_MISSING = -2
294
+ ABORTED_SUCCESS = -2
287
295
  """Task metadata was written for this attempt but logs were not.
288
296
 
289
297
  This is a rare condition that requires a hard failure (i.e. the kind that
@@ -292,20 +300,21 @@ class QuantumAttemptStatus(enum.Enum):
292
300
  """
293
301
 
294
302
  FAILED = -1
295
- """Execution of the quantum failed.
303
+ """Execution of the quantum failed gracefully.
296
304
 
297
305
  This is always set if the task metadata dataset was not written but logs
298
306
  were, as is the case when a Python exception is caught and handled by the
299
- execution system. It may also be set in cases where logs were not written
300
- either, but other information was available (e.g. from higher-level
301
- orchestration tooling) to mark it as a failure.
307
+ execution system.
308
+
309
+ This status guarantees that the task log dataset was produced but the
310
+ metadata dataset was not.
302
311
  """
303
312
 
304
313
  BLOCKED = 0
305
314
  """This quantum was not executed because an upstream quantum failed.
306
315
 
307
- Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
308
- `LOGS_MISSING` is not.
316
+ Upstream quanta with status `UNKNOWN`, `FAILED`, or `ABORTED` are
317
+ considered blockers; `ABORTED_SUCCESS` is not.
309
318
  """
310
319
 
311
320
  SUCCESSFUL = 1
@@ -319,6 +328,30 @@ class QuantumAttemptStatus(enum.Enum):
319
328
  these "successes with caveats" are reported.
320
329
  """
321
330
 
331
+ @property
332
+ def has_metadata(self) -> bool:
333
+ """Whether the task metadata dataset was produced."""
334
+ return self is self.SUCCESSFUL or self is self.ABORTED_SUCCESS
335
+
336
+ @property
337
+ def has_log(self) -> bool:
338
+ """Whether the log dataset was produced."""
339
+ return self is self.SUCCESSFUL or self is self.FAILED
340
+
341
+ @property
342
+ def title(self) -> str:
343
+ """A version of this status' name suitable for use as a title in a plot
344
+ or table.
345
+ """
346
+ return self.name.capitalize().replace("_", " ")
347
+
348
+ @property
349
+ def is_rare(self) -> bool:
350
+ """Whether this status is rare enough that it should only be listed
351
+ when it actually occurs.
352
+ """
353
+ return self in (self.ABORTED, self.ABORTED_SUCCESS, self.UNKNOWN)
354
+
322
355
 
323
356
  class GetSetDictMetadataHolder(Protocol):
324
357
  """Protocol for objects that have a ``metadata`` attribute that satisfies
@@ -26,7 +26,7 @@
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
28
  """Constants used to define the connections automatically added for each
29
- PipelineTask by the execution system.
29
+ PipelineTask by the execution system, as well as other special dataset types.
30
30
  """
31
31
 
32
32
  from __future__ import annotations
@@ -43,6 +43,10 @@ __all__ = (
43
43
  "METADATA_OUTPUT_TEMPLATE",
44
44
  "PACKAGES_INIT_OUTPUT_NAME",
45
45
  "PACKAGES_INIT_OUTPUT_STORAGE_CLASS",
46
+ "PROVENANCE_DATASET_TYPE_NAME",
47
+ "PROVENANCE_STORAGE_CLASS",
48
+ "RESOURCE_USAGE_STORAGE_CLASS",
49
+ "RESOURCE_USAGE_TEMPLATE",
46
50
  )
47
51
 
48
52
 
@@ -91,3 +95,18 @@ type names.
91
95
  METADATA_OUTPUT_STORAGE_CLASS: str = "TaskMetadata"
92
96
  """Name of the storage class for task metadata output datasets.
93
97
  """
98
+
99
+ PROVENANCE_DATASET_TYPE_NAME: str = "run_provenance"
100
+ """Name of the dataset used to store per-RUN provenance."""
101
+
102
+ PROVENANCE_STORAGE_CLASS: str = "ProvenanceQuantumGraph"
103
+ """Name of the storage class used to store provenance."""
104
+
105
+ RESOURCE_USAGE_TEMPLATE: str = "{label}_resource_usage"
106
+ """String template used to form the name of the resource usage dataset type for
107
+ a task.
108
+ """
109
+
110
+ RESOURCE_USAGE_STORAGE_CLASS: str = "ArrowAstropy"
111
+ """Storage class of the resource usage dataset type for a task.
112
+ """
@@ -25,6 +25,22 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
27
27
 
28
- __all__ = ["register_instrument", "transfer_from_graph", "zip_from_graph", "retrieve_artifacts_for_quanta", "aggregate_graph"]
28
+ __all__ = [
29
+ "register_instrument",
30
+ "transfer_from_graph",
31
+ "zip_from_graph",
32
+ "retrieve_artifacts_for_quanta",
33
+ "aggregate_graph",
34
+ "ingest_graph",
35
+ "provenance_report",
36
+ ]
29
37
 
30
- from .commands import (register_instrument, retrieve_artifacts_for_quanta, transfer_from_graph, zip_from_graph, aggregate_graph)
38
+ from .commands import (
39
+ register_instrument,
40
+ retrieve_artifacts_for_quanta,
41
+ transfer_from_graph,
42
+ zip_from_graph,
43
+ aggregate_graph,
44
+ ingest_graph,
45
+ provenance_report,
46
+ )
@@ -25,6 +25,9 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
+ import functools
29
+ import operator
30
+ from collections.abc import Iterable
28
31
  from typing import Any
29
32
 
30
33
  import click
@@ -40,6 +43,7 @@ from lsst.daf.butler.cli.opt import (
40
43
  from lsst.daf.butler.cli.utils import ButlerCommand, split_commas, unwrap
41
44
 
42
45
  from ... import script
46
+ from ..._status import QuantumAttemptStatus, QuantumSuccessCaveats
43
47
  from ...quantum_graph import aggregator
44
48
  from ..opt import instrument_argument, update_output_chain_option
45
49
 
@@ -161,7 +165,7 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
161
165
 
162
166
  @click.command(short_help="Scan for the outputs of an active or completed quantum graph.", cls=ButlerCommand)
163
167
  @click.argument("predicted_graph", required=True)
164
- @repo_argument(required=True, help="Path to the central butler repository.")
168
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
165
169
  @click.option(
166
170
  "-o",
167
171
  "--output",
@@ -181,9 +185,9 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
181
185
  help="Number of processes to use.",
182
186
  )
183
187
  @click.option(
184
- "--complete/--incomplete",
185
- "assume_complete",
186
- default=_AGGREGATOR_DEFAULTS.assume_complete,
188
+ "--incomplete/--complete",
189
+ "incomplete",
190
+ default=_AGGREGATOR_DEFAULTS.incomplete,
187
191
  help="Whether execution has completed (and failures cannot be retried).",
188
192
  )
189
193
  @click.option(
@@ -249,6 +253,14 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
249
253
  default=_AGGREGATOR_DEFAULTS.mock_storage_classes,
250
254
  help="Enable support for storage classes created by the lsst.pipe.base.tests.mocks package.",
251
255
  )
256
+ @click.option(
257
+ "--promise-ingest-graph/--no-promise-ingest-graph",
258
+ default=_AGGREGATOR_DEFAULTS.promise_ingest_graph,
259
+ help=(
260
+ "Promise to run 'butler ingest-graph' later, allowing aggregate-graph "
261
+ "to skip metadata/log/config ingestion for now."
262
+ ),
263
+ )
252
264
  def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
253
265
  """Scan for quantum graph's outputs to gather provenance, ingest datasets
254
266
  into the central butler repository, and delete datasets that are no
@@ -268,3 +280,136 @@ def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
268
280
  # When this exception is raised, we'll have already logged the relevant
269
281
  # traceback from a separate worker.
270
282
  raise click.ClickException(str(err)) from None
283
+
284
+
285
+ @click.command(
286
+ short_help="Ingest a provenance quantum graph into a butler.",
287
+ cls=ButlerCommand,
288
+ )
289
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
290
+ @click.argument("provenance_graph", required=False)
291
+ @transfer_option(default="move")
292
+ @click.option("--batch-size", default=10000, help="How many datasets to process in each transaction.")
293
+ @click.option(
294
+ "--output-run",
295
+ default=None,
296
+ help=(
297
+ "Name of the output RUN collection. Must be provided if the provenance graph is not"
298
+ " provided (so the graph can be found in the butler)."
299
+ ),
300
+ )
301
+ def ingest_graph(
302
+ *,
303
+ repo: str,
304
+ provenance_graph: str | None,
305
+ transfer: str | None,
306
+ batch_size: int,
307
+ output_run: str | None,
308
+ ) -> None:
309
+ """Ingest a provenance graph into a butler repository."""
310
+ from ...quantum_graph.ingest_graph import ingest_graph as ingest_graph_py
311
+
312
+ ingest_graph_py(repo, provenance_graph, transfer=transfer, batch_size=batch_size, output_run=output_run)
313
+
314
+
315
+ @click.command(
316
+ short_help="Print and write provenance reports.",
317
+ cls=ButlerCommand,
318
+ )
319
+ @click.argument("repo_or_qg")
320
+ @click.argument("collection", required=False, default=None)
321
+ @click.option(
322
+ "--state",
323
+ multiple=True,
324
+ type=click.Choice(QuantumAttemptStatus),
325
+ help=(
326
+ "Additional quantum state to include in the status report and data ID tables "
327
+ "(FAILED, ABORTED, and ABORTED_SUCCESS are included by default)."
328
+ ),
329
+ )
330
+ @click.option(
331
+ "--no-state",
332
+ multiple=True,
333
+ type=str,
334
+ metavar="STATE",
335
+ help="Quantum state to drop from in status report and data ID tables (same options as --state).",
336
+ )
337
+ @click.option(
338
+ "--status-report",
339
+ default=None,
340
+ metavar="URI",
341
+ help="File or URI (.json) for a detailed report (with data IDs) on quanta with certain states.",
342
+ )
343
+ @click.option(
344
+ "--quantum-table/--no-quantum-table",
345
+ default=True,
346
+ help="Whether to print summary of quantum status counts to STDOUT.",
347
+ )
348
+ @click.option(
349
+ "--exception-table/--no-exception-table",
350
+ default=True,
351
+ help="Whether to print summary of exception type counts STDOUT.",
352
+ )
353
+ @click.option(
354
+ "--caveat",
355
+ multiple=True,
356
+ type=click.Choice(QuantumSuccessCaveats),
357
+ help=(
358
+ "Include successful quanta in the status report if they have this caveat. "
359
+ "May be passed multiple times; any matching caveat is included. "
360
+ "Passing this option implicitly adds '--state SUCCESSFUL'."
361
+ ),
362
+ )
363
+ @click.option(
364
+ "--data-id-table-dir",
365
+ default=None,
366
+ metavar="URI",
367
+ help=(
368
+ "Directory (may be a URI) for a tree of data ID tables for each "
369
+ "task label, status, and exception type combination in the status report."
370
+ ),
371
+ )
372
+ def provenance_report(
373
+ *,
374
+ repo_or_qg: str,
375
+ collection: str | None,
376
+ state: Iterable[QuantumAttemptStatus],
377
+ no_state: Iterable[str],
378
+ status_report: str | None,
379
+ quantum_table: bool = False,
380
+ exception_table: bool = False,
381
+ caveat: Iterable[QuantumSuccessCaveats],
382
+ data_id_table_dir: str | None,
383
+ ) -> None:
384
+ """Read a provenance quantum graph from a butler or file and use it to
385
+ generate reports.
386
+
387
+ REPO_OR_QG is a path or alias for the butler repository (if reading an
388
+ ingested graph, as indicated by passing COLLECTION), or the path to a
389
+ provenance quantum graph file.
390
+ """
391
+ from ...quantum_graph import ProvenanceQuantumGraph
392
+
393
+ states = set(state)
394
+ states.add(QuantumAttemptStatus.FAILED)
395
+ states.add(QuantumAttemptStatus.ABORTED)
396
+ states.add(QuantumAttemptStatus.ABORTED_SUCCESS)
397
+ for state_name in no_state:
398
+ states.discard(QuantumAttemptStatus.__members__[state_name])
399
+ with_caveats: QuantumSuccessCaveats | None = None
400
+ if caveat:
401
+ states.add(QuantumAttemptStatus.SUCCESSFUL)
402
+ with_caveats = functools.reduce(
403
+ operator.__or__,
404
+ caveat,
405
+ QuantumSuccessCaveats.NO_CAVEATS,
406
+ )
407
+ with ProvenanceQuantumGraph.from_args(repo_or_qg, collection=collection, datasets=()) as (graph, _):
408
+ graph.make_many_reports(
409
+ status_report_file=status_report,
410
+ states=states,
411
+ print_quantum_table=quantum_table,
412
+ print_exception_table=exception_table,
413
+ with_caveats=with_caveats,
414
+ data_id_table_dir=data_id_table_dir,
415
+ )