lsst-pipe-base 30.0.0rc3__py3-none-any.whl → 30.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lsst/pipe/base/_instrument.py +25 -15
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
  6. lsst/pipe/base/automatic_connection_constants.py +20 -1
  7. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  8. lsst/pipe/base/cli/cmd/commands.py +149 -4
  9. lsst/pipe/base/connectionTypes.py +72 -160
  10. lsst/pipe/base/connections.py +6 -9
  11. lsst/pipe/base/execution_reports.py +0 -5
  12. lsst/pipe/base/graph/graph.py +11 -10
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +8 -10
  15. lsst/pipe/base/log_capture.py +1 -1
  16. lsst/pipe/base/log_on_close.py +4 -7
  17. lsst/pipe/base/pipeline.py +5 -6
  18. lsst/pipe/base/pipelineIR.py +2 -8
  19. lsst/pipe/base/pipelineTask.py +5 -7
  20. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  21. lsst/pipe/base/pipeline_graph/_edges.py +32 -22
  22. lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
  23. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
  24. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  25. lsst/pipe/base/pipeline_graph/io.py +7 -10
  26. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  27. lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
  28. lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
  29. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  30. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  31. lsst/pipe/base/prerequisite_helpers.py +2 -1
  32. lsst/pipe/base/quantum_graph/_common.py +15 -17
  33. lsst/pipe/base/quantum_graph/_multiblock.py +36 -20
  34. lsst/pipe/base/quantum_graph/_predicted.py +7 -3
  35. lsst/pipe/base/quantum_graph/_provenance.py +501 -61
  36. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  37. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +187 -240
  38. lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
  39. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
  40. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +15 -7
  41. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  42. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +19 -34
  43. lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
  44. lsst/pipe/base/quantum_graph/aggregator/_writer.py +3 -3
  45. lsst/pipe/base/quantum_graph/formatter.py +74 -4
  46. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  47. lsst/pipe/base/quantum_graph/visualization.py +5 -1
  48. lsst/pipe/base/quantum_graph_builder.py +21 -8
  49. lsst/pipe/base/quantum_graph_skeleton.py +31 -29
  50. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  51. lsst/pipe/base/separable_pipeline_executor.py +1 -1
  52. lsst/pipe/base/single_quantum_executor.py +15 -8
  53. lsst/pipe/base/struct.py +4 -0
  54. lsst/pipe/base/testUtils.py +3 -3
  55. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  56. lsst/pipe/base/version.py +1 -1
  57. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/METADATA +3 -3
  58. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/RECORD +66 -64
  59. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/WHEEL +1 -1
  60. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/entry_points.txt +0 -0
  61. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/COPYRIGHT +0 -0
  62. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  63. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/bsd_license.txt +0 -0
  64. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  65. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/top_level.txt +0 -0
  66. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/zip-safe +0 -0
@@ -35,7 +35,7 @@ from abc import ABCMeta, abstractmethod
35
35
  from collections.abc import Sequence
36
36
  from typing import TYPE_CHECKING, Any, Self, cast, final
37
37
 
38
- from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
38
+ from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter, FormatterV2
39
39
  from lsst.daf.butler.registry import DataIdError
40
40
  from lsst.pex.config import Config, RegistryField
41
41
  from lsst.resources import ResourcePath, ResourcePathExpression
@@ -68,7 +68,7 @@ class Instrument(metaclass=ABCMeta):
68
68
  configPaths: Sequence[ResourcePathExpression] = ()
69
69
  """Paths to config files to read for specific Tasks.
70
70
 
71
- The paths in this list should contain files of the form `task.py`, for
71
+ The paths in this list should contain files of the form ``task.py``, for
72
72
  each of the Tasks that requires special configuration.
73
73
  """
74
74
 
@@ -99,7 +99,8 @@ class Instrument(metaclass=ABCMeta):
99
99
 
100
100
  @abstractmethod
101
101
  def register(self, registry: Registry, *, update: bool = False) -> None:
102
- """Insert instrument, and other relevant records into `Registry`.
102
+ """Insert instrument, and other relevant records into a butler
103
+ registry.
103
104
 
104
105
  Parameters
105
106
  ----------
@@ -109,6 +110,10 @@ class Instrument(metaclass=ABCMeta):
109
110
  If `True` (`False` is default), update existing records if they
110
111
  differ from the new ones.
111
112
 
113
+ Returns
114
+ -------
115
+ None
116
+
112
117
  Raises
113
118
  ------
114
119
  lsst.daf.butler.registry.ConflictingDefinitionError
@@ -127,13 +132,6 @@ class Instrument(metaclass=ABCMeta):
127
132
  the level of individual dimension entries; new detectors and filters
128
133
  should be added, but changes to any existing record should not be.
129
134
  This can generally be achieved via a block like
130
-
131
- .. code-block:: python
132
-
133
- with registry.transaction():
134
- registry.syncDimensionData("instrument", ...)
135
- registry.syncDimensionData("detector", ...)
136
- self.registerFilters(registry)
137
135
  """
138
136
  raise NotImplementedError()
139
137
 
@@ -314,7 +312,7 @@ class Instrument(metaclass=ABCMeta):
314
312
  return instrument_cls(collection_prefix=collection_prefix)
315
313
 
316
314
  @staticmethod
317
- def importAll(registry: Registry) -> None:
315
+ def importAll(registry: Registry) -> dict[str, type[Instrument]]:
318
316
  """Import all the instruments known to this registry.
319
317
 
320
318
  This will ensure that all metadata translators have been registered.
@@ -324,31 +322,43 @@ class Instrument(metaclass=ABCMeta):
324
322
  registry : `lsst.daf.butler.Registry`
325
323
  Butler registry to query to find the information.
326
324
 
325
+ Returns
326
+ -------
327
+ imported : `dict` [`str`, `type` [`Instrument`]]
328
+ A mapping containing all the instrument classes that were loaded
329
+ successfully, keyed by their butler names.
330
+
327
331
  Notes
328
332
  -----
329
333
  It is allowed for a particular instrument class to fail on import.
330
334
  This might simply indicate that a particular obs package has
331
335
  not been setup.
332
336
  """
337
+ imported: dict[str, type[Instrument]] = {}
333
338
  records = list(registry.queryDimensionRecords("instrument"))
334
339
  for record in records:
335
340
  cls = record.class_name
341
+ instrument_name: str = cast(str, record.name)
336
342
  with contextlib.suppress(Exception):
337
- doImportType(cls)
343
+ instr = doImportType(cls)
344
+ assert issubclass(instr, Instrument)
345
+ imported[instrument_name] = instr
346
+ return imported
338
347
 
339
348
  @abstractmethod
340
- def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
349
+ def getRawFormatter(self, dataId: DataId) -> type[Formatter | FormatterV2]:
341
350
  """Return the Formatter class that should be used to read a particular
342
351
  raw file.
343
352
 
344
353
  Parameters
345
354
  ----------
346
- dataId : `DataId`
355
+ dataId : `lsst.daf.butler.DataId`
347
356
  Dimension-based ID for the raw file or files being ingested.
348
357
 
349
358
  Returns
350
359
  -------
351
- formatter : `lsst.daf.butler.Formatter` class
360
+ formatter : `type` \
361
+ [`lsst.daf.butler.Formatter` | `lsst.daf.butler.FormatterV2` ]
352
362
  Class to be used that reads the file into the correct
353
363
  Python object for the raw data.
354
364
  """
@@ -380,8 +380,8 @@ class QuantumContext:
380
380
  if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
381
381
  or a single `~lsst.daf.butler.DatasetRef`. If ``values.NAME`` is
382
382
  None, no output is written.
383
- dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \
384
- or `DatasetRef`
383
+ dataset : `OutputQuantizedConnection` or `list` \
384
+ [`lsst.daf.butler.DatasetRef`] or `lsst.daf.butler.DatasetRef`
385
385
  This argument may either be an `InputQuantizedConnection` which
386
386
  describes all the inputs of a quantum, a list of
387
387
  `lsst.daf.butler.DatasetRef`, or a single
@@ -460,7 +460,7 @@ class QuantumContext:
460
460
 
461
461
  Parameters
462
462
  ----------
463
- ref : `DatasetRef`
463
+ ref : `lsst.daf.butler.DatasetRef`
464
464
  The dataset to attach provenance to. This dataset must have been
465
465
  retrieved by this quantum context.
466
466
  extra : `dict` [ `str`, `int` | `float` | `str` | `bool` ]
lsst/pipe/base/_status.py CHANGED
@@ -275,15 +275,23 @@ class ExceptionInfo(pydantic.BaseModel):
275
275
  class QuantumAttemptStatus(enum.Enum):
276
276
  """Enum summarizing an attempt to run a quantum."""
277
277
 
278
+ ABORTED = -4
279
+ """The quantum failed with a hard error that prevented both logs and
280
+ metadata from being written.
281
+
282
+ This state is only set if information from higher-level tooling (e.g. BPS)
283
+ is available to distinguish it from ``UNKNOWN``.
284
+ """
285
+
278
286
  UNKNOWN = -3
279
287
  """The status of this attempt is unknown.
280
288
 
281
- This usually means no logs or metadata were written, and it at least could
282
- not be determined whether the quantum was blocked by an upstream failure
283
- (if it was definitely blocked, `BLOCKED` is set instead).
289
+ This means no logs or metadata were written, and it at least could not be
290
+ determined whether the quantum was blocked by an upstream failure (if it
291
+ was definitely blocked, `BLOCKED` is set instead).
284
292
  """
285
293
 
286
- LOGS_MISSING = -2
294
+ ABORTED_SUCCESS = -2
287
295
  """Task metadata was written for this attempt but logs were not.
288
296
 
289
297
  This is a rare condition that requires a hard failure (i.e. the kind that
@@ -292,20 +300,21 @@ class QuantumAttemptStatus(enum.Enum):
292
300
  """
293
301
 
294
302
  FAILED = -1
295
- """Execution of the quantum failed.
303
+ """Execution of the quantum failed gracefully.
296
304
 
297
305
  This is always set if the task metadata dataset was not written but logs
298
306
  were, as is the case when a Python exception is caught and handled by the
299
- execution system. It may also be set in cases where logs were not written
300
- either, but other information was available (e.g. from higher-level
301
- orchestration tooling) to mark it as a failure.
307
+ execution system.
308
+
309
+ This status guarantees that the task log dataset was produced but the
310
+ metadata dataset was not.
302
311
  """
303
312
 
304
313
  BLOCKED = 0
305
314
  """This quantum was not executed because an upstream quantum failed.
306
315
 
307
- Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
308
- `LOGS_MISSING` is not.
316
+ Upstream quanta with status `UNKNOWN`, `FAILED`, or `ABORTED` are
317
+ considered blockers; `ABORTED_SUCCESS` is not.
309
318
  """
310
319
 
311
320
  SUCCESSFUL = 1
@@ -319,6 +328,30 @@ class QuantumAttemptStatus(enum.Enum):
319
328
  these "successes with caveats" are reported.
320
329
  """
321
330
 
331
+ @property
332
+ def has_metadata(self) -> bool:
333
+ """Whether the task metadata dataset was produced."""
334
+ return self is self.SUCCESSFUL or self is self.ABORTED_SUCCESS
335
+
336
+ @property
337
+ def has_log(self) -> bool:
338
+ """Whether the log dataset was produced."""
339
+ return self is self.SUCCESSFUL or self is self.FAILED
340
+
341
+ @property
342
+ def title(self) -> str:
343
+ """A version of this status' name suitable for use as a title in a plot
344
+ or table.
345
+ """
346
+ return self.name.capitalize().replace("_", " ")
347
+
348
+ @property
349
+ def is_rare(self) -> bool:
350
+ """Whether this status is rare enough that it should only be listed
351
+ when it actually occurs.
352
+ """
353
+ return self in (self.ABORTED, self.ABORTED_SUCCESS, self.UNKNOWN)
354
+
322
355
 
323
356
  class GetSetDictMetadataHolder(Protocol):
324
357
  """Protocol for objects that have a ``metadata`` attribute that satisfies
@@ -37,7 +37,7 @@ import itertools
37
37
  import numbers
38
38
  import sys
39
39
  from collections.abc import Collection, Iterator, Mapping, Sequence
40
- from typing import Any, Protocol, TypeAlias, Union
40
+ from typing import Any, Protocol
41
41
 
42
42
  from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictFloat, StrictInt, StrictStr
43
43
 
@@ -47,7 +47,7 @@ _ALLOWED_PRIMITIVE_TYPES = (str, float, int, bool)
47
47
 
48
48
  # Note that '|' syntax for unions doesn't work when we have to use a string
49
49
  # literal (and we do since it's recursive and not an annotation).
50
- NestedMetadataDict: TypeAlias = Mapping[str, Union[str, float, int, bool, "NestedMetadataDict"]]
50
+ type NestedMetadataDict = Mapping[str, str | float | int | bool | "NestedMetadataDict"]
51
51
 
52
52
 
53
53
  class PropertySetLike(Protocol):
@@ -574,7 +574,7 @@ class _DimensionGroupBranch:
574
574
 
575
575
  Parameters
576
576
  ----------
577
- log : `lsst.logging.LsstLogAdapter`
577
+ log : `lsst.utils.logging.LsstLogAdapter`
578
578
  Logger to use for status reporting.
579
579
  log_indent : `str`, optional
580
580
  Indentation to prefix the log message. This is used when recursing
@@ -739,7 +739,7 @@ class _DimensionGroupTree:
739
739
  Query constraint specified by the user.
740
740
  data_id_tables : `~collections.abc.Iterable` [ `astropy.table.Table` ]
741
741
  Data ID tables being joined into the query.
742
- log : `lsst.log.LsstLogAdapter`
742
+ log : `lsst.utils.logging.LsstLogAdapter`
743
743
  Logger that supports ``verbose`` output.
744
744
  """
745
745
  universe = self.all_dimensions.universe
@@ -805,7 +805,7 @@ class _DimensionGroupTree:
805
805
  ----------
806
806
  requested : `DatasetQueryConstraintVariant`
807
807
  Query constraint specified by the user.
808
- log : `lsst.log.LsstLogAdapter`
808
+ log : `lsst.utils.logging.LsstLogAdapter`
809
809
  Logger that supports ``verbose`` output.
810
810
  """
811
811
  overall_inputs: dict[str, DatasetTypeNode] = {
@@ -834,6 +834,11 @@ class _DimensionGroupTree:
834
834
  remainder,
835
835
  )
836
836
  self.dataset_constraint.intersection_update(inputs)
837
+ if not self.dataset_constraint:
838
+ raise QuantumGraphBuilderError(
839
+ "An explicit dataset query constraint was provided, but it does not include any "
840
+ f"inputs to the pipeline subset with tasks {list(self.subgraph.tasks.keys())}."
841
+ )
837
842
  case _:
838
843
  raise QuantumGraphBuilderError(
839
844
  f"Unable to handle type {requested} given as dataset query constraint."
@@ -26,7 +26,7 @@
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
28
  """Constants used to define the connections automatically added for each
29
- PipelineTask by the execution system.
29
+ PipelineTask by the execution system, as well as other special dataset types.
30
30
  """
31
31
 
32
32
  from __future__ import annotations
@@ -43,6 +43,10 @@ __all__ = (
43
43
  "METADATA_OUTPUT_TEMPLATE",
44
44
  "PACKAGES_INIT_OUTPUT_NAME",
45
45
  "PACKAGES_INIT_OUTPUT_STORAGE_CLASS",
46
+ "PROVENANCE_DATASET_TYPE_NAME",
47
+ "PROVENANCE_STORAGE_CLASS",
48
+ "RESOURCE_USAGE_STORAGE_CLASS",
49
+ "RESOURCE_USAGE_TEMPLATE",
46
50
  )
47
51
 
48
52
 
@@ -91,3 +95,18 @@ type names.
91
95
  METADATA_OUTPUT_STORAGE_CLASS: str = "TaskMetadata"
92
96
  """Name of the storage class for task metadata output datasets.
93
97
  """
98
+
99
+ PROVENANCE_DATASET_TYPE_NAME: str = "run_provenance"
100
+ """Name of the dataset used to store per-RUN provenance."""
101
+
102
+ PROVENANCE_STORAGE_CLASS: str = "ProvenanceQuantumGraph"
103
+ """Name of the storage class used to store provenance."""
104
+
105
+ RESOURCE_USAGE_TEMPLATE: str = "{label}_resource_usage"
106
+ """String template used to form the name of the resource usage dataset type for
107
+ a task.
108
+ """
109
+
110
+ RESOURCE_USAGE_STORAGE_CLASS: str = "ArrowAstropy"
111
+ """Storage class of the resource usage dataset type for a task.
112
+ """
@@ -25,6 +25,22 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <https://www.gnu.org/licenses/>.
27
27
 
28
- __all__ = ["register_instrument", "transfer_from_graph", "zip_from_graph", "retrieve_artifacts_for_quanta", "aggregate_graph"]
28
+ __all__ = [
29
+ "register_instrument",
30
+ "transfer_from_graph",
31
+ "zip_from_graph",
32
+ "retrieve_artifacts_for_quanta",
33
+ "aggregate_graph",
34
+ "ingest_graph",
35
+ "provenance_report",
36
+ ]
29
37
 
30
- from .commands import (register_instrument, retrieve_artifacts_for_quanta, transfer_from_graph, zip_from_graph, aggregate_graph)
38
+ from .commands import (
39
+ register_instrument,
40
+ retrieve_artifacts_for_quanta,
41
+ transfer_from_graph,
42
+ zip_from_graph,
43
+ aggregate_graph,
44
+ ingest_graph,
45
+ provenance_report,
46
+ )
@@ -25,6 +25,9 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
+ import functools
29
+ import operator
30
+ from collections.abc import Iterable
28
31
  from typing import Any
29
32
 
30
33
  import click
@@ -40,6 +43,7 @@ from lsst.daf.butler.cli.opt import (
40
43
  from lsst.daf.butler.cli.utils import ButlerCommand, split_commas, unwrap
41
44
 
42
45
  from ... import script
46
+ from ..._status import QuantumAttemptStatus, QuantumSuccessCaveats
43
47
  from ...quantum_graph import aggregator
44
48
  from ..opt import instrument_argument, update_output_chain_option
45
49
 
@@ -161,7 +165,7 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
161
165
 
162
166
  @click.command(short_help="Scan for the outputs of an active or completed quantum graph.", cls=ButlerCommand)
163
167
  @click.argument("predicted_graph", required=True)
164
- @repo_argument(required=True, help="Path to the central butler repository.")
168
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
165
169
  @click.option(
166
170
  "-o",
167
171
  "--output",
@@ -181,9 +185,9 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
181
185
  help="Number of processes to use.",
182
186
  )
183
187
  @click.option(
184
- "--complete/--incomplete",
185
- "assume_complete",
186
- default=_AGGREGATOR_DEFAULTS.assume_complete,
188
+ "--incomplete/--complete",
189
+ "incomplete",
190
+ default=_AGGREGATOR_DEFAULTS.incomplete,
187
191
  help="Whether execution has completed (and failures cannot be retried).",
188
192
  )
189
193
  @click.option(
@@ -249,6 +253,14 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
249
253
  default=_AGGREGATOR_DEFAULTS.mock_storage_classes,
250
254
  help="Enable support for storage classes created by the lsst.pipe.base.tests.mocks package.",
251
255
  )
256
+ @click.option(
257
+ "--promise-ingest-graph/--no-promise-ingest-graph",
258
+ default=_AGGREGATOR_DEFAULTS.promise_ingest_graph,
259
+ help=(
260
+ "Promise to run 'butler ingest-graph' later, allowing aggregate-graph "
261
+ "to skip metadata/log/config ingestion for now."
262
+ ),
263
+ )
252
264
  def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
253
265
  """Scan for quantum graph's outputs to gather provenance, ingest datasets
254
266
  into the central butler repository, and delete datasets that are no
@@ -268,3 +280,136 @@ def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
268
280
  # When this exception is raised, we'll have already logged the relevant
269
281
  # traceback from a separate worker.
270
282
  raise click.ClickException(str(err)) from None
283
+
284
+
285
+ @click.command(
286
+ short_help="Ingest a provenance quantum graph into a butler.",
287
+ cls=ButlerCommand,
288
+ )
289
+ @repo_argument(required=True, help="Path or alias for the butler repository.")
290
+ @click.argument("provenance_graph", required=False)
291
+ @transfer_option(default="move")
292
+ @click.option("--batch-size", default=10000, help="How many datasets to process in each transaction.")
293
+ @click.option(
294
+ "--output-run",
295
+ default=None,
296
+ help=(
297
+ "Name of the output RUN collection. Must be provided if the provenance graph is not"
298
+ " provided (so the graph can be found in the butler)."
299
+ ),
300
+ )
301
+ def ingest_graph(
302
+ *,
303
+ repo: str,
304
+ provenance_graph: str | None,
305
+ transfer: str | None,
306
+ batch_size: int,
307
+ output_run: str | None,
308
+ ) -> None:
309
+ """Ingest a provenance graph into a butler repository."""
310
+ from ...quantum_graph.ingest_graph import ingest_graph as ingest_graph_py
311
+
312
+ ingest_graph_py(repo, provenance_graph, transfer=transfer, batch_size=batch_size, output_run=output_run)
313
+
314
+
315
+ @click.command(
316
+ short_help="Print and write provenance reports.",
317
+ cls=ButlerCommand,
318
+ )
319
+ @click.argument("repo_or_qg")
320
+ @click.argument("collection", required=False, default=None)
321
+ @click.option(
322
+ "--state",
323
+ multiple=True,
324
+ type=click.Choice(QuantumAttemptStatus),
325
+ help=(
326
+ "Additional quantum state to include in the status report and data ID tables "
327
+ "(FAILED, ABORTED, and ABORTED_SUCCESS are included by default)."
328
+ ),
329
+ )
330
+ @click.option(
331
+ "--no-state",
332
+ multiple=True,
333
+ type=str,
334
+ metavar="STATE",
335
+ help="Quantum state to drop from in status report and data ID tables (same options as --state).",
336
+ )
337
+ @click.option(
338
+ "--status-report",
339
+ default=None,
340
+ metavar="URI",
341
+ help="File or URI (.json) for a detailed report (with data IDs) on quanta with certain states.",
342
+ )
343
+ @click.option(
344
+ "--quantum-table/--no-quantum-table",
345
+ default=True,
346
+ help="Whether to print summary of quantum status counts to STDOUT.",
347
+ )
348
+ @click.option(
349
+ "--exception-table/--no-exception-table",
350
+ default=True,
351
+ help="Whether to print summary of exception type counts STDOUT.",
352
+ )
353
+ @click.option(
354
+ "--caveat",
355
+ multiple=True,
356
+ type=click.Choice(QuantumSuccessCaveats),
357
+ help=(
358
+ "Include successful quanta in the status report if they have this caveat. "
359
+ "May be passed multiple times; any matching caveat is included. "
360
+ "Passing this option implicitly adds '--state SUCCESSFUL'."
361
+ ),
362
+ )
363
+ @click.option(
364
+ "--data-id-table-dir",
365
+ default=None,
366
+ metavar="URI",
367
+ help=(
368
+ "Directory (may be a URI) for a tree of data ID tables for each "
369
+ "task label, status, and exception type combination in the status report."
370
+ ),
371
+ )
372
+ def provenance_report(
373
+ *,
374
+ repo_or_qg: str,
375
+ collection: str | None,
376
+ state: Iterable[QuantumAttemptStatus],
377
+ no_state: Iterable[str],
378
+ status_report: str | None,
379
+ quantum_table: bool = False,
380
+ exception_table: bool = False,
381
+ caveat: Iterable[QuantumSuccessCaveats],
382
+ data_id_table_dir: str | None,
383
+ ) -> None:
384
+ """Read a provenance quantum graph from a butler or file and use it to
385
+ generate reports.
386
+
387
+ REPO_OR_QG is a path or alias for the butler repository (if reading an
388
+ ingested graph, as indicated by passing COLLECTION), or the path to a
389
+ provenance quantum graph file.
390
+ """
391
+ from ...quantum_graph import ProvenanceQuantumGraph
392
+
393
+ states = set(state)
394
+ states.add(QuantumAttemptStatus.FAILED)
395
+ states.add(QuantumAttemptStatus.ABORTED)
396
+ states.add(QuantumAttemptStatus.ABORTED_SUCCESS)
397
+ for state_name in no_state:
398
+ states.discard(QuantumAttemptStatus.__members__[state_name])
399
+ with_caveats: QuantumSuccessCaveats | None = None
400
+ if caveat:
401
+ states.add(QuantumAttemptStatus.SUCCESSFUL)
402
+ with_caveats = functools.reduce(
403
+ operator.__or__,
404
+ caveat,
405
+ QuantumSuccessCaveats.NO_CAVEATS,
406
+ )
407
+ with ProvenanceQuantumGraph.from_args(repo_or_qg, collection=collection, datasets=()) as (graph, _):
408
+ graph.make_many_reports(
409
+ status_report_file=status_report,
410
+ states=states,
411
+ print_quantum_table=quantum_table,
412
+ print_exception_table=exception_table,
413
+ with_caveats=with_caveats,
414
+ data_id_table_dir=data_id_table_dir,
415
+ )