lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. lsst/pipe/base/_instrument.py +21 -12
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/automatic_connection_constants.py +20 -1
  5. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  6. lsst/pipe/base/cli/cmd/commands.py +149 -4
  7. lsst/pipe/base/connectionTypes.py +72 -160
  8. lsst/pipe/base/connections.py +3 -6
  9. lsst/pipe/base/execution_reports.py +0 -5
  10. lsst/pipe/base/graph/graph.py +9 -8
  11. lsst/pipe/base/log_capture.py +1 -1
  12. lsst/pipe/base/pipeline.py +5 -6
  13. lsst/pipe/base/pipelineIR.py +1 -7
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  16. lsst/pipe/base/pipeline_graph/_edges.py +30 -18
  17. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +10 -2
  18. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  19. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  20. lsst/pipe/base/prerequisite_helpers.py +2 -1
  21. lsst/pipe/base/quantum_graph/_common.py +3 -1
  22. lsst/pipe/base/quantum_graph/_multiblock.py +29 -13
  23. lsst/pipe/base/quantum_graph/_predicted.py +7 -0
  24. lsst/pipe/base/quantum_graph/_provenance.py +498 -56
  25. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  26. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -1
  27. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  28. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  29. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +14 -6
  30. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  31. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +14 -13
  32. lsst/pipe/base/quantum_graph/aggregator/_writer.py +2 -2
  33. lsst/pipe/base/quantum_graph/formatter.py +74 -4
  34. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  35. lsst/pipe/base/quantum_graph_builder.py +1 -8
  36. lsst/pipe/base/quantum_graph_skeleton.py +29 -27
  37. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  38. lsst/pipe/base/separable_pipeline_executor.py +6 -7
  39. lsst/pipe/base/single_quantum_executor.py +7 -7
  40. lsst/pipe/base/struct.py +4 -0
  41. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  42. lsst/pipe/base/version.py +1 -1
  43. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/METADATA +2 -1
  44. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/RECORD +52 -51
  45. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/WHEEL +1 -1
  46. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/entry_points.txt +0 -0
  47. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/COPYRIGHT +0 -0
  48. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/LICENSE +0 -0
  49. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/bsd_license.txt +0 -0
  50. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/gpl-v3.0.txt +0 -0
  51. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/top_level.txt +0 -0
  52. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/zip-safe +0 -0
@@ -38,13 +38,14 @@ __all__ = (
38
38
  "ProvenanceQuantumGraphWriter",
39
39
  "ProvenanceQuantumInfo",
40
40
  "ProvenanceQuantumModel",
41
+ "ProvenanceQuantumReport",
41
42
  "ProvenanceQuantumScanData",
42
43
  "ProvenanceQuantumScanModels",
43
44
  "ProvenanceQuantumScanStatus",
45
+ "ProvenanceReport",
44
46
  "ProvenanceTaskMetadataModel",
45
47
  )
46
48
 
47
-
48
49
  import dataclasses
49
50
  import enum
50
51
  import itertools
@@ -60,9 +61,9 @@ import networkx
60
61
  import numpy as np
61
62
  import pydantic
62
63
 
63
- from lsst.daf.butler import DataCoordinate
64
+ from lsst.daf.butler import Butler, DataCoordinate
64
65
  from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
65
- from lsst.resources import ResourcePathExpression
66
+ from lsst.resources import ResourcePath, ResourcePathExpression
66
67
  from lsst.utils.iteration import ensure_iterable
67
68
  from lsst.utils.logging import LsstLogAdapter, getLogger
68
69
  from lsst.utils.packages import Packages
@@ -94,6 +95,13 @@ from ._predicted import (
94
95
  PredictedQuantumGraphComponents,
95
96
  )
96
97
 
98
+ # Sphinx needs imports for type annotations of base class members.
99
+ if "sphinx" in sys.modules:
100
+ import zipfile # noqa: F401
101
+
102
+ from ._multiblock import AddressReader, Decompressor # noqa: F401
103
+
104
+
97
105
  _T = TypeVar("_T")
98
106
 
99
107
  LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
@@ -186,6 +194,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
186
194
  failure.
187
195
  """
188
196
 
197
+ metadata_id: uuid.UUID
198
+ """ID of this quantum's metadata dataset."""
199
+
200
+ log_id: uuid.UUID
201
+ """ID of this quantum's log dataset."""
202
+
189
203
 
190
204
  class ProvenanceInitQuantumInfo(TypedDict):
191
205
  """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -212,6 +226,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
212
226
  pipeline_node: TaskInitNode
213
227
  """Node in the pipeline graph for this task's init-only step."""
214
228
 
229
+ config_id: uuid.UUID
230
+ """ID of this task's config dataset."""
231
+
215
232
 
216
233
  class ProvenanceDatasetModel(PredictedDatasetModel):
217
234
  """Data model for the datasets in a provenance quantum graph file."""
@@ -543,6 +560,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
543
560
  return super().model_validate_strings(*args, **kwargs)
544
561
 
545
562
 
563
+ class ProvenanceQuantumReport(pydantic.BaseModel):
564
+ """A Pydantic model that used to report information about a single
565
+ (generally problematic) quantum.
566
+ """
567
+
568
+ quantum_id: uuid.UUID
569
+ data_id: dict[str, int | str]
570
+ attempts: list[ProvenanceQuantumAttemptModel]
571
+
572
+ @classmethod
573
+ def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
574
+ """Construct from a provenance quantum graph node.
575
+
576
+ Parameters
577
+ ----------
578
+ quantum_id : `uuid.UUID`
579
+ Unique ID for the quantum.
580
+ quantum_info : `ProvenanceQuantumInfo`
581
+ Node attributes for this quantum.
582
+ """
583
+ return cls(
584
+ quantum_id=quantum_id,
585
+ data_id=dict(quantum_info["data_id"].mapping),
586
+ attempts=quantum_info["attempts"],
587
+ )
588
+
589
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
590
+ # when we inherit those docstrings in our public classes.
591
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
592
+
593
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
594
+ """See `pydantic.BaseModel.copy`."""
595
+ return super().copy(*args, **kwargs)
596
+
597
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
598
+ """See `pydantic.BaseModel.model_dump`."""
599
+ return super().model_dump(*args, **kwargs)
600
+
601
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
602
+ """See `pydantic.BaseModel.model_dump_json`."""
603
+ return super().model_dump(*args, **kwargs)
604
+
605
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
606
+ """See `pydantic.BaseModel.model_copy`."""
607
+ return super().model_copy(*args, **kwargs)
608
+
609
+ @classmethod
610
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
611
+ """See `pydantic.BaseModel.model_construct`."""
612
+ return super().model_construct(*args, **kwargs)
613
+
614
+ @classmethod
615
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
616
+ """See `pydantic.BaseModel.model_json_schema`."""
617
+ return super().model_json_schema(*args, **kwargs)
618
+
619
+ @classmethod
620
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
621
+ """See `pydantic.BaseModel.model_validate`."""
622
+ return super().model_validate(*args, **kwargs)
623
+
624
+ @classmethod
625
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
626
+ """See `pydantic.BaseModel.model_validate_json`."""
627
+ return super().model_validate_json(*args, **kwargs)
628
+
629
+ @classmethod
630
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
631
+ """See `pydantic.BaseModel.model_validate_strings`."""
632
+ return super().model_validate_strings(*args, **kwargs)
633
+
634
+
635
+ class ProvenanceReport(pydantic.RootModel):
636
+ """A Pydantic model that groups quantum information by task label, then
637
+ status (as a string), and then exception type.
638
+ """
639
+
640
+ root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
641
+
642
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
643
+ # when we inherit those docstrings in our public classes.
644
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
645
+
646
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
647
+ """See `pydantic.BaseModel.copy`."""
648
+ return super().copy(*args, **kwargs)
649
+
650
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
651
+ """See `pydantic.BaseModel.model_dump`."""
652
+ return super().model_dump(*args, **kwargs)
653
+
654
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
655
+ """See `pydantic.BaseModel.model_dump_json`."""
656
+ return super().model_dump(*args, **kwargs)
657
+
658
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
659
+ """See `pydantic.BaseModel.model_copy`."""
660
+ return super().model_copy(*args, **kwargs)
661
+
662
+ @classmethod
663
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
664
+ """See `pydantic.BaseModel.model_construct`."""
665
+ return super().model_construct(*args, **kwargs)
666
+
667
+ @classmethod
668
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
669
+ """See `pydantic.BaseModel.model_json_schema`."""
670
+ return super().model_json_schema(*args, **kwargs)
671
+
672
+ @classmethod
673
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
674
+ """See `pydantic.BaseModel.model_validate`."""
675
+ return super().model_validate(*args, **kwargs)
676
+
677
+ @classmethod
678
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
679
+ """See `pydantic.BaseModel.model_validate_json`."""
680
+ return super().model_validate_json(*args, **kwargs)
681
+
682
+ @classmethod
683
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
684
+ """See `pydantic.BaseModel.model_validate_strings`."""
685
+ return super().model_validate_strings(*args, **kwargs)
686
+
687
+
546
688
  class ProvenanceQuantumModel(pydantic.BaseModel):
547
689
  """Data model for the quanta in a provenance quantum graph file."""
548
690
 
@@ -646,6 +788,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
646
788
  resource_usage=last_attempt.resource_usage,
647
789
  attempts=self.attempts,
648
790
  )
791
+ graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
792
+ graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
649
793
  for connection_name, dataset_ids in self.inputs.items():
650
794
  read_edge = task_node.get_input_edge(connection_name)
651
795
  for dataset_id in dataset_ids:
@@ -655,6 +799,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
655
799
  ).append(read_edge)
656
800
  for connection_name, dataset_ids in self.outputs.items():
657
801
  write_edge = task_node.get_output_edge(connection_name)
802
+ if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
803
+ graph._bipartite_xgraph.add_node(
804
+ dataset_ids[0],
805
+ data_id=data_id,
806
+ dataset_type_name=write_edge.dataset_type_name,
807
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
808
+ run=graph.header.output_run,
809
+ produced=last_attempt.status.has_metadata,
810
+ )
811
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
812
+ graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
813
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
814
+ if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
815
+ graph._bipartite_xgraph.add_node(
816
+ dataset_ids[0],
817
+ data_id=data_id,
818
+ dataset_type_name=write_edge.dataset_type_name,
819
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
820
+ run=graph.header.output_run,
821
+ produced=last_attempt.status.has_log,
822
+ )
823
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
824
+ graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
825
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
658
826
  for dataset_id in dataset_ids:
659
827
  graph._bipartite_xgraph.add_edge(
660
828
  self.quantum_id,
@@ -663,8 +831,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
663
831
  # There can only be one pipeline edge for an output.
664
832
  pipeline_edges=[write_edge],
665
833
  )
666
- graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
667
- graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
668
834
  for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
669
835
  for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
670
836
  graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
@@ -803,6 +969,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
803
969
  ).append(read_edge)
804
970
  for connection_name, dataset_id in self.outputs.items():
805
971
  write_edge = task_init_node.get_output_edge(connection_name)
972
+ graph._bipartite_xgraph.add_node(
973
+ dataset_id,
974
+ data_id=empty_data_id,
975
+ dataset_type_name=write_edge.dataset_type_name,
976
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
977
+ run=graph.header.output_run,
978
+ produced=True,
979
+ )
980
+ graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
806
981
  graph._bipartite_xgraph.add_edge(
807
982
  self.quantum_id,
808
983
  dataset_id,
@@ -810,6 +985,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
810
985
  # There can only be one pipeline edge for an output.
811
986
  pipeline_edges=[write_edge],
812
987
  )
988
+ if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
989
+ graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
813
990
  graph._init_quanta[self.task_label] = self.quantum_id
814
991
 
815
992
  # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -954,6 +1131,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
954
1131
  dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
955
1132
  }
956
1133
 
1134
+ @classmethod
1135
+ @contextmanager
1136
+ def from_args(
1137
+ cls,
1138
+ repo_or_filename: str,
1139
+ /,
1140
+ collection: str | None = None,
1141
+ *,
1142
+ quanta: Iterable[uuid.UUID] | None = None,
1143
+ datasets: Iterable[uuid.UUID] | None = None,
1144
+ writeable: bool = False,
1145
+ ) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
1146
+ """Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
1147
+ a file or butler-ingested graph dataset.
1148
+
1149
+ Parameters
1150
+ ----------
1151
+ repo_or_filename : `str`
1152
+ Either a provenance quantum graph filename or a butler repository
1153
+ path or alias.
1154
+ collections : `~collections.abc.Iterable` [ `str` ], optional
1155
+ Collections to search; presence indicates that the first argument
1156
+ is a butler repository, not a filename.
1157
+ quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
1158
+ IDs of the quanta to load, or `None` to load all.
1159
+ datasets : `~collections.abc.Iterable` [ `str` ], optional
1160
+ IDs of the datasets to load, or `None` to load all.
1161
+ writeable : `bool`, optional
1162
+ Whether the butler should be constructed with write support.
1163
+
1164
+ Returns
1165
+ -------
1166
+ context : `contextlib.AbstractContextManager`
1167
+ A context manager that yields a tuple of
1168
+
1169
+ - the `ProvenanceQuantumGraph`
1170
+ - the `Butler` constructed (or `None`)
1171
+
1172
+ when entered.
1173
+ """
1174
+ exit_stack = ExitStack()
1175
+ if collection is not None:
1176
+ try:
1177
+ butler = exit_stack.enter_context(
1178
+ Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
1179
+ )
1180
+ except Exception as err:
1181
+ err.add_note(
1182
+ f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
1183
+ f"collection ({collection}) was provided."
1184
+ )
1185
+ raise
1186
+ with exit_stack:
1187
+ graph = butler.get(
1188
+ acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
1189
+ )
1190
+ yield graph, butler
1191
+ else:
1192
+ try:
1193
+ reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
1194
+ except Exception as err:
1195
+ err.add_note(
1196
+ f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
1197
+ f"because no collection was provided."
1198
+ )
1199
+ raise
1200
+ with exit_stack:
1201
+ if quanta is None:
1202
+ reader.read_quanta()
1203
+ elif not quanta:
1204
+ reader.read_quanta(quanta)
1205
+ if datasets is None:
1206
+ reader.read_datasets()
1207
+ elif not datasets:
1208
+ reader.read_datasets(datasets)
1209
+ yield reader.graph, None
1210
+
957
1211
  @property
958
1212
  def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
959
1213
  """A mapping from task label to the ID of the special init quantum for
@@ -994,6 +1248,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
994
1248
  types in the pipeline graph are included, even if none of their
995
1249
  datasets were loaded (i.e. nested mappings may be empty).
996
1250
 
1251
+ Reading a quantum also populates its log and metadata datasets.
1252
+
997
1253
  The returned object may be an internal dictionary; as the type
998
1254
  annotation indicates, it should not be modified in place.
999
1255
  """
@@ -1032,7 +1288,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1032
1288
  `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
1033
1289
  `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
1034
1290
  with full attributes and edges to adjacent nodes with no attributes.
1035
- Loading quanta necessary to populate edge attributes.
1291
+ Loading quanta is necessary to populate edge attributes.
1292
+ Reading a quantum also populates its log and metadata datasets.
1036
1293
 
1037
1294
  Node attributes are described by the
1038
1295
  `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
@@ -1047,10 +1304,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1047
1304
  """
1048
1305
  return self._bipartite_xgraph.copy(as_view=True)
1049
1306
 
1050
- def make_quantum_table(self) -> astropy.table.Table:
1307
+ def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
1051
1308
  """Construct an `astropy.table.Table` with a tabular summary of the
1052
1309
  quanta.
1053
1310
 
1311
+ Parameters
1312
+ ----------
1313
+ drop_unused_columns : `bool`, optional
1314
+ Whether to drop columns for rare states that did not actually
1315
+ occur in this run.
1316
+
1054
1317
  Returns
1055
1318
  -------
1056
1319
  table : `astropy.table.Table`
@@ -1086,28 +1349,30 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1086
1349
  caveats = f"{code.concise()}({count})" # type: ignore[union-attr]
1087
1350
  else:
1088
1351
  caveats = ""
1089
- rows.append(
1352
+ row: dict[str, Any] = {
1353
+ "Task": task_label,
1354
+ "Caveats": caveats,
1355
+ }
1356
+ for status in QuantumAttemptStatus:
1357
+ row[status.title] = status_counts.get(status, 0)
1358
+ row.update(
1090
1359
  {
1091
- "Task": task_label,
1092
- "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
1093
- "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
1094
- "Caveats": caveats,
1095
- "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
1096
- "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
1097
1360
  "TOTAL": len(quanta_for_task),
1098
1361
  "EXPECTED": self.header.n_task_quanta[task_label],
1099
1362
  }
1100
1363
  )
1101
- return astropy.table.Table(rows)
1364
+ rows.append(row)
1365
+ table = astropy.table.Table(rows)
1366
+ if drop_unused_columns:
1367
+ for status in QuantumAttemptStatus:
1368
+ if status.is_rare and not table[status.title].any():
1369
+ del table[status.title]
1370
+ return table
1102
1371
 
1103
1372
  def make_exception_table(self) -> astropy.table.Table:
1104
1373
  """Construct an `astropy.table.Table` with counts for each exception
1105
1374
  type raised by each task.
1106
1375
 
1107
- At present this only includes information from partial-outputs-error
1108
- successes, since exception information for failures is not tracked.
1109
- This may change in the future.
1110
-
1111
1376
  Returns
1112
1377
  -------
1113
1378
  table : `astropy.table.Table`
@@ -1115,13 +1380,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1115
1380
  """
1116
1381
  rows = []
1117
1382
  for task_label, quanta_for_task in self.quanta_by_task.items():
1118
- counts_by_type = Counter(
1119
- exc_info.type_name
1120
- for q in quanta_for_task.values()
1121
- if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
1122
- )
1123
- for type_name, count in counts_by_type.items():
1124
- rows.append({"Task": task_label, "Exception": type_name, "Count": count})
1383
+ success_counts = Counter[str]()
1384
+ failed_counts = Counter[str]()
1385
+ for quantum_id in quanta_for_task.values():
1386
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1387
+ exc_info = quantum_info["exception"]
1388
+ if exc_info is not None:
1389
+ if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
1390
+ success_counts[exc_info.type_name] += 1
1391
+ else:
1392
+ failed_counts[exc_info.type_name] += 1
1393
+ for type_name in sorted(success_counts.keys() | failed_counts.keys()):
1394
+ rows.append(
1395
+ {
1396
+ "Task": task_label,
1397
+ "Exception": type_name,
1398
+ "Successes": success_counts.get(type_name, 0),
1399
+ "Failures": failed_counts.get(type_name, 0),
1400
+ }
1401
+ )
1125
1402
  return astropy.table.Table(rows)
1126
1403
 
1127
1404
  def make_task_resource_usage_table(
@@ -1164,6 +1441,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1164
1441
  array = np.array(rows, dtype=row_dtype)
1165
1442
  return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
1166
1443
 
1444
+ def make_status_report(
1445
+ self,
1446
+ states: Iterable[QuantumAttemptStatus] = (
1447
+ QuantumAttemptStatus.FAILED,
1448
+ QuantumAttemptStatus.ABORTED,
1449
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1450
+ ),
1451
+ *,
1452
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1453
+ with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
1454
+ data_id_table_dir: ResourcePathExpression | None = None,
1455
+ ) -> ProvenanceReport:
1456
+ """Make a JSON- or YAML-friendly report of all quanta with the given
1457
+ states.
1458
+
1459
+ Parameters
1460
+ ----------
1461
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1462
+ `..QuantumAttemptStatus`, optional
1463
+ A quantum is included if it has any of these states. Defaults to
1464
+ states that clearly represent problems.
1465
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1466
+ `..QuantumAttemptStatus`, optional
1467
+ Additional states to consider; unioned with ``states``. This is
1468
+ provided so users can easily request additional states while also
1469
+ getting the defaults.
1470
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1471
+ If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
1472
+ include quanta with these caveat flags. May be set to `None`
1473
+ to report on all successful quanta.
1474
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1475
+ optional
1476
+ If provided, a directory to write data ID tables (in ECSV format)
1477
+ with all of the data IDs with the given states, for use with the
1478
+ ``--data-id-tables`` argument to the quantum graph builder.
1479
+ Subdirectories for each task and status will created within this
1480
+ directory, with one file for each exception type (or ``UNKNOWN``
1481
+ when there is no exception).
1482
+
1483
+ Returns
1484
+ -------
1485
+ report : `ProvenanceModel`
1486
+ A Pydantic model that groups quanta by task label and exception
1487
+ type.
1488
+ """
1489
+ states = set(ensure_iterable(states))
1490
+ states.update(ensure_iterable(also))
1491
+ result = ProvenanceReport(root={})
1492
+ if data_id_table_dir is not None:
1493
+ data_id_table_dir = ResourcePath(data_id_table_dir)
1494
+ for task_label, quanta_for_task in self.quanta_by_task.items():
1495
+ reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
1496
+ table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
1497
+ for quantum_id in quanta_for_task.values():
1498
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1499
+ quantum_status = quantum_info["status"]
1500
+ if quantum_status not in states:
1501
+ continue
1502
+ if (
1503
+ quantum_status is QuantumAttemptStatus.SUCCESSFUL
1504
+ and with_caveats is not None
1505
+ and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
1506
+ ):
1507
+ continue
1508
+ key1 = quantum_status.name
1509
+ exc_info = quantum_info["exception"]
1510
+ key2 = exc_info.type_name if exc_info is not None else None
1511
+ reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1512
+ ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
1513
+ )
1514
+ if data_id_table_dir:
1515
+ table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1516
+ quantum_info["data_id"].required_values
1517
+ )
1518
+ if reports_for_task:
1519
+ result.root[task_label] = reports_for_task
1520
+ if table_rows_for_task:
1521
+ assert data_id_table_dir is not None, "table_rows_for_task should be empty"
1522
+ for status_name, table_rows_for_status in table_rows_for_task.items():
1523
+ dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
1524
+ status_name, forceDirectory=True
1525
+ )
1526
+ if dir_for_task_and_status.isLocal:
1527
+ dir_for_task_and_status.mkdir()
1528
+ for exc_name, data_id_rows in table_rows_for_status.items():
1529
+ table = astropy.table.Table(
1530
+ rows=data_id_rows,
1531
+ names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
1532
+ )
1533
+ filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
1534
+ with dir_for_task_and_status.join(filename).open("w") as stream:
1535
+ table.write(stream, format="ecsv")
1536
+ return result
1537
+
1538
+ def make_many_reports(
1539
+ self,
1540
+ states: Iterable[QuantumAttemptStatus] = (
1541
+ QuantumAttemptStatus.FAILED,
1542
+ QuantumAttemptStatus.ABORTED,
1543
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1544
+ ),
1545
+ *,
1546
+ status_report_file: ResourcePathExpression | None = None,
1547
+ print_quantum_table: bool = False,
1548
+ print_exception_table: bool = False,
1549
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1550
+ with_caveats: QuantumSuccessCaveats | None = None,
1551
+ data_id_table_dir: ResourcePathExpression | None = None,
1552
+ ) -> None:
1553
+ """Write multiple reports.
1554
+
1555
+ Parameters
1556
+ ----------
1557
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1558
+ `..QuantumAttemptStatus`, optional
1559
+ A quantum is included in the status report and data ID tables if it
1560
+ has any of these states. Defaults to states that clearly represent
1561
+ problems.
1562
+ status_report_file : convertible to `~lsst.resources.ResourcePath`,
1563
+ optional
1564
+ Filename for the JSON status report (see `make_status_report`).
1565
+ print_quantum_table : `bool`, optional
1566
+ If `True`, print a quantum summary table (counts only) to STDOUT.
1567
+ print_exception_table : `bool`, optional
1568
+ If `True`, print an exception-type summary table (counts only) to
1569
+ STDOUT.
1570
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1571
+ `..QuantumAttemptStatus`, optional
1572
+ Additional states to consider in the status report and data ID
1573
+ tables; unioned with ``states``. This is provided so users can
1574
+ easily request additional states while also getting the defaults.
1575
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1576
+ Only include quanta with these caveat flags in the status report
1577
+ and data ID tables. May be set to `None` to report on all
1578
+ successful quanta (an empty sequence reports on only quanta with no
1579
+ caveats). If provided, `QuantumAttemptStatus.SUCCESSFUL` is
1580
+ automatically included in ``states``.
1581
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1582
+ optional
1583
+ If provided, a directory to write data ID tables (in ECSV format)
1584
+ with all of the data IDs with the given states, for use with the
1585
+ ``--data-id-tables`` argument to the quantum graph builder.
1586
+ Subdirectories for each task and status will created within this
1587
+ directory, with one file for each exception type (or ``UNKNOWN``
1588
+ when there is no exception).
1589
+ """
1590
+ if status_report_file is not None or data_id_table_dir is not None:
1591
+ status_report = self.make_status_report(
1592
+ states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
1593
+ )
1594
+ if status_report_file is not None:
1595
+ status_report_file = ResourcePath(status_report_file)
1596
+ if status_report_file.isLocal:
1597
+ status_report_file.dirname().mkdir()
1598
+ with ResourcePath(status_report_file).open("w") as stream:
1599
+ stream.write(status_report.model_dump_json(indent=2))
1600
+ if print_quantum_table:
1601
+ quantum_table = self.make_quantum_table()
1602
+ quantum_table.pprint_all()
1603
+ print("")
1604
+ if print_exception_table:
1605
+ exception_table = self.make_exception_table()
1606
+ exception_table.pprint_all()
1607
+ print("")
1608
+
1167
1609
 
1168
1610
  @dataclasses.dataclass
1169
1611
  class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
@@ -1294,19 +1736,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1294
1736
  # also have other outstanding reference holders).
1295
1737
  continue
1296
1738
  node._add_to_graph(self.graph)
1297
- return
1298
- with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1299
- for node_id_or_index in nodes:
1300
- address_row = self.address_reader.find(node_id_or_index)
1301
- if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1302
- # Use the old node to reduce memory usage (since it might
1303
- # also have other outstanding reference holders).
1304
- continue
1305
- node = mb_reader.read_model(
1306
- address_row.addresses[address_index], model_type, self.decompressor
1307
- )
1308
- if node is not None:
1309
- node._add_to_graph(self.graph)
1739
+ else:
1740
+ with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1741
+ for node_id_or_index in nodes:
1742
+ address_row = self.address_reader.find(node_id_or_index)
1743
+ if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1744
+ # Use the old node to reduce memory usage (since it
1745
+ # might also have other outstanding reference holders).
1746
+ continue
1747
+ node = mb_reader.read_model(
1748
+ address_row.addresses[address_index], model_type, self.decompressor
1749
+ )
1750
+ if node is not None:
1751
+ node._add_to_graph(self.graph)
1310
1752
 
1311
1753
  def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
1312
1754
  """Fetch log datasets.
@@ -1588,7 +2030,7 @@ class ProvenanceQuantumGraphWriter:
1588
2030
  """
1589
2031
  predicted_quantum = self._predicted_quanta[quantum_id]
1590
2032
  provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
1591
- predicted_quantum, metadata, logs, assume_complete=True
2033
+ predicted_quantum, metadata, logs, incomplete=False
1592
2034
  )
1593
2035
  scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
1594
2036
  self.write_scan_data(scan_data)
@@ -1665,8 +2107,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
1665
2107
  enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
1666
2108
  to stop trying for now.
1667
2109
 
1668
- This state means a later run with `ScannerConfig.assume_complete` is
1669
- required.
2110
+ This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
2111
+ be run again with ``incomplete=False``.
1670
2112
  """
1671
2113
 
1672
2114
  SUCCESSFUL = enum.auto()
@@ -1721,7 +2163,7 @@ class ProvenanceQuantumScanModels:
1721
2163
  metadata: TaskMetadata | None,
1722
2164
  logs: ButlerLogRecords | None,
1723
2165
  *,
1724
- assume_complete: bool = True,
2166
+ incomplete: bool = False,
1725
2167
  ) -> ProvenanceQuantumScanModels:
1726
2168
  """Construct provenance information from task metadata and logs.
1727
2169
 
@@ -1733,8 +2175,8 @@ class ProvenanceQuantumScanModels:
1733
2175
  Task metadata.
1734
2176
  logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
1735
2177
  Task logs.
1736
- assume_complete : `bool`, optional
1737
- If `False`, treat execution failures as possibly-incomplete quanta
2178
+ incomplete : `bool`, optional
2179
+ If `True`, treat execution failures as possibly-incomplete quanta
1738
2180
  and do not fully process them; instead just set the status to
1739
2181
  `ProvenanceQuantumScanStatus.ABANDONED` and return.
1740
2182
 
@@ -1752,8 +2194,8 @@ class ProvenanceQuantumScanModels:
1752
2194
  """
1753
2195
  self = ProvenanceQuantumScanModels(predicted.quantum_id)
1754
2196
  last_attempt = ProvenanceQuantumAttemptModel()
1755
- self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
1756
- self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
2197
+ self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
2198
+ self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
1757
2199
  if self.status is ProvenanceQuantumScanStatus.ABANDONED:
1758
2200
  return self
1759
2201
  self._reconcile_attempts(last_attempt)
@@ -1766,15 +2208,15 @@ class ProvenanceQuantumScanModels:
1766
2208
  logs: ButlerLogRecords | None,
1767
2209
  last_attempt: ProvenanceQuantumAttemptModel,
1768
2210
  *,
1769
- assume_complete: bool,
2211
+ incomplete: bool,
1770
2212
  ) -> None:
1771
2213
  (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
1772
2214
  if logs is None:
1773
2215
  self.output_existence[predicted_log_dataset.dataset_id] = False
1774
- if assume_complete:
1775
- self.status = ProvenanceQuantumScanStatus.FAILED
1776
- else:
2216
+ if incomplete:
1777
2217
  self.status = ProvenanceQuantumScanStatus.ABANDONED
2218
+ else:
2219
+ self.status = ProvenanceQuantumScanStatus.FAILED
1778
2220
  else:
1779
2221
  # Set the attempt's run status to FAILED, since the default is
1780
2222
  # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
@@ -1832,15 +2274,15 @@ class ProvenanceQuantumScanModels:
1832
2274
  metadata: TaskMetadata | None,
1833
2275
  last_attempt: ProvenanceQuantumAttemptModel,
1834
2276
  *,
1835
- assume_complete: bool,
2277
+ incomplete: bool,
1836
2278
  ) -> None:
1837
2279
  (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
1838
2280
  if metadata is None:
1839
2281
  self.output_existence[predicted_metadata_dataset.dataset_id] = False
1840
- if assume_complete:
1841
- self.status = ProvenanceQuantumScanStatus.FAILED
1842
- else:
2282
+ if incomplete:
1843
2283
  self.status = ProvenanceQuantumScanStatus.ABANDONED
2284
+ else:
2285
+ self.status = ProvenanceQuantumScanStatus.FAILED
1844
2286
  else:
1845
2287
  self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
1846
2288
  self.output_existence[predicted_metadata_dataset.dataset_id] = True
@@ -1875,7 +2317,7 @@ class ProvenanceQuantumScanModels:
1875
2317
  # But we found the metadata! Either that hard error happened
1876
2318
  # at a very unlucky time (in between those two writes), or
1877
2319
  # something even weirder happened.
1878
- self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
2320
+ self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
1879
2321
  else:
1880
2322
  self.attempts[-1].status = QuantumAttemptStatus.FAILED
1881
2323
  if len(self.metadata.attempts) < len(self.attempts):