lsst-pipe-base 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. lsst/pipe/base/_instrument.py +25 -15
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
  6. lsst/pipe/base/automatic_connection_constants.py +20 -1
  7. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  8. lsst/pipe/base/cli/cmd/commands.py +149 -4
  9. lsst/pipe/base/connectionTypes.py +72 -160
  10. lsst/pipe/base/connections.py +6 -9
  11. lsst/pipe/base/execution_reports.py +0 -5
  12. lsst/pipe/base/graph/graph.py +11 -10
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +8 -10
  15. lsst/pipe/base/log_capture.py +1 -1
  16. lsst/pipe/base/log_on_close.py +4 -7
  17. lsst/pipe/base/pipeline.py +5 -6
  18. lsst/pipe/base/pipelineIR.py +2 -8
  19. lsst/pipe/base/pipelineTask.py +5 -7
  20. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  21. lsst/pipe/base/pipeline_graph/_edges.py +32 -22
  22. lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
  23. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
  24. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  25. lsst/pipe/base/pipeline_graph/io.py +7 -10
  26. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  27. lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
  28. lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
  29. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  30. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  31. lsst/pipe/base/prerequisite_helpers.py +2 -1
  32. lsst/pipe/base/quantum_graph/_common.py +15 -17
  33. lsst/pipe/base/quantum_graph/_multiblock.py +36 -20
  34. lsst/pipe/base/quantum_graph/_predicted.py +7 -3
  35. lsst/pipe/base/quantum_graph/_provenance.py +501 -61
  36. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  37. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +187 -240
  38. lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
  39. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
  40. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +15 -7
  41. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  42. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +19 -34
  43. lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
  44. lsst/pipe/base/quantum_graph/aggregator/_writer.py +3 -3
  45. lsst/pipe/base/quantum_graph/formatter.py +74 -4
  46. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  47. lsst/pipe/base/quantum_graph/visualization.py +5 -1
  48. lsst/pipe/base/quantum_graph_builder.py +21 -8
  49. lsst/pipe/base/quantum_graph_skeleton.py +31 -29
  50. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  51. lsst/pipe/base/separable_pipeline_executor.py +1 -1
  52. lsst/pipe/base/single_quantum_executor.py +15 -8
  53. lsst/pipe/base/struct.py +4 -0
  54. lsst/pipe/base/testUtils.py +3 -3
  55. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  56. lsst/pipe/base/version.py +1 -1
  57. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
  58. lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
  59. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
  60. lsst_pipe_base-30.0.0rc3.dist-info/RECORD +0 -127
  61. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
  62. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
  63. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
  64. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
  65. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  66. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
  67. {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0
@@ -38,13 +38,14 @@ __all__ = (
38
38
  "ProvenanceQuantumGraphWriter",
39
39
  "ProvenanceQuantumInfo",
40
40
  "ProvenanceQuantumModel",
41
+ "ProvenanceQuantumReport",
41
42
  "ProvenanceQuantumScanData",
42
43
  "ProvenanceQuantumScanModels",
43
44
  "ProvenanceQuantumScanStatus",
45
+ "ProvenanceReport",
44
46
  "ProvenanceTaskMetadataModel",
45
47
  )
46
48
 
47
-
48
49
  import dataclasses
49
50
  import enum
50
51
  import itertools
@@ -53,16 +54,16 @@ import uuid
53
54
  from collections import Counter
54
55
  from collections.abc import Callable, Iterable, Iterator, Mapping
55
56
  from contextlib import ExitStack, contextmanager
56
- from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, TypeVar
57
+ from typing import TYPE_CHECKING, Any, TypedDict
57
58
 
58
59
  import astropy.table
59
60
  import networkx
60
61
  import numpy as np
61
62
  import pydantic
62
63
 
63
- from lsst.daf.butler import DataCoordinate
64
+ from lsst.daf.butler import Butler, DataCoordinate
64
65
  from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
65
- from lsst.resources import ResourcePathExpression
66
+ from lsst.resources import ResourcePath, ResourcePathExpression
66
67
  from lsst.utils.iteration import ensure_iterable
67
68
  from lsst.utils.logging import LsstLogAdapter, getLogger
68
69
  from lsst.utils.packages import Packages
@@ -94,9 +95,14 @@ from ._predicted import (
94
95
  PredictedQuantumGraphComponents,
95
96
  )
96
97
 
97
- _T = TypeVar("_T")
98
+ # Sphinx needs imports for type annotations of base class members.
99
+ if "sphinx" in sys.modules:
100
+ import zipfile # noqa: F401
101
+
102
+ from ._multiblock import AddressReader, Decompressor # noqa: F401
98
103
 
99
- LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
104
+
105
+ type LoopWrapper[T] = Callable[[Iterable[T]], Iterable[T]]
100
106
 
101
107
  _LOG = getLogger(__file__)
102
108
 
@@ -111,7 +117,7 @@ LOG_MB_NAME = "logs"
111
117
  METADATA_MB_NAME = "metadata"
112
118
 
113
119
 
114
- def pass_through(arg: _T) -> _T:
120
+ def pass_through[T](arg: T) -> T:
115
121
  return arg
116
122
 
117
123
 
@@ -186,6 +192,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
186
192
  failure.
187
193
  """
188
194
 
195
+ metadata_id: uuid.UUID
196
+ """ID of this quantum's metadata dataset."""
197
+
198
+ log_id: uuid.UUID
199
+ """ID of this quantum's log dataset."""
200
+
189
201
 
190
202
  class ProvenanceInitQuantumInfo(TypedDict):
191
203
  """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -212,6 +224,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
212
224
  pipeline_node: TaskInitNode
213
225
  """Node in the pipeline graph for this task's init-only step."""
214
226
 
227
+ config_id: uuid.UUID
228
+ """ID of this task's config dataset."""
229
+
215
230
 
216
231
  class ProvenanceDatasetModel(PredictedDatasetModel):
217
232
  """Data model for the datasets in a provenance quantum graph file."""
@@ -543,6 +558,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
543
558
  return super().model_validate_strings(*args, **kwargs)
544
559
 
545
560
 
561
+ class ProvenanceQuantumReport(pydantic.BaseModel):
562
+ """A Pydantic model that used to report information about a single
563
+ (generally problematic) quantum.
564
+ """
565
+
566
+ quantum_id: uuid.UUID
567
+ data_id: dict[str, int | str]
568
+ attempts: list[ProvenanceQuantumAttemptModel]
569
+
570
+ @classmethod
571
+ def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
572
+ """Construct from a provenance quantum graph node.
573
+
574
+ Parameters
575
+ ----------
576
+ quantum_id : `uuid.UUID`
577
+ Unique ID for the quantum.
578
+ quantum_info : `ProvenanceQuantumInfo`
579
+ Node attributes for this quantum.
580
+ """
581
+ return cls(
582
+ quantum_id=quantum_id,
583
+ data_id=dict(quantum_info["data_id"].mapping),
584
+ attempts=quantum_info["attempts"],
585
+ )
586
+
587
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
588
+ # when we inherit those docstrings in our public classes.
589
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
590
+
591
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
592
+ """See `pydantic.BaseModel.copy`."""
593
+ return super().copy(*args, **kwargs)
594
+
595
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
596
+ """See `pydantic.BaseModel.model_dump`."""
597
+ return super().model_dump(*args, **kwargs)
598
+
599
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
600
+ """See `pydantic.BaseModel.model_dump_json`."""
601
+ return super().model_dump(*args, **kwargs)
602
+
603
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
604
+ """See `pydantic.BaseModel.model_copy`."""
605
+ return super().model_copy(*args, **kwargs)
606
+
607
+ @classmethod
608
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
609
+ """See `pydantic.BaseModel.model_construct`."""
610
+ return super().model_construct(*args, **kwargs)
611
+
612
+ @classmethod
613
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
614
+ """See `pydantic.BaseModel.model_json_schema`."""
615
+ return super().model_json_schema(*args, **kwargs)
616
+
617
+ @classmethod
618
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
619
+ """See `pydantic.BaseModel.model_validate`."""
620
+ return super().model_validate(*args, **kwargs)
621
+
622
+ @classmethod
623
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
624
+ """See `pydantic.BaseModel.model_validate_json`."""
625
+ return super().model_validate_json(*args, **kwargs)
626
+
627
+ @classmethod
628
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
629
+ """See `pydantic.BaseModel.model_validate_strings`."""
630
+ return super().model_validate_strings(*args, **kwargs)
631
+
632
+
633
+ class ProvenanceReport(pydantic.RootModel):
634
+ """A Pydantic model that groups quantum information by task label, then
635
+ status (as a string), and then exception type.
636
+ """
637
+
638
+ root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
639
+
640
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
641
+ # when we inherit those docstrings in our public classes.
642
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
643
+
644
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
645
+ """See `pydantic.BaseModel.copy`."""
646
+ return super().copy(*args, **kwargs)
647
+
648
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
649
+ """See `pydantic.BaseModel.model_dump`."""
650
+ return super().model_dump(*args, **kwargs)
651
+
652
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
653
+ """See `pydantic.BaseModel.model_dump_json`."""
654
+ return super().model_dump(*args, **kwargs)
655
+
656
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
657
+ """See `pydantic.BaseModel.model_copy`."""
658
+ return super().model_copy(*args, **kwargs)
659
+
660
+ @classmethod
661
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
662
+ """See `pydantic.BaseModel.model_construct`."""
663
+ return super().model_construct(*args, **kwargs)
664
+
665
+ @classmethod
666
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
667
+ """See `pydantic.BaseModel.model_json_schema`."""
668
+ return super().model_json_schema(*args, **kwargs)
669
+
670
+ @classmethod
671
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
672
+ """See `pydantic.BaseModel.model_validate`."""
673
+ return super().model_validate(*args, **kwargs)
674
+
675
+ @classmethod
676
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
677
+ """See `pydantic.BaseModel.model_validate_json`."""
678
+ return super().model_validate_json(*args, **kwargs)
679
+
680
+ @classmethod
681
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
682
+ """See `pydantic.BaseModel.model_validate_strings`."""
683
+ return super().model_validate_strings(*args, **kwargs)
684
+
685
+
546
686
  class ProvenanceQuantumModel(pydantic.BaseModel):
547
687
  """Data model for the quanta in a provenance quantum graph file."""
548
688
 
@@ -646,6 +786,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
646
786
  resource_usage=last_attempt.resource_usage,
647
787
  attempts=self.attempts,
648
788
  )
789
+ graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
790
+ graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
649
791
  for connection_name, dataset_ids in self.inputs.items():
650
792
  read_edge = task_node.get_input_edge(connection_name)
651
793
  for dataset_id in dataset_ids:
@@ -655,6 +797,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
655
797
  ).append(read_edge)
656
798
  for connection_name, dataset_ids in self.outputs.items():
657
799
  write_edge = task_node.get_output_edge(connection_name)
800
+ if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
801
+ graph._bipartite_xgraph.add_node(
802
+ dataset_ids[0],
803
+ data_id=data_id,
804
+ dataset_type_name=write_edge.dataset_type_name,
805
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
806
+ run=graph.header.output_run,
807
+ produced=last_attempt.status.has_metadata,
808
+ )
809
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
810
+ graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
811
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
812
+ if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
813
+ graph._bipartite_xgraph.add_node(
814
+ dataset_ids[0],
815
+ data_id=data_id,
816
+ dataset_type_name=write_edge.dataset_type_name,
817
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
818
+ run=graph.header.output_run,
819
+ produced=last_attempt.status.has_log,
820
+ )
821
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
822
+ graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
823
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
658
824
  for dataset_id in dataset_ids:
659
825
  graph._bipartite_xgraph.add_edge(
660
826
  self.quantum_id,
@@ -663,8 +829,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
663
829
  # There can only be one pipeline edge for an output.
664
830
  pipeline_edges=[write_edge],
665
831
  )
666
- graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
667
- graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
668
832
  for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
669
833
  for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
670
834
  graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
@@ -803,6 +967,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
803
967
  ).append(read_edge)
804
968
  for connection_name, dataset_id in self.outputs.items():
805
969
  write_edge = task_init_node.get_output_edge(connection_name)
970
+ graph._bipartite_xgraph.add_node(
971
+ dataset_id,
972
+ data_id=empty_data_id,
973
+ dataset_type_name=write_edge.dataset_type_name,
974
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
975
+ run=graph.header.output_run,
976
+ produced=True,
977
+ )
978
+ graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
806
979
  graph._bipartite_xgraph.add_edge(
807
980
  self.quantum_id,
808
981
  dataset_id,
@@ -810,6 +983,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
810
983
  # There can only be one pipeline edge for an output.
811
984
  pipeline_edges=[write_edge],
812
985
  )
986
+ if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
987
+ graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
813
988
  graph._init_quanta[self.task_label] = self.quantum_id
814
989
 
815
990
  # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -954,6 +1129,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
954
1129
  dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
955
1130
  }
956
1131
 
1132
+ @classmethod
1133
+ @contextmanager
1134
+ def from_args(
1135
+ cls,
1136
+ repo_or_filename: str,
1137
+ /,
1138
+ collection: str | None = None,
1139
+ *,
1140
+ quanta: Iterable[uuid.UUID] | None = None,
1141
+ datasets: Iterable[uuid.UUID] | None = None,
1142
+ writeable: bool = False,
1143
+ ) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
1144
+ """Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
1145
+ a file or butler-ingested graph dataset.
1146
+
1147
+ Parameters
1148
+ ----------
1149
+ repo_or_filename : `str`
1150
+ Either a provenance quantum graph filename or a butler repository
1151
+ path or alias.
1152
+ collection : `str`, optional
1153
+ Collection to search; presence indicates that the first argument
1154
+ is a butler repository, not a filename.
1155
+ quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
1156
+ IDs of the quanta to load, or `None` to load all.
1157
+ datasets : `~collections.abc.Iterable` [ `str` ], optional
1158
+ IDs of the datasets to load, or `None` to load all.
1159
+ writeable : `bool`, optional
1160
+ Whether the butler should be constructed with write support.
1161
+
1162
+ Returns
1163
+ -------
1164
+ context : `contextlib.AbstractContextManager`
1165
+ A context manager that yields a tuple of
1166
+
1167
+ - the `ProvenanceQuantumGraph`
1168
+ - the `Butler` constructed (or `None`)
1169
+
1170
+ when entered.
1171
+ """
1172
+ exit_stack = ExitStack()
1173
+ if collection is not None:
1174
+ try:
1175
+ butler = exit_stack.enter_context(
1176
+ Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
1177
+ )
1178
+ except Exception as err:
1179
+ err.add_note(
1180
+ f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
1181
+ f"collection ({collection}) was provided."
1182
+ )
1183
+ raise
1184
+ with exit_stack:
1185
+ graph = butler.get(
1186
+ acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
1187
+ )
1188
+ yield graph, butler
1189
+ else:
1190
+ try:
1191
+ reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
1192
+ except Exception as err:
1193
+ err.add_note(
1194
+ f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
1195
+ f"because no collection was provided."
1196
+ )
1197
+ raise
1198
+ with exit_stack:
1199
+ if quanta is None:
1200
+ reader.read_quanta()
1201
+ elif not quanta:
1202
+ reader.read_quanta(quanta)
1203
+ if datasets is None:
1204
+ reader.read_datasets()
1205
+ elif not datasets:
1206
+ reader.read_datasets(datasets)
1207
+ yield reader.graph, None
1208
+
957
1209
  @property
958
1210
  def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
959
1211
  """A mapping from task label to the ID of the special init quantum for
@@ -994,6 +1246,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
994
1246
  types in the pipeline graph are included, even if none of their
995
1247
  datasets were loaded (i.e. nested mappings may be empty).
996
1248
 
1249
+ Reading a quantum also populates its log and metadata datasets.
1250
+
997
1251
  The returned object may be an internal dictionary; as the type
998
1252
  annotation indicates, it should not be modified in place.
999
1253
  """
@@ -1032,7 +1286,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1032
1286
  `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
1033
1287
  `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
1034
1288
  with full attributes and edges to adjacent nodes with no attributes.
1035
- Loading quanta necessary to populate edge attributes.
1289
+ Loading quanta is necessary to populate edge attributes.
1290
+ Reading a quantum also populates its log and metadata datasets.
1036
1291
 
1037
1292
  Node attributes are described by the
1038
1293
  `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
@@ -1047,10 +1302,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1047
1302
  """
1048
1303
  return self._bipartite_xgraph.copy(as_view=True)
1049
1304
 
1050
- def make_quantum_table(self) -> astropy.table.Table:
1305
+ def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
1051
1306
  """Construct an `astropy.table.Table` with a tabular summary of the
1052
1307
  quanta.
1053
1308
 
1309
+ Parameters
1310
+ ----------
1311
+ drop_unused_columns : `bool`, optional
1312
+ Whether to drop columns for rare states that did not actually
1313
+ occur in this run.
1314
+
1054
1315
  Returns
1055
1316
  -------
1056
1317
  table : `astropy.table.Table`
@@ -1086,28 +1347,30 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1086
1347
  caveats = f"{code.concise()}({count})" # type: ignore[union-attr]
1087
1348
  else:
1088
1349
  caveats = ""
1089
- rows.append(
1350
+ row: dict[str, Any] = {
1351
+ "Task": task_label,
1352
+ "Caveats": caveats,
1353
+ }
1354
+ for status in QuantumAttemptStatus:
1355
+ row[status.title] = status_counts.get(status, 0)
1356
+ row.update(
1090
1357
  {
1091
- "Task": task_label,
1092
- "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
1093
- "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
1094
- "Caveats": caveats,
1095
- "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
1096
- "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
1097
1358
  "TOTAL": len(quanta_for_task),
1098
1359
  "EXPECTED": self.header.n_task_quanta[task_label],
1099
1360
  }
1100
1361
  )
1101
- return astropy.table.Table(rows)
1362
+ rows.append(row)
1363
+ table = astropy.table.Table(rows)
1364
+ if drop_unused_columns:
1365
+ for status in QuantumAttemptStatus:
1366
+ if status.is_rare and not table[status.title].any():
1367
+ del table[status.title]
1368
+ return table
1102
1369
 
1103
1370
  def make_exception_table(self) -> astropy.table.Table:
1104
1371
  """Construct an `astropy.table.Table` with counts for each exception
1105
1372
  type raised by each task.
1106
1373
 
1107
- At present this only includes information from partial-outputs-error
1108
- successes, since exception information for failures is not tracked.
1109
- This may change in the future.
1110
-
1111
1374
  Returns
1112
1375
  -------
1113
1376
  table : `astropy.table.Table`
@@ -1115,13 +1378,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1115
1378
  """
1116
1379
  rows = []
1117
1380
  for task_label, quanta_for_task in self.quanta_by_task.items():
1118
- counts_by_type = Counter(
1119
- exc_info.type_name
1120
- for q in quanta_for_task.values()
1121
- if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
1122
- )
1123
- for type_name, count in counts_by_type.items():
1124
- rows.append({"Task": task_label, "Exception": type_name, "Count": count})
1381
+ success_counts = Counter[str]()
1382
+ failed_counts = Counter[str]()
1383
+ for quantum_id in quanta_for_task.values():
1384
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1385
+ exc_info = quantum_info["exception"]
1386
+ if exc_info is not None:
1387
+ if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
1388
+ success_counts[exc_info.type_name] += 1
1389
+ else:
1390
+ failed_counts[exc_info.type_name] += 1
1391
+ for type_name in sorted(success_counts.keys() | failed_counts.keys()):
1392
+ rows.append(
1393
+ {
1394
+ "Task": task_label,
1395
+ "Exception": type_name,
1396
+ "Successes": success_counts.get(type_name, 0),
1397
+ "Failures": failed_counts.get(type_name, 0),
1398
+ }
1399
+ )
1125
1400
  return astropy.table.Table(rows)
1126
1401
 
1127
1402
  def make_task_resource_usage_table(
@@ -1164,6 +1439,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1164
1439
  array = np.array(rows, dtype=row_dtype)
1165
1440
  return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
1166
1441
 
1442
+ def make_status_report(
1443
+ self,
1444
+ states: Iterable[QuantumAttemptStatus] = (
1445
+ QuantumAttemptStatus.FAILED,
1446
+ QuantumAttemptStatus.ABORTED,
1447
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1448
+ ),
1449
+ *,
1450
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1451
+ with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
1452
+ data_id_table_dir: ResourcePathExpression | None = None,
1453
+ ) -> ProvenanceReport:
1454
+ """Make a JSON- or YAML-friendly report of all quanta with the given
1455
+ states.
1456
+
1457
+ Parameters
1458
+ ----------
1459
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1460
+ `..QuantumAttemptStatus`, optional
1461
+ A quantum is included if it has any of these states. Defaults to
1462
+ states that clearly represent problems.
1463
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1464
+ `..QuantumAttemptStatus`, optional
1465
+ Additional states to consider; unioned with ``states``. This is
1466
+ provided so users can easily request additional states while also
1467
+ getting the defaults.
1468
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1469
+ If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
1470
+ include quanta with these caveat flags. May be set to `None`
1471
+ to report on all successful quanta.
1472
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1473
+ optional
1474
+ If provided, a directory to write data ID tables (in ECSV format)
1475
+ with all of the data IDs with the given states, for use with the
1476
+ ``--data-id-tables`` argument to the quantum graph builder.
1477
+ Subdirectories for each task and status will created within this
1478
+ directory, with one file for each exception type (or ``UNKNOWN``
1479
+ when there is no exception).
1480
+
1481
+ Returns
1482
+ -------
1483
+ report : `ProvenanceModel`
1484
+ A Pydantic model that groups quanta by task label and exception
1485
+ type.
1486
+ """
1487
+ states = set(ensure_iterable(states))
1488
+ states.update(ensure_iterable(also))
1489
+ result = ProvenanceReport(root={})
1490
+ if data_id_table_dir is not None:
1491
+ data_id_table_dir = ResourcePath(data_id_table_dir)
1492
+ for task_label, quanta_for_task in self.quanta_by_task.items():
1493
+ reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
1494
+ table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
1495
+ for quantum_id in quanta_for_task.values():
1496
+ quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1497
+ quantum_status = quantum_info["status"]
1498
+ if quantum_status not in states:
1499
+ continue
1500
+ if (
1501
+ quantum_status is QuantumAttemptStatus.SUCCESSFUL
1502
+ and with_caveats is not None
1503
+ and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
1504
+ ):
1505
+ continue
1506
+ key1 = quantum_status.name
1507
+ exc_info = quantum_info["exception"]
1508
+ key2 = exc_info.type_name if exc_info is not None else None
1509
+ reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1510
+ ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
1511
+ )
1512
+ if data_id_table_dir:
1513
+ table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
1514
+ quantum_info["data_id"].required_values
1515
+ )
1516
+ if reports_for_task:
1517
+ result.root[task_label] = reports_for_task
1518
+ if table_rows_for_task:
1519
+ assert data_id_table_dir is not None, "table_rows_for_task should be empty"
1520
+ for status_name, table_rows_for_status in table_rows_for_task.items():
1521
+ dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
1522
+ status_name, forceDirectory=True
1523
+ )
1524
+ if dir_for_task_and_status.isLocal:
1525
+ dir_for_task_and_status.mkdir()
1526
+ for exc_name, data_id_rows in table_rows_for_status.items():
1527
+ table = astropy.table.Table(
1528
+ rows=data_id_rows,
1529
+ names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
1530
+ )
1531
+ filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
1532
+ with dir_for_task_and_status.join(filename).open("w") as stream:
1533
+ table.write(stream, format="ecsv")
1534
+ return result
1535
+
1536
+ def make_many_reports(
1537
+ self,
1538
+ states: Iterable[QuantumAttemptStatus] = (
1539
+ QuantumAttemptStatus.FAILED,
1540
+ QuantumAttemptStatus.ABORTED,
1541
+ QuantumAttemptStatus.ABORTED_SUCCESS,
1542
+ ),
1543
+ *,
1544
+ status_report_file: ResourcePathExpression | None = None,
1545
+ print_quantum_table: bool = False,
1546
+ print_exception_table: bool = False,
1547
+ also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
1548
+ with_caveats: QuantumSuccessCaveats | None = None,
1549
+ data_id_table_dir: ResourcePathExpression | None = None,
1550
+ ) -> None:
1551
+ """Write multiple reports.
1552
+
1553
+ Parameters
1554
+ ----------
1555
+ states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1556
+ `..QuantumAttemptStatus`, optional
1557
+ A quantum is included in the status report and data ID tables if it
1558
+ has any of these states. Defaults to states that clearly represent
1559
+ problems.
1560
+ status_report_file : convertible to `~lsst.resources.ResourcePath`,
1561
+ optional
1562
+ Filename for the JSON status report (see `make_status_report`).
1563
+ print_quantum_table : `bool`, optional
1564
+ If `True`, print a quantum summary table (counts only) to STDOUT.
1565
+ print_exception_table : `bool`, optional
1566
+ If `True`, print an exception-type summary table (counts only) to
1567
+ STDOUT.
1568
+ also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
1569
+ `..QuantumAttemptStatus`, optional
1570
+ Additional states to consider in the status report and data ID
1571
+ tables; unioned with ``states``. This is provided so users can
1572
+ easily request additional states while also getting the defaults.
1573
+ with_caveats : `..QuantumSuccessCaveats` or `None`, optional
1574
+ Only include quanta with these caveat flags in the status report
1575
+ and data ID tables. May be set to `None` to report on all
1576
+ successful quanta (an empty sequence reports on only quanta with no
1577
+ caveats). If provided, `QuantumAttemptStatus.SUCCESSFUL` is
1578
+ automatically included in ``states``.
1579
+ data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
1580
+ optional
1581
+ If provided, a directory to write data ID tables (in ECSV format)
1582
+ with all of the data IDs with the given states, for use with the
1583
+ ``--data-id-tables`` argument to the quantum graph builder.
1584
+ Subdirectories for each task and status will created within this
1585
+ directory, with one file for each exception type (or ``UNKNOWN``
1586
+ when there is no exception).
1587
+ """
1588
+ if status_report_file is not None or data_id_table_dir is not None:
1589
+ status_report = self.make_status_report(
1590
+ states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
1591
+ )
1592
+ if status_report_file is not None:
1593
+ status_report_file = ResourcePath(status_report_file)
1594
+ if status_report_file.isLocal:
1595
+ status_report_file.dirname().mkdir()
1596
+ with ResourcePath(status_report_file).open("w") as stream:
1597
+ stream.write(status_report.model_dump_json(indent=2))
1598
+ if print_quantum_table:
1599
+ quantum_table = self.make_quantum_table()
1600
+ quantum_table.pprint_all()
1601
+ print("")
1602
+ if print_exception_table:
1603
+ exception_table = self.make_exception_table()
1604
+ exception_table.pprint_all()
1605
+ print("")
1606
+
1167
1607
 
1168
1608
  @dataclasses.dataclass
1169
1609
  class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
@@ -1294,19 +1734,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1294
1734
  # also have other outstanding reference holders).
1295
1735
  continue
1296
1736
  node._add_to_graph(self.graph)
1297
- return
1298
- with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1299
- for node_id_or_index in nodes:
1300
- address_row = self.address_reader.find(node_id_or_index)
1301
- if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1302
- # Use the old node to reduce memory usage (since it might
1303
- # also have other outstanding reference holders).
1304
- continue
1305
- node = mb_reader.read_model(
1306
- address_row.addresses[address_index], model_type, self.decompressor
1307
- )
1308
- if node is not None:
1309
- node._add_to_graph(self.graph)
1737
+ else:
1738
+ with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1739
+ for node_id_or_index in nodes:
1740
+ address_row = self.address_reader.find(node_id_or_index)
1741
+ if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1742
+ # Use the old node to reduce memory usage (since it
1743
+ # might also have other outstanding reference holders).
1744
+ continue
1745
+ node = mb_reader.read_model(
1746
+ address_row.addresses[address_index], model_type, self.decompressor
1747
+ )
1748
+ if node is not None:
1749
+ node._add_to_graph(self.graph)
1310
1750
 
1311
1751
  def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
1312
1752
  """Fetch log datasets.
@@ -1588,7 +2028,7 @@ class ProvenanceQuantumGraphWriter:
1588
2028
  """
1589
2029
  predicted_quantum = self._predicted_quanta[quantum_id]
1590
2030
  provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
1591
- predicted_quantum, metadata, logs, assume_complete=True
2031
+ predicted_quantum, metadata, logs, incomplete=False
1592
2032
  )
1593
2033
  scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
1594
2034
  self.write_scan_data(scan_data)
@@ -1665,8 +2105,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
1665
2105
  enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
1666
2106
  to stop trying for now.
1667
2107
 
1668
- This state means a later run with `ScannerConfig.assume_complete` is
1669
- required.
2108
+ This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
2109
+ be run again with ``incomplete=False``.
1670
2110
  """
1671
2111
 
1672
2112
  SUCCESSFUL = enum.auto()
@@ -1721,7 +2161,7 @@ class ProvenanceQuantumScanModels:
1721
2161
  metadata: TaskMetadata | None,
1722
2162
  logs: ButlerLogRecords | None,
1723
2163
  *,
1724
- assume_complete: bool = True,
2164
+ incomplete: bool = False,
1725
2165
  ) -> ProvenanceQuantumScanModels:
1726
2166
  """Construct provenance information from task metadata and logs.
1727
2167
 
@@ -1733,8 +2173,8 @@ class ProvenanceQuantumScanModels:
1733
2173
  Task metadata.
1734
2174
  logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
1735
2175
  Task logs.
1736
- assume_complete : `bool`, optional
1737
- If `False`, treat execution failures as possibly-incomplete quanta
2176
+ incomplete : `bool`, optional
2177
+ If `True`, treat execution failures as possibly-incomplete quanta
1738
2178
  and do not fully process them; instead just set the status to
1739
2179
  `ProvenanceQuantumScanStatus.ABANDONED` and return.
1740
2180
 
@@ -1752,8 +2192,8 @@ class ProvenanceQuantumScanModels:
1752
2192
  """
1753
2193
  self = ProvenanceQuantumScanModels(predicted.quantum_id)
1754
2194
  last_attempt = ProvenanceQuantumAttemptModel()
1755
- self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
1756
- self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
2195
+ self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
2196
+ self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
1757
2197
  if self.status is ProvenanceQuantumScanStatus.ABANDONED:
1758
2198
  return self
1759
2199
  self._reconcile_attempts(last_attempt)
@@ -1766,15 +2206,15 @@ class ProvenanceQuantumScanModels:
1766
2206
  logs: ButlerLogRecords | None,
1767
2207
  last_attempt: ProvenanceQuantumAttemptModel,
1768
2208
  *,
1769
- assume_complete: bool,
2209
+ incomplete: bool,
1770
2210
  ) -> None:
1771
2211
  (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
1772
2212
  if logs is None:
1773
2213
  self.output_existence[predicted_log_dataset.dataset_id] = False
1774
- if assume_complete:
1775
- self.status = ProvenanceQuantumScanStatus.FAILED
1776
- else:
2214
+ if incomplete:
1777
2215
  self.status = ProvenanceQuantumScanStatus.ABANDONED
2216
+ else:
2217
+ self.status = ProvenanceQuantumScanStatus.FAILED
1778
2218
  else:
1779
2219
  # Set the attempt's run status to FAILED, since the default is
1780
2220
  # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
@@ -1832,15 +2272,15 @@ class ProvenanceQuantumScanModels:
1832
2272
  metadata: TaskMetadata | None,
1833
2273
  last_attempt: ProvenanceQuantumAttemptModel,
1834
2274
  *,
1835
- assume_complete: bool,
2275
+ incomplete: bool,
1836
2276
  ) -> None:
1837
2277
  (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
1838
2278
  if metadata is None:
1839
2279
  self.output_existence[predicted_metadata_dataset.dataset_id] = False
1840
- if assume_complete:
1841
- self.status = ProvenanceQuantumScanStatus.FAILED
1842
- else:
2280
+ if incomplete:
1843
2281
  self.status = ProvenanceQuantumScanStatus.ABANDONED
2282
+ else:
2283
+ self.status = ProvenanceQuantumScanStatus.FAILED
1844
2284
  else:
1845
2285
  self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
1846
2286
  self.output_existence[predicted_metadata_dataset.dataset_id] = True
@@ -1875,7 +2315,7 @@ class ProvenanceQuantumScanModels:
1875
2315
  # But we found the metadata! Either that hard error happened
1876
2316
  # at a very unlucky time (in between those two writes), or
1877
2317
  # something even weirder happened.
1878
- self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
2318
+ self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
1879
2319
  else:
1880
2320
  self.attempts[-1].status = QuantumAttemptStatus.FAILED
1881
2321
  if len(self.metadata.attempts) < len(self.attempts):
@@ -1985,7 +2425,7 @@ class ProvenanceQuantumScanData:
1985
2425
  """Serialized logs."""
1986
2426
 
1987
2427
  is_compressed: bool = False
1988
- """Whether the `quantum`, `metadata`, and `log` attributes are
2428
+ """Whether the ``quantum``, ``metadata``, and ``log`` attributes are
1989
2429
  compressed.
1990
2430
  """
1991
2431