lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lsst/pipe/base/_instrument.py +4 -7
  2. lsst/pipe/base/_status.py +29 -10
  3. lsst/pipe/base/automatic_connection_constants.py +9 -1
  4. lsst/pipe/base/cli/cmd/__init__.py +16 -2
  5. lsst/pipe/base/cli/cmd/commands.py +42 -4
  6. lsst/pipe/base/connectionTypes.py +72 -160
  7. lsst/pipe/base/connections.py +3 -6
  8. lsst/pipe/base/execution_reports.py +0 -5
  9. lsst/pipe/base/pipeline.py +3 -4
  10. lsst/pipe/base/pipelineIR.py +0 -6
  11. lsst/pipe/base/pipelineTask.py +5 -7
  12. lsst/pipe/base/pipeline_graph/_edges.py +19 -7
  13. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
  14. lsst/pipe/base/quantum_graph/_common.py +3 -1
  15. lsst/pipe/base/quantum_graph/_predicted.py +7 -0
  16. lsst/pipe/base/quantum_graph/_provenance.py +87 -37
  17. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -1
  18. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  19. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  20. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +14 -6
  21. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  22. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +14 -13
  23. lsst/pipe/base/quantum_graph/aggregator/_writer.py +2 -2
  24. lsst/pipe/base/quantum_graph/formatter.py +70 -0
  25. lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
  26. lsst/pipe/base/quantum_provenance_graph.py +17 -2
  27. lsst/pipe/base/separable_pipeline_executor.py +5 -6
  28. lsst/pipe/base/single_quantum_executor.py +6 -6
  29. lsst/pipe/base/struct.py +4 -0
  30. lsst/pipe/base/version.py +1 -1
  31. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
  32. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +40 -39
  33. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
  34. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
  35. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
  36. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
  37. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
  38. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
  39. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
  40. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
@@ -897,6 +897,10 @@ class PipelineGraph:
897
897
  New config objects or overrides to apply to copies of the current
898
898
  config objects, with task labels as the keywords.
899
899
 
900
+ Returns
901
+ -------
902
+ None
903
+
900
904
  Raises
901
905
  ------
902
906
  ValueError
@@ -1755,6 +1759,10 @@ class PipelineGraph:
1755
1759
  not considered part of the pipeline graph in other respects, but it
1756
1760
  does get written with other provenance datasets).
1757
1761
 
1762
+ Returns
1763
+ -------
1764
+ None
1765
+
1758
1766
  Raises
1759
1767
  ------
1760
1768
  lsst.daf.butler.MissingDatasetTypeError
@@ -453,8 +453,10 @@ class BaseQuantumGraphWriter:
453
453
  cdict_data: bytes | None = None,
454
454
  zstd_level: int = 10,
455
455
  ) -> Iterator[Self]:
456
- uri = ResourcePath(uri)
456
+ uri = ResourcePath(uri, forceDirectory=False)
457
457
  address_writer = AddressWriter()
458
+ if uri.isLocal:
459
+ os.makedirs(uri.dirname().ospath, exist_ok=True)
458
460
  cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
459
461
  compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
460
462
  with uri.open(mode="wb") as stream:
@@ -110,6 +110,13 @@ if TYPE_CHECKING:
110
110
  from ..config import PipelineTaskConfig
111
111
  from ..graph import QgraphSummary, QuantumGraph
112
112
 
113
+ # Sphinx needs imports for type annotations of base class members.
114
+ if "sphinx" in sys.modules:
115
+ import zipfile # noqa: F401
116
+
117
+ from ._multiblock import AddressReader, Decompressor # noqa: F401
118
+
119
+
113
120
  _LOG = logging.getLogger(__name__)
114
121
 
115
122
 
@@ -94,6 +94,13 @@ from ._predicted import (
94
94
  PredictedQuantumGraphComponents,
95
95
  )
96
96
 
97
+ # Sphinx needs imports for type annotations of base class members.
98
+ if "sphinx" in sys.modules:
99
+ import zipfile # noqa: F401
100
+
101
+ from ._multiblock import AddressReader, Decompressor # noqa: F401
102
+
103
+
97
104
  _T = TypeVar("_T")
98
105
 
99
106
  LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
@@ -186,6 +193,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
186
193
  failure.
187
194
  """
188
195
 
196
+ metadata_id: uuid.UUID
197
+ """ID of this quantum's metadata dataset."""
198
+
199
+ log_id: uuid.UUID
200
+ """ID of this quantum's log dataset."""
201
+
189
202
 
190
203
  class ProvenanceInitQuantumInfo(TypedDict):
191
204
  """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -212,6 +225,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
212
225
  pipeline_node: TaskInitNode
213
226
  """Node in the pipeline graph for this task's init-only step."""
214
227
 
228
+ config_id: uuid.UUID
229
+ """ID of this task's config dataset."""
230
+
215
231
 
216
232
  class ProvenanceDatasetModel(PredictedDatasetModel):
217
233
  """Data model for the datasets in a provenance quantum graph file."""
@@ -646,6 +662,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
646
662
  resource_usage=last_attempt.resource_usage,
647
663
  attempts=self.attempts,
648
664
  )
665
+ graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
666
+ graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
649
667
  for connection_name, dataset_ids in self.inputs.items():
650
668
  read_edge = task_node.get_input_edge(connection_name)
651
669
  for dataset_id in dataset_ids:
@@ -655,6 +673,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
655
673
  ).append(read_edge)
656
674
  for connection_name, dataset_ids in self.outputs.items():
657
675
  write_edge = task_node.get_output_edge(connection_name)
676
+ if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
677
+ graph._bipartite_xgraph.add_node(
678
+ dataset_ids[0],
679
+ data_id=data_id,
680
+ dataset_type_name=write_edge.dataset_type_name,
681
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
682
+ run=graph.header.output_run,
683
+ produced=last_attempt.status.has_metadata,
684
+ )
685
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
686
+ graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
687
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
688
+ if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
689
+ graph._bipartite_xgraph.add_node(
690
+ dataset_ids[0],
691
+ data_id=data_id,
692
+ dataset_type_name=write_edge.dataset_type_name,
693
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
694
+ run=graph.header.output_run,
695
+ produced=last_attempt.status.has_log,
696
+ )
697
+ graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
698
+ graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
699
+ graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
658
700
  for dataset_id in dataset_ids:
659
701
  graph._bipartite_xgraph.add_edge(
660
702
  self.quantum_id,
@@ -663,8 +705,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
663
705
  # There can only be one pipeline edge for an output.
664
706
  pipeline_edges=[write_edge],
665
707
  )
666
- graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
667
- graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
668
708
  for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
669
709
  for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
670
710
  graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
@@ -803,6 +843,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
803
843
  ).append(read_edge)
804
844
  for connection_name, dataset_id in self.outputs.items():
805
845
  write_edge = task_init_node.get_output_edge(connection_name)
846
+ graph._bipartite_xgraph.add_node(
847
+ dataset_id,
848
+ data_id=empty_data_id,
849
+ dataset_type_name=write_edge.dataset_type_name,
850
+ pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
851
+ run=graph.header.output_run,
852
+ produced=True,
853
+ )
854
+ graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
806
855
  graph._bipartite_xgraph.add_edge(
807
856
  self.quantum_id,
808
857
  dataset_id,
@@ -810,6 +859,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
810
859
  # There can only be one pipeline edge for an output.
811
860
  pipeline_edges=[write_edge],
812
861
  )
862
+ if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
863
+ graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
813
864
  graph._init_quanta[self.task_label] = self.quantum_id
814
865
 
815
866
  # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -994,6 +1045,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
994
1045
  types in the pipeline graph are included, even if none of their
995
1046
  datasets were loaded (i.e. nested mappings may be empty).
996
1047
 
1048
+ Reading a quantum also populates its log and metadata datasets.
1049
+
997
1050
  The returned object may be an internal dictionary; as the type
998
1051
  annotation indicates, it should not be modified in place.
999
1052
  """
@@ -1032,7 +1085,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1032
1085
  `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
1033
1086
  `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
1034
1087
  with full attributes and edges to adjacent nodes with no attributes.
1035
- Loading quanta necessary to populate edge attributes.
1088
+ Loading quanta is necessary to populate edge attributes.
1089
+ Reading a quantum also populates its log and metadata datasets.
1036
1090
 
1037
1091
  Node attributes are described by the
1038
1092
  `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
@@ -1104,10 +1158,6 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
1104
1158
  """Construct an `astropy.table.Table` with counts for each exception
1105
1159
  type raised by each task.
1106
1160
 
1107
- At present this only includes information from partial-outputs-error
1108
- successes, since exception information for failures is not tracked.
1109
- This may change in the future.
1110
-
1111
1161
  Returns
1112
1162
  -------
1113
1163
  table : `astropy.table.Table`
@@ -1294,19 +1344,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1294
1344
  # also have other outstanding reference holders).
1295
1345
  continue
1296
1346
  node._add_to_graph(self.graph)
1297
- return
1298
- with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1299
- for node_id_or_index in nodes:
1300
- address_row = self.address_reader.find(node_id_or_index)
1301
- if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1302
- # Use the old node to reduce memory usage (since it might
1303
- # also have other outstanding reference holders).
1304
- continue
1305
- node = mb_reader.read_model(
1306
- address_row.addresses[address_index], model_type, self.decompressor
1307
- )
1308
- if node is not None:
1309
- node._add_to_graph(self.graph)
1347
+ else:
1348
+ with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1349
+ for node_id_or_index in nodes:
1350
+ address_row = self.address_reader.find(node_id_or_index)
1351
+ if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1352
+ # Use the old node to reduce memory usage (since it
1353
+ # might also have other outstanding reference holders).
1354
+ continue
1355
+ node = mb_reader.read_model(
1356
+ address_row.addresses[address_index], model_type, self.decompressor
1357
+ )
1358
+ if node is not None:
1359
+ node._add_to_graph(self.graph)
1310
1360
 
1311
1361
  def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
1312
1362
  """Fetch log datasets.
@@ -1588,7 +1638,7 @@ class ProvenanceQuantumGraphWriter:
1588
1638
  """
1589
1639
  predicted_quantum = self._predicted_quanta[quantum_id]
1590
1640
  provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
1591
- predicted_quantum, metadata, logs, assume_complete=True
1641
+ predicted_quantum, metadata, logs, incomplete=False
1592
1642
  )
1593
1643
  scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
1594
1644
  self.write_scan_data(scan_data)
@@ -1665,8 +1715,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
1665
1715
  enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
1666
1716
  to stop trying for now.
1667
1717
 
1668
- This state means a later run with `ScannerConfig.assume_complete` is
1669
- required.
1718
+ This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
1719
+ be run again with ``incomplete=False``.
1670
1720
  """
1671
1721
 
1672
1722
  SUCCESSFUL = enum.auto()
@@ -1721,7 +1771,7 @@ class ProvenanceQuantumScanModels:
1721
1771
  metadata: TaskMetadata | None,
1722
1772
  logs: ButlerLogRecords | None,
1723
1773
  *,
1724
- assume_complete: bool = True,
1774
+ incomplete: bool = False,
1725
1775
  ) -> ProvenanceQuantumScanModels:
1726
1776
  """Construct provenance information from task metadata and logs.
1727
1777
 
@@ -1733,8 +1783,8 @@ class ProvenanceQuantumScanModels:
1733
1783
  Task metadata.
1734
1784
  logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
1735
1785
  Task logs.
1736
- assume_complete : `bool`, optional
1737
- If `False`, treat execution failures as possibly-incomplete quanta
1786
+ incomplete : `bool`, optional
1787
+ If `True`, treat execution failures as possibly-incomplete quanta
1738
1788
  and do not fully process them; instead just set the status to
1739
1789
  `ProvenanceQuantumScanStatus.ABANDONED` and return.
1740
1790
 
@@ -1752,8 +1802,8 @@ class ProvenanceQuantumScanModels:
1752
1802
  """
1753
1803
  self = ProvenanceQuantumScanModels(predicted.quantum_id)
1754
1804
  last_attempt = ProvenanceQuantumAttemptModel()
1755
- self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
1756
- self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
1805
+ self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
1806
+ self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
1757
1807
  if self.status is ProvenanceQuantumScanStatus.ABANDONED:
1758
1808
  return self
1759
1809
  self._reconcile_attempts(last_attempt)
@@ -1766,15 +1816,15 @@ class ProvenanceQuantumScanModels:
1766
1816
  logs: ButlerLogRecords | None,
1767
1817
  last_attempt: ProvenanceQuantumAttemptModel,
1768
1818
  *,
1769
- assume_complete: bool,
1819
+ incomplete: bool,
1770
1820
  ) -> None:
1771
1821
  (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
1772
1822
  if logs is None:
1773
1823
  self.output_existence[predicted_log_dataset.dataset_id] = False
1774
- if assume_complete:
1775
- self.status = ProvenanceQuantumScanStatus.FAILED
1776
- else:
1824
+ if incomplete:
1777
1825
  self.status = ProvenanceQuantumScanStatus.ABANDONED
1826
+ else:
1827
+ self.status = ProvenanceQuantumScanStatus.FAILED
1778
1828
  else:
1779
1829
  # Set the attempt's run status to FAILED, since the default is
1780
1830
  # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
@@ -1832,15 +1882,15 @@ class ProvenanceQuantumScanModels:
1832
1882
  metadata: TaskMetadata | None,
1833
1883
  last_attempt: ProvenanceQuantumAttemptModel,
1834
1884
  *,
1835
- assume_complete: bool,
1885
+ incomplete: bool,
1836
1886
  ) -> None:
1837
1887
  (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
1838
1888
  if metadata is None:
1839
1889
  self.output_existence[predicted_metadata_dataset.dataset_id] = False
1840
- if assume_complete:
1841
- self.status = ProvenanceQuantumScanStatus.FAILED
1842
- else:
1890
+ if incomplete:
1843
1891
  self.status = ProvenanceQuantumScanStatus.ABANDONED
1892
+ else:
1893
+ self.status = ProvenanceQuantumScanStatus.FAILED
1844
1894
  else:
1845
1895
  self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
1846
1896
  self.output_existence[predicted_metadata_dataset.dataset_id] = True
@@ -1875,7 +1925,7 @@ class ProvenanceQuantumScanModels:
1875
1925
  # But we found the metadata! Either that hard error happened
1876
1926
  # at a very unlucky time (in between those two writes), or
1877
1927
  # something even weirder happened.
1878
- self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
1928
+ self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
1879
1929
  else:
1880
1930
  self.attempts[-1].status = QuantumAttemptStatus.FAILED
1881
1931
  if len(self.metadata.attempts) < len(self.attempts):
@@ -318,6 +318,12 @@ Report: TypeAlias = (
318
318
  )
319
319
 
320
320
 
321
+ def _disable_resources_parallelism() -> None:
322
+ os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
323
+ os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
324
+ os.environ["LSST_S3_USE_THREADS"] = "False"
325
+
326
+
321
327
  class SupervisorCommunicator:
322
328
  """A helper object that lets the supervisor direct the other workers.
323
329
 
@@ -364,7 +370,7 @@ class SupervisorCommunicator:
364
370
  # starts its shutdown.
365
371
  self._write_requests: (
366
372
  Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
367
- ) = context.make_queue() if config.output_path is not None else None
373
+ ) = context.make_queue() if config.is_writing_provenance else None
368
374
  # All other workers use this queue to send many different kinds of
369
375
  # reports the supervisor. The supervisor waits for a _DONE sentinal
370
376
  # from each worker before it finishes its shutdown.
@@ -433,6 +439,7 @@ class SupervisorCommunicator:
433
439
  self._expect_empty_queue(self._compression_dict)
434
440
 
435
441
  def __enter__(self) -> Self:
442
+ _disable_resources_parallelism()
436
443
  self.progress.__enter__()
437
444
  # We make the low-level logger in __enter__ instead of __init__ only
438
445
  # because that's the pattern used by true workers (where it matters).
@@ -581,6 +588,7 @@ class WorkerCommunicator:
581
588
  self._cancel_event = supervisor._cancel_event
582
589
 
583
590
  def __enter__(self) -> Self:
591
+ _disable_resources_parallelism()
584
592
  self.log = make_worker_log(self.name, self.config)
585
593
  self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
586
594
  self._exit_stack = ExitStack().__enter__()
@@ -29,6 +29,8 @@ from __future__ import annotations
29
29
 
30
30
  __all__ = ("AggregatorConfig",)
31
31
 
32
+ import sys
33
+ from typing import TYPE_CHECKING, Any
32
34
 
33
35
  import pydantic
34
36
 
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
60
62
  n_processes: int = 1
61
63
  """Number of processes the scanner should use."""
62
64
 
63
- assume_complete: bool = True
64
- """If `True`, the aggregator can assume all quanta have run to completion
65
- (including any automatic retries). If `False`, only successes can be
66
- considered final, and quanta that appear to have failed or to have not been
67
- executed are ignored.
65
+ incomplete: bool = False
66
+ """If `True`, do not expect the graph to have been executed to completion
67
+ yet, and only ingest the outputs of successful quanta.
68
+
69
+ This disables writing the provenance quantum graph, since this is likely to
70
+ be wasted effort that just complicates a follow-up run with
71
+ ``incomplete=False`` later.
68
72
  """
69
73
 
70
74
  defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
95
99
  """
96
100
 
97
101
  dry_run: bool = False
98
- """If `True`, do not actually perform any deletions or central butler
99
- ingests.
102
+ """If `True`, do not actually perform any central butler ingests.
100
103
 
101
- Most log messages concerning deletions and ingests will still be emitted in
102
- order to provide a better emulation of a real run.
104
+ Most log messages concerning ingests will still be emitted in order to
105
+ provide a better emulation of a real run.
103
106
  """
104
107
 
105
108
  interactive_status: bool = False
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
137
140
  """Enable support for storage classes by created by the
138
141
  lsst.pipe.base.tests.mocks package.
139
142
  """
143
+
144
+ promise_ingest_graph: bool = False
145
+ """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
146
+ will be run later to ingest metadata/log/config datasets, and will not
147
+ ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
148
+ not run, those files will be abandoned in the butler storage root without
149
+ being present in the butler database, but it will speed up both processes.
150
+
151
+ It is *usually* safe to build a quantum graph for downstream processing
152
+ before or while running `~.ingest_graph.ingest_graph`, because
153
+ metadata/log/config datasets are rarely used as inputs. To check, use
154
+ ``pipetask build ... --show inputs`` to show the overall-inputs to the
155
+ graph and scan for these dataset types.
156
+ """
157
+
158
+ @property
159
+ def is_writing_provenance(self) -> bool:
160
+ """Whether the aggregator is configured to write the provenance quantum
161
+ graph.
162
+ """
163
+ return self.output_path is not None and not self.incomplete
164
+
165
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
166
+ # when we inherit those docstrings in our public classes.
167
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
168
+
169
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
170
+ """See `pydantic.BaseModel.copy`."""
171
+ return super().copy(*args, **kwargs)
172
+
173
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
174
+ """See `pydantic.BaseModel.model_dump`."""
175
+ return super().model_dump(*args, **kwargs)
176
+
177
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
178
+ """See `pydantic.BaseModel.model_dump_json`."""
179
+ return super().model_dump(*args, **kwargs)
180
+
181
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
182
+ """See `pydantic.BaseModel.model_copy`."""
183
+ return super().model_copy(*args, **kwargs)
184
+
185
+ @classmethod
186
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
187
+ """See `pydantic.BaseModel.model_construct`."""
188
+ return super().model_construct(*args, **kwargs)
189
+
190
+ @classmethod
191
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
192
+ """See `pydantic.BaseModel.model_json_schema`."""
193
+ return super().model_json_schema(*args, **kwargs)
194
+
195
+ @classmethod
196
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
197
+ """See `pydantic.BaseModel.model_validate`."""
198
+ return super().model_validate(*args, **kwargs)
199
+
200
+ @classmethod
201
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
202
+ """See `pydantic.BaseModel.model_validate_json`."""
203
+ return super().model_validate_json(*args, **kwargs)
204
+
205
+ @classmethod
206
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
207
+ """See `pydantic.BaseModel.model_validate_strings`."""
208
+ return super().model_validate_strings(*args, **kwargs)
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
43
43
 
44
44
  from ...pipeline_graph import TaskImportMode
45
45
  from .._common import DatastoreName
46
- from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
46
+ from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
47
47
  from ._communicators import IngesterCommunicator
48
48
 
49
49
 
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
170
170
  for ingest_request in self.comms.poll():
171
171
  self.n_producers_pending += 1
172
172
  self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
173
- self.update_pending(ingest_request.datasets, ingest_request.records)
173
+ self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
174
174
  if self.n_datasets_pending > self.comms.config.ingest_batch_size:
175
175
  self.ingest()
176
176
  self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
266
266
  else:
267
267
  del self.records_pending[datastore_name]
268
268
 
269
- def update_pending(
270
- self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
269
+ def update_outputs_pending(
270
+ self,
271
+ refs: list[DatasetRef],
272
+ records: dict[DatastoreName, DatastoreRecordData],
271
273
  ) -> None:
272
274
  """Add an ingest request to the pending-ingest data structures.
273
275
 
274
276
  Parameters
275
277
  ----------
276
- datasets : `list` [ `PredictedDatasetModel` ]
277
- Registry information about the datasets.
278
+ refs : `list` [ `lsst.daf.butler.DatasetRef` ]
279
+ Registry information about regular quantum-output datasets.
278
280
  records : `dict` [ `str`, \
279
281
  `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
280
282
  Datastore information about the datasets.
281
283
  """
282
- n_given = len(datasets)
284
+ n_given = len(refs)
283
285
  if self.already_ingested is not None:
284
- datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
285
- kept = {d.dataset_id for d in datasets}
286
+ refs = [ref for ref in refs if ref.id not in self.already_ingested]
287
+ kept = {ref.id for ref in refs}
286
288
  self.n_datasets_skipped += n_given - len(kept)
287
289
  records = {
288
290
  datastore_name: filtered_records
289
291
  for datastore_name, original_records in records.items()
290
292
  if (filtered_records := original_records.subset(kept)) is not None
291
293
  }
292
- for dataset in datasets:
293
- ref = self.predicted.make_dataset_ref(dataset)
294
+ for ref in refs:
294
295
  self.refs_pending[ref.datasetType.dimensions].append(ref)
295
296
  for datastore_name, datastore_records in records.items():
296
297
  if (existing_records := self.records_pending.get(datastore_name)) is not None:
@@ -223,7 +223,7 @@ class Scanner(AbstractContextManager):
223
223
  logs = self._read_log(predicted_quantum)
224
224
  metadata = self._read_metadata(predicted_quantum)
225
225
  result = ProvenanceQuantumScanModels.from_metadata_and_logs(
226
- predicted_quantum, metadata, logs, assume_complete=self.comms.config.assume_complete
226
+ predicted_quantum, metadata, logs, incomplete=self.comms.config.incomplete
227
227
  )
228
228
  if result.status is ProvenanceQuantumScanStatus.ABANDONED:
229
229
  self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
@@ -233,7 +233,7 @@ class Scanner(AbstractContextManager):
233
233
  if predicted_output.dataset_id not in result.output_existence:
234
234
  result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
235
235
  to_ingest = self._make_ingest_request(predicted_quantum, result)
236
- if self.comms.config.output_path is not None:
236
+ if self.comms.config.is_writing_provenance:
237
237
  to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
238
238
  self.comms.request_write(to_write)
239
239
  self.comms.request_ingest(to_ingest)
@@ -261,15 +261,23 @@ class Scanner(AbstractContextManager):
261
261
  predicted_outputs_by_id = {
262
262
  d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
263
263
  }
264
- to_ingest_predicted: list[PredictedDatasetModel] = []
265
264
  to_ingest_refs: list[DatasetRef] = []
265
+ to_ignore: set[uuid.UUID] = set()
266
+ if self.comms.config.promise_ingest_graph:
267
+ if result.status is ProvenanceQuantumScanStatus.INIT:
268
+ if predicted_quantum.task_label: # i.e. not the 'packages' producer
269
+ to_ignore.add(
270
+ predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
271
+ )
272
+ else:
273
+ to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
274
+ to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
266
275
  for dataset_id, was_produced in result.output_existence.items():
267
- if was_produced:
276
+ if was_produced and dataset_id not in to_ignore:
268
277
  predicted_output = predicted_outputs_by_id[dataset_id]
269
- to_ingest_predicted.append(predicted_output)
270
278
  to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
271
279
  to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
272
- return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
280
+ return IngestRequest(result.quantum_id, to_ingest_refs, to_ingest_records)
273
281
 
274
282
  def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
275
283
  """Attempt to read the metadata dataset for a quantum.
@@ -32,10 +32,10 @@ __all__ = ("IngestRequest", "ScanReport")
32
32
  import dataclasses
33
33
  import uuid
34
34
 
35
+ from lsst.daf.butler import DatasetRef
35
36
  from lsst.daf.butler.datastore.record_data import DatastoreRecordData
36
37
 
37
38
  from .._common import DatastoreName
38
- from .._predicted import PredictedDatasetModel
39
39
  from .._provenance import ProvenanceQuantumScanStatus
40
40
 
41
41
 
@@ -57,11 +57,11 @@ class IngestRequest:
57
57
  producer_id: uuid.UUID
58
58
  """ID of the quantum that produced these datasets."""
59
59
 
60
- datasets: list[PredictedDatasetModel]
60
+ refs: list[DatasetRef]
61
61
  """Registry information about the datasets."""
62
62
 
63
63
  records: dict[DatastoreName, DatastoreRecordData]
64
64
  """Datastore information about the datasets."""
65
65
 
66
66
  def __bool__(self) -> bool:
67
- return bool(self.datasets or self.records)
67
+ return bool(self.refs or self.records)
@@ -117,6 +117,18 @@ class Supervisor:
117
117
  self.comms.request_scan(ready_set.pop())
118
118
  for scan_return in self.comms.poll():
119
119
  self.handle_report(scan_return)
120
+ if self.comms.config.incomplete:
121
+ quantum_or_quanta = "quanta" if self.n_abandoned != 1 else "quantum"
122
+ self.comms.progress.log.info(
123
+ "%d %s incomplete/failed abandoned; re-run with incomplete=False to finish.",
124
+ self.n_abandoned,
125
+ quantum_or_quanta,
126
+ )
127
+ self.comms.progress.log.info(
128
+ "Scanning complete after %0.1fs; waiting for workers to finish.",
129
+ self.comms.progress.elapsed_time,
130
+ )
131
+ self.comms.wait_for_workers_to_finish()
120
132
 
121
133
  def handle_report(self, scan_report: ScanReport) -> None:
122
134
  """Handle a report from a scanner.
@@ -134,7 +146,7 @@ class Supervisor:
134
146
  self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
135
147
  blocked_quanta = self.walker.fail(scan_report.quantum_id)
136
148
  for blocked_quantum_id in blocked_quanta:
137
- if self.comms.config.output_path is not None:
149
+ if self.comms.config.is_writing_provenance:
138
150
  self.comms.request_write(
139
151
  ProvenanceQuantumScanData(
140
152
  blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
@@ -172,7 +184,7 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
172
184
  writer: Worker | None = None
173
185
  with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
174
186
  comms.progress.log.verbose("Starting workers.")
175
- if config.output_path is not None:
187
+ if config.is_writing_provenance:
176
188
  writer_comms = WriterCommunicator(comms)
177
189
  writer = ctx.make_worker(
178
190
  target=Writer.run,
@@ -198,17 +210,6 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
198
210
  ingester.start()
199
211
  supervisor = Supervisor(predicted_path, comms)
200
212
  supervisor.loop()
201
- log.info(
202
- "Scanning complete after %0.1fs; waiting for workers to finish.",
203
- comms.progress.elapsed_time,
204
- )
205
- comms.wait_for_workers_to_finish()
206
- if supervisor.n_abandoned:
207
- raise RuntimeError(
208
- f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
209
- "abandoned because they did not succeed. Re-run with assume_complete=True after all retry "
210
- "attempts have been exhausted."
211
- )
212
213
  for w in scanners:
213
214
  w.join()
214
215
  ingester.join()