lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. lsst/pipe/base/_status.py +156 -11
  2. lsst/pipe/base/log_capture.py +98 -7
  3. lsst/pipe/base/pipeline_graph/expressions.py +3 -3
  4. lsst/pipe/base/quantum_graph/_common.py +21 -1
  5. lsst/pipe/base/quantum_graph/_multiblock.py +14 -39
  6. lsst/pipe/base/quantum_graph/_predicted.py +90 -90
  7. lsst/pipe/base/quantum_graph/_provenance.py +345 -200
  8. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +19 -19
  9. lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
  10. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +201 -72
  11. lsst/pipe/base/quantum_graph/aggregator/_structs.py +45 -35
  12. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +15 -17
  13. lsst/pipe/base/quantum_graph/aggregator/_writer.py +57 -149
  14. lsst/pipe/base/quantum_graph_builder.py +0 -1
  15. lsst/pipe/base/quantum_provenance_graph.py +2 -44
  16. lsst/pipe/base/single_quantum_executor.py +43 -9
  17. lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
  18. lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
  19. lsst/pipe/base/version.py +1 -1
  20. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/METADATA +1 -1
  21. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/RECORD +29 -29
  22. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/WHEEL +0 -0
  23. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/entry_points.txt +0 -0
  24. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/COPYRIGHT +0 -0
  25. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/LICENSE +0 -0
  26. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/bsd_license.txt +0 -0
  27. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/gpl-v3.0.txt +0 -0
  28. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/top_level.txt +0 -0
  29. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/zip-safe +0 -0
@@ -43,7 +43,6 @@ __all__ = (
43
43
  import dataclasses
44
44
  import itertools
45
45
  import logging
46
- import operator
47
46
  import sys
48
47
  import uuid
49
48
  import warnings
@@ -89,6 +88,7 @@ from ..pipeline_graph import (
89
88
  log_config_mismatch,
90
89
  )
91
90
  from ._common import (
91
+ FORMAT_VERSION,
92
92
  BaseQuantumGraph,
93
93
  BaseQuantumGraphReader,
94
94
  BaseQuantumGraphWriter,
@@ -103,7 +103,7 @@ from ._common import (
103
103
  QuantumInfo,
104
104
  TaskLabel,
105
105
  )
106
- from ._multiblock import DEFAULT_PAGE_SIZE, MultiblockReader, MultiblockWriter
106
+ from ._multiblock import DEFAULT_PAGE_SIZE, AddressRow, MultiblockReader, MultiblockWriter
107
107
 
108
108
  if TYPE_CHECKING:
109
109
  from ..config import PipelineTaskConfig
@@ -115,7 +115,7 @@ _LOG = logging.getLogger(__name__)
115
115
  _T = TypeVar("_T", bound=pydantic.BaseModel)
116
116
 
117
117
 
118
- class PredictedThinQuantumModel(pydantic.BaseModel):
118
+ class _PredictedThinQuantumModelV0(pydantic.BaseModel):
119
119
  """Data model for a quantum data ID and internal integer ID in a predicted
120
120
  quantum graph.
121
121
  """
@@ -126,6 +126,18 @@ class PredictedThinQuantumModel(pydantic.BaseModel):
126
126
  data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
127
127
  """Full (required and implied) data coordinate values for this quantum."""
128
128
 
129
+
130
+ class PredictedThinQuantumModel(pydantic.BaseModel):
131
+ """Data model for a quantum data ID and UUID in a predicted
132
+ quantum graph.
133
+ """
134
+
135
+ quantum_id: uuid.UUID
136
+ """Universally unique ID for this quantum."""
137
+
138
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
139
+ """Full (required and implied) data coordinate values for this quantum."""
140
+
129
141
  # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
130
142
  # when we inherit those docstrings in our public classes.
131
143
  if "sphinx" in sys.modules and not TYPE_CHECKING:
@@ -172,17 +184,45 @@ class PredictedThinQuantumModel(pydantic.BaseModel):
172
184
  return super().model_validate_strings(*args, **kwargs)
173
185
 
174
186
 
175
- class PredictedThinGraphModel(pydantic.BaseModel):
187
+ class _PredictedThinGraphModelV0(pydantic.BaseModel):
176
188
  """Data model for the predicted quantum graph component that maps each
177
189
  task label to the data IDs and internal integer IDs of its quanta.
178
190
  """
179
191
 
180
- quanta: dict[TaskLabel, list[PredictedThinQuantumModel]] = pydantic.Field(default_factory=dict)
192
+ quanta: dict[TaskLabel, list[_PredictedThinQuantumModelV0]] = pydantic.Field(default_factory=dict)
181
193
  """Minimal descriptions of all quanta, grouped by task label."""
182
194
 
183
195
  edges: list[tuple[QuantumIndex, QuantumIndex]] = pydantic.Field(default_factory=list)
184
196
  """Pairs of (predecessor, successor) internal integer quantum IDs."""
185
197
 
198
+ def _upgraded(self, address_rows: Mapping[uuid.UUID, AddressRow]) -> PredictedThinGraphModel:
199
+ """Convert to the v1+ model."""
200
+ uuid_by_index = {v.index: k for k, v in address_rows.items()}
201
+ return PredictedThinGraphModel(
202
+ quanta={
203
+ task_label: [
204
+ PredictedThinQuantumModel(
205
+ quantum_id=uuid_by_index[q.quantum_index], data_coordinate=q.data_coordinate
206
+ )
207
+ for q in quanta
208
+ ]
209
+ for task_label, quanta in self.quanta.items()
210
+ },
211
+ edges=[(uuid_by_index[index1], uuid_by_index[index2]) for index1, index2 in self.edges],
212
+ )
213
+
214
+
215
+ class PredictedThinGraphModel(pydantic.BaseModel):
216
+ """Data model for the predicted quantum graph component that maps each
217
+ task label to the data IDs and UUIDs of its quanta.
218
+ """
219
+
220
+ quanta: dict[TaskLabel, list[PredictedThinQuantumModel]] = pydantic.Field(default_factory=dict)
221
+ """Minimal descriptions of all quanta, grouped by task label."""
222
+
223
+ edges: list[tuple[uuid.UUID, uuid.UUID]] = pydantic.Field(default_factory=list)
224
+ """Pairs of (predecessor, successor) quantum IDs."""
225
+
186
226
  # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
187
227
  # when we inherit those docstrings in our public classes.
188
228
  if "sphinx" in sys.modules and not TYPE_CHECKING:
@@ -673,7 +713,7 @@ class PredictedQuantumGraph(BaseQuantumGraph):
673
713
  self._add_init_quanta(components.init_quanta)
674
714
  self._quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = {}
675
715
  self._expanded_data_ids: dict[DataCoordinate, DataCoordinate] = {}
676
- self._add_thin_graph(components.thin_graph, components.quantum_indices)
716
+ self._add_thin_graph(components.thin_graph)
677
717
  for quantum_datasets in components.quantum_datasets.values():
678
718
  self._add_quantum_datasets(quantum_datasets)
679
719
  if not components.thin_graph.edges:
@@ -710,19 +750,11 @@ class PredictedQuantumGraph(BaseQuantumGraph):
710
750
  quantum_datasets.task_label,
711
751
  )
712
752
 
713
- def _add_thin_graph(
714
- self, component: PredictedThinGraphModel, indices: Mapping[uuid.UUID, QuantumIndex]
715
- ) -> None:
716
- uuid_by_index = {v: k for k, v in indices.items()}
717
- for index1, index2 in component.edges:
718
- self._quantum_only_xgraph.add_edge(uuid_by_index[index1], uuid_by_index[index2])
753
+ def _add_thin_graph(self, component: PredictedThinGraphModel) -> None:
754
+ self._quantum_only_xgraph.add_edges_from(component.edges)
719
755
  for task_label, thin_quanta_for_task in component.quanta.items():
720
756
  for thin_quantum in thin_quanta_for_task:
721
- self._add_quantum(
722
- uuid_by_index[thin_quantum.quantum_index],
723
- task_label,
724
- thin_quantum.data_coordinate,
725
- )
757
+ self._add_quantum(thin_quantum.quantum_id, task_label, thin_quantum.data_coordinate)
726
758
 
727
759
  def _add_quantum_datasets(self, quantum_datasets: PredictedQuantumDatasetsModel) -> None:
728
760
  self._quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
@@ -1496,8 +1528,6 @@ class PredictedQuantumGraphComponents:
1496
1528
  thin_graph: PredictedThinGraphModel = dataclasses.field(default_factory=PredictedThinGraphModel)
1497
1529
  """A lightweight quantum-quantum DAG with task labels and data IDs only.
1498
1530
 
1499
- This uses internal integer IDs ("indexes") for node IDs.
1500
-
1501
1531
  This does not include the special "init" quanta.
1502
1532
  """
1503
1533
 
@@ -1511,18 +1541,6 @@ class PredictedQuantumGraphComponents:
1511
1541
  This does not include special "init" quanta.
1512
1542
  """
1513
1543
 
1514
- quantum_indices: dict[uuid.UUID, QuantumIndex] = dataclasses.field(default_factory=dict)
1515
- """A mapping from external universal quantum ID to internal integer ID.
1516
-
1517
- While this `dict` does not need to be sorted, the internal integer IDs do
1518
- need to correspond exactly to ``enumerate(sorted(uuids))``.
1519
-
1520
- When used to construct a `PredictedQuantumGraph`, this must be fully
1521
- populated if `thin_graph` is. It can be empty otherwise.
1522
-
1523
- This does include special "init" quanta.
1524
- """
1525
-
1526
1544
  def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
1527
1545
  """Make a `lsst.daf.butler.DatasetRef` from information in the
1528
1546
  predicted quantum graph.
@@ -1555,48 +1573,35 @@ class PredictedQuantumGraphComponents:
1555
1573
  id=predicted.dataset_id,
1556
1574
  )
1557
1575
 
1558
- def set_quantum_indices(self) -> None:
1559
- """Populate the `quantum_indices` component by sorting the UUIDs in the
1560
- `init_quanta` and `quantum_datasets` components (which must both be
1561
- complete).
1562
- """
1563
- all_quantum_ids = [q.quantum_id for q in self.init_quanta.root]
1564
- all_quantum_ids.extend(self.quantum_datasets.keys())
1565
- all_quantum_ids.sort(key=operator.attrgetter("int"))
1566
- self.quantum_indices = {quantum_id: index for index, quantum_id in enumerate(all_quantum_ids)}
1567
-
1568
1576
  def set_thin_graph(self) -> None:
1569
1577
  """Populate the `thin_graph` component from the `pipeline_graph`,
1570
- `quantum_datasets` and `quantum_indices` components (which must all be
1571
- complete).
1578
+ `quantum_datasets` components (which must be complete).
1572
1579
  """
1573
1580
  bipartite_xgraph = networkx.DiGraph()
1574
1581
  self.thin_graph.quanta = {task_label: [] for task_label in self.pipeline_graph.tasks}
1575
- graph_quantum_indices = []
1582
+ graph_quantum_ids: list[uuid.UUID] = []
1576
1583
  for quantum_datasets in self.quantum_datasets.values():
1577
- quantum_index = self.quantum_indices[quantum_datasets.quantum_id]
1578
1584
  self.thin_graph.quanta[quantum_datasets.task_label].append(
1579
1585
  PredictedThinQuantumModel.model_construct(
1580
- quantum_index=quantum_index,
1586
+ quantum_id=quantum_datasets.quantum_id,
1581
1587
  data_coordinate=quantum_datasets.data_coordinate,
1582
1588
  )
1583
1589
  )
1584
1590
  for dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
1585
- bipartite_xgraph.add_edge(dataset.dataset_id, quantum_index)
1591
+ bipartite_xgraph.add_edge(dataset.dataset_id, quantum_datasets.quantum_id)
1586
1592
  for dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
1587
- bipartite_xgraph.add_edge(quantum_index, dataset.dataset_id)
1588
- graph_quantum_indices.append(quantum_index)
1593
+ bipartite_xgraph.add_edge(quantum_datasets.quantum_id, dataset.dataset_id)
1594
+ graph_quantum_ids.append(quantum_datasets.quantum_id)
1589
1595
  quantum_only_xgraph: networkx.DiGraph = networkx.bipartite.projected_graph(
1590
- bipartite_xgraph, graph_quantum_indices
1596
+ bipartite_xgraph, graph_quantum_ids
1591
1597
  )
1592
1598
  self.thin_graph.edges = list(quantum_only_xgraph.edges)
1593
1599
 
1594
1600
  def set_header_counts(self) -> None:
1595
1601
  """Populate the quantum and dataset counts in the header from the
1596
- `quantum_indices`, `thin_graph`, `init_quanta`, and `quantum_datasets`
1597
- components.
1602
+ `thin_graph`, `init_quanta`, and `quantum_datasets` components.
1598
1603
  """
1599
- self.header.n_quanta = len(self.quantum_indices) - len(self.init_quanta.root)
1604
+ self.header.n_quanta = len(self.quantum_datasets)
1600
1605
  self.header.n_task_quanta = {
1601
1606
  task_label: len(thin_quanta) for task_label, thin_quanta in self.thin_graph.quanta.items()
1602
1607
  }
@@ -1642,8 +1647,7 @@ class PredictedQuantumGraphComponents:
1642
1647
  )
1643
1648
  # Update the keys of the quantum_datasets dict.
1644
1649
  self.quantum_datasets = {qd.quantum_id: qd for qd in self.quantum_datasets.values()}
1645
- # Since the UUIDs have changed, the indices need to change, too.
1646
- self.set_quantum_indices()
1650
+ # Since the UUIDs have changed, the thin graph needs to be rewritten.
1647
1651
  self.set_thin_graph()
1648
1652
  # Update the header last, since we use it above to get the old run.
1649
1653
  self.header.output_run = output_run
@@ -1728,7 +1732,6 @@ class PredictedQuantumGraphComponents:
1728
1732
  records=dimension_data_extractor.records.values(),
1729
1733
  dimensions=result.pipeline_graph.get_all_dimensions(),
1730
1734
  )
1731
- result.set_quantum_indices()
1732
1735
  result.set_thin_graph()
1733
1736
  result.set_header_counts()
1734
1737
  return result
@@ -1764,11 +1767,15 @@ class PredictedQuantumGraphComponents:
1764
1767
  Only a complete predicted quantum graph with all components fully
1765
1768
  populated should be written.
1766
1769
  """
1767
- if self.header.n_quanta + len(self.init_quanta.root) != len(self.quantum_indices):
1770
+ if self.header.n_task_quanta != {
1771
+ task_label: len(quanta) for task_label, quanta in self.thin_graph.quanta.items()
1772
+ }:
1768
1773
  raise RuntimeError(
1769
- f"Cannot save graph after partial read of quanta: expected {self.header.n_quanta}, "
1770
- f"got {len(self.quantum_indices)}."
1774
+ "Cannot save graph after partial read of quanta: thin graph is inconsistent with header."
1771
1775
  )
1776
+ # Ensure we record the actual version we're about to write, in case
1777
+ # we're rewriting an old graph in a new format.
1778
+ self.header.version = FORMAT_VERSION
1772
1779
  uri = ResourcePath(uri)
1773
1780
  match uri.getExtension():
1774
1781
  case ".qg":
@@ -1811,11 +1818,12 @@ class PredictedQuantumGraphComponents:
1811
1818
  else:
1812
1819
  cdict_data = cdict.as_bytes()
1813
1820
  compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
1821
+ indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
1814
1822
  with BaseQuantumGraphWriter.open(
1815
1823
  uri,
1816
1824
  header=self.header,
1817
1825
  pipeline_graph=self.pipeline_graph,
1818
- indices=self.quantum_indices,
1826
+ indices=indices,
1819
1827
  address_filename="quanta",
1820
1828
  compressor=compressor,
1821
1829
  cdict_data=cdict_data,
@@ -1899,36 +1907,34 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1899
1907
  """Construct a `PredictedQuantumGraph` instance from this reader."""
1900
1908
  return self.components.assemble()
1901
1909
 
1902
- def read_all(self) -> PredictedQuantumGraphReader:
1910
+ def read_all(self) -> None:
1903
1911
  """Read all components in full."""
1904
- return self.read_thin_graph().read_execution_quanta()
1912
+ self.read_thin_graph()
1913
+ self.read_execution_quanta()
1905
1914
 
1906
- def read_thin_graph(self) -> PredictedQuantumGraphReader:
1915
+ def read_thin_graph(self) -> None:
1907
1916
  """Read the thin graph.
1908
1917
 
1909
- The thin graph is a quantum-quantum DAG with internal integer IDs for
1910
- nodes and just task labels and data IDs as node attributes. It always
1911
- includes all regular quanta, and does not include init-input or
1912
- init-output information.
1918
+ The thin graph is a quantum-quantum DAG with just task labels and data
1919
+ IDs as node attributes. It always includes all regular quanta, and
1920
+ does not include init-input or init-output information.
1913
1921
  """
1914
1922
  if not self.components.thin_graph.quanta:
1915
- self.components.thin_graph = self._read_single_block("thin_graph", PredictedThinGraphModel)
1916
- if len(self.components.quantum_indices) != self.components.header.n_quanta:
1917
- self.address_reader.read_all()
1918
- self.components.quantum_indices.update(
1919
- {row.key: row.index for row in self.address_reader.rows.values()}
1920
- )
1921
- return self
1923
+ if self.header.version > 0:
1924
+ self.components.thin_graph = self._read_single_block("thin_graph", PredictedThinGraphModel)
1925
+ else:
1926
+ self.address_reader.read_all()
1927
+ thin_graph_v0 = self._read_single_block("thin_graph", _PredictedThinGraphModelV0)
1928
+ self.components.thin_graph = thin_graph_v0._upgraded(self.address_reader.rows)
1922
1929
 
1923
- def read_init_quanta(self) -> PredictedQuantumGraphReader:
1930
+ def read_init_quanta(self) -> None:
1924
1931
  """Read the list of special quanta that represent init-inputs and
1925
1932
  init-outputs.
1926
1933
  """
1927
1934
  if not self.components.init_quanta.root:
1928
1935
  self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
1929
- return self
1930
1936
 
1931
- def read_dimension_data(self) -> PredictedQuantumGraphReader:
1937
+ def read_dimension_data(self) -> None:
1932
1938
  """Read all dimension records.
1933
1939
 
1934
1940
  Record data IDs will be immediately deserialized, while other fields
@@ -1948,11 +1954,8 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1948
1954
  universe=self.components.pipeline_graph.universe,
1949
1955
  ),
1950
1956
  )
1951
- return self
1952
1957
 
1953
- def read_quantum_datasets(
1954
- self, quantum_ids: Iterable[uuid.UUID] | None = None
1955
- ) -> PredictedQuantumGraphReader:
1958
+ def read_quantum_datasets(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
1956
1959
  """Read information about all datasets produced and consumed by the
1957
1960
  given quantum IDs.
1958
1961
 
@@ -1975,9 +1978,7 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1975
1978
  ):
1976
1979
  self.components.quantum_datasets.setdefault(quantum_datasets.quantum_id, quantum_datasets)
1977
1980
  self.address_reader.read_all()
1978
- for address_row in self.address_reader.rows.values():
1979
- self.components.quantum_indices[address_row.key] = address_row.index
1980
- return self
1981
+ return
1981
1982
  with MultiblockReader.open_in_zip(
1982
1983
  self.zf, "quantum_datasets", int_size=self.components.header.int_size
1983
1984
  ) as mb_reader:
@@ -1985,17 +1986,14 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1985
1986
  if quantum_id in self.components.quantum_datasets:
1986
1987
  continue
1987
1988
  address_row = self.address_reader.find(quantum_id)
1988
- self.components.quantum_indices[address_row.key] = address_row.index
1989
1989
  quantum_datasets = mb_reader.read_model(
1990
1990
  address_row.addresses[0], PredictedQuantumDatasetsModel, self.decompressor
1991
1991
  )
1992
1992
  if quantum_datasets is not None:
1993
1993
  self.components.quantum_datasets[address_row.key] = quantum_datasets
1994
- return self
1994
+ return
1995
1995
 
1996
- def read_execution_quanta(
1997
- self, quantum_ids: Iterable[uuid.UUID] | None = None
1998
- ) -> PredictedQuantumGraphReader:
1996
+ def read_execution_quanta(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
1999
1997
  """Read all information needed to execute the given quanta.
2000
1998
 
2001
1999
  Parameters
@@ -2004,4 +2002,6 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
2004
2002
  Iterable of quantum IDs to load. If not provided, all quanta will
2005
2003
  be loaded. The UUIDs of special init quanta will be ignored.
2006
2004
  """
2007
- return self.read_init_quanta().read_dimension_data().read_quantum_datasets(quantum_ids)
2005
+ self.read_init_quanta()
2006
+ self.read_dimension_data()
2007
+ self.read_quantum_datasets(quantum_ids)