lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. lsst/pipe/base/_instrument.py +20 -31
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +10 -43
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
  6. lsst/pipe/base/automatic_connection_constants.py +1 -20
  7. lsst/pipe/base/cli/cmd/__init__.py +2 -18
  8. lsst/pipe/base/cli/cmd/commands.py +4 -149
  9. lsst/pipe/base/connectionTypes.py +160 -72
  10. lsst/pipe/base/connections.py +9 -6
  11. lsst/pipe/base/execution_reports.py +5 -0
  12. lsst/pipe/base/graph/graph.py +10 -11
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +10 -8
  15. lsst/pipe/base/log_capture.py +5 -9
  16. lsst/pipe/base/mp_graph_executor.py +15 -51
  17. lsst/pipe/base/pipeline.py +6 -5
  18. lsst/pipe/base/pipelineIR.py +8 -2
  19. lsst/pipe/base/pipelineTask.py +7 -5
  20. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  21. lsst/pipe/base/pipeline_graph/_edges.py +22 -32
  22. lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
  23. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
  24. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  25. lsst/pipe/base/pipeline_graph/io.py +10 -7
  26. lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
  27. lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
  28. lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
  29. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  30. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
  31. lsst/pipe/base/prerequisite_helpers.py +1 -2
  32. lsst/pipe/base/quantum_graph/_common.py +20 -19
  33. lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
  34. lsst/pipe/base/quantum_graph/_predicted.py +13 -111
  35. lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
  36. lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
  37. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
  38. lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
  39. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
  40. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
  41. lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
  42. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
  43. lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
  44. lsst/pipe/base/quantum_graph/visualization.py +1 -5
  45. lsst/pipe/base/quantum_graph_builder.py +8 -21
  46. lsst/pipe/base/quantum_graph_executor.py +13 -116
  47. lsst/pipe/base/quantum_graph_skeleton.py +29 -31
  48. lsst/pipe/base/quantum_provenance_graph.py +12 -29
  49. lsst/pipe/base/separable_pipeline_executor.py +3 -19
  50. lsst/pipe/base/single_quantum_executor.py +42 -67
  51. lsst/pipe/base/struct.py +0 -4
  52. lsst/pipe/base/testUtils.py +3 -3
  53. lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
  54. lsst/pipe/base/version.py +1 -1
  55. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/METADATA +3 -3
  56. lsst_pipe_base-30.2025.5200.dist-info/RECORD +125 -0
  57. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/WHEEL +1 -1
  58. lsst/pipe/base/log_on_close.py +0 -76
  59. lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
  60. lsst/pipe/base/quantum_graph/formatter.py +0 -171
  61. lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
  62. lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
  63. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/entry_points.txt +0 -0
  64. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/COPYRIGHT +0 -0
  65. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/LICENSE +0 -0
  66. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/bsd_license.txt +0 -0
  67. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/gpl-v3.0.txt +0 -0
  68. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/top_level.txt +0 -0
  69. {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/zip-safe +0 -0
@@ -30,7 +30,7 @@ __all__ = ("ColumnSelector", "Layout", "LayoutRow")
30
30
 
31
31
  import dataclasses
32
32
  from collections.abc import Iterable, Iterator, Mapping, Set
33
- from typing import TextIO
33
+ from typing import Generic, TextIO, TypeVar
34
34
 
35
35
  import networkx
36
36
  import networkx.algorithms.components
@@ -38,8 +38,10 @@ import networkx.algorithms.dag
38
38
  import networkx.algorithms.shortest_paths
39
39
  import networkx.algorithms.traversal
40
40
 
41
+ _K = TypeVar("_K")
41
42
 
42
- class Layout[K]:
43
+
44
+ class Layout(Generic[_K]):
43
45
  """A class that positions nodes and edges in text-art graph visualizations.
44
46
 
45
47
  Parameters
@@ -71,9 +73,9 @@ class Layout[K]:
71
73
  # to be close to that text when possible (or maybe it's historical, and
72
74
  # it's just a lot of work to re-invert the algorithm now that it's
73
75
  # written).
74
- self._active_columns: dict[int, set[K]] = {}
76
+ self._active_columns: dict[int, set[_K]] = {}
75
77
  # Mapping from node key to its column.
76
- self._locations: dict[K, int] = {}
78
+ self._locations: dict[_K, int] = {}
77
79
  # Minimum and maximum column (may go negative; will be shifted as
78
80
  # needed before actual display).
79
81
  self._x_min = 0
@@ -114,7 +116,7 @@ class Layout[K]:
114
116
  for component_xgraph, component_order in component_xgraphs_and_orders:
115
117
  self._add_connected_graph(component_xgraph, component_order)
116
118
 
117
- def _add_single_node(self, node: K) -> None:
119
+ def _add_single_node(self, node: _K) -> None:
118
120
  """Add a single node to the layout."""
119
121
  assert node not in self._locations
120
122
  if not self._locations:
@@ -182,7 +184,7 @@ class Layout[K]:
182
184
  return x + 1
183
185
 
184
186
  def _add_connected_graph(
185
- self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[K] | None = None
187
+ self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[_K] | None = None
186
188
  ) -> None:
187
189
  """Add a subgraph whose nodes are connected.
188
190
 
@@ -200,7 +202,7 @@ class Layout[K]:
200
202
  # "backbone" of our layout; we'll step through this path and add
201
203
  # recurse via calls to `_add_graph` on the nodes that we think should
202
204
  # go between the backbone nodes.
203
- backbone: list[K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
205
+ backbone: list[_K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
204
206
  # Add the first backbone node and any ancestors according to the full
205
207
  # graph (it can't have ancestors in this _subgraph_ because they'd have
206
208
  # been part of the longest path themselves, but the subgraph doesn't
@@ -235,7 +237,7 @@ class Layout[K]:
235
237
  remaining.remove_nodes_from(self._locations.keys())
236
238
  self._add_graph(remaining)
237
239
 
238
- def _add_blockers_of(self, node: K) -> None:
240
+ def _add_blockers_of(self, node: _K) -> None:
239
241
  """Add all nodes that are ancestors of the given node according to the
240
242
  full graph.
241
243
  """
@@ -249,7 +251,7 @@ class Layout[K]:
249
251
  return (self._x_max - self._x_min) // 2
250
252
 
251
253
  @property
252
- def nodes(self) -> Iterable[K]:
254
+ def nodes(self) -> Iterable[_K]:
253
255
  """The graph nodes in the order they appear in the layout."""
254
256
  return self._locations.keys()
255
257
 
@@ -275,7 +277,7 @@ class Layout[K]:
275
277
  return (self._x_max - x) // 2
276
278
 
277
279
  def __iter__(self) -> Iterator[LayoutRow]:
278
- active_edges: dict[K, set[K]] = {}
280
+ active_edges: dict[_K, set[_K]] = {}
279
281
  for node, node_x in self._locations.items():
280
282
  row = LayoutRow(node, self._external_location(node_x))
281
283
  for origin, destinations in active_edges.items():
@@ -293,20 +295,20 @@ class Layout[K]:
293
295
 
294
296
 
295
297
  @dataclasses.dataclass
296
- class LayoutRow[K]:
298
+ class LayoutRow(Generic[_K]):
297
299
  """Information about a single text-art row in a graph."""
298
300
 
299
- node: K
301
+ node: _K
300
302
  """Key for the node in the exported NetworkX graph."""
301
303
 
302
304
  x: int
303
305
  """Column of the node's symbol and its outgoing edges."""
304
306
 
305
- connecting: list[tuple[int, K]] = dataclasses.field(default_factory=list)
307
+ connecting: list[tuple[int, _K]] = dataclasses.field(default_factory=list)
306
308
  """The columns and node keys of edges that terminate at this row.
307
309
  """
308
310
 
309
- continuing: list[tuple[int, K, frozenset[K]]] = dataclasses.field(default_factory=list)
311
+ continuing: list[tuple[int, _K, frozenset[_K]]] = dataclasses.field(default_factory=list)
310
312
  """The columns and node keys of edges that continue through this row.
311
313
  """
312
314
 
@@ -335,11 +337,11 @@ class ColumnSelector:
335
337
  out in that case because it's applied to all candidate columns.
336
338
  """
337
339
 
338
- def __call__[K](
340
+ def __call__(
339
341
  self,
340
342
  connecting_x: list[int],
341
343
  node_x: int,
342
- active_columns: Mapping[int, Set[K]],
344
+ active_columns: Mapping[int, Set[_K]],
343
345
  x_min: int,
344
346
  x_max: int,
345
347
  ) -> int:
@@ -38,7 +38,7 @@ import hashlib
38
38
  from collections import defaultdict
39
39
  from collections.abc import Iterable
40
40
  from functools import cached_property
41
- from typing import Any
41
+ from typing import Any, TypeVar
42
42
 
43
43
  import networkx
44
44
  import networkx.algorithms.dag
@@ -49,6 +49,9 @@ from lsst.daf.butler import DimensionGroup
49
49
  from .._nodes import NodeKey, NodeType
50
50
  from ._options import NodeAttributeOptions
51
51
 
52
+ _P = TypeVar("_P")
53
+ _C = TypeVar("_C")
54
+
52
55
 
53
56
  class MergedNodeKey(frozenset[NodeKey]):
54
57
  """A key for NetworkX graph nodes that represent multiple similar tasks
@@ -222,11 +225,11 @@ class _MergeKey:
222
225
  """
223
226
 
224
227
  @classmethod
225
- def from_node_state[P, C](
228
+ def from_node_state(
226
229
  cls,
227
230
  state: dict[str, Any],
228
- parents: Iterable[P],
229
- children: Iterable[C],
231
+ parents: Iterable[_P],
232
+ children: Iterable[_C],
230
233
  options: NodeAttributeOptions,
231
234
  ) -> _MergeKey:
232
235
  """Construct from a NetworkX node attribute state dictionary.
@@ -30,9 +30,9 @@ __all__ = ("Printer", "make_colorama_printer", "make_default_printer", "make_sim
30
30
 
31
31
  import sys
32
32
  from collections.abc import Callable, Sequence
33
- from typing import TextIO
33
+ from typing import Generic, TextIO
34
34
 
35
- from ._layout import Layout, LayoutRow
35
+ from ._layout import _K, Layout, LayoutRow
36
36
 
37
37
  _CHAR_DECOMPOSITION = {
38
38
  # This mapping provides the "logic" for how to decompose the relevant
@@ -170,7 +170,7 @@ class PrintRow:
170
170
  return "".join(self._cells)
171
171
 
172
172
 
173
- def _default_get_text[K](node: K, x: int, style: tuple[str, str]) -> str:
173
+ def _default_get_text(node: _K, x: int, style: tuple[str, str]) -> str:
174
174
  """Return the default text to associate with a node.
175
175
 
176
176
  This function is the default value for the ``get_text`` argument to
@@ -179,7 +179,7 @@ def _default_get_text[K](node: K, x: int, style: tuple[str, str]) -> str:
179
179
  return str(node)
180
180
 
181
181
 
182
- def _default_get_symbol[K](node: K, x: int) -> str:
182
+ def _default_get_symbol(node: _K, x: int) -> str:
183
183
  """Return the default symbol for a node.
184
184
 
185
185
  This function is the default value for the ``get_symbol`` argument to
@@ -188,7 +188,7 @@ def _default_get_symbol[K](node: K, x: int) -> str:
188
188
  return "⬤"
189
189
 
190
190
 
191
- def _default_get_style[K](node: K, x: int) -> tuple[str, str]:
191
+ def _default_get_style(node: _K, x: int) -> tuple[str, str]:
192
192
  """Get the default styling suffix/prefix strings.
193
193
 
194
194
  This function is the default value for the ``get_style`` argument to
@@ -197,7 +197,7 @@ def _default_get_style[K](node: K, x: int) -> tuple[str, str]:
197
197
  return "", ""
198
198
 
199
199
 
200
- class Printer[K]:
200
+ class Printer(Generic[_K]):
201
201
  """High-level tool for drawing a text-based DAG visualization.
202
202
 
203
203
  Parameters
@@ -231,9 +231,9 @@ class Printer[K]:
231
231
  *,
232
232
  pad: str = " ",
233
233
  make_blank_row: Callable[[int, str], PrintRow] = PrintRow,
234
- get_text: Callable[[K, int, tuple[str, str]], str] = _default_get_text,
235
- get_symbol: Callable[[K, int], str] = _default_get_symbol,
236
- get_style: Callable[[K, int], tuple[str, str]] = _default_get_style,
234
+ get_text: Callable[[_K, int, tuple[str, str]], str] = _default_get_text,
235
+ get_symbol: Callable[[_K, int], str] = _default_get_symbol,
236
+ get_style: Callable[[_K, int], tuple[str, str]] = _default_get_style,
237
237
  ):
238
238
  self.width = layout_width * 2 + 1
239
239
  self.pad = pad
@@ -245,7 +245,7 @@ class Printer[K]:
245
245
  def print_row(
246
246
  self,
247
247
  stream: TextIO,
248
- layout_row: LayoutRow[K],
248
+ layout_row: LayoutRow[_K],
249
249
  ) -> None:
250
250
  """Print a single row of the DAG visualization to a file-like object.
251
251
 
@@ -200,13 +200,6 @@ class QuantumGraphExecutionStatusAnnotator:
200
200
  """Annotates a networkx graph with task and dataset status information from
201
201
  a quantum graph execution summary, implementing the StatusAnnotator
202
202
  protocol to update the graph with status data.
203
-
204
- Parameters
205
- ----------
206
- *args : `typing.Any`
207
- Arbitrary arguments.
208
- **kwargs : `typing.Any`
209
- Arbitrary keyword arguments.
210
203
  """
211
204
 
212
205
  def __init__(self, *args: Any, **kwargs: Any) -> None:
@@ -252,8 +252,7 @@ class PrerequisiteFinder:
252
252
  Sequence of collections to search, in order.
253
253
  data_id : `lsst.daf.butler.DataCoordinate`
254
254
  Data ID for the quantum.
255
- skypix_bounds : `~collections.abc.Mapping` \
256
- [ `str`, `lsst.sphgeom.RangeSet` ]
255
+ skypix_bounds : `Mapping` [ `str`, `lsst.sphgeom.RangeSet` ]
257
256
  The spatial bounds of this quantum in various skypix dimensions.
258
257
  Keys are skypix dimension names (a superset of those in
259
258
  `dataset_skypix`) and values are sets of integer pixel ID ranges.
@@ -50,7 +50,9 @@ from typing import (
50
50
  TYPE_CHECKING,
51
51
  Any,
52
52
  Self,
53
+ TypeAlias,
53
54
  TypedDict,
55
+ TypeVar,
54
56
  )
55
57
 
56
58
  import networkx
@@ -79,16 +81,18 @@ if TYPE_CHECKING:
79
81
  # These aliases make it a lot easier how the various pydantic models are
80
82
  # structured, but they're too verbose to be worth exporting to code outside the
81
83
  # quantum_graph subpackage.
82
- type TaskLabel = str
83
- type DatasetTypeName = str
84
- type ConnectionName = str
85
- type DatasetIndex = int
86
- type QuantumIndex = int
87
- type DatastoreName = str
88
- type DimensionElementName = str
89
- type DataCoordinateValues = list[DataIdValue]
84
+ TaskLabel: TypeAlias = str
85
+ DatasetTypeName: TypeAlias = str
86
+ ConnectionName: TypeAlias = str
87
+ DatasetIndex: TypeAlias = int
88
+ QuantumIndex: TypeAlias = int
89
+ DatastoreName: TypeAlias = str
90
+ DimensionElementName: TypeAlias = str
91
+ DataCoordinateValues: TypeAlias = list[DataIdValue]
90
92
 
91
93
 
94
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
95
+
92
96
  FORMAT_VERSION: int = 1
93
97
  """
94
98
  File format version number for new files.
@@ -444,17 +448,14 @@ class BaseQuantumGraphWriter:
444
448
  uri: ResourcePathExpression,
445
449
  header: HeaderModel,
446
450
  pipeline_graph: PipelineGraph,
451
+ indices: dict[uuid.UUID, int],
447
452
  *,
448
453
  address_filename: str,
454
+ compressor: Compressor,
449
455
  cdict_data: bytes | None = None,
450
- zstd_level: int = 10,
451
456
  ) -> Iterator[Self]:
452
- uri = ResourcePath(uri, forceDirectory=False)
453
- address_writer = AddressWriter()
454
- if uri.isLocal:
455
- os.makedirs(uri.dirname().ospath, exist_ok=True)
456
- cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
457
- compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
457
+ uri = ResourcePath(uri)
458
+ address_writer = AddressWriter(indices)
458
459
  with uri.open(mode="wb") as stream:
459
460
  with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
460
461
  self = cls(zf, compressor, address_writer, header.int_size)
@@ -593,9 +594,9 @@ class BaseQuantumGraphReader:
593
594
  )
594
595
 
595
596
  @staticmethod
596
- def _read_single_block_static[T: pydantic.BaseModel](
597
- name: str, model_type: type[T], zf: zipfile.ZipFile, decompressor: Decompressor
598
- ) -> T:
597
+ def _read_single_block_static(
598
+ name: str, model_type: type[_T], zf: zipfile.ZipFile, decompressor: Decompressor
599
+ ) -> _T:
599
600
  """Read a single compressed JSON block from a 'file' in a zip archive.
600
601
 
601
602
  Parameters
@@ -618,7 +619,7 @@ class BaseQuantumGraphReader:
618
619
  json_data = decompressor.decompress(compressed_data)
619
620
  return model_type.model_validate_json(json_data)
620
621
 
621
- def _read_single_block[T: pydantic.BaseModel](self, name: str, model_type: type[T]) -> T:
622
+ def _read_single_block(self, name: str, model_type: type[_T]) -> _T:
622
623
  """Read a single compressed JSON block from a 'file' in a zip archive.
623
624
 
624
625
  Parameters
@@ -43,22 +43,25 @@ import dataclasses
43
43
  import logging
44
44
  import tempfile
45
45
  import uuid
46
- import zipfile
47
- from collections.abc import Iterator, Set
46
+ from collections.abc import Iterator
48
47
  from contextlib import contextmanager
49
48
  from io import BufferedReader, BytesIO
50
49
  from operator import attrgetter
51
- from typing import IO, Protocol, TypeVar
50
+ from typing import IO, TYPE_CHECKING, Protocol, TypeAlias, TypeVar
52
51
 
53
52
  import pydantic
54
53
 
54
+ if TYPE_CHECKING:
55
+ import zipfile
56
+
57
+
55
58
  _LOG = logging.getLogger(__name__)
56
59
 
57
60
 
58
61
  _T = TypeVar("_T", bound=pydantic.BaseModel)
59
62
 
60
63
 
61
- type UUID_int = int
64
+ UUID_int: TypeAlias = int
62
65
 
63
66
  MAX_UUID_INT: UUID_int = 2**128
64
67
 
@@ -74,7 +77,7 @@ individual quanta (especially for execution).
74
77
 
75
78
 
76
79
  class Compressor(Protocol):
77
- """A protocol for objects with a ``compress`` method that takes and returns
80
+ """A protocol for objects with a `compress` method that takes and returns
78
81
  `bytes`.
79
82
  """
80
83
 
@@ -202,14 +205,21 @@ class AddressRow:
202
205
  class AddressWriter:
203
206
  """A helper object for writing address files for multi-block files."""
204
207
 
208
+ indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
209
+ """Mapping from UUID to internal integer ID.
210
+
211
+ The internal integer ID must always correspond to the index into the
212
+ sorted list of all UUIDs, but this `dict` need not be sorted itself.
213
+ """
214
+
205
215
  addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
206
216
  """Addresses to store with each UUID.
207
217
 
208
- Every key in one of these dictionaries must have an entry in ``indices``.
218
+ Every key in one of these dictionaries must have an entry in `indices`.
209
219
  The converse is not true.
210
220
  """
211
221
 
212
- def write(self, stream: IO[bytes], int_size: int, all_ids: Set[uuid.UUID] | None = None) -> None:
222
+ def write(self, stream: IO[bytes], int_size: int) -> None:
213
223
  """Write all addresses to a file-like object.
214
224
 
215
225
  Parameters
@@ -218,18 +228,19 @@ class AddressWriter:
218
228
  Binary file-like object.
219
229
  int_size : `int`
220
230
  Number of bytes to use for all integers.
221
- all_ids : `~collections.abc.Set` [`uuid.UUID`], optional
222
- Set of the union of all UUIDs in any dictionary from a call to
223
- `get_all_ids`.
224
231
  """
225
- if all_ids is None:
226
- all_ids = self.get_all_ids()
232
+ for n, address_map in enumerate(self.addresses):
233
+ if not self.indices.keys() >= address_map.keys():
234
+ raise AssertionError(
235
+ f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
236
+ f"{address_map.keys() - self.indices.keys()} not in the index map."
237
+ )
227
238
  stream.write(int_size.to_bytes(1))
228
- stream.write(len(all_ids).to_bytes(int_size))
239
+ stream.write(len(self.indices).to_bytes(int_size))
229
240
  stream.write(len(self.addresses).to_bytes(int_size))
230
241
  empty_address = Address()
231
- for n, key in enumerate(sorted(all_ids, key=attrgetter("int"))):
232
- row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
242
+ for key in sorted(self.indices.keys(), key=attrgetter("int")):
243
+ row = AddressRow(key, self.indices[key], [m.get(key, empty_address) for m in self.addresses])
233
244
  _LOG.debug("Wrote address %s.", row)
234
245
  row.write(stream, int_size)
235
246
 
@@ -245,25 +256,8 @@ class AddressWriter:
245
256
  int_size : `int`
246
257
  Number of bytes to use for all integers.
247
258
  """
248
- all_ids = self.get_all_ids()
249
- zip_info = zipfile.ZipInfo(f"{name}.addr")
250
- row_size = AddressReader.compute_row_size(int_size, len(self.addresses))
251
- zip_info.file_size = AddressReader.compute_header_size(int_size) + len(all_ids) * row_size
252
- with zf.open(zip_info, mode="w") as stream:
253
- self.write(stream, int_size=int_size, all_ids=all_ids)
254
-
255
- def get_all_ids(self) -> Set[uuid.UUID]:
256
- """Return all IDs used by any address dictionary.
257
-
258
- Returns
259
- -------
260
- all_ids : `~collections.abc.Set` [`uuid.UUID`]
261
- Set of all IDs.
262
- """
263
- all_ids: set[uuid.UUID] = set()
264
- for address_map in self.addresses:
265
- all_ids.update(address_map.keys())
266
- return all_ids
259
+ with zf.open(f"{name}.addr", mode="w") as stream:
260
+ self.write(stream, int_size=int_size)
267
261
 
268
262
 
269
263
  @dataclasses.dataclass
@@ -662,7 +656,7 @@ class MultiblockWriter:
662
656
  model : `pydantic.BaseModel`
663
657
  Model to convert to JSON and compress.
664
658
  compressor : `Compressor`
665
- Object with a ``compress`` method that takes and returns `bytes`.
659
+ Object with a `compress` method that takes and returns `bytes`.
666
660
 
667
661
  Returns
668
662
  -------
@@ -759,7 +753,7 @@ class MultiblockReader:
759
753
  model_type : `type` [ `pydantic.BaseModel` ]
760
754
  Pydantic model to validate JSON with.
761
755
  decompressor : `Decompressor`
762
- Object with a ``decompress`` method that takes and returns `bytes`.
756
+ Object with a `decompress` method that takes and returns `bytes`.
763
757
  int_size : `int`
764
758
  Number of bytes to use for all integers.
765
759
  page_size : `int`
@@ -809,7 +803,7 @@ class MultiblockReader:
809
803
  model_type : `type` [ `pydantic.BaseModel` ]
810
804
  Pydantic model to validate JSON with.
811
805
  decompressor : `Decompressor`
812
- Object with a ``decompress`` method that takes and returns `bytes`.
806
+ Object with a `decompress` method that takes and returns `bytes`.
813
807
 
814
808
  Returns
815
809
  -------
@@ -49,7 +49,7 @@ import warnings
49
49
  from collections import defaultdict
50
50
  from collections.abc import Iterable, Iterator, Mapping, Sequence
51
51
  from contextlib import AbstractContextManager, contextmanager
52
- from typing import TYPE_CHECKING, Any, cast
52
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
53
53
 
54
54
  import networkx
55
55
  import networkx.algorithms.bipartite
@@ -66,7 +66,6 @@ from lsst.daf.butler import (
66
66
  DimensionDataExtractor,
67
67
  DimensionGroup,
68
68
  DimensionRecordSetDeserializer,
69
- DimensionUniverse,
70
69
  LimitedButler,
71
70
  Quantum,
72
71
  QuantumBackedButler,
@@ -110,14 +109,10 @@ if TYPE_CHECKING:
110
109
  from ..config import PipelineTaskConfig
111
110
  from ..graph import QgraphSummary, QuantumGraph
112
111
 
113
- # Sphinx needs imports for type annotations of base class members.
114
- if "sphinx" in sys.modules:
115
- import zipfile # noqa: F401
116
-
117
- from ._multiblock import AddressReader, Decompressor # noqa: F401
112
+ _LOG = logging.getLogger(__name__)
118
113
 
119
114
 
120
- _LOG = logging.getLogger(__name__)
115
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
121
116
 
122
117
 
123
118
  class _PredictedThinQuantumModelV0(pydantic.BaseModel):
@@ -882,49 +877,6 @@ class PredictedQuantumGraph(BaseQuantumGraph):
882
877
  page_size=page_size,
883
878
  ).assemble()
884
879
 
885
- @classmethod
886
- def make_empty(
887
- cls,
888
- universe: DimensionUniverse,
889
- *,
890
- output_run: str,
891
- inputs: Iterable[str] = (),
892
- output: str | None = None,
893
- add_packages: bool = True,
894
- ) -> PredictedQuantumGraph:
895
- """Make an empty quantum graph with no tasks.
896
-
897
- Parameters
898
- ----------
899
- universe : `lsst.daf.butler.DimensionUniverse`
900
- Definitions for all butler dimensions.
901
- output_run : `str`
902
- Output run collection.
903
- inputs : `~collections.abc.Iterable` [`str`], optional
904
- Iterable of input collection names.
905
- output : `str` or `None`, optional
906
- Output chained collection.
907
- add_packages : `bool`, optional
908
- Whether to add the special init quantum that writes the 'packages'
909
- dataset. The default (`True`) is consistent with
910
- `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
911
- are no regular quanta generated.
912
-
913
- Returns
914
- -------
915
- quantum_graph : `PredictedQuantumGraph`
916
- An empty quantum graph.
917
- """
918
- return cls(
919
- PredictedQuantumGraphComponents.make_empty(
920
- universe,
921
- output_run=output_run,
922
- inputs=inputs,
923
- output=output,
924
- add_packages=add_packages,
925
- )
926
- )
927
-
928
880
  @property
929
881
  def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
930
882
  """A nested mapping of all quanta, keyed first by task name and then by
@@ -1589,63 +1541,6 @@ class PredictedQuantumGraphComponents:
1589
1541
  This does not include special "init" quanta.
1590
1542
  """
1591
1543
 
1592
- @classmethod
1593
- def make_empty(
1594
- cls,
1595
- universe: DimensionUniverse,
1596
- *,
1597
- output_run: str,
1598
- inputs: Iterable[str] = (),
1599
- output: str | None = None,
1600
- add_packages: bool = True,
1601
- ) -> PredictedQuantumGraphComponents:
1602
- """Make components for an empty quantum graph with no tasks.
1603
-
1604
- Parameters
1605
- ----------
1606
- universe : `lsst.daf.butler.DimensionUniverse`
1607
- Definitions for all butler dimensions.
1608
- output_run : `str`
1609
- Output run collection.
1610
- inputs : `~collections.abc.Iterable` [`str`], optional
1611
- Iterable of input collection names.
1612
- output : `str` or `None`, optional
1613
- Output chained collection.
1614
- add_packages : `bool`, optional
1615
- Whether to add the special init quantum that writes the 'packages'
1616
- dataset. The default (`True`) is consistent with
1617
- `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
1618
- are no regular quanta generated.
1619
-
1620
- Returns
1621
- -------
1622
- components : `PredictedQuantumGraphComponents`
1623
- Components that can be used to build or write an empty quantum
1624
- graph.
1625
- """
1626
- components = cls(pipeline_graph=PipelineGraph(universe=universe))
1627
- components.header.inputs = list(inputs)
1628
- components.header.output_run = output_run
1629
- components.header.output = output
1630
- if add_packages:
1631
- components.init_quanta.root = [
1632
- PredictedQuantumDatasetsModel.model_construct(
1633
- quantum_id=generate_uuidv7(),
1634
- task_label="",
1635
- outputs={
1636
- acc.PACKAGES_INIT_OUTPUT_NAME: [
1637
- PredictedDatasetModel(
1638
- dataset_id=generate_uuidv7(),
1639
- dataset_type_name=acc.PACKAGES_INIT_OUTPUT_NAME,
1640
- data_coordinate=[],
1641
- run=output_run,
1642
- )
1643
- ]
1644
- },
1645
- )
1646
- ]
1647
- return components
1648
-
1649
1544
  def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
1650
1545
  """Make a `lsst.daf.butler.DatasetRef` from information in the
1651
1546
  predicted quantum graph.
@@ -1898,6 +1793,7 @@ class PredictedQuantumGraphComponents:
1898
1793
  f"Unsupported extension {ext!r} for quantum graph; "
1899
1794
  "expected '.qg' (or '.qgraph' to force the old format)."
1900
1795
  )
1796
+ cdict: zstandard.ZstdCompressionDict | None = None
1901
1797
  cdict_data: bytes | None = None
1902
1798
  quantum_datasets_json: dict[uuid.UUID, bytes] = {}
1903
1799
  if len(self.quantum_datasets) < zstd_dict_n_inputs:
@@ -1911,20 +1807,26 @@ class PredictedQuantumGraphComponents:
1911
1807
  for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
1912
1808
  }
1913
1809
  try:
1914
- cdict_data = zstandard.train_dictionary(
1810
+ cdict = zstandard.train_dictionary(
1915
1811
  zstd_dict_size,
1916
1812
  list(quantum_datasets_json.values()),
1917
1813
  level=zstd_level,
1918
- ).as_bytes()
1814
+ )
1919
1815
  except zstandard.ZstdError as err:
1920
1816
  warnings.warn(f"Not using a compression dictionary: {err}.")
1817
+ cdict = None
1818
+ else:
1819
+ cdict_data = cdict.as_bytes()
1820
+ compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
1821
+ indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
1921
1822
  with BaseQuantumGraphWriter.open(
1922
1823
  uri,
1923
1824
  header=self.header,
1924
1825
  pipeline_graph=self.pipeline_graph,
1826
+ indices=indices,
1925
1827
  address_filename="quanta",
1828
+ compressor=compressor,
1926
1829
  cdict_data=cdict_data,
1927
- zstd_level=zstd_level,
1928
1830
  ) as writer:
1929
1831
  writer.write_single_model("thin_graph", self.thin_graph)
1930
1832
  if self.dimension_data is None: