PyPI - lsst-pipe-base - Versions diffs - 30.0.1rc1__py3-none-any.whl → 30.2025.5200__py3-none-any.whl - Mend

lsst-pipe-base 30.0.1rc1py3-none-any.whl → 30.2025.5200py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

lsst/pipe/base/_instrument.py +20 -31
lsst/pipe/base/_quantumContext.py +3 -3
lsst/pipe/base/_status.py +10 -43
lsst/pipe/base/_task_metadata.py +2 -2
lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
lsst/pipe/base/automatic_connection_constants.py +1 -20
lsst/pipe/base/cli/cmd/__init__.py +2 -18
lsst/pipe/base/cli/cmd/commands.py +4 -149
lsst/pipe/base/connectionTypes.py +160 -72
lsst/pipe/base/connections.py +9 -6
lsst/pipe/base/execution_reports.py +5 -0
lsst/pipe/base/graph/graph.py +10 -11
lsst/pipe/base/graph/quantumNode.py +4 -4
lsst/pipe/base/graph_walker.py +10 -8
lsst/pipe/base/log_capture.py +5 -9
lsst/pipe/base/mp_graph_executor.py +15 -51
lsst/pipe/base/pipeline.py +6 -5
lsst/pipe/base/pipelineIR.py +8 -2
lsst/pipe/base/pipelineTask.py +7 -5
lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
lsst/pipe/base/pipeline_graph/_edges.py +22 -32
lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
lsst/pipe/base/pipeline_graph/expressions.py +2 -2
lsst/pipe/base/pipeline_graph/io.py +10 -7
lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
lsst/pipe/base/prerequisite_helpers.py +1 -2
lsst/pipe/base/quantum_graph/_common.py +20 -19
lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
lsst/pipe/base/quantum_graph/_predicted.py +13 -111
lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
lsst/pipe/base/quantum_graph/visualization.py +1 -5
lsst/pipe/base/quantum_graph_builder.py +8 -21
lsst/pipe/base/quantum_graph_executor.py +13 -116
lsst/pipe/base/quantum_graph_skeleton.py +29 -31
lsst/pipe/base/quantum_provenance_graph.py +12 -29
lsst/pipe/base/separable_pipeline_executor.py +3 -19
lsst/pipe/base/single_quantum_executor.py +42 -67
lsst/pipe/base/struct.py +0 -4
lsst/pipe/base/testUtils.py +3 -3
lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
lsst/pipe/base/version.py +1 -1
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/METADATA +3 -3
lsst_pipe_base-30.2025.5200.dist-info/RECORD +125 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/WHEEL +1 -1
lsst/pipe/base/log_on_close.py +0 -76
lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
lsst/pipe/base/quantum_graph/formatter.py +0 -171
lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/entry_points.txt +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/COPYRIGHT +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/LICENSE +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/bsd_license.txt +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/gpl-v3.0.txt +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/top_level.txt +0 -0
{lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/zip-safe +0 -0

lsst/pipe/base/pipeline_graph/visualization/_layout.py CHANGED Viewed

@@ -30,7 +30,7 @@ __all__ = ("ColumnSelector", "Layout", "LayoutRow")
 import dataclasses
 from collections.abc import Iterable, Iterator, Mapping, Set
-from typing import TextIO
+from typing import Generic, TextIO, TypeVar
 import networkx
 import networkx.algorithms.components
@@ -38,8 +38,10 @@ import networkx.algorithms.dag
 import networkx.algorithms.shortest_paths
 import networkx.algorithms.traversal
+_K = TypeVar("_K")
-class Layout[K]:
+class Layout(Generic[_K]):
     """A class that positions nodes and edges in text-art graph visualizations.
     Parameters
@@ -71,9 +73,9 @@ class Layout[K]:
         # to be close to that text when possible (or maybe it's historical, and
         # it's just a lot of work to re-invert the algorithm now that it's
         # written).
-        self._active_columns: dict[int, set[K]] = {}
+        self._active_columns: dict[int, set[_K]] = {}
         # Mapping from node key to its column.
-        self._locations: dict[K, int] = {}
+        self._locations: dict[_K, int] = {}
         # Minimum and maximum column (may go negative; will be shifted as
         # needed before actual display).
         self._x_min = 0
@@ -114,7 +116,7 @@ class Layout[K]:
         for component_xgraph, component_order in component_xgraphs_and_orders:
             self._add_connected_graph(component_xgraph, component_order)
-    def _add_single_node(self, node: K) -> None:
+    def _add_single_node(self, node: _K) -> None:
         """Add a single node to the layout."""
         assert node not in self._locations
         if not self._locations:
@@ -182,7 +184,7 @@ class Layout[K]:
         return x + 1
     def _add_connected_graph(
-        self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[K] | None = None
+        self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[_K] | None = None
     ) -> None:
         """Add a subgraph whose nodes are connected.
@@ -200,7 +202,7 @@ class Layout[K]:
         # "backbone" of our layout; we'll step through this path and add
         # recurse via calls to `_add_graph` on the nodes that we think should
         # go between the backbone nodes.
-        backbone: list[K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
+        backbone: list[_K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
         # Add the first backbone node and any ancestors according to the full
         # graph (it can't have ancestors in this _subgraph_ because they'd have
         # been part of the longest path themselves, but the subgraph doesn't
@@ -235,7 +237,7 @@ class Layout[K]:
         remaining.remove_nodes_from(self._locations.keys())
         self._add_graph(remaining)
-    def _add_blockers_of(self, node: K) -> None:
+    def _add_blockers_of(self, node: _K) -> None:
         """Add all nodes that are ancestors of the given node according to the
         full graph.
         """
@@ -249,7 +251,7 @@ class Layout[K]:
         return (self._x_max - self._x_min) // 2
     @property
-    def nodes(self) -> Iterable[K]:
+    def nodes(self) -> Iterable[_K]:
         """The graph nodes in the order they appear in the layout."""
         return self._locations.keys()
@@ -275,7 +277,7 @@ class Layout[K]:
         return (self._x_max - x) // 2
     def __iter__(self) -> Iterator[LayoutRow]:
-        active_edges: dict[K, set[K]] = {}
+        active_edges: dict[_K, set[_K]] = {}
         for node, node_x in self._locations.items():
             row = LayoutRow(node, self._external_location(node_x))
             for origin, destinations in active_edges.items():
@@ -293,20 +295,20 @@ class Layout[K]:
 @dataclasses.dataclass
-class LayoutRow[K]:
+class LayoutRow(Generic[_K]):
     """Information about a single text-art row in a graph."""
-    node: K
+    node: _K
     """Key for the node in the exported NetworkX graph."""
     x: int
     """Column of the node's symbol and its outgoing edges."""
-    connecting: list[tuple[int, K]] = dataclasses.field(default_factory=list)
+    connecting: list[tuple[int, _K]] = dataclasses.field(default_factory=list)
     """The columns and node keys of edges that terminate at this row.
     """
-    continuing: list[tuple[int, K, frozenset[K]]] = dataclasses.field(default_factory=list)
+    continuing: list[tuple[int, _K, frozenset[_K]]] = dataclasses.field(default_factory=list)
     """The columns and node keys of edges that continue through this row.
     """
@@ -335,11 +337,11 @@ class ColumnSelector:
     out in that case because it's applied to all candidate columns.
     """
-    def __call__[K](
+    def __call__(
         self,
         connecting_x: list[int],
         node_x: int,
-        active_columns: Mapping[int, Set[K]],
+        active_columns: Mapping[int, Set[_K]],
         x_min: int,
         x_max: int,
     ) -> int:

lsst/pipe/base/pipeline_graph/visualization/_merge.py CHANGED Viewed

@@ -38,7 +38,7 @@ import hashlib
 from collections import defaultdict
 from collections.abc import Iterable
 from functools import cached_property
-from typing import Any
+from typing import Any, TypeVar
 import networkx
 import networkx.algorithms.dag
@@ -49,6 +49,9 @@ from lsst.daf.butler import DimensionGroup
 from .._nodes import NodeKey, NodeType
 from ._options import NodeAttributeOptions
+_P = TypeVar("_P")
+_C = TypeVar("_C")
 class MergedNodeKey(frozenset[NodeKey]):
     """A key for NetworkX graph nodes that represent multiple similar tasks
@@ -222,11 +225,11 @@ class _MergeKey:
     """
     @classmethod
-    def from_node_state[P, C](
+    def from_node_state(
         cls,
         state: dict[str, Any],
-        parents: Iterable[P],
-        children: Iterable[C],
+        parents: Iterable[_P],
+        children: Iterable[_C],
         options: NodeAttributeOptions,
     ) -> _MergeKey:
         """Construct from a NetworkX node attribute state dictionary.

lsst/pipe/base/pipeline_graph/visualization/_printer.py CHANGED Viewed

@@ -30,9 +30,9 @@ __all__ = ("Printer", "make_colorama_printer", "make_default_printer", "make_sim
 import sys
 from collections.abc import Callable, Sequence
-from typing import TextIO
+from typing import Generic, TextIO
-from ._layout import Layout, LayoutRow
+from ._layout import _K, Layout, LayoutRow
 _CHAR_DECOMPOSITION = {
     # This mapping provides the "logic" for how to decompose the relevant
@@ -170,7 +170,7 @@ class PrintRow:
         return "".join(self._cells)
-def _default_get_text[K](node: K, x: int, style: tuple[str, str]) -> str:
+def _default_get_text(node: _K, x: int, style: tuple[str, str]) -> str:
     """Return the default text to associate with a node.
     This function is the default value for the ``get_text`` argument to
@@ -179,7 +179,7 @@ def _default_get_text[K](node: K, x: int, style: tuple[str, str]) -> str:
     return str(node)
-def _default_get_symbol[K](node: K, x: int) -> str:
+def _default_get_symbol(node: _K, x: int) -> str:
     """Return the default symbol for a node.
     This function is the default value for the ``get_symbol`` argument to
@@ -188,7 +188,7 @@ def _default_get_symbol[K](node: K, x: int) -> str:
     return "⬤"
-def _default_get_style[K](node: K, x: int) -> tuple[str, str]:
+def _default_get_style(node: _K, x: int) -> tuple[str, str]:
     """Get the default styling suffix/prefix strings.
     This function is the default value for the ``get_style`` argument to
@@ -197,7 +197,7 @@ def _default_get_style[K](node: K, x: int) -> tuple[str, str]:
     return "", ""
-class Printer[K]:
+class Printer(Generic[_K]):
     """High-level tool for drawing a text-based DAG visualization.
     Parameters
@@ -231,9 +231,9 @@ class Printer[K]:
         *,
         pad: str = " ",
         make_blank_row: Callable[[int, str], PrintRow] = PrintRow,
-        get_text: Callable[[K, int, tuple[str, str]], str] = _default_get_text,
-        get_symbol: Callable[[K, int], str] = _default_get_symbol,
-        get_style: Callable[[K, int], tuple[str, str]] = _default_get_style,
+        get_text: Callable[[_K, int, tuple[str, str]], str] = _default_get_text,
+        get_symbol: Callable[[_K, int], str] = _default_get_symbol,
+        get_style: Callable[[_K, int], tuple[str, str]] = _default_get_style,
     ):
         self.width = layout_width * 2 + 1
         self.pad = pad
@@ -245,7 +245,7 @@ class Printer[K]:
     def print_row(
         self,
         stream: TextIO,
-        layout_row: LayoutRow[K],
+        layout_row: LayoutRow[_K],
     ) -> None:
         """Print a single row of the DAG visualization to a file-like object.

lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py CHANGED Viewed

@@ -200,13 +200,6 @@ class QuantumGraphExecutionStatusAnnotator:
     """Annotates a networkx graph with task and dataset status information from
     a quantum graph execution summary, implementing the StatusAnnotator
     protocol to update the graph with status data.
-    Parameters
-    ----------
-    *args : `typing.Any`
-        Arbitrary arguments.
-    **kwargs : `typing.Any`
-        Arbitrary keyword arguments.
     """
     def __init__(self, *args: Any, **kwargs: Any) -> None:

lsst/pipe/base/prerequisite_helpers.py CHANGED Viewed

@@ -252,8 +252,7 @@ class PrerequisiteFinder:
             Sequence of collections to search, in order.
         data_id : `lsst.daf.butler.DataCoordinate`
             Data ID for the quantum.
-        skypix_bounds : `~collections.abc.Mapping` \
-              [ `str`, `lsst.sphgeom.RangeSet` ]
+        skypix_bounds : `Mapping` [ `str`, `lsst.sphgeom.RangeSet` ]
             The spatial bounds of this quantum in various skypix dimensions.
             Keys are skypix dimension names (a superset of those in
             `dataset_skypix`) and values are sets of integer pixel ID ranges.

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -50,7 +50,9 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Self,
+    TypeAlias,
     TypedDict,
+    TypeVar,
 )
 import networkx
@@ -79,16 +81,18 @@ if TYPE_CHECKING:
 # These aliases make it a lot easier how the various pydantic models are
 # structured, but they're too verbose to be worth exporting to code outside the
 # quantum_graph subpackage.
-type TaskLabel = str
-type DatasetTypeName = str
-type ConnectionName = str
-type DatasetIndex = int
-type QuantumIndex = int
-type DatastoreName = str
-type DimensionElementName = str
-type DataCoordinateValues = list[DataIdValue]
+TaskLabel: TypeAlias = str
+DatasetTypeName: TypeAlias = str
+ConnectionName: TypeAlias = str
+DatasetIndex: TypeAlias = int
+QuantumIndex: TypeAlias = int
+DatastoreName: TypeAlias = str
+DimensionElementName: TypeAlias = str
+DataCoordinateValues: TypeAlias = list[DataIdValue]
+_T = TypeVar("_T", bound=pydantic.BaseModel)
 FORMAT_VERSION: int = 1
 """
 File format version number for new files.
@@ -444,17 +448,14 @@ class BaseQuantumGraphWriter:
         uri: ResourcePathExpression,
         header: HeaderModel,
         pipeline_graph: PipelineGraph,
+        indices: dict[uuid.UUID, int],
         *,
         address_filename: str,
+        compressor: Compressor,
         cdict_data: bytes | None = None,
-        zstd_level: int = 10,
     ) -> Iterator[Self]:
-        uri = ResourcePath(uri, forceDirectory=False)
-        address_writer = AddressWriter()
-        if uri.isLocal:
-            os.makedirs(uri.dirname().ospath, exist_ok=True)
-        cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
-        compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
+        uri = ResourcePath(uri)
+        address_writer = AddressWriter(indices)
         with uri.open(mode="wb") as stream:
             with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
                 self = cls(zf, compressor, address_writer, header.int_size)
@@ -593,9 +594,9 @@ class BaseQuantumGraphReader:
                     )
     @staticmethod
-    def _read_single_block_static[T: pydantic.BaseModel](
-        name: str, model_type: type[T], zf: zipfile.ZipFile, decompressor: Decompressor
-    ) -> T:
+    def _read_single_block_static(
+        name: str, model_type: type[_T], zf: zipfile.ZipFile, decompressor: Decompressor
+    ) -> _T:
         """Read a single compressed JSON block from a 'file' in a zip archive.
         Parameters
@@ -618,7 +619,7 @@ class BaseQuantumGraphReader:
         json_data = decompressor.decompress(compressed_data)
         return model_type.model_validate_json(json_data)
-    def _read_single_block[T: pydantic.BaseModel](self, name: str, model_type: type[T]) -> T:
+    def _read_single_block(self, name: str, model_type: type[_T]) -> _T:
         """Read a single compressed JSON block from a 'file' in a zip archive.
         Parameters

lsst/pipe/base/quantum_graph/_multiblock.py CHANGED Viewed

@@ -43,22 +43,25 @@ import dataclasses
 import logging
 import tempfile
 import uuid
-import zipfile
-from collections.abc import Iterator, Set
+from collections.abc import Iterator
 from contextlib import contextmanager
 from io import BufferedReader, BytesIO
 from operator import attrgetter
-from typing import IO, Protocol, TypeVar
+from typing import IO, TYPE_CHECKING, Protocol, TypeAlias, TypeVar
 import pydantic
+if TYPE_CHECKING:
+    import zipfile
 _LOG = logging.getLogger(__name__)
 _T = TypeVar("_T", bound=pydantic.BaseModel)
-type UUID_int = int
+UUID_int: TypeAlias = int
 MAX_UUID_INT: UUID_int = 2**128
@@ -74,7 +77,7 @@ individual quanta (especially for execution).
 class Compressor(Protocol):
-    """A protocol for objects with a ``compress`` method that takes and returns
+    """A protocol for objects with a `compress` method that takes and returns
     `bytes`.
     """
@@ -202,14 +205,21 @@ class AddressRow:
 class AddressWriter:
     """A helper object for writing address files for multi-block files."""
+    indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
+    """Mapping from UUID to internal integer ID.
+    The internal integer ID must always correspond to the index into the
+    sorted list of all UUIDs, but this `dict` need not be sorted itself.
+    """
     addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
     """Addresses to store with each UUID.
-    Every key in one of these dictionaries must have an entry in ``indices``.
+    Every key in one of these dictionaries must have an entry in `indices`.
     The converse is not true.
     """
-    def write(self, stream: IO[bytes], int_size: int, all_ids: Set[uuid.UUID] | None = None) -> None:
+    def write(self, stream: IO[bytes], int_size: int) -> None:
         """Write all addresses to a file-like object.
         Parameters
@@ -218,18 +228,19 @@ class AddressWriter:
             Binary file-like object.
         int_size : `int`
             Number of bytes to use for all integers.
-        all_ids : `~collections.abc.Set` [`uuid.UUID`], optional
-            Set of the union of all UUIDs in any dictionary from a call to
-            `get_all_ids`.
         """
-        if all_ids is None:
-            all_ids = self.get_all_ids()
+        for n, address_map in enumerate(self.addresses):
+            if not self.indices.keys() >= address_map.keys():
+                raise AssertionError(
+                    f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
+                    f"{address_map.keys() - self.indices.keys()} not in the index map."
+                )
         stream.write(int_size.to_bytes(1))
-        stream.write(len(all_ids).to_bytes(int_size))
+        stream.write(len(self.indices).to_bytes(int_size))
         stream.write(len(self.addresses).to_bytes(int_size))
         empty_address = Address()
-        for n, key in enumerate(sorted(all_ids, key=attrgetter("int"))):
-            row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
+        for key in sorted(self.indices.keys(), key=attrgetter("int")):
+            row = AddressRow(key, self.indices[key], [m.get(key, empty_address) for m in self.addresses])
             _LOG.debug("Wrote address %s.", row)
             row.write(stream, int_size)
@@ -245,25 +256,8 @@ class AddressWriter:
         int_size : `int`
             Number of bytes to use for all integers.
         """
-        all_ids = self.get_all_ids()
-        zip_info = zipfile.ZipInfo(f"{name}.addr")
-        row_size = AddressReader.compute_row_size(int_size, len(self.addresses))
-        zip_info.file_size = AddressReader.compute_header_size(int_size) + len(all_ids) * row_size
-        with zf.open(zip_info, mode="w") as stream:
-            self.write(stream, int_size=int_size, all_ids=all_ids)
-    def get_all_ids(self) -> Set[uuid.UUID]:
-        """Return all IDs used by any address dictionary.
-        Returns
-        -------
-        all_ids : `~collections.abc.Set` [`uuid.UUID`]
-            Set of all IDs.
-        """
-        all_ids: set[uuid.UUID] = set()
-        for address_map in self.addresses:
-            all_ids.update(address_map.keys())
-        return all_ids
+        with zf.open(f"{name}.addr", mode="w") as stream:
+            self.write(stream, int_size=int_size)
 @dataclasses.dataclass
@@ -662,7 +656,7 @@ class MultiblockWriter:
         model : `pydantic.BaseModel`
             Model to convert to JSON and compress.
         compressor : `Compressor`
-            Object with a ``compress`` method that takes and returns `bytes`.
+            Object with a `compress` method that takes and returns `bytes`.
         Returns
         -------
@@ -759,7 +753,7 @@ class MultiblockReader:
         model_type : `type` [ `pydantic.BaseModel` ]
             Pydantic model to validate JSON with.
         decompressor : `Decompressor`
-            Object with a ``decompress`` method that takes and returns `bytes`.
+            Object with a `decompress` method that takes and returns `bytes`.
         int_size : `int`
             Number of bytes to use for all integers.
         page_size : `int`
@@ -809,7 +803,7 @@ class MultiblockReader:
         model_type : `type` [ `pydantic.BaseModel` ]
             Pydantic model to validate JSON with.
         decompressor : `Decompressor`
-            Object with a ``decompress`` method that takes and returns `bytes`.
+            Object with a `decompress` method that takes and returns `bytes`.
         Returns
         -------

lsst/pipe/base/quantum_graph/_predicted.py CHANGED Viewed

@@ -49,7 +49,7 @@ import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Iterator, Mapping, Sequence
 from contextlib import AbstractContextManager, contextmanager
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, TypeVar, cast
 import networkx
 import networkx.algorithms.bipartite
@@ -66,7 +66,6 @@ from lsst.daf.butler import (
     DimensionDataExtractor,
     DimensionGroup,
     DimensionRecordSetDeserializer,
-    DimensionUniverse,
     LimitedButler,
     Quantum,
     QuantumBackedButler,
@@ -110,14 +109,10 @@ if TYPE_CHECKING:
     from ..config import PipelineTaskConfig
     from ..graph import QgraphSummary, QuantumGraph
-# Sphinx needs imports for type annotations of base class members.
-if "sphinx" in sys.modules:
-    import zipfile  # noqa: F401
-    from ._multiblock import AddressReader, Decompressor  # noqa: F401
+_LOG = logging.getLogger(__name__)
-_LOG = logging.getLogger(__name__)
+_T = TypeVar("_T", bound=pydantic.BaseModel)
 class _PredictedThinQuantumModelV0(pydantic.BaseModel):
@@ -882,49 +877,6 @@ class PredictedQuantumGraph(BaseQuantumGraph):
             page_size=page_size,
         ).assemble()
-    @classmethod
-    def make_empty(
-        cls,
-        universe: DimensionUniverse,
-        *,
-        output_run: str,
-        inputs: Iterable[str] = (),
-        output: str | None = None,
-        add_packages: bool = True,
-    ) -> PredictedQuantumGraph:
-        """Make an empty quantum graph with no tasks.
-        Parameters
-        ----------
-        universe : `lsst.daf.butler.DimensionUniverse`
-            Definitions for all butler dimensions.
-        output_run : `str`
-            Output run collection.
-        inputs : `~collections.abc.Iterable` [`str`], optional
-            Iterable of input collection names.
-        output : `str` or `None`, optional
-            Output chained collection.
-        add_packages : `bool`, optional
-            Whether to add the special init quantum that writes the 'packages'
-            dataset.  The default (`True`) is consistent with
-            `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
-            are no regular quanta generated.
-        Returns
-        -------
-        quantum_graph : `PredictedQuantumGraph`
-            An empty quantum graph.
-        """
-        return cls(
-            PredictedQuantumGraphComponents.make_empty(
-                universe,
-                output_run=output_run,
-                inputs=inputs,
-                output=output,
-                add_packages=add_packages,
-            )
-        )
     @property
     def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
         """A nested mapping of all quanta, keyed first by task name and then by
@@ -1589,63 +1541,6 @@ class PredictedQuantumGraphComponents:
     This does not include special "init" quanta.
     """
-    @classmethod
-    def make_empty(
-        cls,
-        universe: DimensionUniverse,
-        *,
-        output_run: str,
-        inputs: Iterable[str] = (),
-        output: str | None = None,
-        add_packages: bool = True,
-    ) -> PredictedQuantumGraphComponents:
-        """Make components for an empty quantum graph with no tasks.
-        Parameters
-        ----------
-        universe : `lsst.daf.butler.DimensionUniverse`
-            Definitions for all butler dimensions.
-        output_run : `str`
-            Output run collection.
-        inputs : `~collections.abc.Iterable` [`str`], optional
-            Iterable of input collection names.
-        output : `str` or `None`, optional
-            Output chained collection.
-        add_packages : `bool`, optional
-            Whether to add the special init quantum that writes the 'packages'
-            dataset.  The default (`True`) is consistent with
-            `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
-            are no regular quanta generated.
-        Returns
-        -------
-        components : `PredictedQuantumGraphComponents`
-            Components that can be used to build or write an empty quantum
-            graph.
-        """
-        components = cls(pipeline_graph=PipelineGraph(universe=universe))
-        components.header.inputs = list(inputs)
-        components.header.output_run = output_run
-        components.header.output = output
-        if add_packages:
-            components.init_quanta.root = [
-                PredictedQuantumDatasetsModel.model_construct(
-                    quantum_id=generate_uuidv7(),
-                    task_label="",
-                    outputs={
-                        acc.PACKAGES_INIT_OUTPUT_NAME: [
-                            PredictedDatasetModel(
-                                dataset_id=generate_uuidv7(),
-                                dataset_type_name=acc.PACKAGES_INIT_OUTPUT_NAME,
-                                data_coordinate=[],
-                                run=output_run,
-                            )
-                        ]
-                    },
-                )
-            ]
-        return components
     def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
         """Make a `lsst.daf.butler.DatasetRef` from information in the
         predicted quantum graph.
@@ -1898,6 +1793,7 @@ class PredictedQuantumGraphComponents:
                     f"Unsupported extension {ext!r} for quantum graph; "
                     "expected '.qg' (or '.qgraph' to force the old format)."
                 )
+        cdict: zstandard.ZstdCompressionDict | None = None
         cdict_data: bytes | None = None
         quantum_datasets_json: dict[uuid.UUID, bytes] = {}
         if len(self.quantum_datasets) < zstd_dict_n_inputs:
@@ -1911,20 +1807,26 @@ class PredictedQuantumGraphComponents:
                 for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
             }
             try:
-                cdict_data = zstandard.train_dictionary(
+                cdict = zstandard.train_dictionary(
                     zstd_dict_size,
                     list(quantum_datasets_json.values()),
                     level=zstd_level,
-                ).as_bytes()
+                )
             except zstandard.ZstdError as err:
                 warnings.warn(f"Not using a compression dictionary: {err}.")
+                cdict = None
+            else:
+                cdict_data = cdict.as_bytes()
+        compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
+        indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
         with BaseQuantumGraphWriter.open(
             uri,
             header=self.header,
             pipeline_graph=self.pipeline_graph,
+            indices=indices,
             address_filename="quanta",
+            compressor=compressor,
             cdict_data=cdict_data,
-            zstd_level=zstd_level,
         ) as writer:
             writer.write_single_model("thin_graph", self.thin_graph)
             if self.dimension_data is None:

lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5200__py3-none-any.whl

lsst-pipe-base 30.0.1rc1py3-none-any.whl → 30.2025.5200py3-none-any.whl