PyPI - lsst-pipe-base - Versions diffs - 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl - Mend

lsst-pipe-base 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

lsst/pipe/base/_instrument.py +31 -20
lsst/pipe/base/_quantumContext.py +3 -3
lsst/pipe/base/_status.py +43 -10
lsst/pipe/base/_task_metadata.py +2 -2
lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
lsst/pipe/base/automatic_connection_constants.py +20 -1
lsst/pipe/base/cli/cmd/__init__.py +18 -2
lsst/pipe/base/cli/cmd/commands.py +149 -4
lsst/pipe/base/connectionTypes.py +72 -160
lsst/pipe/base/connections.py +6 -9
lsst/pipe/base/execution_reports.py +0 -5
lsst/pipe/base/graph/graph.py +11 -10
lsst/pipe/base/graph/quantumNode.py +4 -4
lsst/pipe/base/graph_walker.py +8 -10
lsst/pipe/base/log_capture.py +40 -80
lsst/pipe/base/log_on_close.py +76 -0
lsst/pipe/base/mp_graph_executor.py +51 -15
lsst/pipe/base/pipeline.py +5 -6
lsst/pipe/base/pipelineIR.py +2 -8
lsst/pipe/base/pipelineTask.py +5 -7
lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
lsst/pipe/base/pipeline_graph/_edges.py +32 -22
lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
lsst/pipe/base/pipeline_graph/expressions.py +2 -2
lsst/pipe/base/pipeline_graph/io.py +7 -10
lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
lsst/pipe/base/prerequisite_helpers.py +2 -1
lsst/pipe/base/quantum_graph/_common.py +19 -20
lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
lsst/pipe/base/quantum_graph/_predicted.py +113 -15
lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
lsst/pipe/base/quantum_graph/formatter.py +171 -0
lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
lsst/pipe/base/quantum_graph/visualization.py +5 -1
lsst/pipe/base/quantum_graph_builder.py +33 -9
lsst/pipe/base/quantum_graph_executor.py +116 -13
lsst/pipe/base/quantum_graph_skeleton.py +31 -35
lsst/pipe/base/quantum_provenance_graph.py +29 -12
lsst/pipe/base/separable_pipeline_executor.py +19 -3
lsst/pipe/base/single_quantum_executor.py +67 -42
lsst/pipe/base/struct.py +4 -0
lsst/pipe/base/testUtils.py +3 -3
lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
lsst/pipe/base/version.py +1 -1
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
{lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0

lsst/pipe/base/pipeline_graph/_pipeline_graph.py CHANGED Viewed

@@ -33,7 +33,7 @@ import itertools
 import json
 import logging
 from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
-from typing import TYPE_CHECKING, Any, BinaryIO, Literal, TypeVar, cast
+from typing import TYPE_CHECKING, Any, BinaryIO, Literal, cast
 import networkx
 import networkx.algorithms.bipartite
@@ -79,9 +79,6 @@ if TYPE_CHECKING:
     from ..pipeline import TaskDef
     from ..pipelineTask import PipelineTask
-_G = TypeVar("_G", bound=networkx.DiGraph | networkx.MultiDiGraph)
 _LOG = logging.getLogger("lsst.pipe.base.pipeline_graph")
@@ -897,6 +894,10 @@ class PipelineGraph:
             New config objects or overrides to apply to copies of the current
             config objects, with task labels as the keywords.
+        Returns
+        -------
+        None
         Raises
         ------
         ValueError
@@ -1632,7 +1633,7 @@ class PipelineGraph:
         Returns
         -------
-        subgraphs : `Iterable` [ `PipelineGraph` ]
+        subgraphs : `~collections.abc.Iterable` [ `PipelineGraph` ]
             An iterable over component subgraphs that could be run
             independently (they have only overall inputs in common).  May be a
             lazy iterator.
@@ -1755,6 +1756,10 @@ class PipelineGraph:
             not considered part of the pipeline graph in other respects, but it
             does get written with other provenance datasets).
+        Returns
+        -------
+        None
         Raises
         ------
         lsst.daf.butler.MissingDatasetTypeError
@@ -2179,7 +2184,9 @@ class PipelineGraph:
         ]
         return networkx.algorithms.bipartite.projected_graph(networkx.DiGraph(bipartite_xgraph), task_keys)
-    def _transform_xgraph_state(self, xgraph: _G, skip_edges: bool) -> _G:
+    def _transform_xgraph_state[G: networkx.DiGraph | networkx.MultiDiGraph](
+        self, xgraph: G, skip_edges: bool
+    ) -> G:
         """Transform networkx graph attributes in-place from the internal
         "instance" attributes to the documented exported attributes.
@@ -2228,7 +2235,7 @@ class PipelineGraph:
         Parameters
         ----------
-        updates : `Mapping` [ `str`, `TaskNode` ]
+        updates : `~collections.abc.Mapping` [ `str`, `TaskNode` ]
             New task nodes with task label keys.  All keys must be task labels
             that are already present in the graph.
         check_edges_unchanged : `bool`, optional

lsst/pipe/base/pipeline_graph/expressions.py CHANGED Viewed

@@ -43,7 +43,7 @@ __all__ = (
 import dataclasses
 import functools
-from typing import TYPE_CHECKING, Any, Literal, TypeAlias
+from typing import TYPE_CHECKING, Any, Literal
 from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
@@ -268,4 +268,4 @@ def parse(expression: str) -> Node:
     return _ParserYacc().parse(expression)
-Node: TypeAlias = IdentifierNode | DirectionNode | NotNode | UnionNode | IntersectionNode
+type Node = IdentifierNode | DirectionNode | NotNode | UnionNode | IntersectionNode

lsst/pipe/base/pipeline_graph/io.py CHANGED Viewed

@@ -33,11 +33,10 @@ __all__ = (
     "SerializedTaskInitNode",
     "SerializedTaskNode",
     "SerializedTaskSubset",
-    "expect_not_none",
 )
 from collections.abc import Mapping
-from typing import Any, TypeVar
+from typing import Any
 import networkx
 import pydantic
@@ -53,14 +52,12 @@ from ._pipeline_graph import PipelineGraph
 from ._task_subsets import StepDefinitions, TaskSubset
 from ._tasks import TaskImportMode, TaskInitNode, TaskNode
-_U = TypeVar("_U")
 _IO_VERSION_INFO = (0, 0, 1)
 """Version tuple embedded in saved PipelineGraphs.
 """
-def expect_not_none(value: _U | None, msg: str) -> _U:
+def _expect_not_none[U](value: U | None, msg: str) -> U:
     """Check that a value is not `None` and return it.
     Parameters
@@ -418,7 +415,7 @@ class SerializedTaskNode(pydantic.BaseModel):
         init = self.init.deserialize(
             init_key,
             task_class_name=self.task_class,
-            config_str=expect_not_none(
+            config_str=_expect_not_none(
                 self.config_str, f"No serialized config file for task with label {key.name!r}."
             ),
             dataset_type_keys=dataset_type_keys,
@@ -547,16 +544,16 @@ class SerializedDatasetTypeNode(pydantic.BaseModel):
         if self.dimensions is not None:
             dataset_type = DatasetType(
                 key.name,
-                expect_not_none(
+                _expect_not_none(
                     self.dimensions,
                     f"Serialized dataset type {key.name!r} has no dimensions.",
                 ),
-                storageClass=expect_not_none(
+                storageClass=_expect_not_none(
                     self.storage_class,
                     f"Serialized dataset type {key.name!r} has no storage class.",
                 ),
                 isCalibration=self.is_calibration,
-                universe=expect_not_none(
+                universe=_expect_not_none(
                     universe,
                     f"Serialized dataset type {key.name!r} has dimensions, "
                     "but no dimension universe was stored.",
@@ -747,7 +744,7 @@ class SerializedPipelineGraph(pydantic.BaseModel):
         if self.dimensions is not None:
             universe = DimensionUniverse(
                 config=DimensionConfig(
-                    expect_not_none(
+                    _expect_not_none(
                         self.dimensions,
                         "Serialized pipeline graph has not been resolved; "
                         "load it is a MutablePipelineGraph instead.",

lsst/pipe/base/pipeline_graph/visualization/_dot.py CHANGED Viewed

@@ -66,7 +66,7 @@ def show_dot(
     ----------
     pipeline_graph : `PipelineGraph`
         Pipeline graph to show.
-    stream : `TextIO`, optional
+    stream : `io.TextIO`, optional
         Stream to write the DOT representation to.
     label_edge_connections : `bool`, optional
         If `True`, label edges with their connection names.
@@ -167,21 +167,22 @@ def _render_dataset_type_node(
     Parameters
     ----------
-    node_key : NodeKey
-        The key for the node
-    node_data : Mapping[str, Any]
-        The data associated with the node
-    options : NodeAttributeOptions
-        Options for rendering the node
-    stream : TextIO
-        The stream to write the node to
+    node_key : `NodeKey`
+        The key for the node.
+    node_data : `~collections.abc.Mapping` [`str`, `typing.Any`]
+        The data associated with the node.
+    options : `NodeAttributeOptions`
+        Options for rendering the node.
+    stream : `io.TextIO`
+        The stream to write the node to.
+    overflow_ref : `int`, optional
     Returns
     -------
     overflow_ref : int
-        The reference number for the next overflow node
+        The reference number for the next overflow node.
     overflow_ids : str | None
-        The ID of the overflow node, if any
+        The ID of the overflow node, if any.
     """
     labels, label_extras, common_prefix = _format_label(str(node_key), _LABEL_MAX_LINES_SOFT)
     if len(labels) + len(label_extras) <= _LABEL_MAX_LINES_HARD:
@@ -271,7 +272,7 @@ def _render_edge(from_node_id: str, to_node_id: str, stream: TextIO, **kwargs: A
         The unique ID of the node the edge is going to
     stream : TextIO
         The stream to write the edge to
-    kwargs : Any
+    **kwargs : Any
         Additional keyword arguments to pass to the edge
     """
     if kwargs:

lsst/pipe/base/pipeline_graph/visualization/_layout.py CHANGED Viewed

@@ -30,7 +30,7 @@ __all__ = ("ColumnSelector", "Layout", "LayoutRow")
 import dataclasses
 from collections.abc import Iterable, Iterator, Mapping, Set
-from typing import Generic, TextIO, TypeVar
+from typing import TextIO
 import networkx
 import networkx.algorithms.components
@@ -38,10 +38,8 @@ import networkx.algorithms.dag
 import networkx.algorithms.shortest_paths
 import networkx.algorithms.traversal
-_K = TypeVar("_K")
-class Layout(Generic[_K]):
+class Layout[K]:
     """A class that positions nodes and edges in text-art graph visualizations.
     Parameters
@@ -73,9 +71,9 @@ class Layout(Generic[_K]):
         # to be close to that text when possible (or maybe it's historical, and
         # it's just a lot of work to re-invert the algorithm now that it's
         # written).
-        self._active_columns: dict[int, set[_K]] = {}
+        self._active_columns: dict[int, set[K]] = {}
         # Mapping from node key to its column.
-        self._locations: dict[_K, int] = {}
+        self._locations: dict[K, int] = {}
         # Minimum and maximum column (may go negative; will be shifted as
         # needed before actual display).
         self._x_min = 0
@@ -116,7 +114,7 @@ class Layout(Generic[_K]):
         for component_xgraph, component_order in component_xgraphs_and_orders:
             self._add_connected_graph(component_xgraph, component_order)
-    def _add_single_node(self, node: _K) -> None:
+    def _add_single_node(self, node: K) -> None:
         """Add a single node to the layout."""
         assert node not in self._locations
         if not self._locations:
@@ -184,7 +182,7 @@ class Layout(Generic[_K]):
         return x + 1
     def _add_connected_graph(
-        self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[_K] | None = None
+        self, xgraph: networkx.DiGraph | networkx.MultiDiGraph, order: list[K] | None = None
     ) -> None:
         """Add a subgraph whose nodes are connected.
@@ -202,7 +200,7 @@ class Layout(Generic[_K]):
         # "backbone" of our layout; we'll step through this path and add
         # recurse via calls to `_add_graph` on the nodes that we think should
         # go between the backbone nodes.
-        backbone: list[_K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
+        backbone: list[K] = networkx.algorithms.dag.dag_longest_path(xgraph, topo_order=order)
         # Add the first backbone node and any ancestors according to the full
         # graph (it can't have ancestors in this _subgraph_ because they'd have
         # been part of the longest path themselves, but the subgraph doesn't
@@ -237,7 +235,7 @@ class Layout(Generic[_K]):
         remaining.remove_nodes_from(self._locations.keys())
         self._add_graph(remaining)
-    def _add_blockers_of(self, node: _K) -> None:
+    def _add_blockers_of(self, node: K) -> None:
         """Add all nodes that are ancestors of the given node according to the
         full graph.
         """
@@ -251,7 +249,7 @@ class Layout(Generic[_K]):
         return (self._x_max - self._x_min) // 2
     @property
-    def nodes(self) -> Iterable[_K]:
+    def nodes(self) -> Iterable[K]:
         """The graph nodes in the order they appear in the layout."""
         return self._locations.keys()
@@ -277,7 +275,7 @@ class Layout(Generic[_K]):
         return (self._x_max - x) // 2
     def __iter__(self) -> Iterator[LayoutRow]:
-        active_edges: dict[_K, set[_K]] = {}
+        active_edges: dict[K, set[K]] = {}
         for node, node_x in self._locations.items():
             row = LayoutRow(node, self._external_location(node_x))
             for origin, destinations in active_edges.items():
@@ -295,20 +293,20 @@ class Layout(Generic[_K]):
 @dataclasses.dataclass
-class LayoutRow(Generic[_K]):
+class LayoutRow[K]:
     """Information about a single text-art row in a graph."""
-    node: _K
+    node: K
     """Key for the node in the exported NetworkX graph."""
     x: int
     """Column of the node's symbol and its outgoing edges."""
-    connecting: list[tuple[int, _K]] = dataclasses.field(default_factory=list)
+    connecting: list[tuple[int, K]] = dataclasses.field(default_factory=list)
     """The columns and node keys of edges that terminate at this row.
     """
-    continuing: list[tuple[int, _K, frozenset[_K]]] = dataclasses.field(default_factory=list)
+    continuing: list[tuple[int, K, frozenset[K]]] = dataclasses.field(default_factory=list)
     """The columns and node keys of edges that continue through this row.
     """
@@ -337,11 +335,11 @@ class ColumnSelector:
     out in that case because it's applied to all candidate columns.
     """
-    def __call__(
+    def __call__[K](
         self,
         connecting_x: list[int],
         node_x: int,
-        active_columns: Mapping[int, Set[_K]],
+        active_columns: Mapping[int, Set[K]],
         x_min: int,
         x_max: int,
     ) -> int:

lsst/pipe/base/pipeline_graph/visualization/_merge.py CHANGED Viewed

@@ -38,7 +38,7 @@ import hashlib
 from collections import defaultdict
 from collections.abc import Iterable
 from functools import cached_property
-from typing import Any, TypeVar
+from typing import Any
 import networkx
 import networkx.algorithms.dag
@@ -49,9 +49,6 @@ from lsst.daf.butler import DimensionGroup
 from .._nodes import NodeKey, NodeType
 from ._options import NodeAttributeOptions
-_P = TypeVar("_P")
-_C = TypeVar("_C")
 class MergedNodeKey(frozenset[NodeKey]):
     """A key for NetworkX graph nodes that represent multiple similar tasks
@@ -225,11 +222,11 @@ class _MergeKey:
     """
     @classmethod
-    def from_node_state(
+    def from_node_state[P, C](
         cls,
         state: dict[str, Any],
-        parents: Iterable[_P],
-        children: Iterable[_C],
+        parents: Iterable[P],
+        children: Iterable[C],
         options: NodeAttributeOptions,
     ) -> _MergeKey:
         """Construct from a NetworkX node attribute state dictionary.

lsst/pipe/base/pipeline_graph/visualization/_printer.py CHANGED Viewed

@@ -30,9 +30,9 @@ __all__ = ("Printer", "make_colorama_printer", "make_default_printer", "make_sim
 import sys
 from collections.abc import Callable, Sequence
-from typing import Generic, TextIO
+from typing import TextIO
-from ._layout import _K, Layout, LayoutRow
+from ._layout import Layout, LayoutRow
 _CHAR_DECOMPOSITION = {
     # This mapping provides the "logic" for how to decompose the relevant
@@ -170,7 +170,7 @@ class PrintRow:
         return "".join(self._cells)
-def _default_get_text(node: _K, x: int, style: tuple[str, str]) -> str:
+def _default_get_text[K](node: K, x: int, style: tuple[str, str]) -> str:
     """Return the default text to associate with a node.
     This function is the default value for the ``get_text`` argument to
@@ -179,7 +179,7 @@ def _default_get_text(node: _K, x: int, style: tuple[str, str]) -> str:
     return str(node)
-def _default_get_symbol(node: _K, x: int) -> str:
+def _default_get_symbol[K](node: K, x: int) -> str:
     """Return the default symbol for a node.
     This function is the default value for the ``get_symbol`` argument to
@@ -188,7 +188,7 @@ def _default_get_symbol(node: _K, x: int) -> str:
     return "⬤"
-def _default_get_style(node: _K, x: int) -> tuple[str, str]:
+def _default_get_style[K](node: K, x: int) -> tuple[str, str]:
     """Get the default styling suffix/prefix strings.
     This function is the default value for the ``get_style`` argument to
@@ -197,7 +197,7 @@ def _default_get_style(node: _K, x: int) -> tuple[str, str]:
     return "", ""
-class Printer(Generic[_K]):
+class Printer[K]:
     """High-level tool for drawing a text-based DAG visualization.
     Parameters
@@ -231,9 +231,9 @@ class Printer(Generic[_K]):
         *,
         pad: str = " ",
         make_blank_row: Callable[[int, str], PrintRow] = PrintRow,
-        get_text: Callable[[_K, int, tuple[str, str]], str] = _default_get_text,
-        get_symbol: Callable[[_K, int], str] = _default_get_symbol,
-        get_style: Callable[[_K, int], tuple[str, str]] = _default_get_style,
+        get_text: Callable[[K, int, tuple[str, str]], str] = _default_get_text,
+        get_symbol: Callable[[K, int], str] = _default_get_symbol,
+        get_style: Callable[[K, int], tuple[str, str]] = _default_get_style,
     ):
         self.width = layout_width * 2 + 1
         self.pad = pad
@@ -245,7 +245,7 @@ class Printer(Generic[_K]):
     def print_row(
         self,
         stream: TextIO,
-        layout_row: LayoutRow[_K],
+        layout_row: LayoutRow[K],
     ) -> None:
         """Print a single row of the DAG visualization to a file-like object.

lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py CHANGED Viewed

@@ -200,6 +200,13 @@ class QuantumGraphExecutionStatusAnnotator:
     """Annotates a networkx graph with task and dataset status information from
     a quantum graph execution summary, implementing the StatusAnnotator
     protocol to update the graph with status data.
+    Parameters
+    ----------
+    *args : `typing.Any`
+        Arbitrary arguments.
+    **kwargs : `typing.Any`
+        Arbitrary keyword arguments.
     """
     def __init__(self, *args: Any, **kwargs: Any) -> None:

lsst/pipe/base/prerequisite_helpers.py CHANGED Viewed

@@ -252,7 +252,8 @@ class PrerequisiteFinder:
             Sequence of collections to search, in order.
         data_id : `lsst.daf.butler.DataCoordinate`
             Data ID for the quantum.
-        skypix_bounds : `Mapping` [ `str`, `lsst.sphgeom.RangeSet` ]
+        skypix_bounds : `~collections.abc.Mapping` \
+              [ `str`, `lsst.sphgeom.RangeSet` ]
             The spatial bounds of this quantum in various skypix dimensions.
             Keys are skypix dimension names (a superset of those in
             `dataset_skypix`) and values are sets of integer pixel ID ranges.

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -50,9 +50,7 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Self,
-    TypeAlias,
     TypedDict,
-    TypeVar,
 )
 import networkx
@@ -81,18 +79,16 @@ if TYPE_CHECKING:
 # These aliases make it a lot easier how the various pydantic models are
 # structured, but they're too verbose to be worth exporting to code outside the
 # quantum_graph subpackage.
-TaskLabel: TypeAlias = str
-DatasetTypeName: TypeAlias = str
-ConnectionName: TypeAlias = str
-DatasetIndex: TypeAlias = int
-QuantumIndex: TypeAlias = int
-DatastoreName: TypeAlias = str
-DimensionElementName: TypeAlias = str
-DataCoordinateValues: TypeAlias = list[DataIdValue]
+type TaskLabel = str
+type DatasetTypeName = str
+type ConnectionName = str
+type DatasetIndex = int
+type QuantumIndex = int
+type DatastoreName = str
+type DimensionElementName = str
+type DataCoordinateValues = list[DataIdValue]
-_T = TypeVar("_T", bound=pydantic.BaseModel)
 FORMAT_VERSION: int = 1
 """
 File format version number for new files.
@@ -448,14 +444,17 @@ class BaseQuantumGraphWriter:
         uri: ResourcePathExpression,
         header: HeaderModel,
         pipeline_graph: PipelineGraph,
-        indices: dict[uuid.UUID, int],
         *,
         address_filename: str,
-        compressor: Compressor,
         cdict_data: bytes | None = None,
+        zstd_level: int = 10,
     ) -> Iterator[Self]:
-        uri = ResourcePath(uri)
-        address_writer = AddressWriter(indices)
+        uri = ResourcePath(uri, forceDirectory=False)
+        address_writer = AddressWriter()
+        if uri.isLocal:
+            os.makedirs(uri.dirname().ospath, exist_ok=True)
+        cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
+        compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
         with uri.open(mode="wb") as stream:
             with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
                 self = cls(zf, compressor, address_writer, header.int_size)
@@ -594,9 +593,9 @@ class BaseQuantumGraphReader:
                     )
     @staticmethod
-    def _read_single_block_static(
-        name: str, model_type: type[_T], zf: zipfile.ZipFile, decompressor: Decompressor
-    ) -> _T:
+    def _read_single_block_static[T: pydantic.BaseModel](
+        name: str, model_type: type[T], zf: zipfile.ZipFile, decompressor: Decompressor
+    ) -> T:
         """Read a single compressed JSON block from a 'file' in a zip archive.
         Parameters
@@ -619,7 +618,7 @@ class BaseQuantumGraphReader:
         json_data = decompressor.decompress(compressed_data)
         return model_type.model_validate_json(json_data)
-    def _read_single_block(self, name: str, model_type: type[_T]) -> _T:
+    def _read_single_block[T: pydantic.BaseModel](self, name: str, model_type: type[T]) -> T:
         """Read a single compressed JSON block from a 'file' in a zip archive.
         Parameters

lsst/pipe/base/quantum_graph/_multiblock.py CHANGED Viewed

@@ -43,25 +43,22 @@ import dataclasses
 import logging
 import tempfile
 import uuid
-from collections.abc import Iterator
+import zipfile
+from collections.abc import Iterator, Set
 from contextlib import contextmanager
 from io import BufferedReader, BytesIO
 from operator import attrgetter
-from typing import IO, TYPE_CHECKING, Protocol, TypeAlias, TypeVar
+from typing import IO, Protocol, TypeVar
 import pydantic
-if TYPE_CHECKING:
-    import zipfile
 _LOG = logging.getLogger(__name__)
 _T = TypeVar("_T", bound=pydantic.BaseModel)
-UUID_int: TypeAlias = int
+type UUID_int = int
 MAX_UUID_INT: UUID_int = 2**128
@@ -77,7 +74,7 @@ individual quanta (especially for execution).
 class Compressor(Protocol):
-    """A protocol for objects with a `compress` method that takes and returns
+    """A protocol for objects with a ``compress`` method that takes and returns
     `bytes`.
     """
@@ -205,21 +202,14 @@ class AddressRow:
 class AddressWriter:
     """A helper object for writing address files for multi-block files."""
-    indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
-    """Mapping from UUID to internal integer ID.
-    The internal integer ID must always correspond to the index into the
-    sorted list of all UUIDs, but this `dict` need not be sorted itself.
-    """
     addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
     """Addresses to store with each UUID.
-    Every key in one of these dictionaries must have an entry in `indices`.
+    Every key in one of these dictionaries must have an entry in ``indices``.
     The converse is not true.
     """
-    def write(self, stream: IO[bytes], int_size: int) -> None:
+    def write(self, stream: IO[bytes], int_size: int, all_ids: Set[uuid.UUID] | None = None) -> None:
         """Write all addresses to a file-like object.
         Parameters
@@ -228,19 +218,18 @@ class AddressWriter:
             Binary file-like object.
         int_size : `int`
             Number of bytes to use for all integers.
+        all_ids : `~collections.abc.Set` [`uuid.UUID`], optional
+            Set of the union of all UUIDs in any dictionary from a call to
+            `get_all_ids`.
         """
-        for n, address_map in enumerate(self.addresses):
-            if not self.indices.keys() >= address_map.keys():
-                raise AssertionError(
-                    f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
-                    f"{address_map.keys() - self.indices.keys()} not in the index map."
-                )
+        if all_ids is None:
+            all_ids = self.get_all_ids()
         stream.write(int_size.to_bytes(1))
-        stream.write(len(self.indices).to_bytes(int_size))
+        stream.write(len(all_ids).to_bytes(int_size))
         stream.write(len(self.addresses).to_bytes(int_size))
         empty_address = Address()
-        for key in sorted(self.indices.keys(), key=attrgetter("int")):
-            row = AddressRow(key, self.indices[key], [m.get(key, empty_address) for m in self.addresses])
+        for n, key in enumerate(sorted(all_ids, key=attrgetter("int"))):
+            row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
             _LOG.debug("Wrote address %s.", row)
             row.write(stream, int_size)
@@ -256,8 +245,25 @@ class AddressWriter:
         int_size : `int`
             Number of bytes to use for all integers.
         """
-        with zf.open(f"{name}.addr", mode="w") as stream:
-            self.write(stream, int_size=int_size)
+        all_ids = self.get_all_ids()
+        zip_info = zipfile.ZipInfo(f"{name}.addr")
+        row_size = AddressReader.compute_row_size(int_size, len(self.addresses))
+        zip_info.file_size = AddressReader.compute_header_size(int_size) + len(all_ids) * row_size
+        with zf.open(zip_info, mode="w") as stream:
+            self.write(stream, int_size=int_size, all_ids=all_ids)
+    def get_all_ids(self) -> Set[uuid.UUID]:
+        """Return all IDs used by any address dictionary.
+        Returns
+        -------
+        all_ids : `~collections.abc.Set` [`uuid.UUID`]
+            Set of all IDs.
+        """
+        all_ids: set[uuid.UUID] = set()
+        for address_map in self.addresses:
+            all_ids.update(address_map.keys())
+        return all_ids
 @dataclasses.dataclass
@@ -656,7 +662,7 @@ class MultiblockWriter:
         model : `pydantic.BaseModel`
             Model to convert to JSON and compress.
         compressor : `Compressor`
-            Object with a `compress` method that takes and returns `bytes`.
+            Object with a ``compress`` method that takes and returns `bytes`.
         Returns
         -------
@@ -753,7 +759,7 @@ class MultiblockReader:
         model_type : `type` [ `pydantic.BaseModel` ]
             Pydantic model to validate JSON with.
         decompressor : `Decompressor`
-            Object with a `decompress` method that takes and returns `bytes`.
+            Object with a ``decompress`` method that takes and returns `bytes`.
         int_size : `int`
             Number of bytes to use for all integers.
         page_size : `int`
@@ -803,7 +809,7 @@ class MultiblockReader:
         model_type : `type` [ `pydantic.BaseModel` ]
             Pydantic model to validate JSON with.
         decompressor : `Decompressor`
-            Object with a `decompress` method that takes and returns `bytes`.
+            Object with a ``decompress`` method that takes and returns `bytes`.
         Returns
         -------

lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1__py3-none-any.whl

lsst-pipe-base 30.0.0rc2py3-none-any.whl → 30.0.1py3-none-any.whl