PyPI - lsst-pipe-base - Versions diffs - 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.3000py3-none-any.whl → 29.2025.3200py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

lsst/pipe/base/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@ from ._status import *
 from ._task_metadata import *
 from .config import *
 from .connections import *
-from .executionButlerBuilder import *
 from .graph import *
 from .pipeline import *

lsst/pipe/base/_datasetQueryConstraints.py CHANGED Viewed

@@ -26,7 +26,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """Symbols defined in this package should be imported from
-`all_dimensions_quantum_graph_builder` instead; it only appears in the docs
+`.all_dimensions_quantum_graph_builder` instead; it only appears in the docs
 due to limitations in Sphinx.
 """

lsst/pipe/base/all_dimensions_quantum_graph_builder.py CHANGED Viewed

@@ -44,7 +44,6 @@ import astropy.table
 from lsst.daf.butler import (
     Butler,
     DataCoordinate,
-    DimensionDataAttacher,
     DimensionElement,
     DimensionGroup,
     DimensionRecordSet,
@@ -57,7 +56,7 @@ from lsst.utils.timer import timeMethod
 from ._datasetQueryConstraints import DatasetQueryConstraintVariant
 from .quantum_graph_builder import QuantumGraphBuilder, QuantumGraphBuilderError
-from .quantum_graph_skeleton import DatasetKey, Key, PrerequisiteDatasetKey, QuantumGraphSkeleton, QuantumKey
+from .quantum_graph_skeleton import DatasetKey, PrerequisiteDatasetKey, QuantumGraphSkeleton, QuantumKey
 if TYPE_CHECKING:
     from .pipeline_graph import DatasetTypeNode, PipelineGraph, TaskNode
@@ -65,13 +64,14 @@ if TYPE_CHECKING:
 @final
 class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
-    """An implementation of `QuantumGraphBuilder` that uses a single large
-    query for data IDs covering all dimensions in the pipeline.
+    """An implementation of `.quantum_graph_builder.QuantumGraphBuilder` that
+    uses a single large query for data IDs covering all dimensions in the
+    pipeline.
     Parameters
     ----------
     pipeline_graph : `.pipeline_graph.PipelineGraph`
-        Pipeline to build a `QuantumGraph` from, as a graph.  Will be resolved
+        Pipeline to build a `.QuantumGraph` from, as a graph.  Will be resolved
         in-place with the given butler (any existing resolution is ignored).
     butler : `lsst.daf.butler.Butler`
         Client for the data repository.  Should be read-only.
@@ -92,7 +92,8 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
         are constrained by the ``where`` argument or pipeline data ID will be
         filled in automatically.
     **kwargs
-        Additional keyword arguments forwarded to `QuantumGraphBuilder`.
+        Additional keyword arguments forwarded to
+        `.quantum_graph_builder.QuantumGraphBuilder`.
     Notes
     -----
@@ -141,13 +142,14 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
         self._query_for_data_ids(tree)
         dimension_records = self._fetch_most_dimension_records(tree)
         tree.generate_data_ids(self.log)
-        skeleton = self._make_subgraph_skeleton(tree)
+        skeleton: QuantumGraphSkeleton = self._make_subgraph_skeleton(tree)
         if not skeleton.has_any_quanta:
             # QG is going to be empty; exit early not just for efficiency, but
             # also so downstream code doesn't have to guard against this case.
             return skeleton
         self._find_followup_datasets(tree, skeleton)
-        self._attach_dimension_records(skeleton, dimension_records)
+        all_data_id_dimensions = subgraph.get_all_dimensions()
+        skeleton.attach_dimension_records(self.butler, all_data_id_dimensions, dimension_records)
         return skeleton
     def _query_for_data_ids(self, tree: _DimensionGroupTree) -> None:
@@ -484,44 +486,6 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
                     result.append(record_set)
         return result
-    @timeMethod
-    def _attach_dimension_records(
-        self, skeleton: QuantumGraphSkeleton, dimension_records: Iterable[DimensionRecordSet]
-    ) -> None:
-        """Attach dimension records to most data IDs in the in-progress graph,
-        and return a data structure that records the rest.
-        Parameters
-        ----------
-        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
-            In-progress quantum graph to modify in place.
-        dimension_records : `~collections.abc.Iterable` [ \
-                `lsst.daf.butler.DimensionRecordSet` ]
-            Iterable of sets of dimension records.
-        """
-        # Group all nodes by data ID (and dimensions of data ID).
-        data_ids_to_expand: defaultdict[DimensionGroup, defaultdict[DataCoordinate, list[Key]]] = defaultdict(
-            lambda: defaultdict(list)
-        )
-        data_id: DataCoordinate | None
-        for node_key in skeleton:
-            if data_id := skeleton[node_key].get("data_id"):
-                data_ids_to_expand[data_id.dimensions][data_id].append(node_key)
-        attacher = DimensionDataAttacher(
-            records=dimension_records,
-            dimensions=DimensionGroup.union(*data_ids_to_expand.keys(), universe=self.universe),
-        )
-        for dimensions, data_ids in data_ids_to_expand.items():
-            with self.butler.query() as query:
-                # Butler query will be used as-needed to get dimension records
-                # (from prerequisites) we didn't fetch in advance.  These are
-                # cached in the attacher so we don't look them up multiple
-                # times.
-                expanded_data_ids = attacher.attach(dimensions, data_ids.keys(), query=query)
-            for expanded_data_id, node_keys in zip(expanded_data_ids, data_ids.values()):
-                for node_key in node_keys:
-                    skeleton.set_data_id(node_key, expanded_data_id)
 @dataclasses.dataclass(eq=False, repr=False, slots=True)
 class _DimensionGroupTwig:

lsst/pipe/base/caching_limited_butler.py CHANGED Viewed

@@ -84,7 +84,6 @@ class CachingLimitedButler(LimitedButler):
         no_copy_on_cache: Set[str] = frozenset(),
     ):
         self._wrapped = wrapped
-        self._datastore = self._wrapped._datastore
         self.storageClasses = self._wrapped.storageClasses
         self._cache_on_put = cache_on_put
         self._cache_on_get = cache_on_get
@@ -148,9 +147,6 @@ class CachingLimitedButler(LimitedButler):
         # note that this does not use the cache at all
         return self._wrapped.getDeferred(ref, parameters=parameters, storageClass=storageClass)
-    def stored(self, ref: DatasetRef) -> bool:
-        return self.stored_many([ref])[ref]  # TODO: remove this once DM-43086 is done.
     def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
         result = {}
         unknown_refs = []
@@ -205,3 +201,11 @@ class CachingLimitedButler(LimitedButler):
     @property
     def dimensions(self) -> DimensionUniverse:
         return self._wrapped.dimensions
+    @property
+    def _datastore(self) -> Any:
+        return self._wrapped._datastore
+    @_datastore.setter  # demanded by MyPy since we declare it to be an instance attribute in LimitedButler.
+    def _datastore(self, value: Any) -> None:
+        self._wrapped._datastore = value

lsst/pipe/base/connectionTypes.py CHANGED Viewed

@@ -26,7 +26,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """Module defining connection types to be used within a
-`PipelineTaskConnections` class.
+`.PipelineTaskConnections` class.
 """
 __all__ = ["BaseConnection", "InitInput", "InitOutput", "Input", "Output", "PrerequisiteInput"]
@@ -53,7 +53,7 @@ class BaseConnection:
         Indicates if this connection should expect to contain multiple objects
         of the given dataset type.  Tasks with more than one connection with
         ``multiple=True`` with the same dimensions may want to implement
-        `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
+        `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
         consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and
         notify the execution system as early as possible of outputs that will
         not be produced because the corresponding input is missing.
@@ -121,7 +121,7 @@ class DimensionedConnection(BaseConnection):
         Indicates if this connection should expect to contain multiple objects
         of the given dataset type.  Tasks with more than one connection with
         ``multiple=True`` with the same dimensions may want to implement
-        `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
+        `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
         consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
         the execution system as early as possible of outputs that will not be
         produced because the corresponding input is missing.
@@ -161,7 +161,7 @@ class BaseInput(DimensionedConnection):
         Indicates if this connection should expect to contain multiple objects
         of the given dataset type.  Tasks with more than one connection with
         ``multiple=True`` with the same dimensions may want to implement
-        `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
+        `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
         consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
         the execution system as early as possible of outputs that will not be
         produced because the corresponding input is missing.
@@ -175,14 +175,14 @@ class BaseInput(DimensionedConnection):
     minimum : `bool`
         Minimum number of datasets required for this connection, per quantum.
         This is checked in the base implementation of
-        `PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
+        `.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
         the minimum is not met for `Input` connections (causing the quantum to
         be pruned, skipped, or never created, depending on the context), and
         `FileNotFoundError` for `PrerequisiteInput` connections (causing
         QuantumGraph generation to fail).  `PipelineTask` implementations may
-        provide custom `~PipelineTaskConnections.adjustQuantum` implementations
-        for more fine-grained or configuration-driven constraints, as long as
-        they are compatible with this minium.
+        provide custom `~.PipelineTaskConnections.adjustQuantum`
+        implementations for more fine-grained or configuration-driven
+        constraints, as long as they are compatible with this minium.
     Raises
     ------
@@ -216,7 +216,7 @@ class Input(BaseInput):
         Indicates if this connection should expect to contain multiple objects
         of the given dataset type.  Tasks with more than one connection with
         ``multiple=True`` with the same dimensions may want to implement
-        `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
+        `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
         consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
         the execution system as early as possible of outputs that will not be
         produced because the corresponding input is missing.
@@ -230,14 +230,14 @@ class Input(BaseInput):
     minimum : `bool`
         Minimum number of datasets required for this connection, per quantum.
         This is checked in the base implementation of
-        `PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
+        `.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
         the minimum is not met for `Input` connections (causing the quantum to
         be pruned, skipped, or never created, depending on the context), and
         `FileNotFoundError` for `PrerequisiteInput` connections (causing
         QuantumGraph generation to fail).  `PipelineTask` implementations may
-        provide custom `~PipelineTaskConnections.adjustQuantum` implementations
-        for more fine-grained or configuration-driven constraints, as long as
-        they are compatible with this minium.
+        provide custom `~.PipelineTaskConnections.adjustQuantum`
+        implementations for more fine-grained or configuration-driven
+        constraints, as long as they are compatible with this minium.
     deferGraphConstraint : `bool`, optional
         If `True`, do not include this dataset type's existence in the initial
         query that starts the QuantumGraph generation process.  This can be
@@ -286,7 +286,7 @@ class PrerequisiteInput(BaseInput):
         Indicates if this connection should expect to contain multiple objects
         of the given dataset type.  Tasks with more than one connection with
         ``multiple=True`` with the same dimensions may want to implement
-        `PipelineTaskConnections.adjustQuantum` to ensure those datasets are
+        `.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
         consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
         the execution system as early as possible of outputs that will not be
         produced because the corresponding input is missing.
@@ -296,12 +296,12 @@ class PrerequisiteInput(BaseInput):
     minimum : `bool`
         Minimum number of datasets required for this connection, per quantum.
         This is checked in the base implementation of
-        `PipelineTaskConnections.adjustQuantum`, which raises
+        `.PipelineTaskConnections.adjustQuantum`, which raises
         `FileNotFoundError` (causing QuantumGraph generation to fail).
-        `PipelineTask` implementations may
-        provide custom `~PipelineTaskConnections.adjustQuantum` implementations
-        for more fine-grained or configuration-driven constraints, as long as
-        they are compatible with this minium.
+        `PipelineTask` implementations may provide custom
+        `~.PipelineTaskConnections.adjustQuantum` implementations for more
+        fine-grained or configuration-driven constraints, as long as they are
+        compatible with this minium.
     lookupFunction : `typing.Callable`, optional
         An optional callable function that will look up PrerequisiteInputs
         using the DatasetType, registry, quantum dataId, and input collections

lsst/pipe/base/connections.py CHANGED Viewed

@@ -1063,8 +1063,8 @@ def iterConnections(
 class AdjustQuantumHelper:
     """Helper class for calling `PipelineTaskConnections.adjustQuantum`.
-    This class holds `input` and `output` mappings in the form used by
-    `Quantum` and execution harness code, i.e. with
+    This class holds `inputs` and `outputs` mappings in the form used by
+    `lsst.daf.butler.Quantum` and execution harness code, i.e. with
     `~lsst.daf.butler.DatasetType` keys, translating them to and from the
     connection-oriented mappings used inside `PipelineTaskConnections`.
     """

lsst/pipe/base/exec_fixup_data_id.py ADDED Viewed

@@ -0,0 +1,131 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+__all__ = ["ExecutionGraphFixup"]
+import contextlib
+import itertools
+from collections import defaultdict
+from collections.abc import Sequence
+from typing import Any
+import networkx as nx
+from .execution_graph_fixup import ExecutionGraphFixup
+from .graph import QuantumGraph, QuantumNode
+class ExecFixupDataId(ExecutionGraphFixup):
+    """Implementation of ExecutionGraphFixup for ordering of tasks based
+    on DataId values.
+    This class is a trivial implementation mostly useful as an example,
+    though it can be used to make actual fixup instances by defining
+    a method that instantiates it, e.g.::
+        # lsst/ap/verify/ci_fixup.py
+        from lsst.pipe.base.exec_fixup_data_id import ExecFixupDataId
+        def assoc_fixup():
+            return ExecFixupDataId(
+                taskLabel="ap_assoc", dimensions=("visit", "detector")
+            )
+    and then executing pipetask::
+        pipetask run --graph-fixup=lsst.ap.verify.ci_fixup.assoc_fixup ...
+    This will add new dependencies between quanta executed by the task with
+    label "ap_assoc". Quanta with higher visit number will depend on quanta
+    with lower visit number and their execution will wait until lower visit
+    number finishes.
+    Parameters
+    ----------
+    taskLabel : `str`
+        The label of the task for which to add dependencies.
+    dimensions : `str` or sequence [`str`]
+        One or more dimension names, quanta execution will be ordered
+        according to values of these dimensions.
+    reverse : `bool`, optional
+        If `False` (default) then quanta with higher values of dimensions
+        will be executed after quanta with lower values, otherwise the order
+        is reversed.
+    """
+    def __init__(self, taskLabel: str, dimensions: str | Sequence[str], reverse: bool = False):
+        self.taskLabel = taskLabel
+        self.dimensions = dimensions
+        self.reverse = reverse
+        if isinstance(self.dimensions, str):
+            self.dimensions = (self.dimensions,)
+        else:
+            self.dimensions = tuple(self.dimensions)
+    def _key(self, qnode: QuantumNode) -> tuple[Any, ...]:
+        """Produce comparison key for quantum data.
+        Parameters
+        ----------
+        qnode : `QuantumNode`
+            An individual node in a `~lsst.pipe.base.QuantumGraph`
+        Returns
+        -------
+        key : `tuple`
+        """
+        dataId = qnode.quantum.dataId
+        assert dataId is not None, "Quantum DataId cannot be None"
+        key = tuple(dataId[dim] for dim in self.dimensions)
+        return key
+    def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
+        taskDef = graph.findTaskDefByLabel(self.taskLabel)
+        if taskDef is None:
+            raise ValueError(f"Cannot find task with label {self.taskLabel}")
+        quanta = list(graph.getNodesForTask(taskDef))
+        keyQuanta = defaultdict(list)
+        for q in quanta:
+            key = self._key(q)
+            keyQuanta[key].append(q)
+        keys = sorted(keyQuanta.keys(), reverse=self.reverse)
+        networkGraph = graph.graph
+        for prev_key, key in itertools.pairwise(keys):
+            for prev_node in keyQuanta[prev_key]:
+                for node in keyQuanta[key]:
+                    # remove any existing edges between the two nodes, but
+                    # don't fail if there are not any. Both directions need
+                    # tried because in a directed graph, order maters
+                    for edge in ((node, prev_node), (prev_node, node)):
+                        with contextlib.suppress(nx.NetworkXException):
+                            networkGraph.remove_edge(*edge)
+                    networkGraph.add_edge(prev_node, node)
+        return graph

lsst/pipe/base/execution_graph_fixup.py ADDED Viewed

@@ -0,0 +1,69 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+__all__ = ["ExecutionGraphFixup"]
+from abc import ABC, abstractmethod
+from .graph import QuantumGraph
+class ExecutionGraphFixup(ABC):
+    """Interface for classes which update quantum graphs before execution.
+    Primary goal of this class is to modify quanta dependencies which may not
+    be possible to reflect in a quantum graph using standard tools. One known
+    use case for that is to guarantee particular execution order of visits in
+    CI jobs for cases when outcome depends on the processing order of visits
+    (e.g. AP association pipeline).
+    Instances of this class receive pre-ordered sequence of quanta
+    (`.QuantumGraph` instances) and they are allowed to modify quanta data in
+    place, for example update ``dependencies`` field to add additional
+    dependencies. Returned list of quanta will be re-ordered once again by the
+    graph executor to reflect new dependencies.
+    """
+    @abstractmethod
+    def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
+        """Update quanta in a graph.
+        Potentially anything in the graph could be changed if it does not
+        break executor assumptions. If modifications result in a dependency
+        cycle the executor will raise an exception.
+        Parameters
+        ----------
+        graph : `.QuantumGraph`
+            Quantum Graph that will be executed by the executor.
+        Returns
+        -------
+        graph : `.QuantumGraph`
+            Modified graph.
+        """
+        raise NotImplementedError

lsst/pipe/base/graph/graphSummary.py CHANGED Viewed

@@ -39,10 +39,10 @@ class QgraphTaskSummary(pydantic.BaseModel):
     numQuanta: int = 0
     """Number of Quanta for this PipelineTask in this QuantumGraph."""
-    numInputs: dict[str, int] = Counter()
+    numInputs: dict[str, int] = pydantic.Field(default_factory=Counter)
     """Total number of inputs per dataset type name for this PipelineTask."""
-    numOutputs: dict[str, int] = Counter()
+    numOutputs: dict[str, int] = pydantic.Field(default_factory=Counter)
     """Total number of outputs per dataset type name for this PipelineTask."""
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -79,7 +79,7 @@ class QgraphTaskSummary(pydantic.BaseModel):
 class QgraphSummary(pydantic.BaseModel):
     """Report for the QuantumGraph creation or reading."""
-    graphID: BuildId
+    graphID: BuildId | None = None
     """QuantumGraph ID."""
     cmdLine: str | None = None
@@ -97,7 +97,7 @@ class QgraphSummary(pydantic.BaseModel):
     outputRun: str | None = None
     """Output run collection."""
-    qgraphTaskSummaries: dict[str, QgraphTaskSummary] = {}
+    qgraphTaskSummaries: dict[str, QgraphTaskSummary] = pydantic.Field(default_factory=dict)
     """Quanta information summarized per PipelineTask."""
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,

lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl

lsst-pipe-base 29.2025.3000py3-none-any.whl → 29.2025.3200py3-none-any.whl