PyPI - lsst-pipe-base - Versions diffs - 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.3000py3-none-any.whl → 29.2025.3200py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

lsst/pipe/base/pipeline_graph/_pipeline_graph.py CHANGED Viewed

@@ -150,8 +150,10 @@ class PipelineGraph:
         self._description = value
     @property
-    def universe(self) -> DimensionUniverse | None:
+    def universe(self) -> DimensionUniverse:
         """Definitions for all butler dimensions."""
+        if self._universe is None:
+            raise UnresolvedGraphError("Pipeline graph is not resolved.")
         return self._universe
     @property
@@ -159,7 +161,7 @@ class PipelineGraph:
         """Data ID that represents a constraint on all quanta generated from
         this pipeline.
-        This is may not be available unless `universe` is not `None`.
+        This is may not be available unless the graph is resolved.
         """
         return DataCoordinate.standardize(self._raw_data_id, universe=self.universe)
@@ -305,7 +307,7 @@ class PipelineGraph:
                 for k, v in self._task_subsets.items()
             },
             description=self._description,
-            universe=self.universe,
+            universe=self._universe,
             data_id=self._raw_data_id,
             step_definitions=step_definitions,
         )
@@ -774,7 +776,7 @@ class PipelineGraph:
             key=NodeKey(NodeType.TASK, label),
             init_key=NodeKey(NodeType.TASK_INIT, label),
             data=_TaskNodeImportedData.configure(label, task_class, config, connections),
-            universe=self.universe,
+            universe=self._universe,
         )
         self.add_task_nodes([task_node])
         return task_node
@@ -1571,9 +1573,9 @@ class PipelineGraph:
         Returns
         -------
-        groups : `dict` [ `DimensionGroup`, `tuple` ]
-            A dictionary of groups keyed by `DimensionGroup`, in which each
-            value is a tuple of:
+        groups : `dict` [ `~lsst.daf.butler.DimensionGroup`, `tuple` ]
+            A dictionary of groups keyed by `~lsst.daf.butler.DimensionGroup`,
+            in which each value is a tuple of:
             - a `dict` of `TaskNode` instances, keyed by task label
             - a `dict` of `DatasetTypeNode` instances, keyed by
@@ -1605,6 +1607,25 @@ class PipelineGraph:
                 group[1][dataset_type_node.name] = dataset_type_node
         return result
+    def get_all_dimensions(self, prerequisites: bool = True) -> DimensionGroup:
+        """Return all dimensions used in this graph's tasks and dataset types.
+        Parameters
+        ----------
+        prerequisites : `bool`, optional
+            If `False`, do not include the dimensions that are only used by
+            prerequisite input dataset types.
+        Returns
+        -------
+        dimensions : `~lsst.daf.butler.DimensionGroup`.
+            All dimensions in this pipeline.
+        """
+        return DimensionGroup.union(
+            *self.group_by_dimensions(prerequisites=prerequisites).keys(),
+            universe=self.universe,
+        )
     def split_independent(self) -> Iterable[PipelineGraph]:
         """Iterate over independent subgraphs that together comprise this
         pipeline graph.
@@ -1668,11 +1689,13 @@ class PipelineGraph:
         not considered part of the pipeline graph in other respects, but it
         does get written with other provenance datasets.
         """
-        if self.universe is None:
+        if self._universe is None:
             raise UnresolvedGraphError(
                 "PipelineGraph must be resolved in order to get the packages dataset type."
             )
-        return DatasetType(PACKAGES_INIT_OUTPUT_NAME, self.universe.empty, PACKAGES_INIT_OUTPUT_STORAGE_CLASS)
+        return DatasetType(
+            PACKAGES_INIT_OUTPUT_NAME, self._universe.empty, PACKAGES_INIT_OUTPUT_STORAGE_CLASS
+        )
     def register_dataset_types(self, butler: Butler, include_packages: bool = True) -> None:
         """Register all dataset types in a data repository.
@@ -1767,6 +1790,7 @@ class PipelineGraph:
         self,
         get_init_input: Callable[[DatasetType], Any] | None = None,
         init_outputs: list[tuple[Any, DatasetType]] | None = None,
+        labels: Iterable[str] | None = None,
     ) -> list[PipelineTask]:
         """Instantiate all tasks in the pipeline.
@@ -1785,6 +1809,9 @@ class PipelineGraph:
             correspond to the storage class of the output connection, which
             may not be the same as the storage class on the graph's dataset
             type node.
+        labels : `~collections.abc.Iterable` [ `str` ], optional
+            The labels of tasks to instantiate.  If not provided, all tasks in
+            the graph will be instantiated.
         Returns
         -------
@@ -1793,10 +1820,13 @@ class PipelineGraph:
         """
         if not self.is_fully_resolved:
             raise UnresolvedGraphError("Pipeline graph must be fully resolved before instantiating tasks.")
-        empty_data_id = DataCoordinate.make_empty(cast(DimensionUniverse, self.universe))
+        empty_data_id = DataCoordinate.make_empty(self.universe)
+        labels = set(labels) if labels is not None else self.tasks.keys()
         handles: dict[str, InMemoryDatasetHandle] = {}
         tasks: list[PipelineTask] = []
         for task_node in self.tasks.values():
+            if task_node.label not in labels:
+                continue
             task_init_inputs: dict[str, Any] = {}
             for read_edge in task_node.init.inputs.values():
                 if (handle := handles.get(read_edge.dataset_type_name)) is not None:

lsst/pipe/base/pipeline_graph/_tasks.py CHANGED Viewed

@@ -360,6 +360,57 @@ class TaskInitNode:
         yield from self.outputs.values()
         yield self.config_output
+    def get_input_edge(self, connection_name: str) -> ReadEdge:
+        """Look up an input edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `ReadEdge`
+            Input edge.
+        """
+        return self.inputs[connection_name]
+    def get_output_edge(self, connection_name: str) -> WriteEdge:
+        """Look up an output edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `WriteEdge`
+            Output edge.
+        """
+        if connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
+            return self.config_output
+        return self.outputs[connection_name]
+    def get_edge(self, connection_name: str) -> Edge:
+        """Look up an edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `Edge`
+            Edge.
+        """
+        try:
+            return self.get_input_edge(connection_name)
+        except KeyError:
+            pass
+        return self.get_output_edge(connection_name)
     def diff_edges(self, other: TaskInitNode) -> list[str]:
         """Compare the edges of this task initialization node to those from the
         same task label in a different pipeline.
@@ -742,6 +793,61 @@ class TaskNode:
         if self.log_output is not None:
             yield self.log_output
+    def get_input_edge(self, connection_name: str) -> ReadEdge:
+        """Look up an input edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `ReadEdge`
+            Input edge.
+        """
+        return self.inputs[connection_name]
+    def get_output_edge(self, connection_name: str) -> WriteEdge:
+        """Look up an output edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `WriteEdge`
+            Output edge.
+        """
+        if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
+            return self.metadata_output
+        if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
+            if self.log_output is None:
+                raise KeyError(connection_name)
+            return self.log_output
+        return self.outputs[connection_name]
+    def get_edge(self, connection_name: str) -> Edge:
+        """Look up an edge by connection name.
+        Parameters
+        ----------
+        connection_name : `str`
+            Name of the connection.
+        Returns
+        -------
+        edge : `Edge`
+            Edge.
+        """
+        try:
+            return self.get_input_edge(connection_name)
+        except KeyError:
+            pass
+        return self.get_output_edge(connection_name)
     def diff_edges(self, other: TaskNode) -> list[str]:
         """Compare the edges of this task node to those from the same task
         label in a different pipeline.

lsst/pipe/base/pipeline_graph/io.py CHANGED Viewed

@@ -713,7 +713,7 @@ class SerializedPipelineGraph(pydantic.BaseModel):
             },
             step_labels=list(target.steps),
             steps_verified=target.steps.verified,
-            dimensions=target.universe.dimensionConfig.toDict() if target.universe is not None else None,
+            dimensions=target._universe.dimensionConfig.toDict() if target._universe is not None else None,
             data_id=target._raw_data_id,
         )
         if target._sorted_keys:

lsst/pipe/base/quantum_graph_builder.py CHANGED Viewed

@@ -55,6 +55,7 @@ from lsst.daf.butler import (
     NamedKeyMapping,
     Quantum,
 )
+from lsst.daf.butler.datastore.record_data import DatastoreRecordData
 from lsst.daf.butler.registry import MissingCollectionError, MissingDatasetTypeError
 from lsst.utils.logging import LsstLogAdapter, getLogger
 from lsst.utils.timer import timeMethod
@@ -103,13 +104,13 @@ class InitInputMissingError(QuantumGraphBuilderError):
 class QuantumGraphBuilder(ABC):
-    """An abstract base class for building `QuantumGraph` objects from a
+    """An abstract base class for building `.QuantumGraph` objects from a
     pipeline.
     Parameters
     ----------
     pipeline_graph : `.pipeline_graph.PipelineGraph`
-        Pipeline to build a `QuantumGraph` from, as a graph.  Will be resolved
+        Pipeline to build a `.QuantumGraph` from, as a graph.  Will be resolved
         in-place with the given butler (any existing resolution is ignored).
     butler : `lsst.daf.butler.Butler`
         Client for the data repository.  Should be read-only.
@@ -139,7 +140,7 @@ class QuantumGraphBuilder(ABC):
     The `build` method splits the pipeline graph into independent subgraphs,
     then calls the abstract method `process_subgraph` on each, to allow
     concrete implementations to populate the rough graph structure (the
-    `~quantum_graph_skeleton.QuantumGraphSkeleton` class), including searching
+    `~.quantum_graph_skeleton.QuantumGraphSkeleton` class), including searching
     for existing datasets.  The `build` method then:
     - assembles `lsst.daf.butler.Quantum` instances from all data IDs in the
@@ -321,7 +322,7 @@ class QuantumGraphBuilder(ABC):
         Returns
         -------
-        quantum_graph : `QuantumGraph`
+        quantum_graph : `.QuantumGraph`
             DAG describing processing to be performed.
         Notes
@@ -373,7 +374,7 @@ class QuantumGraphBuilder(ABC):
     @abstractmethod
     def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton:
         """Build the rough structure for an independent subset of the
-        `QuantumGraph` and query for relevant existing datasets.
+        `.QuantumGraph` and query for relevant existing datasets.
         Parameters
         ----------
@@ -384,39 +385,38 @@ class QuantumGraphBuilder(ABC):
         Returns
         -------
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Class representing an initial quantum graph. See
-            `quantum_graph_skeleton.QuantumGraphSkeleton` docs for details.
+            `.quantum_graph_skeleton.QuantumGraphSkeleton` docs for details.
             After this is returned, the object may be modified in-place in
             unspecified ways.
         Notes
         -----
-        The `quantum_graph_skeleton.QuantumGraphSkeleton` should associate
-        `DatasetRef` objects with nodes for existing datasets.  In
-        particular:
+        The `.quantum_graph_skeleton.QuantumGraphSkeleton` should associate
+        `lsst.daf.butler.DatasetRef` objects with nodes for existing datasets.
+        In particular:
-        - `quantum_graph_skeleton.QuantumGraphSkeleton.set_dataset_ref` must be
-          used to associate existing datasets with all overall-input dataset
+        - `.quantum_graph_skeleton.QuantumGraphSkeleton.set_dataset_ref` must
+          be used to associate existing datasets with all overall-input dataset
           nodes in the skeleton by querying `input_collections`.  This includes
           all standard input nodes and any prerequisite nodes added by the
           method (prerequisite nodes may also be left out entirely, as the base
           class can add them later, albeit possibly less efficiently).
-        - `quantum_graph_skeleton.QuantumGraphSkeleton.set_output_for_skip`
+        - `.quantum_graph_skeleton.QuantumGraphSkeleton.set_output_for_skip`
           must be used to associate existing datasets with output dataset nodes
           by querying `skip_existing_in`.
-        - `quantum_graph_skeleton.QuantumGraphSkeleton.add_output_in_the_way`
+        - `.quantum_graph_skeleton.QuantumGraphSkeleton.add_output_in_the_way`
           must be used to associated existing outputs with output dataset nodes
-          by querying `output_run` if `output_run_exists` is `True`.
-          Note that the presence of such datasets is not automatically an
-          error, even if `clobber` is `False`, as these may be quanta that will
-          be skipped.
+          by querying `output_run` if `output_run_exists` is `True`. Note that
+          the presence of such datasets is not automatically an error, even if
+          `clobber` is `False`, as these may be quanta that will be skipped.
-        `DatasetRef` objects for existing datasets with empty data IDs in all
-        of the above categories may be found in the `empty_dimensions_datasets`
-        attribute, as these are queried for prior to this call by the base
-        class, but associating them with graph nodes is still this method's
-        responsibility.
+        `lsst.daf.butler.DatasetRef` objects for existing datasets with empty
+        data IDs in all of the above categories may be found in the
+        `empty_dimensions_datasets` attribute, as these are queried for prior
+        to this call by the base class, but associating them with graph nodes
+        is still this method's responsibility.
         Dataset types should never be components and should always use the
         "common" storage class definition in `pipeline_graph.DatasetTypeNode`
@@ -435,16 +435,17 @@ class QuantumGraphBuilder(ABC):
         ----------
         task_node : `pipeline_graph.TaskNode`
             Node for this task in the pipeline graph.
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
         Notes
         -----
         This method modifies ``skeleton`` in-place in several ways:
-        - It associates a `DatasetRef` with all output datasets and drops input
-          dataset nodes that do not have a `DatasetRef` already.  This ensures
-          producing and consuming tasks start from the same `DatasetRef`.
+        - It associates a `lsst.daf.butler.DatasetRef` with all output datasets
+          and drops input dataset nodes that do not have a
+          `lsst.daf.butler.DatasetRef` already.  This ensures producing and
+          consuming tasks start from the same `lsst.daf.butler.DatasetRef`.
         - It adds "inputs", "outputs", and "init_inputs" attributes to the
           quantum nodes, holding the same `NamedValueMapping` objects needed to
           construct an actual `Quantum` instances.
@@ -596,7 +597,7 @@ class QuantumGraphBuilder(ABC):
             Node for this task in the pipeline graph.
         quantum_key : `QuantumKey`
             Identifier for this quantum in the graph.
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
         Returns
@@ -611,9 +612,10 @@ class QuantumGraphBuilder(ABC):
         `skip_existing_in` collections, the quantum will be skipped. This
         causes the quantum node to be removed from the graph.  Dataset nodes
         that were previously the outputs of this quantum will be associated
-        with `DatasetRef` objects that were found in ``skip_existing_in``, or
-        will be removed if there is no such dataset there.  Any output dataset
-        in `output_run` will be removed from the "output in the way" category.
+        with `lsst.daf.butler.DatasetRef` objects that were found in
+        ``skip_existing_in``, or will be removed if there is no such dataset
+        there.  Any output dataset in `output_run` will be removed from the
+        "output in the way" category.
         """
         metadata_dataset_key = DatasetKey(
             task_node.metadata_output.parent_dataset_type_name, quantum_key.data_id_values
@@ -659,7 +661,7 @@ class QuantumGraphBuilder(ABC):
         ----------
         quantum_key : `QuantumKey`
             Identifier for this quantum in the graph.
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
         task_prerequisite_info : `~prerequisite_helpers.PrerequisiteInfo`
             Information about the prerequisite inputs to this task.
@@ -679,7 +681,7 @@ class QuantumGraphBuilder(ABC):
         the original there).  If `clobber` is `False`, `RuntimeError` is
         raised.  If there is no output already present, a new one with a random
         UUID is generated.  In all cases the dataset node in the skeleton is
-        associated with a `DatasetRef`.
+        associated with a `lsst.daf.butler.DatasetRef`.
         """
         dataset_key: DatasetKey | PrerequisiteDatasetKey
         for dataset_key in skeleton.iter_outputs_of(quantum_key):
@@ -743,7 +745,7 @@ class QuantumGraphBuilder(ABC):
             Node for this task in the pipeline graph.
         quantum_key : `QuantumKey`
             Identifier for this quantum in the graph.
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
         Returns
@@ -787,7 +789,7 @@ class QuantumGraphBuilder(ABC):
             Node for this task in the pipeline graph.
         quantum_key : `QuantumKey`
             Identifier for this quantum in the graph.
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
         skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
             An object that accumulates the appropriate spatial bounds for a
@@ -806,8 +808,8 @@ class QuantumGraphBuilder(ABC):
         Notes
         -----
         This method trims input dataset nodes that are not already associated
-        with a `DatasetRef`, and queries for prerequisite input nodes that do
-        not exist.
+        with a `lsst.daf.butler.DatasetRef`, and queries for prerequisite input
+        nodes that do not exist.
         """
         inputs_by_type: dict[str, set[DatasetRef]] = {}
         dataset_key: DatasetKey | PrerequisiteDatasetKey
@@ -987,7 +989,7 @@ class QuantumGraphBuilder(ABC):
         Parameters
         ----------
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph to update in place.
         Notes
@@ -999,31 +1001,47 @@ class QuantumGraphBuilder(ABC):
         """
         overall_inputs = skeleton.extract_overall_inputs()
         exported_records = self.butler._datastore.export_records(overall_inputs.values())
-        for quantum_key in skeleton.iter_all_quanta():
-            quantum_records = {}
-            input_ids = {
+        for task_label in self._pipeline_graph.tasks:
+            if not skeleton.has_task(task_label):
+                continue
+            task_init_key = skeleton.get_task_init_node(task_label)
+            init_input_ids = {
                 ref.id
-                for dataset_key in skeleton.iter_inputs_of(quantum_key)
+                for dataset_key in skeleton.iter_inputs_of(task_init_key)
                 if (ref := overall_inputs.get(dataset_key)) is not None
             }
-            if input_ids:
+            init_records = {}
+            if init_input_ids:
                 for datastore_name, records in exported_records.items():
-                    matching_records = records.subset(input_ids)
+                    matching_records = records.subset(init_input_ids)
                     if matching_records is not None:
-                        quantum_records[datastore_name] = matching_records
-            skeleton[quantum_key]["datastore_records"] = quantum_records
+                        init_records[datastore_name] = matching_records
+            skeleton[task_init_key]["datastore_records"] = init_records
+            for quantum_key in skeleton.get_quanta(task_label):
+                quantum_records = {}
+                input_ids = {
+                    ref.id
+                    for dataset_key in skeleton.iter_inputs_of(quantum_key)
+                    if (ref := overall_inputs.get(dataset_key)) is not None
+                }
+                if input_ids:
+                    for datastore_name, records in exported_records.items():
+                        matching_records = records.subset(input_ids)
+                        if matching_records is not None:
+                            quantum_records[datastore_name] = matching_records
+                skeleton[quantum_key]["datastore_records"] = quantum_records
     @final
     @timeMethod
     def _construct_quantum_graph(
         self, skeleton: QuantumGraphSkeleton, metadata: Mapping[str, Any]
     ) -> QuantumGraph:
-        """Construct a `QuantumGraph` object from the contents of a
-        fully-processed `quantum_graph_skeleton.QuantumGraphSkeleton`.
+        """Construct a `.QuantumGraph` object from the contents of a
+        fully-processed `.quantum_graph_skeleton.QuantumGraphSkeleton`.
         Parameters
         ----------
-        skeleton : `quantum_graph_skeleton.QuantumGraphSkeleton`
+        skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph.  Must have "init_inputs", "inputs", and
             "outputs" attributes on all quantum nodes, as added by
             `_resolve_task_quanta`, as well as a "datastore_records" attribute
@@ -1033,7 +1051,7 @@ class QuantumGraphBuilder(ABC):
         Returns
         -------
-        quantum_graph : `QuantumGraph`
+        quantum_graph : `.QuantumGraph`
             DAG describing processing to be performed.
         """
         quanta: dict[TaskDef, set[Quantum]] = {}
@@ -1044,20 +1062,29 @@ class QuantumGraphBuilder(ABC):
                 continue
             task_node = self._pipeline_graph.tasks[task_def.label]
             task_init_key = skeleton.get_task_init_node(task_def.label)
-            init_inputs[task_def] = skeleton[task_init_key]["inputs"].values()
-            init_outputs[task_def] = skeleton[task_init_key]["outputs"].values()
+            task_init_state = skeleton[task_init_key]
+            init_datastore_records: dict[str, DatastoreRecordData] = task_init_state.get(
+                "datastore_records", {}
+            )
+            init_inputs[task_def] = task_init_state["inputs"].values()
+            init_outputs[task_def] = task_init_state["outputs"].values()
             quanta_for_task: set[Quantum] = set()
             for quantum_key in skeleton.get_quanta(task_node.label):
-                node_state = skeleton[quantum_key]
+                quantum_state = skeleton[quantum_key]
+                quantum_datastore_records: dict[str, DatastoreRecordData] = quantum_state.get(
+                    "datastore_records", {}
+                )
                 quanta_for_task.add(
                     Quantum(
                         taskName=task_node.task_class_name,
                         taskClass=task_node.task_class,
-                        dataId=node_state["data_id"],
-                        initInputs=node_state["init_inputs"],
-                        inputs=node_state["inputs"],
-                        outputs=node_state["outputs"],
-                        datastore_records=node_state.get("datastore_records"),
+                        dataId=quantum_state["data_id"],
+                        initInputs=quantum_state["init_inputs"],
+                        inputs=quantum_state["inputs"],
+                        outputs=quantum_state["outputs"],
+                        datastore_records=DatastoreRecordData.merge_mappings(
+                            quantum_datastore_records, init_datastore_records
+                        ),
                     )
                 )
             quanta[task_def] = quanta_for_task

lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl

lsst-pipe-base 29.2025.3000py3-none-any.whl → 29.2025.3200py3-none-any.whl