PyPI - lsst-pipe-base - Versions diffs - 29.2025.1400__py3-none-any.whl → 29.2025.1600__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.1400py3-none-any.whl → 29.2025.1600py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

lsst/pipe/base/all_dimensions_quantum_graph_builder.py CHANGED Viewed

@@ -39,6 +39,8 @@ from collections import defaultdict
 from collections.abc import Iterable, Mapping
 from typing import TYPE_CHECKING, Any, TypeAlias, final
+import astropy.table
 from lsst.daf.butler import (
     Butler,
     DataCoordinate,
@@ -85,6 +87,11 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
         (sometimes catastrophically bad) query plan.
     bind : `~collections.abc.Mapping`, optional
         Variable substitutions for the ``where`` expression.
+    data_id_tables : `~collections.abc.Iterable` [ `astropy.table.Table` ],\
+            optional
+        Tables of data IDs to join in as constraints.  Missing dimensions that
+        are constrained by the ``where`` argument or pipeline data ID will be
+        filled in automatically.
     **kwargs
         Additional keyword arguments forwarded to `QuantumGraphBuilder`.
@@ -113,6 +120,7 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
         where: str = "",
         dataset_query_constraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL,
         bind: Mapping[str, Any] | None = None,
+        data_id_tables: Iterable[astropy.table.Table] = (),
         **kwargs: Any,
     ):
         super().__init__(pipeline_graph, butler, **kwargs)
@@ -120,6 +128,7 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
         self.where = where
         self.dataset_query_constraint = dataset_query_constraint
         self.bind = bind
+        self.data_id_tables = list(data_id_tables)
     @timeMethod
     def process_subgraph(self, subgraph: PipelineGraph) -> QuantumGraphSkeleton:
@@ -194,6 +203,14 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
                 f"{self.where!r}, bind={self.bind!r})"
             )
             query = query.where(tree.subgraph.data_id, self.where, bind=self.bind)
+            # It's important for tables to be joined in last, so data IDs from
+            # pipeline and where can be used to fill in missing columns.
+            for table in self.data_id_tables:
+                # If this is from ctrl_mpexec's pipetask, it'll have added
+                # a filename to the metadata for us.
+                table_name = table.meta.get("filename", "unknown")
+                query_cmd.append(f"    query = query.join_data_coordinate_table(<{table_name}>)")
+                query = query.join_data_coordinate_table(table)
             self.log.verbose("Querying for data IDs via: %s", "\n".join(query_cmd))
             # Allow duplicates from common skypix overlaps to make some queries
             # run faster.

lsst/pipe/base/graph/_loadHelpers.py CHANGED Viewed

@@ -65,6 +65,7 @@ class LoadHelper(AbstractContextManager["LoadHelper"]):
     to upgrade them to the latest format before they can be used in
     production.
     """
+    fullRead: bool = False
     def __post_init__(self) -> None:
         self._resourceHandle: ResourceHandleProtocol | None = None
@@ -261,6 +262,9 @@ class LoadHelper(AbstractContextManager["LoadHelper"]):
     def __enter__(self) -> LoadHelper:
         if isinstance(self.uri, BinaryIO | BytesIO | BufferedRandom):
             self._resourceHandle = self.uri
+        elif self.fullRead:
+            local = self._exitStack.enter_context(self.uri.as_local())
+            self._resourceHandle = self._exitStack.enter_context(local.open("rb"))
         else:
             self._resourceHandle = self._exitStack.enter_context(self.uri.open("rb"))
         self._initialize()

lsst/pipe/base/graph/graph.py CHANGED Viewed

@@ -963,7 +963,7 @@ class QuantumGraph:
         """
         uri = ResourcePath(uri)
         if uri.getExtension() in {".qgraph"}:
-            with LoadHelper(uri, minimumVersion) as loader:
+            with LoadHelper(uri, minimumVersion, fullRead=(nodes is None)) as loader:
                 qgraph = loader.load(universe, nodes, graphID)
         else:
             raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}")
@@ -1230,7 +1230,7 @@ class QuantumGraph:
             being loaded or if the supplied uri does not point at a valid
             `QuantumGraph` save file.
         """
-        with LoadHelper(file, minimumVersion) as loader:
+        with LoadHelper(file, minimumVersion, fullRead=(nodes is None)) as loader:
             qgraph = loader.load(universe, nodes, graphID)
         if not isinstance(qgraph, QuantumGraph):
             raise TypeError(f"QuantumGraph file contains unexpected object type: {type(qgraph)}")

lsst/pipe/base/pipeline.py CHANGED Viewed

@@ -427,7 +427,7 @@ class Pipeline:
             if "," in label_subset:
                 if ".." in label_subset:
                     raise ValueError(
-                        "Can only specify a list of labels or a rangewhen loading a Pipline not both"
+                        "Can only specify a list of labels or a range when loading a Pipeline, not both."
                     )
                 args = {"labels": set(label_subset.split(","))}
             # labels supplied as a range

lsst/pipe/base/pipelineIR.py CHANGED Viewed

@@ -980,10 +980,19 @@ class PipelineIR:
             if extraTaskLabels := (labeled_subset.subset - pipeline.tasks.keys()):
                 match subsetCtrl:
                     case PipelineSubsetCtrl.DROP:
-                        pipeline.labeled_subsets.pop(label)
+                        del pipeline.labeled_subsets[label]
                     case PipelineSubsetCtrl.EDIT:
                         for extra in extraTaskLabels:
                             labeled_subset.subset.discard(extra)
+            elif subsetCtrl is PipelineSubsetCtrl.DROP and not labeled_subset.subset:
+                # When mode is DROP, also drop any subsets that were already
+                # empty.  This ensures we drop steps that were emptied-out by
+                # (earlier) imports with exclude in EDIT mode.  Note that we
+                # don't want to drop those steps when they're first excluded
+                # down to nothing, because the pipeline might be about to add
+                # new tasks back into them, and then we'd want to preserve the
+                # step definitions.
+                del pipeline.labeled_subsets[label]
         # remove any steps that correspond to removed subsets
         new_steps = []

lsst/pipe/base/pipeline_graph/__main__.py CHANGED Viewed

@@ -334,6 +334,7 @@ class DisplayArguments:
                 dimensions=args.dimensions,
                 task_classes=args.task_classes,
                 storage_classes=args.storage_classes,
+                status=None,
             ),
             merge_input_trees=args.merge_input_trees,
             merge_output_trees=args.merge_output_trees,

lsst/pipe/base/pipeline_graph/_exceptions.py CHANGED Viewed

@@ -31,6 +31,7 @@ __all__ = (
     "DuplicateOutputError",
     "EdgesChangedError",
     "IncompatibleDatasetTypeError",
+    "InvalidExpressionError",
     "InvalidStepsError",
     "PipelineDataCycleError",
     "PipelineGraphError",
@@ -102,5 +103,11 @@ class PipelineGraphExceptionSafetyError(PipelineGraphError):
     """
+class InvalidExpressionError(PipelineGraphError):
+    """Exception raised when a pipeline subset expression could not be parsed
+    or applied.
+    """
 class InvalidStepsError(PipelineGraphError):
     """Exception raised when the step definitions are invalid."""

lsst/pipe/base/pipeline_graph/_pipeline_graph.py CHANGED Viewed

@@ -55,11 +55,13 @@ from lsst.utils.packages import Packages
 from .._dataset_handle import InMemoryDatasetHandle
 from ..automatic_connection_constants import PACKAGES_INIT_OUTPUT_NAME, PACKAGES_INIT_OUTPUT_STORAGE_CLASS
+from . import expressions
 from ._dataset_types import DatasetTypeNode
 from ._edges import Edge, ReadEdge, WriteEdge
 from ._exceptions import (
     DuplicateOutputError,
     EdgesChangedError,
+    InvalidExpressionError,
     InvalidStepsError,
     PipelineDataCycleError,
     PipelineGraphError,
@@ -1149,16 +1151,7 @@ class PipelineGraph:
         See `TaskNode` and `TaskInitNode` for the descriptive node and
         attributes added.
         """
-        bipartite_xgraph = self._make_bipartite_xgraph_internal(init)
-        task_keys = [
-            key
-            for key, bipartite in bipartite_xgraph.nodes(data="bipartite")
-            if bipartite == NodeType.TASK.bipartite
-        ]
-        return self._transform_xgraph_state(
-            networkx.algorithms.bipartite.projected_graph(networkx.DiGraph(bipartite_xgraph), task_keys),
-            skip_edges=True,
-        )
+        return self._transform_xgraph_state(self._make_task_xgraph_internal(init), skip_edges=True)
     def make_dataset_type_xgraph(self, init: bool = False) -> networkx.DiGraph:
         """Return a networkx representation of just the dataset types in the
@@ -1197,6 +1190,62 @@ class PipelineGraph:
             skip_edges=True,
         )
+    ###########################################################################
+    #
+    # Expression-based Selection Interface.
+    #
+    ###########################################################################
+    def select_tasks(self, expression: str) -> set[str]:
+        """Return the tasks that match an expression.
+        Parameters
+        ----------
+        expression : `str`
+            String expression to evaluate.  See
+            :ref:`pipeline-graph-subset-expressions`.
+        Returns
+        -------
+        task_labels : `set` [ `str` ]
+            Set of matching task labels.
+        """
+        task_xgraph = self._make_task_xgraph_internal(init=False)
+        expr_tree = expressions.parse(expression)
+        matching_task_keys = self._select_expression(expr_tree, task_xgraph)
+        return {key.name for key in matching_task_keys}
+    def select(self, expression: str) -> PipelineGraph:
+        """Return a new pipeline graph with the tasks that match an expression.
+        Parameters
+        ----------
+        expression : `str`
+            String expression to evaluate.  See
+            :ref:`pipeline-graph-subset-expressions`.
+        Returns
+        -------
+        new_graph : `PipelineGraph`
+            New pipeline graph with just the matching tasks.
+        Notes
+        -----
+        All resolved dataset type nodes will be preserved.
+        If `has_been_sorted`, the new graph will be sorted as well.
+        Task subsets will not be included in the returned graph.
+        """
+        selected_tasks = self.select_tasks(expression)
+        new_pipeline_graph = PipelineGraph(universe=self._universe, data_id=self._raw_data_id)
+        new_pipeline_graph.add_task_nodes(
+            [self.tasks[task_label] for task_label in selected_tasks], parent=self
+        )
+        if self.has_been_sorted:
+            new_pipeline_graph.sort()
+        return new_pipeline_graph
     ###########################################################################
     #
     # Serialization Interface.
@@ -1575,6 +1624,8 @@ class PipelineGraph:
         element in the iterable.
         If `has_been_sorted`, all subgraphs will be sorted as well.
+        Task subsets will not be included in the returned graphs.
         """
         # Having an overall input in common isn't enough to make subgraphs
         # dependent on each other, so we want to look for connected component
@@ -1595,7 +1646,7 @@ class PipelineGraph:
                 yield self
                 return
             else:
-                component_subgraph = PipelineGraph(universe=self._universe)
+                component_subgraph = PipelineGraph(universe=self._universe, data_id=self._raw_data_id)
                 component_subgraph.add_task_nodes(
                     [self._xgraph.nodes[key]["instance"] for key in component_task_keys], parent=self
                 )
@@ -2053,6 +2104,26 @@ class PipelineGraph:
         """
         return self._xgraph.edge_subgraph([edge.key for edge in self.iter_edges(init)])
+    def _make_task_xgraph_internal(self, init: bool) -> networkx.DiGraph:
+        """Make a init-only or runtime-only internal task subgraph.
+        See `make_task_xgraph` for parameters and return values.
+        Notes
+        -----
+        This method returns a view of the `PipelineGraph` object's internal
+        backing graph, and hence should only be called in methods that copy the
+        result either explicitly or by running a copying algorithm before
+        returning it to the user.
+        """
+        bipartite_xgraph = self._make_bipartite_xgraph_internal(init=init)
+        task_keys = [
+            key
+            for key, bipartite in bipartite_xgraph.nodes(data="bipartite")
+            if bipartite == NodeType.TASK.bipartite
+        ]
+        return networkx.algorithms.bipartite.projected_graph(networkx.DiGraph(bipartite_xgraph), task_keys)
     def _transform_xgraph_state(self, xgraph: _G, skip_edges: bool) -> _G:
         """Transform networkx graph attributes in-place from the internal
         "instance" attributes to the documented exported attributes.
@@ -2342,6 +2413,284 @@ class PipelineGraph:
                             f"{step_label!r}."
                         )
+    def _select_expression(self, expr_tree: expressions.Node, task_xgraph: networkx.DiGraph) -> set[NodeKey]:
+        """Select tasks from a pipeline based on a string expression.
+        This is the primary implementation method for `select` and
+        `select_tasks`.
+        Parameters
+        ----------
+        expr_tree : `expressions.Node`
+            Expression [sub]tree to process (recursively).
+        task_xgraph : `networkx.DiGraph`
+            NetworkX graph of all tasks (runtime nodes only) in the pipeline.
+        Returns
+        -------
+        selected : `set` [ `NodeKey` ]
+            Set of `NodeKey` objects for matching tasks (only; no dataset type
+            or task-init nodes).
+        """
+        match expr_tree:
+            case expressions.IdentifierNode(qualifier=qualifier, label=label):
+                match self._select_identifier(qualifier, label):
+                    case NodeKey(node_type=NodeType.TASK) as task_key:
+                        return {task_key}
+                    case NodeKey(node_type=NodeType.DATASET_TYPE) as dataset_type_key:
+                        # Since a dataset type can have only one producer, this
+                        # yields 0- (for overall inputs) or 1-element sets.
+                        for producer_key, _ in self._xgraph.in_edges(dataset_type_key):
+                            if producer_key.node_type is NodeType.TASK_INIT:
+                                raise InvalidExpressionError(
+                                    f"Init-output dataset type {label!r} cannot be used directly in an "
+                                    "expression."
+                                )
+                            return {producer_key}
+                        return set()
+                    case TaskSubset() as task_subset:
+                        return {NodeKey(NodeType.TASK, label) for label in task_subset}
+                    case _:  # pragma: no cover
+                        raise AssertionError("Identifier type inconsistent with grammar.")
+            case expressions.DirectionNode(operator=operator, start=start):
+                match self._select_identifier(start.qualifier, start.label):
+                    case NodeKey(node_type=NodeType.TASK) as task_key:
+                        if operator.startswith("<"):
+                            return self._select_task_ancestors(
+                                task_key, task_xgraph, inclusive=operator.endswith("=")
+                            )
+                        else:
+                            assert operator.startswith(">"), "Guaranteed by grammar."
+                            return self._select_task_descendants(
+                                task_key, task_xgraph, inclusive=operator.endswith("=")
+                            )
+                    case NodeKey(node_type=NodeType.DATASET_TYPE) as dataset_type_key:
+                        if operator.startswith("<"):
+                            return self._select_dataset_type_ancestors(
+                                dataset_type_key, task_xgraph, inclusive=operator.endswith("=")
+                            )
+                        else:
+                            assert operator.startswith(">"), "Guaranteed by grammar."
+                            return self._select_dataset_type_descendants(
+                                dataset_type_key, task_xgraph, inclusive=operator.endswith("=")
+                            )
+                    case TaskSubset():
+                        raise InvalidExpressionError(
+                            f"Task subset identifier {start!r} cannot be used as the start of an "
+                            "ancestor/descendant search."
+                        )
+                    case _:  # pragma: no cover
+                        raise AssertionError("Unexpected parsed identifier result type.")
+            case expressions.NotNode(operand=operand):
+                operand_result = self._select_expression(operand, task_xgraph)
+                return set(task_xgraph.nodes.keys() - operand_result)
+            case expressions.UnionNode(lhs=lhs, rhs=rhs):
+                lhs_result = self._select_expression(lhs, task_xgraph)
+                rhs_result = self._select_expression(rhs, task_xgraph)
+                return lhs_result.union(rhs_result)
+            case expressions.IntersectionNode(lhs=lhs, rhs=rhs):
+                lhs_result = self._select_expression(lhs, task_xgraph)
+                rhs_result = self._select_expression(rhs, task_xgraph)
+                return lhs_result.intersection(rhs_result)
+            case _:  # pragma: no cover
+                raise AssertionError("Expression parse node inconsistent with grammar.")
+    def _select_task_ancestors(
+        self, start: NodeKey, task_xgraph: networkx.DiGraph, inclusive: bool
+    ) -> set[NodeKey]:
+        """Return all task-node ancestors of the given task node, as defined by
+        the `select` expression language.
+        Parameters
+        ----------
+        start : `NodeKey`
+            A runtime task node key.
+        task_xgraph : `networkx.DiGraph`
+            NetworkX graph of all tasks (runtime nodes only) in the pipeline.
+        inclusive : `bool`
+            Whether to include the ``start`` node in the results.
+        Returns
+        -------
+        selected : `set` [ `NodeKey` ]
+            Set of `NodeKey` objects for matching tasks (only; no dataset type
+            or task-init nodes).
+        """
+        result = set(networkx.dag.ancestors(task_xgraph, start))
+        if inclusive:
+            result.add(start)
+        return result
+    def _select_task_descendants(
+        self, start: NodeKey, task_xgraph: networkx.DiGraph, inclusive: bool
+    ) -> set[NodeKey]:
+        """Return all task-node descendants of the given task node, as defined
+        by the `select` expression language.
+        Parameters
+        ----------
+        start : `NodeKey`
+            A runtime task node key.
+        task_xgraph : `networkx.DiGraph`
+            NetworkX graph of all tasks (runtime nodes only) in the pipeline.
+        inclusive : `bool`
+            Whether to include the ``start`` node in the results.
+        Returns
+        -------
+        selected : `set` [ `NodeKey` ]
+            Set of `NodeKey` objects for matching tasks (only; no dataset type
+            or task-init nodes).
+        """
+        result = set(networkx.dag.descendants(task_xgraph, start))
+        if inclusive:
+            result.add(start)
+        return result
+    def _select_dataset_type_ancestors(
+        self, start: NodeKey, task_xgraph: networkx.DiGraph, inclusive: bool
+    ) -> set[NodeKey]:
+        """Return all task-node ancestors of the given dataset type node, as
+        defined by the `select` expression language.
+        Parameters
+        ----------
+        start : `NodeKey`
+            A dataset type node key.  May not be an init-output.
+        task_xgraph : `networkx.DiGraph`
+            NetworkX graph of all tasks (runtime nodes only) in the pipeline.
+        inclusive : `bool`
+            Whether to include the producer of the ``start`` node in the
+            results.
+        Returns
+        -------
+        selected : `set` [ `NodeKey` ]
+            Set of `NodeKey` objects for matching tasks (only; no dataset type
+            or task-init nodes).
+        """
+        result: set[NodeKey] = set()
+        for producer_key, _ in self._xgraph.in_edges(start):
+            if producer_key.node_type is NodeType.TASK_INIT:
+                raise InvalidExpressionError(
+                    f"Init-output dataset type {start.name!r} cannot be used as the "
+                    "starting point for an ancestor ('<' or '<=') search."
+                )
+            result.update(networkx.dag.ancestors(task_xgraph, producer_key))
+            if inclusive:
+                result.add(producer_key)
+        return result
+    def _select_dataset_type_descendants(
+        self, start: NodeKey, task_xgraph: networkx.DiGraph, inclusive: bool
+    ) -> set[NodeKey]:
+        """Return all task-node descendatns of the given dataset type node, as
+        defined by the `select` expression language.
+        Parameters
+        ----------
+        start : `NodeKey`
+            A dataset type node key.  May not be an init-output if
+            ``inclusive=True``.
+        task_xgraph : `networkx.DiGraph`
+            NetworkX graph of all tasks (runtime nodes only) in the pipeline.
+        inclusive : `bool`
+            Whether to include the producer of the ``start`` node in the
+            results.
+        Returns
+        -------
+        selected : `set` [ `NodeKey` ]
+            Set of `NodeKey` objects for matching tasks (only; no dataset type
+            or task-init nodes).
+        """
+        result: set[NodeKey] = set()
+        if inclusive:
+            for producer_key, _ in self._xgraph.in_edges(start):
+                if producer_key.node_type is NodeType.TASK_INIT:
+                    raise InvalidExpressionError(
+                        f"Init-output dataset type {start.name!r} cannot be used as the "
+                        "starting point for an includsive descendant ('>=') search."
+                    )
+                result.add(producer_key)
+        # We also include tasks that consume a dataset type as an init-input,
+        # since that can affect their runtime behavior.
+        consumer_keys: set[NodeKey] = {
+            (
+                consumer_key
+                if consumer_key.node_type is NodeType.TASK
+                else NodeKey(NodeType.TASK, consumer_key.name)
+            )
+            for _, consumer_key in self._xgraph.out_edges(start)
+        }
+        for consumer_key in consumer_keys:
+            result.add(consumer_key)
+            result.update(networkx.dag.descendants(task_xgraph, consumer_key))
+        return result
+    def _select_identifier(
+        self, qualifier: Literal["T", "D", "S"] | None, label: str
+    ) -> NodeKey | TaskSubset:
+        """Return the node key or task subset that corresponds to a `select`
+        expression identifier.
+        Parameters
+        ----------
+        qualifier : `str` or `None`
+            Task, dataset type, or task subset qualifier included in the
+            identifier, if any.
+        label : `str`
+            Task label, dataset type name, or task subset label.
+        Returns
+        -------
+        key_or_subset : `NodeKey` or `TaskSubset`
+            A `NodeKey` for a task or dataset type, or a `TaskSubset` for a
+            task subset.
+        """
+        match qualifier:
+            case None:
+                task_key = NodeKey(NodeType.TASK, label)
+                dataset_type_key = NodeKey(NodeType.DATASET_TYPE, label)
+                if task_key in self._xgraph.nodes:
+                    if dataset_type_key in self._xgraph.nodes:
+                        raise InvalidExpressionError(
+                            f"{label!r} is both a task label and a dataset type name; "
+                            "prefix with 'T:' or 'D:' (respectively) to specify which."
+                        )
+                    assert label not in self._task_subsets, "Should be prohibited at construction."
+                    return task_key
+                elif dataset_type_key in self._xgraph.nodes:
+                    if label in self._task_subsets:
+                        raise InvalidExpressionError(
+                            f"{label!r} is both a subset label and a dataset type name; "
+                            "prefix with 'S:' or 'D:' (respectively) to specify which."
+                        )
+                    return dataset_type_key
+                elif label in self._task_subsets:
+                    return self._task_subsets[label]
+                else:
+                    raise InvalidExpressionError(
+                        f"{label!r} is not a task label, task subset label, or dataset type name."
+                    )
+            case "T":
+                task_key = NodeKey(NodeType.TASK, label)
+                if task_key not in self._xgraph.nodes:
+                    raise InvalidExpressionError(f"Task with label {label!r} does not exist.")
+                return task_key
+            case "D":
+                dataset_type_key = NodeKey(NodeType.DATASET_TYPE, label)
+                if dataset_type_key not in self._xgraph.nodes:
+                    raise InvalidExpressionError(f"Dataset type with name {label!r} does not exist.")
+                return dataset_type_key
+            case "S":
+                try:
+                    return self._task_subsets[label]
+                except KeyError:
+                    raise InvalidExpressionError(f"Task subset with label {label!r} does not exist.")
+            case _:  # pragma: no cover
+                raise AssertionError("Unexpected identifier qualifier in expression.")
     _xgraph: networkx.MultiDiGraph
     _sorted_keys: Sequence[NodeKey] | None
     _task_subsets: dict[str, TaskSubset]

lsst-pipe-base 29.2025.1400__py3-none-any.whl → 29.2025.1600__py3-none-any.whl

lsst-pipe-base 29.2025.1400py3-none-any.whl → 29.2025.1600py3-none-any.whl