PyPI - lsst-pipe-base - Versions diffs - 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.3900py3-none-any.whl → 29.2025.4100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

lsst/pipe/base/separable_pipeline_executor.py CHANGED Viewed

@@ -47,6 +47,7 @@ from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuild
 from .graph import QuantumGraph
 from .mp_graph_executor import MPGraphExecutor
 from .pipeline import Pipeline
+from .quantum_graph import PredictedQuantumGraph
 from .quantum_graph_builder import QuantumGraphBuilder
 from .quantum_graph_executor import QuantumGraphExecutor
 from .single_quantum_executor import SingleQuantumExecutor
@@ -120,7 +121,7 @@ class SeparablePipelineExecutor:
     def pre_execute_qgraph(
         self,
-        graph: QuantumGraph,
+        graph: QuantumGraph | PredictedQuantumGraph,
         register_dataset_types: bool = False,
         save_init_outputs: bool = True,
         save_versions: bool = True,
@@ -133,7 +134,7 @@ class SeparablePipelineExecutor:
         Parameters
         ----------
-        graph : `.QuantumGraph`
+        graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
             The quantum graph defining the pipeline and datasets to
             be initialized.
         register_dataset_types : `bool`, optional
@@ -169,6 +170,55 @@ class SeparablePipelineExecutor:
         """
         return Pipeline.from_uri(pipeline_uri)
+    def make_quantum_graph_builder(
+        self,
+        pipeline: Pipeline,
+        where: str = "",
+        *,
+        builder_class: type[QuantumGraphBuilder] = AllDimensionsQuantumGraphBuilder,
+        **kwargs: Any,
+    ) -> QuantumGraphBuilder:
+        """Initialize a quantum graph builder from a pipeline and input
+        datasets.
+        Parameters
+        ----------
+        pipeline : `.Pipeline`
+            The pipeline for which to generate a quantum graph.
+        where : `str`, optional
+            A data ID query that constrains the quanta generated.  Must not be
+            provided if a custom ``builder_class`` is given and that class does
+            not accept ``where`` as a construction argument.
+        builder_class : `type` [ \
+                `.quantum_graph_builder.QuantumGraphBuilder` ], optional
+            Quantum graph builder implementation.  Ignored if ``builder`` is
+            provided.
+        **kwargs
+            Additional keyword arguments are forwarded to ``builder_class``
+            when a quantum graph builder instance is constructed.  All
+            arguments accepted by the
+            `~.quantum_graph_builder.QuantumGraphBuilder` base
+            class are provided automatically (from explicit arguments to this
+            method and executor attributes) and do not need to be included
+            as keyword arguments.
+        Returns
+        -------
+        builder : `.quantum_graph_builder.QuantumGraphBuilder`
+            A quantum graph builder.
+        """
+        if where:
+            # Only pass 'where' if it's actually provided, since some
+            # QuantumGraphBuilder subclasses may not accept it.
+            kwargs["where"] = where
+        return builder_class(
+            pipeline.to_graph(),
+            self._butler,
+            skip_existing_in=self._skip_existing_in,
+            clobber=self._clobber_output,
+            **kwargs,
+        )
     def make_quantum_graph(
         self,
         pipeline: Pipeline,
@@ -180,6 +230,10 @@ class SeparablePipelineExecutor:
     ) -> QuantumGraph:
         """Build a quantum graph from a pipeline and input datasets.
+        This returns an instance of the old `.QuantumGraph` class.  Use
+        `build_quantum_graph` to construct a
+        `.quantum_graph.PredictedQuantumGraph`.
         Parameters
         ----------
         pipeline : `.Pipeline`
@@ -225,17 +279,7 @@ class SeparablePipelineExecutor:
             "user": getpass.getuser(),
             "time": str(datetime.datetime.now()),
         }
-        if where:
-            # Only pass 'where' if it's actually provided, since some
-            # QuantumGraphBuilder subclasses may not accept it.
-            kwargs["where"] = where
-        qg_builder = builder_class(
-            pipeline.to_graph(),
-            self._butler,
-            skip_existing_in=self._skip_existing_in,
-            clobber=self._clobber_output,
-            **kwargs,
-        )
+        qg_builder = self.make_quantum_graph_builder(pipeline, where, builder_class=builder_class, **kwargs)
         graph = qg_builder.build(metadata=metadata, attach_datastore_records=attach_datastore_records)
         _LOG.info(
             "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
@@ -245,9 +289,76 @@ class SeparablePipelineExecutor:
         )
         return graph
+    def build_quantum_graph(
+        self,
+        pipeline: Pipeline,
+        where: str = "",
+        *,
+        builder_class: type[QuantumGraphBuilder] = AllDimensionsQuantumGraphBuilder,
+        attach_datastore_records: bool = False,
+        **kwargs: Any,
+    ) -> PredictedQuantumGraph:
+        """Build a quantum graph from a pipeline and input datasets.
+        This returns an instance of the new
+        `.quantum_graph.PredictedQuantumGraph` class. Use `make_quantum_graph`
+        to construct a `.QuantumGraph`.
+        Parameters
+        ----------
+        pipeline : `.Pipeline`
+            The pipeline for which to generate a quantum graph.
+        where : `str`, optional
+            A data ID query that constrains the quanta generated.  Must not be
+            provided if a custom ``builder_class`` is given and that class does
+            not accept ``where`` as a construction argument.
+        builder_class : `type` [ \
+                `.quantum_graph_builder.QuantumGraphBuilder` ], optional
+            Quantum graph builder implementation.  Ignored if ``builder`` is
+            provided.
+        attach_datastore_records : `bool`, optional
+            Whether to attach datastore records.  These are currently used only
+            by `lsst.daf.butler.QuantumBackedButler`, which is not used by
+            `SeparablePipelineExecutor` for execution.
+        **kwargs
+            Additional keyword arguments are forwarded to ``builder_class``
+            when a quantum graph builder instance is constructed.  All
+            arguments accepted by the
+            `~.quantum_graph_builder.QuantumGraphBuilder` base
+            class are provided automatically (from explicit arguments to this
+            method and executor attributes) and do not need to be included
+            as keyword arguments.
+        Returns
+        -------
+        graph : `.QuantumGraph`
+            The quantum graph for ``.Pipeline`` as run on the datasets
+            identified by ``where``.
+        Notes
+        -----
+        This method does no special handling of empty quantum graphs. If
+        needed, clients can use `len` to test if the returned graph is empty.
+        """
+        metadata = {
+            "skip_existing_in": self._skip_existing_in,
+            "skip_existing": bool(self._skip_existing_in),
+            "data_query": where,
+        }
+        qg_builder = self.make_quantum_graph_builder(pipeline, where, builder_class=builder_class, **kwargs)
+        graph = qg_builder.finish(
+            metadata=metadata, attach_datastore_records=attach_datastore_records
+        ).assemble()
+        _LOG.info(
+            "PredictedQuantumGraph contains %d quanta for %d tasks.",
+            len(graph),
+            len(graph.quanta_by_task),
+        )
+        return graph
     def run_pipeline(
         self,
-        graph: QuantumGraph,
+        graph: QuantumGraph | PredictedQuantumGraph,
         fail_fast: bool = False,
         graph_executor: QuantumGraphExecutor | None = None,
         num_proc: int = 1,
@@ -259,7 +370,7 @@ class SeparablePipelineExecutor:
         Parameters
         ----------
-        graph : `.QuantumGraph`
+        graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
             The pipeline and datasets to execute.
         fail_fast : `bool`, optional
             If `True`, abort all execution if any task fails when

lsst/pipe/base/simple_pipeline_executor.py CHANGED Viewed

@@ -29,20 +29,15 @@ from __future__ import annotations
 __all__ = ("SimplePipelineExecutor",)
-import datetime
-import getpass
-import itertools
 import os
 from collections.abc import Iterable, Iterator, Mapping
-from typing import Any, cast
+from typing import Any
 from lsst.daf.butler import (
     Butler,
     CollectionType,
     DataCoordinate,
     DatasetRef,
-    DimensionDataExtractor,
-    DimensionGroup,
     Quantum,
 )
 from lsst.pex.config import Config
@@ -54,6 +49,7 @@ from .graph import QuantumGraph
 from .pipeline import Pipeline
 from .pipeline_graph import PipelineGraph
 from .pipelineTask import PipelineTask
+from .quantum_graph import PredictedQuantumGraph
 from .single_quantum_executor import SingleQuantumExecutor
 from .taskFactory import TaskFactory
@@ -95,12 +91,19 @@ class SimplePipelineExecutor:
     def __init__(
         self,
-        quantum_graph: QuantumGraph,
+        quantum_graph: QuantumGraph | PredictedQuantumGraph,
         butler: Butler,
         resources: ExecutionResources | None = None,
         raise_on_partial_outputs: bool = True,
     ):
-        self.quantum_graph = quantum_graph
+        from .graph import QuantumGraph
+        self._quantum_graph: QuantumGraph | None = None
+        if isinstance(quantum_graph, QuantumGraph):
+            self._quantum_graph = quantum_graph
+            self.predicted = PredictedQuantumGraph.from_old_quantum_graph(self._quantum_graph)
+        else:
+            self.predicted = quantum_graph
         self.butler = butler
         self.resources = resources
         self.raise_on_partial_outputs = raise_on_partial_outputs
@@ -442,25 +445,29 @@ class SimplePipelineExecutor:
             pipeline_graph, butler, where=where, bind=bind, output_run=output_run
         )
         metadata = {
-            "input": list(butler.collections.defaults),
-            "output": output,
-            "output_run": output_run,
             "skip_existing_in": [],
             "skip_existing": False,
             "data_query": where,
-            "user": getpass.getuser(),
-            "time": str(datetime.datetime.now()),
         }
-        quantum_graph = quantum_graph_builder.build(
-            metadata=metadata, attach_datastore_records=attach_datastore_records
-        )
+        predicted = quantum_graph_builder.finish(
+            output=output,
+            metadata=metadata,
+            attach_datastore_records=attach_datastore_records,
+        ).assemble()
         return cls(
-            quantum_graph=quantum_graph,
+            predicted,
             butler=butler,
             resources=resources,
             raise_on_partial_outputs=raise_on_partial_outputs,
         )
+    @property
+    def quantum_graph(self) -> QuantumGraph:
+        """The quantum graph run by this executor."""
+        if self._quantum_graph is None:
+            self._quantum_graph = self.predicted.to_old_quantum_graph()
+        return self._quantum_graph
     def use_local_butler(
         self, root: str, register_dataset_types: bool = True, transfer_dimensions: bool = True
     ) -> Butler:
@@ -503,9 +510,9 @@ class SimplePipelineExecutor:
             Butler.makeRepo(root)
         out_butler = Butler.from_config(root, writeable=True)
-        output_run = self.quantum_graph.metadata["output_run"]
+        output_run = self.predicted.header.output_run
         out_butler.collections.register(output_run, CollectionType.RUN)
-        output = self.quantum_graph.metadata["output"]
+        output = self.predicted.header.output
         inputs: str | None = None
         if output is not None:
             inputs = f"{output}/inputs"
@@ -525,12 +532,12 @@ class SimplePipelineExecutor:
         # into a TAGGED collection.
         refs: set[DatasetRef] = set()
         to_tag_by_type: dict[str, dict[DataCoordinate, DatasetRef | None]] = {}
-        pipeline_graph = self.quantum_graph.pipeline_graph
+        pipeline_graph = self.predicted.pipeline_graph
         for name, dataset_type_node in pipeline_graph.iter_overall_inputs():
             assert dataset_type_node is not None, "PipelineGraph should be resolved."
             to_tag_for_type = to_tag_by_type.setdefault(name, {})
             for task_node in pipeline_graph.consumers_of(name):
-                for quantum in self.quantum_graph.get_task_quanta(task_node.label).values():
+                for quantum in self.predicted.build_execution_quanta(task_label=task_node.label).values():
                     for ref in quantum.inputs[name]:
                         ref = dataset_type_node.generalize_ref(ref)
                         refs.add(ref)
@@ -563,7 +570,7 @@ class SimplePipelineExecutor:
         return self.butler
     def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
-        """Run all the quanta in the `.QuantumGraph` in topological order.
+        """Run all the quanta in the quantum graph in topological order.
         Use this method to run all quanta in the graph.  Use
         `as_generator` to get a generator to run the quanta one at
@@ -594,7 +601,7 @@ class SimplePipelineExecutor:
     def as_generator(
         self, register_dataset_types: bool = False, save_versions: bool = True
     ) -> Iterator[Quantum]:
-        """Yield quanta in the `.QuantumGraph` in topological order.
+        """Yield quanta in the quantum graph in topological order.
         These quanta will be run as the returned generator is iterated
         over.  Use this method to run the quanta one at a time.
@@ -623,11 +630,11 @@ class SimplePipelineExecutor:
         guarantees are made about the order in which quanta are processed.
         """
         if register_dataset_types:
-            self.quantum_graph.pipeline_graph.register_dataset_types(self.butler)
-        self.quantum_graph.write_configs(self.butler, compare_existing=False)
-        self.quantum_graph.write_init_outputs(self.butler, skip_existing=False)
+            self.predicted.pipeline_graph.register_dataset_types(self.butler)
+        self.predicted.write_configs(self.butler, compare_existing=False)
+        self.predicted.write_init_outputs(self.butler, skip_existing=False)
         if save_versions:
-            self.quantum_graph.write_packages(self.butler, compare_existing=False)
+            self.predicted.write_packages(self.butler, compare_existing=False)
         task_factory = TaskFactory()
         single_quantum_executor = SingleQuantumExecutor(
             butler=self.butler,
@@ -635,14 +642,20 @@ class SimplePipelineExecutor:
             resources=self.resources,
             raise_on_partial_outputs=self.raise_on_partial_outputs,
         )
+        self.predicted.build_execution_quanta()
+        nodes_map = self.predicted.quantum_only_xgraph.nodes
         # Important that this returns a generator expression rather than being
         # a generator itself; that is what makes the init stuff above happen
         # immediately instead of when the first quanta is executed, which might
         # be useful for callers who want to check the state of the repo in
         # between.
         return (
-            single_quantum_executor.execute(qnode.task_node, qnode.quantum, qnode.nodeId)[0]
-            for qnode in self.quantum_graph
+            single_quantum_executor.execute(
+                nodes_map[quantum_id]["pipeline_node"],
+                nodes_map[quantum_id]["quantum"],
+                quantum_id,
+            )[0]
+            for quantum_id in self.predicted
         )
     def _transfer_qg_dimension_records(self, out_butler: Butler) -> None:
@@ -653,20 +666,8 @@ class SimplePipelineExecutor:
         out_butler : `lsst.daf.butler.Butler`
             Butler to transfer records to.
         """
-        pipeline_graph = self.quantum_graph.pipeline_graph
-        all_dimensions = DimensionGroup.union(
-            *pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
-            universe=self.butler.dimensions,
-        )
-        dimension_data_extractor = DimensionDataExtractor.from_dimension_group(all_dimensions)
-        for task_node in pipeline_graph.tasks.values():
-            task_quanta = self.quantum_graph.get_task_quanta(task_node.label)
-            for quantum in task_quanta.values():
-                dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
-                for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
-                    dimension_data_extractor.update(ref.dataId for ref in refs)
-        for element_name in all_dimensions.elements:
-            record_set = dimension_data_extractor.records.get(element_name)
+        assert self.predicted.dimension_data is not None, "Dimension data must be present for execution."
+        for record_set in self.predicted.dimension_data.records.values():
             if record_set and record_set.element.has_own_table:
                 out_butler.registry.insertDimensionData(
                     record_set.element,

lsst/pipe/base/single_quantum_executor.py CHANGED Viewed

@@ -33,7 +33,7 @@ import uuid
 from collections import defaultdict
 from collections.abc import Callable, Mapping
 from itertools import chain
-from typing import Any, cast
+from typing import Any
 from lsst.daf.butler import (
     Butler,
@@ -46,7 +46,6 @@ from lsst.daf.butler import (
 )
 from lsst.utils.timer import logInfo
-from ._instrument import Instrument
 from ._quantumContext import ExecutionResources, QuantumContext
 from ._status import AnnotatedPartialOutputsError, InvalidQuantumError, NoWorkFound, QuantumSuccessCaveats
 from .connections import AdjustQuantumHelper
@@ -238,9 +237,6 @@ class SingleQuantumExecutor(QuantumExecutor):
                 except ImportError:
                     _LOG.warning("No 'debug' module found.")
-            # initialize global state
-            self._init_globals(quantum)
             # Ensure that we are executing a frozen config
             task_node.config.freeze()
             logInfo(None, "init", metadata=quantumMetadata)  # type: ignore[arg-type]
@@ -569,41 +565,6 @@ class SingleQuantumExecutor(QuantumExecutor):
             ) from exc
         limited_butler.put(metadata, ref)
-    def _init_globals(self, quantum: Quantum) -> None:
-        """Initialize global state needed for task execution.
-        Parameters
-        ----------
-        quantum : `~lsst.daf.butler.Quantum`
-            Single Quantum instance.
-        Notes
-        -----
-        There is an issue with initializing filters singleton which is done
-        by instrument, to avoid requiring tasks to do it in runQuantum()
-        we do it here when any dataId has an instrument dimension. Also for
-        now we only allow single instrument, verify that all instrument
-        names in all dataIds are identical.
-        This will need revision when filter singleton disappears.
-        """
-        # can only work for full butler
-        if self._butler is None:
-            return
-        oneInstrument = None
-        for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
-            for datasetRef in datasetRefs:
-                dataId = datasetRef.dataId
-                instrument = cast(str, dataId.get("instrument"))
-                if instrument is not None:
-                    if oneInstrument is not None:
-                        assert (  # type: ignore
-                            instrument == oneInstrument
-                        ), "Currently require that only one instrument is used per graph"
-                    else:
-                        oneInstrument = instrument
-                        Instrument.fromName(instrument, self._butler.registry)
     def _should_assume_exists(self, quantum: Quantum, ref: DatasetRef) -> bool | None:
         """Report whether the given dataset can be assumed to exist because
         some previous check reported that it did.

lsst/pipe/base/tests/mocks/__init__.py CHANGED Viewed

@@ -32,6 +32,6 @@ See :ref:`testing-pipelines-with-mocks` for details.
 """
 from ._data_id_match import *
-from ._in_memory_repo import *
+from ._repo import *
 from ._pipeline_task import *
 from ._storage_class import *

lsst/pipe/base/tests/mocks/_pipeline_task.py CHANGED Viewed

@@ -56,6 +56,7 @@ from lsst.utils.iteration import ensure_iterable
 from ... import connectionTypes as cT
 from ..._status import AlgorithmError, AnnotatedPartialOutputsError
+from ...automatic_connection_constants import METADATA_OUTPUT_CONNECTION_NAME, METADATA_OUTPUT_STORAGE_CLASS
 from ...config import PipelineTaskConfig
 from ...connections import InputQuantizedConnection, OutputQuantizedConnection, PipelineTaskConnections
 from ...pipeline_graph import PipelineGraph
@@ -202,6 +203,13 @@ class BaseTestPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=BaseTes
         doc="Time to sleep (seconds) before mock execution reading inputs or failing.",
     )
+    int_value = Field[int](
+        "Arbitrary integer value to write into mock output datasets", dtype=int, optional=True, default=None
+    )
+    str_value = Field[str](
+        "Arbitrary string value to write into mock output datasets", dtype=str, optional=True, default=None
+    )
     def data_id_match(self) -> DataIdMatch | None:
         if not self.fail_condition:
             return None
@@ -294,6 +302,8 @@ class BaseTestPipelineTask(PipelineTask):
                 run=None,  # task also has no way to get this
                 quantum=mock_dataset_quantum,
                 output_connection_name=connection_name,
+                int_value=self.config.int_value,
+                str_value=self.config.str_value,
             )
             setattr(self, connection_name, output_dataset)
@@ -373,6 +383,8 @@ class BaseTestPipelineTask(PipelineTask):
                     run=ref.run,
                     quantum=mock_dataset_quantum,
                     output_connection_name=name,
+                    int_value=self.config.int_value,
+                    str_value=self.config.str_value,
                 )
                 butlerQC.put(output, ref)
@@ -476,7 +488,10 @@ class MockPipelineTaskConnections(BaseTestPipelineTaskConnections, dimensions=()
                 raise ValueError(
                     f"Unmocked dataset type {connection.name!r} cannot be used as an init-output."
                 )
-            elif connection.name.endswith("_metadata") and connection.storageClass == "TaskMetadata":
+            elif (
+                connection.name.endswith(METADATA_OUTPUT_CONNECTION_NAME)
+                and connection.storageClass == METADATA_OUTPUT_STORAGE_CLASS
+            ):
                 # Task metadata does not use a mock storage class, because it's
                 # written by the system, but it does end up with the _mock_*
                 # prefix because the task label does.

lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl

lsst-pipe-base 29.2025.3900py3-none-any.whl → 29.2025.4100py3-none-any.whl