PyPI - lsst-pipe-base - Versions diffs - 29.2025.4800__py3-none-any.whl → 30.0.0__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.4800py3-none-any.whl → 30.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

lsst/pipe/base/quantum_graph/formatter.py ADDED Viewed

@@ -0,0 +1,101 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+__all__ = ("ProvenanceFormatter",)
+import uuid
+from typing import Any, ClassVar
+import pydantic
+from lsst.daf.butler import FormatterV2
+from lsst.resources import ResourcePath
+from lsst.utils.logging import getLogger
+from ..pipeline_graph import TaskImportMode
+from ._provenance import ProvenanceQuantumGraphReader
+_LOG = getLogger(__file__)
+class _ProvenanceFormatterParameters(pydantic.BaseModel):
+    """A Pydantic model for validating and applying defaults to the
+    read parameters of `ProvenanceFormatter`.
+    """
+    import_mode: TaskImportMode = TaskImportMode.DO_NOT_IMPORT
+    quanta: list[uuid.UUID] | None = None
+    datasets: list[uuid.UUID] | None = None
+    read_init_quanta: bool = True
+    @pydantic.field_validator("quanta", mode="before")
+    @classmethod
+    def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
+        return list(v)
+    @pydantic.field_validator("datasets", mode="before")
+    @classmethod
+    def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
+        return list(v)
+    @property
+    def nodes(self) -> list[uuid.UUID]:
+        if self.quanta is not None:
+            if self.datasets is not None:
+                return self.quanta + self.datasets
+            else:
+                return self.quanta
+        elif self.datasets is not None:
+            return self.datasets
+        raise ValueError("'datasets' and/or 'quanta' parameters are required for this component")
+class ProvenanceFormatter(FormatterV2):
+    """Butler interface for reading `ProvenanceQuantumGraph` objects."""
+    default_extension: ClassVar[str] = ".qg"
+    can_read_from_uri: ClassVar[bool] = True
+    def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
+        parameters = _ProvenanceFormatterParameters.model_validate(self.file_descriptor.parameters or {})
+        with ProvenanceQuantumGraphReader.open(uri, import_mode=parameters.import_mode) as reader:
+            match component:
+                case None:
+                    if parameters.read_init_quanta:
+                        reader.read_init_quanta()
+                    reader.read_quanta(parameters.quanta)
+                    reader.read_datasets(parameters.datasets)
+                    return reader.graph
+                case "metadata":
+                    return reader.fetch_metadata(parameters.nodes)
+                case "logs":
+                    return reader.fetch_logs(parameters.nodes)
+                case "packages":
+                    return reader.fetch_packages()
+        raise AssertionError(f"Unexpected component {component!r}.")

lsst/pipe/base/quantum_graph_builder.py CHANGED Viewed

@@ -1095,11 +1095,13 @@ class QuantumGraphBuilder(ABC):
         to `lsst.daf.butler.DatastoreRecordData`, as used by
         `lsst.daf.butler.Quantum`.
         """
+        self.log.info("Fetching and attaching datastore records for all overall inputs.")
         overall_inputs = skeleton.extract_overall_inputs()
         exported_records = self.butler._datastore.export_records(overall_inputs.values())
         for task_label in self._pipeline_graph.tasks:
             if not skeleton.has_task(task_label):
                 continue
+            self.log.verbose("Fetching and attaching datastore records for task %s.", task_label)
             task_init_key = skeleton.get_task_init_node(task_label)
             init_input_ids = {
                 ref.id
@@ -1152,12 +1154,14 @@ class QuantumGraphBuilder(ABC):
         """
         from .graph import QuantumGraph
+        self.log.info("Transforming graph skeleton into a QuantumGraph instance.")
         quanta: dict[TaskDef, set[Quantum]] = {}
         init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {}
         init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {}
         for task_def in self._pipeline_graph._iter_task_defs():
             if not skeleton.has_task(task_def.label):
                 continue
+            self.log.verbose("Transforming graph skeleton nodes for task %s.", task_def.label)
             task_node = self._pipeline_graph.tasks[task_def.label]
             task_init_key = skeleton.get_task_init_node(task_def.label)
             task_init_state = skeleton[task_init_key]
@@ -1198,7 +1202,8 @@ class QuantumGraphBuilder(ABC):
             ref = skeleton.get_dataset_ref(dataset_key)
             assert ref is not None, "Global init input refs should be resolved already."
             global_init_outputs.append(ref)
-        return QuantumGraph(
+        self.log.verbose("Invoking QuantumGraph class constructor.")
+        result = QuantumGraph(
             quanta,
             metadata=all_metadata,
             universe=self.universe,
@@ -1207,6 +1212,8 @@ class QuantumGraphBuilder(ABC):
             globalInitOutputs=global_init_outputs,
             registryDatasetTypes=registry_dataset_types,
         )
+        self.log.info("Graph build complete.")
+        return result
     @final
     @timeMethod
@@ -1243,6 +1250,7 @@ class QuantumGraphBuilder(ABC):
             PredictedQuantumGraphComponents,
         )
+        self.log.info("Transforming graph skeleton into PredictedQuantumGraph components.")
         components = PredictedQuantumGraphComponents(pipeline_graph=self._pipeline_graph)
         components.header.inputs = list(self.input_collections)
         components.header.output_run = self.output_run
@@ -1270,6 +1278,7 @@ class QuantumGraphBuilder(ABC):
         for task_node in self._pipeline_graph.tasks.values():
             if not skeleton.has_task(task_node.label):
                 continue
+            self.log.verbose("Transforming graph skeleton nodes for task %s.", task_node.label)
             task_init_key = TaskInitKey(task_node.label)
             init_quantum_datasets = PredictedQuantumDatasetsModel.model_construct(
                 quantum_id=generate_uuidv7(),
@@ -1315,8 +1324,10 @@ class QuantumGraphBuilder(ABC):
                     },
                 )
                 components.quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
+        self.log.verbose("Building the thin summary graph.")
         components.set_thin_graph()
         components.set_header_counts()
+        self.log.info("Graph build complete.")
         return components
     @staticmethod

lsst/pipe/base/quantum_graph_executor.py CHANGED Viewed

@@ -27,23 +27,113 @@
 from __future__ import annotations
-__all__ = ["QuantumExecutor", "QuantumGraphExecutor"]
+__all__ = ["QuantumExecutionResult", "QuantumExecutor", "QuantumGraphExecutor"]
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Self
+from lsst.daf.butler import Quantum
 from .quantum_reports import QuantumReport, Report
 if TYPE_CHECKING:
     import uuid
-    from lsst.daf.butler import Quantum
+    from lsst.daf.butler.logging import ButlerLogRecords
+    from ._task_metadata import TaskMetadata
     from .graph import QuantumGraph
     from .pipeline_graph import TaskNode
     from .quantum_graph import PredictedQuantumGraph
+class QuantumExecutionResult(tuple[Quantum, QuantumReport | None]):
+    """A result struct that captures information about a single quantum's
+    execution.
+    Parameters
+    ----------
+    quantum : `lsst.daf.butler.Quantum`
+        Quantum that was executed.
+    report : `.quantum_reports.QuantumReport`
+        Report with basic information about the execution.
+    task_metadata : `TaskMetadata`, optional
+        Metadata saved by the task and executor during execution.
+    skipped_existing : `bool`, optional
+        If `True`, this quantum was not executed because it appeared to have
+        already been executed successfully.
+    adjusted_no_work : `bool`, optional
+        If `True`, this quantum was not executed because the
+        `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
+    Notes
+    -----
+    For backwards compatibility, this class is a two-element tuple that allows
+    the ``quantum`` and ``report`` attributes to be unpacked.  Additional
+    regular attributes may be added by executors (but the tuple must remain
+    only two elements to enable the current unpacking interface).
+    """
+    def __new__(
+        cls,
+        quantum: Quantum,
+        report: QuantumReport | None,
+        *,
+        task_metadata: TaskMetadata | None = None,
+        skipped_existing: bool | None = None,
+        adjusted_no_work: bool | None = None,
+    ) -> Self:
+        return super().__new__(cls, (quantum, report))
+    # We need to define both __init__ and __new__ because tuple inheritance
+    # requires __new__ and numpydoc requires __init__.
+    def __init__(
+        self,
+        quantum: Quantum,
+        report: QuantumReport | None,
+        *,
+        task_metadata: TaskMetadata | None = None,
+        skipped_existing: bool | None = None,
+        adjusted_no_work: bool | None = None,
+    ):
+        self._task_metadata = task_metadata
+        self._skipped_existing = skipped_existing
+        self._adjusted_no_work = adjusted_no_work
+    @property
+    def quantum(self) -> Quantum:
+        """The quantum actually executed."""
+        return self[0]
+    @property
+    def report(self) -> QuantumReport | None:
+        """Structure describing the status of the execution of a quantum.
+        This is `None` if the implementation does not support this feature.
+        """
+        return self[1]
+    @property
+    def task_metadata(self) -> TaskMetadata | None:
+        """Metadata saved by the task and executor during execution."""
+        return self._task_metadata
+    @property
+    def skipped_existing(self) -> bool | None:
+        """If `True`, this quantum was not executed because it appeared to have
+        already been executed successfully.
+        """
+        return self._skipped_existing
+    @property
+    def adjusted_no_work(self) -> bool | None:
+        """If `True`, this quantum was not executed because the
+        `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
+        """
+        return self._adjusted_no_work
 class QuantumExecutor(ABC):
     """Class which abstracts execution of a single Quantum.
@@ -55,8 +145,14 @@ class QuantumExecutor(ABC):
     @abstractmethod
     def execute(
-        self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
-    ) -> tuple[Quantum, QuantumReport | None]:
+        self,
+        task_node: TaskNode,
+        /,
+        quantum: Quantum,
+        quantum_id: uuid.UUID | None = None,
+        *,
+        log_records: ButlerLogRecords | None = None,
+    ) -> QuantumExecutionResult:
         """Execute single quantum.
         Parameters
@@ -67,15 +163,18 @@ class QuantumExecutor(ABC):
             Quantum for this execution.
         quantum_id : `uuid.UUID` or `None`, optional
             The ID of the quantum to be executed.
+        log_records : `lsst.daf.butler.ButlerLogRecords`, optional
+            Container that should be used to store logs in memory before
+            writing them to the butler.  This disables streaming log (since
+            we'd have to store them in memory anyway), but it permits the
+            caller to prepend logs to be stored in the butler and allows task
+            logs to be inspected by the caller after execution is complete.
         Returns
         -------
-        quantum : `~lsst.daf.butler.Quantum`
-            The quantum actually executed.
-        report : `~.quantum_reports.QuantumReport`
-            Structure describing the status of the execution of a quantum.
-            `None` is returned if implementation does not support this
-            feature.
+        result : `QuantumExecutionResult`
+            Result struct.  May also be unpacked as a 2-tuple (see type
+            documentation).
         Notes
         -----
@@ -93,7 +192,9 @@ class QuantumGraphExecutor(ABC):
     """
     @abstractmethod
-    def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
+    def execute(
+        self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
+    ) -> None:
         """Execute whole graph.
         Implementation of this method depends on particular execution model
@@ -103,8 +204,10 @@ class QuantumGraphExecutor(ABC):
         Parameters
         ----------
-        graph : `.QuantumGraph`
+        graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
             Execution graph.
+        provenance_graph_file : `str`, optional
+            A filename to write provenance to.
         """
         raise NotImplementedError()

lsst/pipe/base/quantum_graph_skeleton.py CHANGED Viewed

@@ -383,12 +383,6 @@ class QuantumGraphSkeleton:
             The dataset ref of the prerequisite.
         **attrs : `~typing.Any`
             Additional attributes for the node.
-        Notes
-        -----
-        This automatically sets the 'existing_input' ref attribute (see
-        `set_existing_input_ref`), since prerequisites are always overall
-        inputs.
         """
         key = PrerequisiteDatasetKey(ref.datasetType.name, ref.id.bytes)
         self._xgraph.add_node(key, data_id=ref.dataId, ref=ref, **attrs)
@@ -606,7 +600,7 @@ class QuantumGraphSkeleton:
     def set_output_in_the_way(self, ref: DatasetRef) -> None:
         """Associate a dataset node with a `DatasetRef` that represents an
-        existing output in the output RUN collectoin.
+        existing output in the output RUN collection.
         Parameters
         ----------

lsst/pipe/base/script/register_instrument.py CHANGED Viewed

@@ -53,7 +53,7 @@ def register_instrument(repo: str, instrument: list[str], update: bool = False)
         Raised iff the instrument is not a subclass of
         `lsst.pipe.base.Instrument`.
     """
-    butler = Butler.from_config(repo, writeable=True)
-    for string in instrument:
-        instrument_instance = Instrument.from_string(string, butler.registry)
-        instrument_instance.register(butler.registry, update=update)
+    with Butler.from_config(repo, writeable=True) as butler:
+        for string in instrument:
+            instrument_instance = Instrument.from_string(string, butler.registry)
+            instrument_instance.register(butler.registry, update=update)

lsst/pipe/base/script/retrieve_artifacts_for_quanta.py CHANGED Viewed

@@ -93,16 +93,15 @@ def retrieve_artifacts_for_quanta(
     dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
     # Make QBB, its config is the same as output Butler.
-    qbb = QuantumBackedButler.from_predicted(
+    with QuantumBackedButler.from_predicted(
         config=repo,
         predicted_inputs=[ref.id for ref in refs],
         predicted_outputs=[],
         dimensions=qgraph.universe,
         datastore_records=datastore_records,
         dataset_types=dataset_types,
-    )
-    paths = qbb.retrieve_artifacts(
-        refs, dest, transfer=transfer, overwrite=clobber, preserve_path=preserve_path
-    )
+    ) as qbb:
+        paths = qbb.retrieve_artifacts(
+            refs, dest, transfer=transfer, overwrite=clobber, preserve_path=preserve_path
+        )
     return paths

lsst/pipe/base/script/transfer_from_graph.py CHANGED Viewed

@@ -85,52 +85,52 @@ def transfer_from_graph(
     # Get data repository dataset type definitions from the QuantumGraph.
     dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
-    # Make QBB, its config is the same as output Butler.
-    qbb = QuantumBackedButler.from_predicted(
-        config=dest,
-        predicted_inputs=[ref.id for ref in output_refs],
-        predicted_outputs=[],
-        dimensions=qgraph.universe,
-        datastore_records={},
-        dataset_types=dataset_types,
-    )
     # Filter the refs based on requested dataset types.
     filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
     _LOG.verbose("After filtering by dataset_type, number of datasets to transfer: %d", len(filtered_refs))
-    dest_butler = Butler.from_config(dest, writeable=True)
-    # For faster restarts, filter out those the destination already knows.
-    filtered_refs = filter_by_existence(dest_butler, filtered_refs)
-    # Transfer in chunks
-    chunk_size = 50_000
-    n_chunks = math.ceil(len(filtered_refs) / chunk_size)
-    chunk_num = 0
-    count = 0
-    for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
-        chunk_num += 1
-        if n_chunks > 1:
-            _LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
-        transferred = dest_butler.transfer_from(
-            qbb,
-            chunk,
-            transfer="auto",
-            register_dataset_types=register_dataset_types,
-            transfer_dimensions=transfer_dimensions,
-            dry_run=dry_run,
-        )
-        count += len(transferred)
-    # If asked to do so, update output chain definition.
-    if update_output_chain and (metadata := qgraph.metadata) is not None:
-        # These are defined in CmdLineFwk.
-        output_run = metadata.get("output_run")
-        output = metadata.get("output")
-        input = metadata.get("input")
-        if output_run is not None and output is not None:
-            _update_chain(dest_butler, output, output_run, input)
+    # Make QBB, its config is the same as output Butler.
+    with (
+        QuantumBackedButler.from_predicted(
+            config=dest,
+            predicted_inputs=[ref.id for ref in output_refs],
+            predicted_outputs=[],
+            dimensions=qgraph.universe,
+            datastore_records={},
+            dataset_types=dataset_types,
+        ) as qbb,
+        Butler.from_config(dest, writeable=True) as dest_butler,
+    ):
+        # For faster restarts, filter out those the destination already knows.
+        filtered_refs = filter_by_existence(dest_butler, filtered_refs)
+        # Transfer in chunks
+        chunk_size = 50_000
+        n_chunks = math.ceil(len(filtered_refs) / chunk_size)
+        chunk_num = 0
+        count = 0
+        for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
+            chunk_num += 1
+            if n_chunks > 1:
+                _LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
+            transferred = dest_butler.transfer_from(
+                qbb,
+                chunk,
+                transfer="auto",
+                register_dataset_types=register_dataset_types,
+                transfer_dimensions=transfer_dimensions,
+                dry_run=dry_run,
+            )
+            count += len(transferred)
+        # If asked to do so, update output chain definition.
+        if update_output_chain and (metadata := qgraph.metadata) is not None:
+            # These are defined in CmdLineFwk.
+            output_run = metadata.get("output_run")
+            output = metadata.get("output")
+            input = metadata.get("input")
+            if output_run is not None and output is not None:
+                _update_chain(dest_butler, output, output_run, input)
     return count

lsst/pipe/base/script/zip_from_graph.py CHANGED Viewed

@@ -72,19 +72,18 @@ def zip_from_graph(
     # Get data repository dataset type definitions from the QuantumGraph.
     dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
+    # Filter the refs based on requested dataset types.
+    filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
     # Make QBB, its config is the same as output Butler.
-    qbb = QuantumBackedButler.from_predicted(
+    with QuantumBackedButler.from_predicted(
         config=repo,
         predicted_inputs=[ref.id for ref in output_refs],
         predicted_outputs=[],
         dimensions=qgraph.universe,
         datastore_records={},
         dataset_types=dataset_types,
-    )
-    # Filter the refs based on requested dataset types.
-    filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
-    _LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
-    zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
+    ) as qbb:
+        _LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
+        zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
     return zip

lsst/pipe/base/separable_pipeline_executor.py CHANGED Viewed

@@ -40,7 +40,8 @@ from collections.abc import Iterable
 from typing import Any
 import lsst.resources
-from lsst.daf.butler import Butler
+from lsst.daf.butler import Butler, DatasetRef
+from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
 from ._quantumContext import ExecutionResources
 from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
@@ -362,6 +363,8 @@ class SeparablePipelineExecutor:
         fail_fast: bool = False,
         graph_executor: QuantumGraphExecutor | None = None,
         num_proc: int = 1,
+        *,
+        provenance_dataset_ref: DatasetRef | None = None,
     ) -> None:
         """Run a pipeline in the form of a prepared quantum graph.
@@ -384,6 +387,14 @@ class SeparablePipelineExecutor:
             The number of processes that can be used to run the pipeline. The
             default value ensures that no subprocess is created. Only used with
             the default graph executor.
+        provenance_dataset_ref : `lsst.daf.butler.DatasetRef`, optional
+            Dataset that should be used to save provenance.  Provenance is only
+            supported when running in a single process (at least for the
+            default quantum executor), and should not be used with
+            ``skip_existing_in=[output_run]`` when retrying a previous
+            execution attempt. The caller is responsible for registering the
+            dataset type and for ensuring that the dimensions of this dataset
+            do not lead to uniqueness conflicts.
         """
         if not graph_executor:
             quantum_executor = SingleQuantumExecutor(
@@ -404,4 +415,9 @@ class SeparablePipelineExecutor:
             # forked processes.
             self._butler.registry.resetConnectionPool()
-        graph_executor.execute(graph)
+        if provenance_dataset_ref is not None:
+            with TemporaryForIngest(self._butler, provenance_dataset_ref) as temporary:
+                graph_executor.execute(graph, provenance_graph_file=temporary.ospath)
+                temporary.ingest()
+        else:
+            graph_executor.execute(graph)

lsst/pipe/base/simple_pipeline_executor.py CHANGED Viewed

@@ -40,6 +40,7 @@ from lsst.daf.butler import (
     DatasetRef,
     Quantum,
 )
+from lsst.daf.butler.registry import RegistryDefaults
 from lsst.pex.config import Config
 from ._instrument import Instrument
@@ -152,9 +153,9 @@ class SimplePipelineExecutor:
         collections = [output_run]
         collections.extend(inputs)
         butler.registry.setCollectionChain(output, collections)
-        # Remake butler to let it infer default data IDs from collections, now
-        # that those collections exist.
-        return Butler.from_config(butler=butler, collections=[output], run=output_run)
+        # Override the registry defaults. No need to clone.
+        butler.registry.defaults = RegistryDefaults(collections=[output], run=output_run)
+        return butler
     @classmethod
     def from_pipeline_filename(

lsst-pipe-base 29.2025.4800__py3-none-any.whl → 30.0.0__py3-none-any.whl

lsst-pipe-base 29.2025.4800py3-none-any.whl → 30.0.0py3-none-any.whl