PyPI - lsst-pipe-base - Versions diffs - 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl - Mend

lsst-pipe-base 30.2026.200py3-none-any.whl → 30.2026.400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

lsst/pipe/base/log_on_close.py ADDED Viewed

@@ -0,0 +1,79 @@
+# This file is part of pipe_base.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This software is dual licensed under the GNU General Public License and also
+# under a 3-clause BSD license. Recipients may choose which of these licenses
+# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
+# respectively.  If you choose the GPL option then the following text applies
+# (but note that there is still no warranty even if you opt for BSD instead):
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from __future__ import annotations
+__all__ = ("LogOnClose",)
+from collections.abc import Callable, Iterator
+from contextlib import AbstractContextManager, contextmanager
+from typing import TypeVar
+from lsst.utils.logging import VERBOSE
+_T = TypeVar("_T")
+class LogOnClose:
+    """A factory for context manager wrappers that emit a log message when
+    they are closed.
+    Parameters
+    ----------
+    log_func : `~collections.abc.Callable` [ `int`, `str` ]
+        Callable that takes an integer log level and a string message and emits
+        a log message.  Note that placeholder formatting is not supported.
+    """
+    def __init__(self, log_func: Callable[[int, str], None]):
+        self.log_func = log_func
+    def wrap(
+        self,
+        cm: AbstractContextManager[_T],
+        msg: str,
+        level: int = VERBOSE,
+    ) -> AbstractContextManager[_T]:
+        """Wrap a context manager to log when it is exited.
+        Parameters
+        ----------
+        cm : `contextlib.AbstractContextManager`
+            Context manager to wrap.
+        msg : `str`
+            Log message.
+        level : `int`, optional
+            Log level.
+        """
+        @contextmanager
+        def wrapper() -> Iterator[_T]:
+            with cm as result:
+                yield result
+                self.log_func(level, msg)
+        return wrapper()

lsst/pipe/base/mp_graph_executor.py CHANGED Viewed

@@ -39,20 +39,24 @@ import sys
 import threading
 import time
 import uuid
+from contextlib import ExitStack
 from typing import Literal, cast
 import networkx
 from lsst.daf.butler import DataCoordinate, Quantum
 from lsst.daf.butler.cli.cliLog import CliLog
+from lsst.daf.butler.logging import ButlerLogRecords
 from lsst.utils.threads import disable_implicit_threading
 from ._status import InvalidQuantumError, RepeatableQuantumError
+from ._task_metadata import TaskMetadata
 from .execution_graph_fixup import ExecutionGraphFixup
 from .graph import QuantumGraph
 from .graph_walker import GraphWalker
+from .log_on_close import LogOnClose
 from .pipeline_graph import TaskNode
-from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo
+from .quantum_graph import PredictedQuantumGraph, PredictedQuantumInfo, ProvenanceQuantumGraphWriter
 from .quantum_graph_executor import QuantumExecutor, QuantumGraphExecutor
 from .quantum_reports import ExecutionStatus, QuantumReport, Report
@@ -515,7 +519,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
             start_method = "spawn"
         self._start_method = start_method
-    def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
+    def execute(
+        self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
+    ) -> None:
         # Docstring inherited from QuantumGraphExecutor.execute
         old_graph: QuantumGraph | None = None
         if isinstance(graph, QuantumGraph):
@@ -525,14 +531,31 @@ class MPGraphExecutor(QuantumGraphExecutor):
             new_graph = graph
         xgraph = self._make_xgraph(new_graph, old_graph)
         self._report = Report(qgraphSummary=new_graph._make_summary())
-        try:
-            if self._num_proc > 1:
-                self._execute_quanta_mp(xgraph, self._report)
-            else:
-                self._execute_quanta_in_process(xgraph, self._report)
-        except Exception as exc:
-            self._report.set_exception(exc)
-            raise
+        with ExitStack() as exit_stack:
+            provenance_writer: ProvenanceQuantumGraphWriter | None = None
+            if provenance_graph_file is not None:
+                if provenance_graph_file is not None and self._num_proc > 1:
+                    raise NotImplementedError(
+                        "Provenance writing is not implemented for multiprocess execution."
+                    )
+                provenance_writer = ProvenanceQuantumGraphWriter(
+                    provenance_graph_file,
+                    exit_stack=exit_stack,
+                    log_on_close=LogOnClose(_LOG.log),
+                    predicted=new_graph,
+                )
+            try:
+                if self._num_proc > 1:
+                    self._execute_quanta_mp(xgraph, self._report)
+                else:
+                    self._execute_quanta_in_process(xgraph, self._report, provenance_writer)
+            except Exception as exc:
+                self._report.set_exception(exc)
+                raise
+            if provenance_writer is not None:
+                provenance_writer.write_overall_inputs()
+                provenance_writer.write_packages()
+                provenance_writer.write_init_outputs(assume_existence=True)
     def _make_xgraph(
         self, new_graph: PredictedQuantumGraph, old_graph: QuantumGraph | None
@@ -576,7 +599,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
                 raise MPGraphExecutorError("Updated execution graph has dependency cycle.")
         return xgraph
-    def _execute_quanta_in_process(self, xgraph: networkx.DiGraph, report: Report) -> None:
+    def _execute_quanta_in_process(
+        self, xgraph: networkx.DiGraph, report: Report, provenance_writer: ProvenanceQuantumGraphWriter | None
+    ) -> None:
         """Execute all Quanta in current process.
         Parameters
@@ -589,6 +614,9 @@ class MPGraphExecutor(QuantumGraphExecutor):
             `.quantum_graph.PredictedQuantumGraph.quantum_only_xgraph`.
         report : `Report`
             Object for reporting execution status.
+        provenance_writer : `.quantum_graph.ProvenanceQuantumGraphWriter` or \
+                `None`
+            Object for recording provenance.
         """
         def tiebreaker_sort_key(quantum_id: uuid.UUID) -> tuple:
@@ -606,16 +634,19 @@ class MPGraphExecutor(QuantumGraphExecutor):
                 _LOG.debug("Executing %s (%s@%s)", quantum_id, task_node.label, data_id)
                 fail_exit_code: int | None = None
+                task_metadata: TaskMetadata | None = None
+                task_logs = ButlerLogRecords([])
                 try:
                     # For some exception types we want to exit immediately with
                     # exception-specific exit code, but we still want to start
                     # debugger before exiting if debugging is enabled.
                     try:
-                        _, quantum_report = self._quantum_executor.execute(
-                            task_node, quantum, quantum_id=quantum_id
+                        execution_result = self._quantum_executor.execute(
+                            task_node, quantum, quantum_id=quantum_id, log_records=task_logs
                         )
-                        if quantum_report:
-                            report.quantaReports.append(quantum_report)
+                        if execution_result.report:
+                            report.quantaReports.append(execution_result.report)
+                        task_metadata = execution_result.task_metadata
                         success_count += 1
                         walker.finish(quantum_id)
                     except RepeatableQuantumError as exc:
@@ -701,6 +732,11 @@ class MPGraphExecutor(QuantumGraphExecutor):
                         )
                         failed_count += 1
+                if provenance_writer is not None:
+                    provenance_writer.write_quantum_provenance(
+                        quantum_id, metadata=task_metadata, logs=task_logs
+                    )
                 _LOG.info(
                     "Executed %d quanta successfully, %d failed and %d remain out of total %d quanta.",
                     success_count,

lsst/pipe/base/pipeline.py CHANGED Viewed

@@ -54,13 +54,12 @@ from lsst.utils.introspection import get_full_type_name
 from . import automatic_connection_constants as acc
 from . import pipeline_graph, pipelineIR
-from ._instrument import Instrument as PipeBaseInstrument
+from ._instrument import Instrument as Instrument
 from .config import PipelineTaskConfig
 from .connections import PipelineTaskConnections
 from .pipelineTask import PipelineTask
 if TYPE_CHECKING:  # Imports needed only for type annotations; may be circular.
-    from lsst.obs.base import Instrument
     from lsst.pex.config import Config
 # ----------------------------------
@@ -702,7 +701,7 @@ class Pipeline:
         """
         instrument_class_name = self._pipelineIR.instrument
         if instrument_class_name is not None:
-            instrument_class = cast(PipeBaseInstrument, doImportType(instrument_class_name))
+            instrument_class = cast(Instrument, doImportType(instrument_class_name))
             if instrument_class is not None:
                 return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe)
         return DataCoordinate.make_empty(universe)
@@ -893,7 +892,7 @@ class Pipeline:
             raise NameError(f"Label {label} does not appear in this pipeline")
         taskClass: type[PipelineTask] = doImportType(taskIR.klass)
         config = taskClass.ConfigClass()
-        instrument: PipeBaseInstrument | None = None
+        instrument: Instrument | None = None
         if (instrumentName := self._pipelineIR.instrument) is not None:
             instrument_cls: type = doImportType(instrumentName)
             instrument = instrument_cls()

lsst/pipe/base/pipelineIR.py CHANGED Viewed

@@ -220,12 +220,6 @@ class LabeledSubset:
 class ParametersIR:
     """Intermediate representation of parameters that are global to a pipeline.
-    Attributes
-    ----------
-    mapping : `dict` [`str`, `str`]
-        A mutable mapping of identifiers as keys, and shared configuration
-        as values.
     Notes
     -----
     These parameters are specified under a top level key named ``parameters``

lsst/pipe/base/pipelineTask.py CHANGED Viewed

@@ -55,7 +55,7 @@ class PipelineTask(Task):
     resulting data is also stored in a data butler.
     PipelineTask inherits from a `~lsst.pipe.base.Task` and uses the same
-    configuration mechanism based on :ref:`lsst.pex.config`. `PipelineTask`
+    configuration mechanism based on `lsst.pex.config`. `PipelineTask`
     classes also have a `PipelineTaskConnections` class associated with their
     config which defines all of the IO a `PipelineTask` will need to do.
     PipelineTask sub-class typically implements `run()` method which receives
@@ -75,12 +75,6 @@ class PipelineTask(Task):
     PipelineTask base class constructor, but may support other signatures as
     well.
-    Attributes
-    ----------
-    canMultiprocess : bool, True by default (class attribute)
-        This class attribute is checked by execution framework, sub-classes
-        can set it to ``False`` in case task does not support multiprocessing.
     Parameters
     ----------
     config : `~lsst.pex.config.Config`, optional
@@ -102,7 +96,11 @@ class PipelineTask(Task):
     """
     ConfigClass: ClassVar[type[PipelineTaskConfig]]
     canMultiprocess: ClassVar[bool] = True
+    """Whether this task can be run by an executor that uses subprocesses for
+    parallelism.
+    """
     def __init__(
         self,

lsst/pipe/base/pipeline_graph/_edges.py CHANGED Viewed

@@ -659,13 +659,25 @@ class ReadEdge(Edge):
                     # compatible), since neither connection should take
                     # precedence.
                     if dataset_type != current:
-                        raise MissingDatasetTypeError(
-                            f"Definitions differ for input dataset type {self.parent_dataset_type_name!r}; "
-                            f"task {self.task_label!r} has {dataset_type}, but the definition "
-                            f"from {report_current_origin()} is {current}.  If the storage classes are "
-                            "compatible but different, registering the dataset type in the data repository "
-                            "in advance will avoid this error."
-                        )
+                        if visualization_only and dataset_type.dimensions == current.dimensions:
+                            # Make a visualization-only ambiguous storage class
+                            # "name".
+                            all_storage_classes = set(current.storageClass_name.split("/"))
+                            all_storage_classes.update(dataset_type.storageClass_name.split("/"))
+                            current = DatasetType(
+                                current.name,
+                                current.dimensions,
+                                "/".join(sorted(all_storage_classes)),
+                            )
+                        else:
+                            raise MissingDatasetTypeError(
+                                f"Definitions differ for input dataset type "
+                                f"{self.parent_dataset_type_name!r}; task {self.task_label!r} has "
+                                f"{dataset_type}, but the definition from {report_current_origin()} is "
+                                f"{current}.  If the storage classes are compatible but different, "
+                                "registering the dataset type in the data repository in advance will avoid "
+                                "this error."
+                            )
                 elif not visualization_only and not dataset_type.is_compatible_with(current):
                     raise IncompatibleDatasetTypeError(
                         f"Incompatible definition for input dataset type {self.parent_dataset_type_name!r}; "

lsst/pipe/base/pipeline_graph/_pipeline_graph.py CHANGED Viewed

@@ -897,6 +897,10 @@ class PipelineGraph:
             New config objects or overrides to apply to copies of the current
             config objects, with task labels as the keywords.
+        Returns
+        -------
+        None
         Raises
         ------
         ValueError
@@ -1755,6 +1759,10 @@ class PipelineGraph:
             not considered part of the pipeline graph in other respects, but it
             does get written with other provenance datasets).
+        Returns
+        -------
+        None
         Raises
         ------
         lsst.daf.butler.MissingDatasetTypeError

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -448,14 +448,17 @@ class BaseQuantumGraphWriter:
         uri: ResourcePathExpression,
         header: HeaderModel,
         pipeline_graph: PipelineGraph,
-        indices: dict[uuid.UUID, int],
         *,
         address_filename: str,
-        compressor: Compressor,
         cdict_data: bytes | None = None,
+        zstd_level: int = 10,
     ) -> Iterator[Self]:
-        uri = ResourcePath(uri)
-        address_writer = AddressWriter(indices)
+        uri = ResourcePath(uri, forceDirectory=False)
+        address_writer = AddressWriter()
+        if uri.isLocal:
+            os.makedirs(uri.dirname().ospath, exist_ok=True)
+        cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
+        compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
         with uri.open(mode="wb") as stream:
             with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
                 self = cls(zf, compressor, address_writer, header.int_size)

lsst/pipe/base/quantum_graph/_multiblock.py CHANGED Viewed

@@ -205,13 +205,6 @@ class AddressRow:
 class AddressWriter:
     """A helper object for writing address files for multi-block files."""
-    indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
-    """Mapping from UUID to internal integer ID.
-    The internal integer ID must always correspond to the index into the
-    sorted list of all UUIDs, but this `dict` need not be sorted itself.
-    """
     addresses: list[dict[uuid.UUID, Address]] = dataclasses.field(default_factory=list)
     """Addresses to store with each UUID.
@@ -229,18 +222,15 @@ class AddressWriter:
         int_size : `int`
             Number of bytes to use for all integers.
         """
-        for n, address_map in enumerate(self.addresses):
-            if not self.indices.keys() >= address_map.keys():
-                raise AssertionError(
-                    f"Logic bug in quantum graph I/O: address map {n} of {len(self.addresses)} has IDs "
-                    f"{address_map.keys() - self.indices.keys()} not in the index map."
-                )
+        indices: set[uuid.UUID] = set()
+        for address_map in self.addresses:
+            indices.update(address_map.keys())
         stream.write(int_size.to_bytes(1))
-        stream.write(len(self.indices).to_bytes(int_size))
+        stream.write(len(indices).to_bytes(int_size))
         stream.write(len(self.addresses).to_bytes(int_size))
         empty_address = Address()
-        for key in sorted(self.indices.keys(), key=attrgetter("int")):
-            row = AddressRow(key, self.indices[key], [m.get(key, empty_address) for m in self.addresses])
+        for n, key in enumerate(sorted(indices, key=attrgetter("int"))):
+            row = AddressRow(key, n, [m.get(key, empty_address) for m in self.addresses])
             _LOG.debug("Wrote address %s.", row)
             row.write(stream, int_size)

lsst/pipe/base/quantum_graph/_predicted.py CHANGED Viewed

@@ -66,6 +66,7 @@ from lsst.daf.butler import (
     DimensionDataExtractor,
     DimensionGroup,
     DimensionRecordSetDeserializer,
+    DimensionUniverse,
     LimitedButler,
     Quantum,
     QuantumBackedButler,
@@ -109,6 +110,13 @@ if TYPE_CHECKING:
     from ..config import PipelineTaskConfig
     from ..graph import QgraphSummary, QuantumGraph
+# Sphinx needs imports for type annotations of base class members.
+if "sphinx" in sys.modules:
+    import zipfile  # noqa: F401
+    from ._multiblock import AddressReader, Decompressor  # noqa: F401
 _LOG = logging.getLogger(__name__)
@@ -877,6 +885,49 @@ class PredictedQuantumGraph(BaseQuantumGraph):
             page_size=page_size,
         ).assemble()
+    @classmethod
+    def make_empty(
+        cls,
+        universe: DimensionUniverse,
+        *,
+        output_run: str,
+        inputs: Iterable[str] = (),
+        output: str | None = None,
+        add_packages: bool = True,
+    ) -> PredictedQuantumGraph:
+        """Make an empty quantum graph with no tasks.
+        Parameters
+        ----------
+        universe : `lsst.daf.butler.DimensionUniverse`
+            Definitions for all butler dimensions.
+        output_run : `str`
+            Output run collection.
+        inputs : `~collections.abc.Iterable` [`str`], optional
+            Iterable of input collection names.
+        output : `str` or `None`, optional
+            Output chained collection.
+        add_packages : `bool`, optional
+            Whether to add the special init quantum that writes the 'packages'
+            dataset.  The default (`True`) is consistent with
+            `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
+            are no regular quanta generated.
+        Returns
+        -------
+        quantum_graph : `PredictedQuantumGraph`
+            An empty quantum graph.
+        """
+        return cls(
+            PredictedQuantumGraphComponents.make_empty(
+                universe,
+                output_run=output_run,
+                inputs=inputs,
+                output=output,
+                add_packages=add_packages,
+            )
+        )
     @property
     def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
         """A nested mapping of all quanta, keyed first by task name and then by
@@ -1541,6 +1592,63 @@ class PredictedQuantumGraphComponents:
     This does not include special "init" quanta.
     """
+    @classmethod
+    def make_empty(
+        cls,
+        universe: DimensionUniverse,
+        *,
+        output_run: str,
+        inputs: Iterable[str] = (),
+        output: str | None = None,
+        add_packages: bool = True,
+    ) -> PredictedQuantumGraphComponents:
+        """Make components for an empty quantum graph with no tasks.
+        Parameters
+        ----------
+        universe : `lsst.daf.butler.DimensionUniverse`
+            Definitions for all butler dimensions.
+        output_run : `str`
+            Output run collection.
+        inputs : `~collections.abc.Iterable` [`str`], optional
+            Iterable of input collection names.
+        output : `str` or `None`, optional
+            Output chained collection.
+        add_packages : `bool`, optional
+            Whether to add the special init quantum that writes the 'packages'
+            dataset.  The default (`True`) is consistent with
+            `~..quantum_graph_builder.QuantumGraphBuilder` behavior when there
+            are no regular quanta generated.
+        Returns
+        -------
+        components : `PredictedQuantumGraphComponents`
+            Components that can be used to build or write an empty quantum
+            graph.
+        """
+        components = cls(pipeline_graph=PipelineGraph(universe=universe))
+        components.header.inputs = list(inputs)
+        components.header.output_run = output_run
+        components.header.output = output
+        if add_packages:
+            components.init_quanta.root = [
+                PredictedQuantumDatasetsModel.model_construct(
+                    quantum_id=generate_uuidv7(),
+                    task_label="",
+                    outputs={
+                        acc.PACKAGES_INIT_OUTPUT_NAME: [
+                            PredictedDatasetModel(
+                                dataset_id=generate_uuidv7(),
+                                dataset_type_name=acc.PACKAGES_INIT_OUTPUT_NAME,
+                                data_coordinate=[],
+                                run=output_run,
+                            )
+                        ]
+                    },
+                )
+            ]
+        return components
     def make_dataset_ref(self, predicted: PredictedDatasetModel) -> DatasetRef:
         """Make a `lsst.daf.butler.DatasetRef` from information in the
         predicted quantum graph.
@@ -1793,7 +1901,6 @@ class PredictedQuantumGraphComponents:
                     f"Unsupported extension {ext!r} for quantum graph; "
                     "expected '.qg' (or '.qgraph' to force the old format)."
                 )
-        cdict: zstandard.ZstdCompressionDict | None = None
         cdict_data: bytes | None = None
         quantum_datasets_json: dict[uuid.UUID, bytes] = {}
         if len(self.quantum_datasets) < zstd_dict_n_inputs:
@@ -1807,26 +1914,20 @@ class PredictedQuantumGraphComponents:
                 for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
             }
             try:
-                cdict = zstandard.train_dictionary(
+                cdict_data = zstandard.train_dictionary(
                     zstd_dict_size,
                     list(quantum_datasets_json.values()),
                     level=zstd_level,
-                )
+                ).as_bytes()
             except zstandard.ZstdError as err:
                 warnings.warn(f"Not using a compression dictionary: {err}.")
-                cdict = None
-            else:
-                cdict_data = cdict.as_bytes()
-        compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
-        indices = {quantum_id: n for n, quantum_id in enumerate(sorted(self.quantum_datasets.keys()))}
         with BaseQuantumGraphWriter.open(
             uri,
             header=self.header,
             pipeline_graph=self.pipeline_graph,
-            indices=indices,
             address_filename="quanta",
-            compressor=compressor,
             cdict_data=cdict_data,
+            zstd_level=zstd_level,
         ) as writer:
             writer.write_single_model("thin_graph", self.thin_graph)
             if self.dimension_data is None:

lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl

lsst-pipe-base 30.2026.200py3-none-any.whl → 30.2026.400py3-none-any.whl