PyPI - lsst-pipe-base - Versions diffs - 29.2025.4400__py3-none-any.whl → 29.2025.4600__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.4400py3-none-any.whl → 29.2025.4600py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

lsst/pipe/base/_status.py CHANGED Viewed

@@ -27,28 +27,37 @@
 from __future__ import annotations
+__all__ = (
+    "AlgorithmError",
+    "AnnotatedPartialOutputsError",
+    "ExceptionInfo",
+    "InvalidQuantumError",
+    "NoWorkFound",
+    "QuantumAttemptStatus",
+    "QuantumSuccessCaveats",
+    "RepeatableQuantumError",
+    "UnprocessableDataError",
+    "UpstreamFailureNoWorkFound",
+)
 import abc
 import enum
 import logging
+import sys
 from typing import TYPE_CHECKING, Any, ClassVar, Protocol
+import pydantic
 from lsst.utils import introspection
+from lsst.utils.logging import LsstLogAdapter, getLogger
 from ._task_metadata import GetSetDictMetadata, NestedMetadataDict
 if TYPE_CHECKING:
-    from lsst.utils.logging import LsstLogAdapter
+    from ._task_metadata import TaskMetadata
-__all__ = (
-    "AlgorithmError",
-    "AnnotatedPartialOutputsError",
-    "InvalidQuantumError",
-    "NoWorkFound",
-    "QuantumSuccessCaveats",
-    "RepeatableQuantumError",
-    "UnprocessableDataError",
-    "UpstreamFailureNoWorkFound",
-)
+_LOG = getLogger(__name__)
 class QuantumSuccessCaveats(enum.Flag):
@@ -175,6 +184,142 @@ class QuantumSuccessCaveats(enum.Flag):
         }
+class ExceptionInfo(pydantic.BaseModel):
+    """Information about an exception that was raised."""
+    type_name: str
+    """Fully-qualified Python type name for the exception raised."""
+    message: str
+    """String message included in the exception."""
+    metadata: dict[str, float | int | str | bool | None]
+    """Additional metadata included in the exception."""
+    @classmethod
+    def _from_metadata(cls, md: TaskMetadata) -> ExceptionInfo:
+        """Construct from task metadata.
+        Parameters
+        ----------
+        md : `TaskMetadata`
+            Metadata about the error, as written by
+            `AnnotatedPartialOutputsError`.
+        Returns
+        -------
+        info : `ExceptionInfo`
+            Information about the exception.
+        """
+        result = cls(type_name=md["type"], message=md["message"], metadata={})
+        if "metadata" in md:
+            raw_err_metadata = md["metadata"].to_dict()
+            for k, v in raw_err_metadata.items():
+                # Guard against error metadata we wouldn't be able to serialize
+                # later via Pydantic; don't want one weird value bringing down
+                # our ability to report on an entire run.
+                if isinstance(v, float | int | str | bool):
+                    result.metadata[k] = v
+                else:
+                    _LOG.debug(
+                        "Not propagating nested or JSON-incompatible exception metadata key %s=%r.", k, v
+                    )
+        return result
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class QuantumAttemptStatus(enum.Enum):
+    """Enum summarizing an attempt to run a quantum."""
+    UNKNOWN = -3
+    """The status of this attempt is unknown.
+    This usually means no logs or metadata were written, and it at least could
+    not be determined whether the quantum was blocked by an upstream failure
+    (if it was definitely blocked, `BLOCKED` is set instead).
+    """
+    LOGS_MISSING = -2
+    """Task metadata was written for this attempt but logs were not.
+    This is a rare condition that requires a hard failure (i.e. the kind that
+    can prevent a ``finally`` block from running or I/O from being durable) at
+    a very precise time.
+    """
+    FAILED = -1
+    """Execution of the quantum failed.
+    This is always set if the task metadata dataset was not written but logs
+    were, as is the case when a Python exception is caught and handled by the
+    execution system.  It may also be set in cases where logs were not written
+    either, but other information was available (e.g. from higher-level
+    orchestration tooling) to mark it as a failure.
+    """
+    BLOCKED = 0
+    """This quantum was not executed because an upstream quantum failed.
+    Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
+    `LOGS_MISSING` is not.
+    """
+    SUCCESSFUL = 1
+    """This quantum was successfully executed.
+    Quanta may be considered successful even if they do not write any outputs
+    or shortcut early by raising `NoWorkFound` or one of its variants.  They
+    may even be considered successful if they raise
+    `AnnotatedPartialOutputsError` if the executor is configured to treat that
+    exception as a non-failure.  See `QuantumSuccessCaveats` for details on how
+    these "successes with caveats" are reported.
+    """
 class GetSetDictMetadataHolder(Protocol):
     """Protocol for objects that have a ``metadata`` attribute that satisfies
     `GetSetDictMetadata`.

lsst/pipe/base/log_capture.py CHANGED Viewed

@@ -29,28 +29,105 @@ from __future__ import annotations
 __all__ = ["LogCapture"]
+import dataclasses
 import logging
 import os
 import shutil
 import tempfile
+import uuid
 from collections.abc import Iterator
 from contextlib import contextmanager, suppress
 from logging import FileHandler
-from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
-from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
+import pydantic
-from ._status import InvalidQuantumError
+from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
+from lsst.daf.butler.logging import (
+    ButlerLogRecord,
+    ButlerLogRecordHandler,
+    ButlerLogRecords,
+    ButlerMDC,
+    JsonLogFormatter,
+)
+from ._status import ExceptionInfo, InvalidQuantumError
+from ._task_metadata import TaskMetadata
 from .automatic_connection_constants import METADATA_OUTPUT_TEMPLATE
 from .pipeline_graph import TaskNode
 _LOG = logging.getLogger(__name__)
-class _LogCaptureFlag:
-    """Simple flag to enable/disable log-to-butler saving."""
+class _ExecutionLogRecordsExtra(pydantic.BaseModel):
+    """Extra information about a quantum's execution stored with logs.
+    This middleware-private model includes information that is not directly
+    available via any public interface, as it is used exclusively for
+    provenance extraction and then made available through the provenance
+    quantum graph.
+    """
+    exception: ExceptionInfo | None = None
+    """Exception information for this quantum, if it failed.
+    """
+    metadata: TaskMetadata | None = None
+    """Metadata for this quantum, if it failed.
+    Metadata datasets are written if and only if a quantum succeeds, but we
+    still want to capture metadata from failed attempts, so we store it in the
+    log dataset.  This field is always `None` when the quantum succeeds,
+    because in that case the metadata is already stored separately.
+    """
+    previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
+    """The IDs of other quanta previously executed in the same process as this
+    one.
+    """
+    logs: list[ButlerLogRecord] = pydantic.Field(default_factory=list)
+    """Logs for this attempt.
+    This is always empty for the most recent attempt, because that stores logs
+    in the main section of the butler log records.
+    """
+    previous_attempts: list[_ExecutionLogRecordsExtra] = pydantic.Field(default_factory=list)
+    """Information about previous attempts to run this task within the same
+    `~lsst.daf.butler.CollectionType.RUN` collection.
+    This is always empty for any attempt other than the most recent one,
+    as all previous attempts are flattened into one list.
+    """
+    def attach_previous_attempt(self, log_records: ButlerLogRecords) -> None:
+        """Attach logs from a previous attempt to this struct.
+        Parameters
+        ----------
+        log_records : `ButlerLogRecords`
+            Logs from a past attempt to run a quantum.
+        """
+        previous = self.model_validate(log_records.extra)
+        previous.logs.extend(log_records)
+        self.previous_attempts.extend(previous.previous_attempts)
+        self.previous_attempts.append(previous)
+        previous.previous_attempts.clear()
+@dataclasses.dataclass
+class _LogCaptureContext:
+    """Controls for log capture returned by the `LogCapture.capture_logging`
+    context manager.
+    """
     store: bool = True
+    """Whether to store logs at all."""
+    extra: _ExecutionLogRecordsExtra = dataclasses.field(default_factory=_ExecutionLogRecordsExtra)
+    """Extra information about the quantum's execution to store for provenance
+    extraction.
+    """
 class LogCapture:
@@ -88,7 +165,7 @@ class LogCapture:
         return cls(butler, butler)
     @contextmanager
-    def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureFlag]:
+    def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
         """Configure logging system to capture logs for execution of this task.
         Parameters
@@ -121,7 +198,7 @@ class LogCapture:
         metadata_ref = quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_node.label)][0]
         mdc["RUN"] = metadata_ref.run
-        ctx = _LogCaptureFlag()
+        ctx = _LogCaptureContext()
         log_dataset_name = (
             task_node.log_output.dataset_type_name if task_node.log_output is not None else None
         )
@@ -154,6 +231,12 @@ class LogCapture:
                     # Ensure that the logs are stored in butler.
                     logging.getLogger().removeHandler(log_handler_file)
                     log_handler_file.close()
+                    if ctx.extra:
+                        with open(log_file, "a") as log_stream:
+                            ButlerLogRecords.write_streaming_extra(
+                                log_stream,
+                                ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
+                            )
                     if ctx.store:
                         self._ingest_log_records(quantum, log_dataset_name, log_file)
                     shutil.rmtree(tmpdir, ignore_errors=True)
@@ -165,7 +248,15 @@ class LogCapture:
                 try:
                     with ButlerMDC.set_mdc(mdc):
                         yield ctx
+                except:
+                    raise
+                else:
+                    # If the quantum succeeded, we don't need to save the
+                    # metadata in the logs, because we'll have saved them in
+                    # the metadata.
+                    ctx.extra.metadata = None
                 finally:
+                    log_handler_memory.records.extra = ctx.extra.model_dump()
                     # Ensure that the logs are stored in butler.
                     logging.getLogger().removeHandler(log_handler_memory)
                     if ctx.store:

lsst/pipe/base/pipelineIR.py CHANGED Viewed

@@ -45,7 +45,7 @@ import warnings
 from collections import Counter
 from collections.abc import Generator, Hashable, Iterable, MutableMapping
 from dataclasses import dataclass, field
-from typing import Any, Literal
+from typing import Any, Literal, cast
 import yaml
@@ -461,6 +461,8 @@ class ImportIR:
     """list of tasks that should be excluded when inheriting this pipeline.
     Either the include or exclude attributes may be specified, but not both.
     """
+    rename: dict[str, str] = field(default_factory=dict)
+    """dict of tasks to rename, keyed by old name with new name value."""
     importContracts: bool = True
     """Boolean attribute to dictate if contracts should be inherited with the
     pipeline or not.
@@ -497,18 +499,49 @@ class ImportIR:
                 "An include list and an exclude list cannot both be specified"
                 " when declaring a pipeline import."
             )
+        if rename_keys := self.rename.keys():
+            rename_values_set = set(self.rename.values())
+            if len(rename_values_set) != len(rename_keys):
+                raise ValueError(f"rename {rename_keys=} must not have duplicates")
+            if rename_values_set.intersection(rename_keys):
+                raise ValueError(
+                    f"rename keys={rename_keys} must not intersect with values={self.rename.values()}"
+                )
         tmp_pipeline = PipelineIR.from_uri(os.path.expandvars(self.location))
         if self.instrument is not _Tags.KeepInstrument:
             tmp_pipeline.instrument = self.instrument
         included_labels = set()
+        renamed_tasks = {}
         for label in tmp_pipeline.tasks:
+            is_included = self.include and label in self.include
             if (
-                (self.include and label in self.include)
+                is_included
                 or (self.exclude and label not in self.exclude)
                 or (self.include is None and self.exclude is None)
             ):
-                included_labels.add(label)
+                if (label_new := self.rename.get(label)) is not None:
+                    renamed_tasks[label] = label_new
+                    if is_included:
+                        self.include = [
+                            label_new if (x == label) else label for x in cast(list[str], self.include)
+                        ]
+                else:
+                    label_new = label
+                included_labels.add(label_new)
+        rename_errors = []
+        for label, label_new in renamed_tasks.items():
+            if label_new in tmp_pipeline.tasks:
+                rename_errors.append(f"Can't rename {label=} to existing {label_new=}")
+            else:
+                task = tmp_pipeline.tasks.pop(label)
+                task.label = label_new
+                tmp_pipeline.tasks[label_new] = task
+        if rename_errors:
+            raise ValueError("; ".join(rename_errors))
         # Handle labeled subsets being specified in the include or exclude
         # list, adding or removing labels.

lsst/pipe/base/pipeline_graph/expressions.py CHANGED Viewed

@@ -45,13 +45,13 @@ import dataclasses
 import functools
 from typing import TYPE_CHECKING, Any, Literal, TypeAlias
-from lsst.daf.butler.registry.queries.expressions.parser.ply import lex, yacc
+from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
 from ._exceptions import InvalidExpressionError
 if TYPE_CHECKING:
-    from lsst.daf.butler.registry.queries.expressions.parser.parserLex import LexToken
-    from lsst.daf.butler.registry.queries.expressions.parser.parserYacc import YaccProduction
+    from lsst.daf.butler.queries.expressions.parser.parserLex import LexToken
+    from lsst.daf.butler.queries.expressions.parser.parserYacc import YaccProduction
 class _ParserLex:

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -60,6 +60,7 @@ import pydantic
 import zstandard
 from lsst.daf.butler import DataCoordinate, DataIdValue
+from lsst.daf.butler._rubin import generate_uuidv7
 from lsst.resources import ResourcePath, ResourcePathExpression
 from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
@@ -157,6 +158,11 @@ class HeaderModel(pydantic.BaseModel):
     quantum graph file).
     """
+    provenance_dataset_id: uuid.UUID = pydantic.Field(default_factory=generate_uuidv7)
+    """The dataset ID for provenance quantum graph when it is ingested into
+    a butler repository.
+    """
     @classmethod
     def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
         """Extract a header from an old `QuantumGraph` instance.

lsst/pipe/base/quantum_graph/_predicted.py CHANGED Viewed

@@ -1899,11 +1899,12 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
         """Construct a `PredictedQuantumGraph` instance from this reader."""
         return self.components.assemble()
-    def read_all(self) -> PredictedQuantumGraphReader:
+    def read_all(self) -> None:
         """Read all components in full."""
-        return self.read_thin_graph().read_execution_quanta()
+        self.read_thin_graph()
+        self.read_execution_quanta()
-    def read_thin_graph(self) -> PredictedQuantumGraphReader:
+    def read_thin_graph(self) -> None:
         """Read the thin graph.
         The thin graph is a quantum-quantum DAG with internal integer IDs for
@@ -1918,17 +1919,15 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
             self.components.quantum_indices.update(
                 {row.key: row.index for row in self.address_reader.rows.values()}
             )
-        return self
-    def read_init_quanta(self) -> PredictedQuantumGraphReader:
+    def read_init_quanta(self) -> None:
         """Read the list of special quanta that represent init-inputs and
         init-outputs.
         """
         if not self.components.init_quanta.root:
             self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
-        return self
-    def read_dimension_data(self) -> PredictedQuantumGraphReader:
+    def read_dimension_data(self) -> None:
         """Read all dimension records.
         Record data IDs will be immediately deserialized, while other fields
@@ -1948,11 +1947,8 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
                     universe=self.components.pipeline_graph.universe,
                 ),
             )
-        return self
-    def read_quantum_datasets(
-        self, quantum_ids: Iterable[uuid.UUID] | None = None
-    ) -> PredictedQuantumGraphReader:
+    def read_quantum_datasets(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
         """Read information about all datasets produced and consumed by the
         given quantum IDs.
@@ -1977,7 +1973,7 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
                 self.address_reader.read_all()
                 for address_row in self.address_reader.rows.values():
                     self.components.quantum_indices[address_row.key] = address_row.index
-            return self
+            return
         with MultiblockReader.open_in_zip(
             self.zf, "quantum_datasets", int_size=self.components.header.int_size
         ) as mb_reader:
@@ -1991,11 +1987,9 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
                 )
                 if quantum_datasets is not None:
                     self.components.quantum_datasets[address_row.key] = quantum_datasets
-        return self
+        return
-    def read_execution_quanta(
-        self, quantum_ids: Iterable[uuid.UUID] | None = None
-    ) -> PredictedQuantumGraphReader:
+    def read_execution_quanta(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
         """Read all information needed to execute the given quanta.
         Parameters
@@ -2004,4 +1998,6 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
             Iterable of quantum IDs to load.  If not provided, all quanta will
             be loaded.  The UUIDs of special init quanta will be ignored.
         """
-        return self.read_init_quanta().read_dimension_data().read_quantum_datasets(quantum_ids)
+        self.read_init_quanta()
+        self.read_dimension_data()
+        self.read_quantum_datasets(quantum_ids)

lsst-pipe-base 29.2025.4400__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

lsst-pipe-base 29.2025.4400py3-none-any.whl → 29.2025.4600py3-none-any.whl