PyPI - lsst-pipe-base - Versions diffs - 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.4500py3-none-any.whl → 29.2025.4700py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

lsst/pipe/base/_status.py CHANGED Viewed

@@ -27,28 +27,37 @@
 from __future__ import annotations
+__all__ = (
+    "AlgorithmError",
+    "AnnotatedPartialOutputsError",
+    "ExceptionInfo",
+    "InvalidQuantumError",
+    "NoWorkFound",
+    "QuantumAttemptStatus",
+    "QuantumSuccessCaveats",
+    "RepeatableQuantumError",
+    "UnprocessableDataError",
+    "UpstreamFailureNoWorkFound",
+)
 import abc
 import enum
 import logging
+import sys
 from typing import TYPE_CHECKING, Any, ClassVar, Protocol
+import pydantic
 from lsst.utils import introspection
+from lsst.utils.logging import LsstLogAdapter, getLogger
 from ._task_metadata import GetSetDictMetadata, NestedMetadataDict
 if TYPE_CHECKING:
-    from lsst.utils.logging import LsstLogAdapter
+    from ._task_metadata import TaskMetadata
-__all__ = (
-    "AlgorithmError",
-    "AnnotatedPartialOutputsError",
-    "InvalidQuantumError",
-    "NoWorkFound",
-    "QuantumSuccessCaveats",
-    "RepeatableQuantumError",
-    "UnprocessableDataError",
-    "UpstreamFailureNoWorkFound",
-)
+_LOG = getLogger(__name__)
 class QuantumSuccessCaveats(enum.Flag):
@@ -175,6 +184,142 @@ class QuantumSuccessCaveats(enum.Flag):
         }
+class ExceptionInfo(pydantic.BaseModel):
+    """Information about an exception that was raised."""
+    type_name: str
+    """Fully-qualified Python type name for the exception raised."""
+    message: str
+    """String message included in the exception."""
+    metadata: dict[str, float | int | str | bool | None]
+    """Additional metadata included in the exception."""
+    @classmethod
+    def _from_metadata(cls, md: TaskMetadata) -> ExceptionInfo:
+        """Construct from task metadata.
+        Parameters
+        ----------
+        md : `TaskMetadata`
+            Metadata about the error, as written by
+            `AnnotatedPartialOutputsError`.
+        Returns
+        -------
+        info : `ExceptionInfo`
+            Information about the exception.
+        """
+        result = cls(type_name=md["type"], message=md["message"], metadata={})
+        if "metadata" in md:
+            raw_err_metadata = md["metadata"].to_dict()
+            for k, v in raw_err_metadata.items():
+                # Guard against error metadata we wouldn't be able to serialize
+                # later via Pydantic; don't want one weird value bringing down
+                # our ability to report on an entire run.
+                if isinstance(v, float | int | str | bool):
+                    result.metadata[k] = v
+                else:
+                    _LOG.debug(
+                        "Not propagating nested or JSON-incompatible exception metadata key %s=%r.", k, v
+                    )
+        return result
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class QuantumAttemptStatus(enum.Enum):
+    """Enum summarizing an attempt to run a quantum."""
+    UNKNOWN = -3
+    """The status of this attempt is unknown.
+    This usually means no logs or metadata were written, and it at least could
+    not be determined whether the quantum was blocked by an upstream failure
+    (if it was definitely blocked, `BLOCKED` is set instead).
+    """
+    LOGS_MISSING = -2
+    """Task metadata was written for this attempt but logs were not.
+    This is a rare condition that requires a hard failure (i.e. the kind that
+    can prevent a ``finally`` block from running or I/O from being durable) at
+    a very precise time.
+    """
+    FAILED = -1
+    """Execution of the quantum failed.
+    This is always set if the task metadata dataset was not written but logs
+    were, as is the case when a Python exception is caught and handled by the
+    execution system.  It may also be set in cases where logs were not written
+    either, but other information was available (e.g. from higher-level
+    orchestration tooling) to mark it as a failure.
+    """
+    BLOCKED = 0
+    """This quantum was not executed because an upstream quantum failed.
+    Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
+    `LOGS_MISSING` is not.
+    """
+    SUCCESSFUL = 1
+    """This quantum was successfully executed.
+    Quanta may be considered successful even if they do not write any outputs
+    or shortcut early by raising `NoWorkFound` or one of its variants.  They
+    may even be considered successful if they raise
+    `AnnotatedPartialOutputsError` if the executor is configured to treat that
+    exception as a non-failure.  See `QuantumSuccessCaveats` for details on how
+    these "successes with caveats" are reported.
+    """
 class GetSetDictMetadataHolder(Protocol):
     """Protocol for objects that have a ``metadata`` attribute that satisfies
     `GetSetDictMetadata`.

lsst/pipe/base/log_capture.py CHANGED Viewed

@@ -29,28 +29,105 @@ from __future__ import annotations
 __all__ = ["LogCapture"]
+import dataclasses
 import logging
 import os
 import shutil
 import tempfile
+import uuid
 from collections.abc import Iterator
 from contextlib import contextmanager, suppress
 from logging import FileHandler
-from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
-from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
+import pydantic
-from ._status import InvalidQuantumError
+from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
+from lsst.daf.butler.logging import (
+    ButlerLogRecord,
+    ButlerLogRecordHandler,
+    ButlerLogRecords,
+    ButlerMDC,
+    JsonLogFormatter,
+)
+from ._status import ExceptionInfo, InvalidQuantumError
+from ._task_metadata import TaskMetadata
 from .automatic_connection_constants import METADATA_OUTPUT_TEMPLATE
 from .pipeline_graph import TaskNode
 _LOG = logging.getLogger(__name__)
-class _LogCaptureFlag:
-    """Simple flag to enable/disable log-to-butler saving."""
+class _ExecutionLogRecordsExtra(pydantic.BaseModel):
+    """Extra information about a quantum's execution stored with logs.
+    This middleware-private model includes information that is not directly
+    available via any public interface, as it is used exclusively for
+    provenance extraction and then made available through the provenance
+    quantum graph.
+    """
+    exception: ExceptionInfo | None = None
+    """Exception information for this quantum, if it failed.
+    """
+    metadata: TaskMetadata | None = None
+    """Metadata for this quantum, if it failed.
+    Metadata datasets are written if and only if a quantum succeeds, but we
+    still want to capture metadata from failed attempts, so we store it in the
+    log dataset.  This field is always `None` when the quantum succeeds,
+    because in that case the metadata is already stored separately.
+    """
+    previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
+    """The IDs of other quanta previously executed in the same process as this
+    one.
+    """
+    logs: list[ButlerLogRecord] = pydantic.Field(default_factory=list)
+    """Logs for this attempt.
+    This is always empty for the most recent attempt, because that stores logs
+    in the main section of the butler log records.
+    """
+    previous_attempts: list[_ExecutionLogRecordsExtra] = pydantic.Field(default_factory=list)
+    """Information about previous attempts to run this task within the same
+    `~lsst.daf.butler.CollectionType.RUN` collection.
+    This is always empty for any attempt other than the most recent one,
+    as all previous attempts are flattened into one list.
+    """
+    def attach_previous_attempt(self, log_records: ButlerLogRecords) -> None:
+        """Attach logs from a previous attempt to this struct.
+        Parameters
+        ----------
+        log_records : `ButlerLogRecords`
+            Logs from a past attempt to run a quantum.
+        """
+        previous = self.model_validate(log_records.extra)
+        previous.logs.extend(log_records)
+        self.previous_attempts.extend(previous.previous_attempts)
+        self.previous_attempts.append(previous)
+        previous.previous_attempts.clear()
+@dataclasses.dataclass
+class _LogCaptureContext:
+    """Controls for log capture returned by the `LogCapture.capture_logging`
+    context manager.
+    """
     store: bool = True
+    """Whether to store logs at all."""
+    extra: _ExecutionLogRecordsExtra = dataclasses.field(default_factory=_ExecutionLogRecordsExtra)
+    """Extra information about the quantum's execution to store for provenance
+    extraction.
+    """
 class LogCapture:
@@ -88,7 +165,7 @@ class LogCapture:
         return cls(butler, butler)
     @contextmanager
-    def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureFlag]:
+    def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
         """Configure logging system to capture logs for execution of this task.
         Parameters
@@ -121,7 +198,7 @@ class LogCapture:
         metadata_ref = quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_node.label)][0]
         mdc["RUN"] = metadata_ref.run
-        ctx = _LogCaptureFlag()
+        ctx = _LogCaptureContext()
         log_dataset_name = (
             task_node.log_output.dataset_type_name if task_node.log_output is not None else None
         )
@@ -154,6 +231,12 @@ class LogCapture:
                     # Ensure that the logs are stored in butler.
                     logging.getLogger().removeHandler(log_handler_file)
                     log_handler_file.close()
+                    if ctx.extra:
+                        with open(log_file, "a") as log_stream:
+                            ButlerLogRecords.write_streaming_extra(
+                                log_stream,
+                                ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
+                            )
                     if ctx.store:
                         self._ingest_log_records(quantum, log_dataset_name, log_file)
                     shutil.rmtree(tmpdir, ignore_errors=True)
@@ -165,7 +248,15 @@ class LogCapture:
                 try:
                     with ButlerMDC.set_mdc(mdc):
                         yield ctx
+                except:
+                    raise
+                else:
+                    # If the quantum succeeded, we don't need to save the
+                    # metadata in the logs, because we'll have saved them in
+                    # the metadata.
+                    ctx.extra.metadata = None
                 finally:
+                    log_handler_memory.records.extra = ctx.extra.model_dump()
                     # Ensure that the logs are stored in butler.
                     logging.getLogger().removeHandler(log_handler_memory)
                     if ctx.store:

lsst/pipe/base/pipeline_graph/expressions.py CHANGED Viewed

@@ -45,13 +45,13 @@ import dataclasses
 import functools
 from typing import TYPE_CHECKING, Any, Literal, TypeAlias
-from lsst.daf.butler.registry.queries.expressions.parser.ply import lex, yacc
+from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
 from ._exceptions import InvalidExpressionError
 if TYPE_CHECKING:
-    from lsst.daf.butler.registry.queries.expressions.parser.parserLex import LexToken
-    from lsst.daf.butler.registry.queries.expressions.parser.parserYacc import YaccProduction
+    from lsst.daf.butler.queries.expressions.parser.parserLex import LexToken
+    from lsst.daf.butler.queries.expressions.parser.parserYacc import YaccProduction
 class _ParserLex:

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -28,6 +28,7 @@
 from __future__ import annotations
 __all__ = (
+    "FORMAT_VERSION",
     "BaseQuantumGraph",
     "BaseQuantumGraphReader",
     "BipartiteEdgeInfo",
@@ -60,6 +61,7 @@ import pydantic
 import zstandard
 from lsst.daf.butler import DataCoordinate, DataIdValue
+from lsst.daf.butler._rubin import generate_uuidv7
 from lsst.resources import ResourcePath, ResourcePathExpression
 from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
@@ -91,6 +93,19 @@ DataCoordinateValues: TypeAlias = list[DataIdValue]
 _T = TypeVar("_T", bound=pydantic.BaseModel)
+FORMAT_VERSION: int = 1
+"""
+File format version number for new files.
+This applies to both predicted and provenance QGs, since they usually change
+in concert.
+CHANGELOG:
+- 0: Initial version.
+- 1: Switched from internal integer IDs to UUIDs in all models.
+"""
 class IncompleteQuantumGraphError(RuntimeError):
     pass
@@ -99,7 +114,7 @@ class IncompleteQuantumGraphError(RuntimeError):
 class HeaderModel(pydantic.BaseModel):
     """Data model for the header of a quantum graph file."""
-    version: int = 0
+    version: int = FORMAT_VERSION
     """File format / data model version number."""
     graph_type: str = ""
@@ -157,6 +172,11 @@ class HeaderModel(pydantic.BaseModel):
     quantum graph file).
     """
+    provenance_dataset_id: uuid.UUID = pydantic.Field(default_factory=generate_uuidv7)
+    """The dataset ID for provenance quantum graph when it is ingested into
+    a butler repository.
+    """
     @classmethod
     def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
         """Extract a header from an old `QuantumGraph` instance.

lsst/pipe/base/quantum_graph/_multiblock.py CHANGED Viewed

@@ -323,10 +323,11 @@ class AddressReader:
     rows: dict[uuid.UUID, AddressRow] = dataclasses.field(default_factory=dict)
     """Rows that have already been read."""
-    rows_by_index: dict[int, AddressRow] = dataclasses.field(default_factory=dict)
-    """Rows that have already been read, keyed by integer index."""
     pages: list[AddressPage] = dataclasses.field(default_factory=list)
+    """Descriptions of the file offsets and integer row indexes of pages and
+    flags for whether they have been read already.
+    """
     page_bounds: dict[int, PageBounds] = dataclasses.field(default_factory=dict)
     """Mapping from page index to page boundary information."""
@@ -502,32 +503,23 @@ class AddressReader:
         self.pages.clear()
         return self.rows
-    def find(self, key: uuid.UUID | int) -> AddressRow:
+    def find(self, key: uuid.UUID) -> AddressRow:
         """Read the row for the given UUID or integer index.
         Parameters
         ----------
-        key : `uuid.UUID` or `int`
-            UUID or integer index to find.
+        key : `uuid.UUID`
+            UUID to find.
         Returns
         -------
         row : `AddressRow`
             Addresses for the given UUID.
         """
-        match key:
-            case uuid.UUID():
-                return self._find_uuid(key)
-            case int():
-                return self._find_index(key)
-            case _:
-                raise TypeError(f"Invalid argument: {key}.")
-    def _find_uuid(self, target: uuid.UUID) -> AddressRow:
-        if (row := self.rows.get(target)) is not None:
+        if (row := self.rows.get(key)) is not None:
             return row
         if self.n_rows == 0 or not self.pages:
-            raise LookupError(f"Address for {target} not found.")
+            raise LookupError(f"Address for {key} not found.")
         # Use a binary search to find the page containing the target UUID.
         left = 0
@@ -535,35 +527,19 @@ class AddressReader:
         while left <= right:
             mid = left + ((right - left) // 2)
             self._read_page(mid)
-            if (row := self.rows.get(target)) is not None:
+            if (row := self.rows.get(key)) is not None:
                 return row
             bounds = self.page_bounds[mid]
-            if target.int < bounds.uuid_int_begin:
+            if key.int < bounds.uuid_int_begin:
                 right = mid - 1
-            elif target.int > bounds.uuid_int_end:
+            elif key.int > bounds.uuid_int_end:
                 left = mid + 1
             else:
                 # Should have been on this page, but it wasn't.
-                raise LookupError(f"Address for {target} not found.")
+                raise LookupError(f"Address for {key} not found.")
         # Ran out of pages to search.
-        raise LookupError(f"Address for {target} not found.")
-    def _find_index(self, target: int) -> AddressRow:
-        # First shortcut if we've already loaded this row.
-        if (row := self.rows_by_index.get(target)) is not None:
-            return row
-        if target < 0 or target >= self.n_rows:
-            raise LookupError(f"Address for index {target} not found.")
-        # Since all indexes should be present, we can predict the right page
-        # exactly.
-        page_index = target // self.rows_per_page
-        self._read_page(page_index)
-        try:
-            return self.rows_by_index[target]
-        except KeyError:
-            _LOG.debug("Index find failed: %s should have been in page %s.", target, page_index)
-            raise LookupError(f"Address for {target} not found.") from None
+        raise LookupError(f"Address for {key} not found.")
     def _read_page(self, page_index: int, page_stream: BytesIO | None = None) -> bool:
         page = self.pages[page_index]
@@ -586,7 +562,6 @@ class AddressReader:
     def _read_row(self, page_stream: BytesIO) -> AddressRow:
         row = AddressRow.read(page_stream, self.n_addresses, self.int_size)
         self.rows[row.key] = row
-        self.rows_by_index[row.index] = row
         _LOG.debug("Read address row %s.", row)
         return row

lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl

lsst-pipe-base 29.2025.4500py3-none-any.whl → 29.2025.4700py3-none-any.whl