PyPI - lsst-pipe-base - Versions diffs - 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl - Mend

lsst-pipe-base 29.2025.4500py3-none-any.whl → 29.2025.4700py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

lsst/pipe/base/quantum_graph/_provenance.py CHANGED Viewed

@@ -32,10 +32,12 @@ __all__ = (
     "ProvenanceDatasetModel",
     "ProvenanceInitQuantumInfo",
     "ProvenanceInitQuantumModel",
+    "ProvenanceLogRecordsModel",
     "ProvenanceQuantumGraph",
     "ProvenanceQuantumGraphReader",
     "ProvenanceQuantumInfo",
     "ProvenanceQuantumModel",
+    "ProvenanceTaskMetadataModel",
 )
@@ -45,7 +47,7 @@ import uuid
 from collections import Counter
 from collections.abc import Iterable, Iterator, Mapping
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any, Self, TypedDict
+from typing import TYPE_CHECKING, Any, TypedDict, TypeVar
 import astropy.table
 import networkx
@@ -53,35 +55,28 @@ import numpy as np
 import pydantic
 from lsst.daf.butler import DataCoordinate
+from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
 from lsst.resources import ResourcePathExpression
 from lsst.utils.packages import Packages
-from .._status import QuantumSuccessCaveats
+from .._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
+from .._task_metadata import TaskMetadata
 from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
-from ..quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
 from ..resource_usage import QuantumResourceUsage
 from ._common import (
     BaseQuantumGraph,
     BaseQuantumGraphReader,
     ConnectionName,
     DataCoordinateValues,
-    DatasetIndex,
     DatasetInfo,
     DatasetTypeName,
     HeaderModel,
-    QuantumIndex,
     QuantumInfo,
     TaskLabel,
 )
-from ._multiblock import AddressReader, MultiblockReader
+from ._multiblock import MultiblockReader
 from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
-if TYPE_CHECKING:
-    from lsst.daf.butler.logging import ButlerLogRecords
-    from .._task_metadata import TaskMetadata
 DATASET_ADDRESS_INDEX = 0
 QUANTUM_ADDRESS_INDEX = 1
 LOG_ADDRESS_INDEX = 2
@@ -92,6 +87,8 @@ QUANTUM_MB_NAME = "quanta"
 LOG_MB_NAME = "logs"
 METADATA_MB_NAME = "metadata"
+_I = TypeVar("_I", bound=uuid.UUID | int)
 class ProvenanceDatasetInfo(DatasetInfo):
     """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -108,13 +105,13 @@ class ProvenanceDatasetInfo(DatasetInfo):
     dataset_id: uuid.UUID
     """Unique identifier for the dataset."""
-    exists: bool
-    """Whether this dataset existed immediately after the quantum graph was
-    run.
+    produced: bool
+    """Whether this dataset was produced (vs. only predicted).
     This is always `True` for overall input datasets.  It is also `True` for
     datasets that were produced and then removed before/during transfer back to
-    the central butler repository.
+    the central butler repository, so it may not reflect the continued
+    existence of the dataset.
     """
@@ -131,17 +128,38 @@ class ProvenanceQuantumInfo(QuantumInfo):
     `ProvenanceQuantumGraph.quantum_only_xgraph`
     """
-    status: QuantumRunStatus
-    """Enumerated status for the quantum."""
+    status: QuantumAttemptStatus
+    """Enumerated status for the quantum.
+    This corresponds to the last attempt to run this quantum, or
+    `QuantumAttemptStatus.BLOCKED` if there were no attempts.
+    """
     caveats: QuantumSuccessCaveats | None
-    """Flags indicating caveats on successful quanta."""
+    """Flags indicating caveats on successful quanta.
+    This corresponds to the last attempt to run this quantum.
+    """
     exception: ExceptionInfo | None
-    """Information about an exception raised when the quantum was executing."""
+    """Information about an exception raised when the quantum was executing.
+    This corresponds to the last attempt to run this quantum.
+    """
     resource_usage: QuantumResourceUsage | None
-    """Resource usage information (timing, memory use) for this quantum."""
+    """Resource usage information (timing, memory use) for this quantum.
+    This corresponds to the last attempt to run this quantum.
+    """
+    attempts: list[ProvenanceQuantumAttemptModel]
+    """Information about each attempt to run this quantum.
+    An entry is added merely if the quantum *should* have been attempted; an
+    empty `list` is used only for quanta that were blocked by an upstream
+    failure.
+    """
 class ProvenanceInitQuantumInfo(TypedDict):
@@ -173,25 +191,23 @@ class ProvenanceInitQuantumInfo(TypedDict):
 class ProvenanceDatasetModel(PredictedDatasetModel):
     """Data model for the datasets in a provenance quantum graph file."""
-    exists: bool
-    """Whether this dataset existed immediately after the quantum graph was
-    run.
+    produced: bool
+    """Whether this dataset was produced (vs. only predicted).
     This is always `True` for overall input datasets.  It is also `True` for
     datasets that were produced and then removed before/during transfer back to
-    the central butler repository.
+    the central butler repository, so it may not reflect the continued
+    existence of the dataset.
     """
-    producer: QuantumIndex | None = None
-    """Internal integer ID of the quantum that produced this dataset.
+    producer: uuid.UUID | None = None
+    """ID of the quantum that produced this dataset.
     This is `None` for overall inputs to the graph.
     """
-    consumers: list[QuantumIndex] = pydantic.Field(default_factory=list)
-    """Internal integer IDs of quanta that were predicted to consume this
-    dataset.
-    """
+    consumers: list[uuid.UUID] = pydantic.Field(default_factory=list)
+    """IDs of quanta that were predicted to consume this dataset."""
     @property
     def node_id(self) -> uuid.UUID:
@@ -202,8 +218,8 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
     def from_predicted(
         cls,
         predicted: PredictedDatasetModel,
-        producer: QuantumIndex | None = None,
-        consumers: Iterable[QuantumIndex] = (),
+        producer: uuid.UUID | None = None,
+        consumers: Iterable[uuid.UUID] = (),
     ) -> ProvenanceDatasetModel:
         """Construct from a predicted dataset model.
@@ -211,12 +227,10 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
         ----------
         predicted : `PredictedDatasetModel`
             Information about the dataset from the predicted graph.
-        producer : `int` or `None`, optional
-            Internal ID of the quantum that was predicted to produce this
-            dataset.
-        consumers : `~collections.abc.Iterable` [`int`], optional
-            Internal IDs of the quanta that were predicted to consume this
-            dataset.
+        producer : `uuid.UUID` or `None`, optional
+            ID of the quantum that was predicted to produce this dataset.
+        consumers : `~collections.abc.Iterable` [`uuid.UUID`], optional
+            IDs of the quanta that were predicted to consume this dataset.
         Returns
         -------
@@ -225,7 +239,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
         Notes
         -----
-        This initializes `exists` to `True` when ``producer is None`` and
+        This initializes `produced` to `True` when ``producer is None`` and
         `False` otherwise, on the assumption that it will be updated later.
         """
         return cls.model_construct(
@@ -233,21 +247,18 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
             dataset_type_name=predicted.dataset_type_name,
             data_coordinate=predicted.data_coordinate,
             run=predicted.run,
-            exists=(producer is None),  # if it's not produced by this QG, it's an overall input
+            produced=(producer is None),  # if it's not produced by this QG, it's an overall input
             producer=producer,
             consumers=list(consumers),
         )
-    def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
+    def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
         """Add this dataset and its edges to quanta to a provenance graph.
         Parameters
         ----------
         graph : `ProvenanceQuantumGraph`
             Graph to update in place.
-        address_reader : `AddressReader`
-            Reader object that can be used to look up UUIDs from integer
-            indexes.
         Notes
         -----
@@ -268,17 +279,14 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
             dataset_type_name=self.dataset_type_name,
             pipeline_node=dataset_type_node,
             run=self.run,
-            exists=self.exists,
+            produced=self.produced,
         )
-        producer_id: uuid.UUID | None = None
         if self.producer is not None:
-            producer_id = address_reader.find(self.producer).key
-            graph._bipartite_xgraph.add_edge(producer_id, self.dataset_id)
-        for consumer_index in self.consumers:
-            consumer_id = address_reader.find(consumer_index).key
+            graph._bipartite_xgraph.add_edge(self.producer, self.dataset_id)
+        for consumer_id in self.consumers:
             graph._bipartite_xgraph.add_edge(self.dataset_id, consumer_id)
-            if producer_id is not None:
-                graph._quantum_only_xgraph.add_edge(producer_id, consumer_id)
+            if self.producer is not None:
+                graph._quantum_only_xgraph.add_edge(self.producer, consumer_id)
         graph._datasets_by_type[self.dataset_type_name][data_id] = self.dataset_id
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -327,24 +335,15 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
             return super().model_validate_strings(*args, **kwargs)
-class ProvenanceQuantumModel(pydantic.BaseModel):
-    """Data model for the quanta in a provenance quantum graph file."""
-    quantum_id: uuid.UUID
-    """Unique identifier for the quantum."""
-    task_label: TaskLabel
-    """Name of the type of this dataset.
-    This is always a parent dataset type name, not a component.
-    Note that full dataset type definitions are stored in the pipeline graph.
+class ProvenanceQuantumAttemptModel(pydantic.BaseModel):
+    """Data model for a now-superseded attempt to run a quantum in a
+    provenance quantum graph file.
     """
-    data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
-    """The full values (required and implied) of this dataset's data ID."""
+    attempt: int = 0
+    """Counter incremented for every attempt to execute this quantum."""
-    status: QuantumRunStatus = QuantumRunStatus.METADATA_MISSING
+    status: QuantumAttemptStatus = QuantumAttemptStatus.UNKNOWN
     """Enumerated status for the quantum."""
     caveats: QuantumSuccessCaveats | None = None
@@ -353,18 +352,202 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
     exception: ExceptionInfo | None = None
     """Information about an exception raised when the quantum was executing."""
-    inputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
-    """Internal integer IDs of the datasets predicted to be consumed by this
-    quantum, grouped by connection name.
+    resource_usage: QuantumResourceUsage | None = None
+    """Resource usage information (timing, memory use) for this quantum."""
+    previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
+    """The IDs of other quanta previously executed in the same process as this
+    one.
+    """
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class ProvenanceLogRecordsModel(pydantic.BaseModel):
+    """Data model for storing execution logs in a provenance quantum graph
+    file.
     """
-    outputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
-    """Internal integer IDs of the datasets predicted to be produced by this
-    quantum, grouped by connection name.
+    attempts: list[list[ButlerLogRecord] | None] = pydantic.Field(default_factory=list)
+    """Logs from attempts to run this task, ordered chronologically from first
+    to last.
     """
-    resource_usage: QuantumResourceUsage | None = None
-    """Resource usage information (timing, memory use) for this quantum."""
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class ProvenanceTaskMetadataModel(pydantic.BaseModel):
+    """Data model for storing task metadata in a provenance quantum graph
+    file.
+    """
+    attempts: list[TaskMetadata | None] = pydantic.Field(default_factory=list)
+    """Metadata from attempts to run this task, ordered chronologically from
+    first to last.
+    """
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class ProvenanceQuantumModel(pydantic.BaseModel):
+    """Data model for the quanta in a provenance quantum graph file."""
+    quantum_id: uuid.UUID
+    """Unique identifier for the quantum."""
+    task_label: TaskLabel
+    """Name of the type of this dataset."""
+    data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
+    """The full values (required and implied) of this dataset's data ID."""
+    inputs: dict[ConnectionName, list[uuid.UUID]] = pydantic.Field(default_factory=dict)
+    """IDs of the datasets predicted to be consumed by this quantum, grouped by
+    connection name.
+    """
+    outputs: dict[ConnectionName, list[uuid.UUID]] = pydantic.Field(default_factory=dict)
+    """IDs of the datasets predicted to be produced by this quantum, grouped by
+    connection name.
+    """
+    attempts: list[ProvenanceQuantumAttemptModel] = pydantic.Field(default_factory=list)
+    """Provenance for all attempts to execute this quantum, ordered
+    chronologically from first to last.
+    An entry is added merely if the quantum *should* have been attempted; an
+    empty `list` is used only for quanta that were blocked by an upstream
+    failure.
+    """
     @property
     def node_id(self) -> uuid.UUID:
@@ -372,17 +555,13 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
         return self.quantum_id
     @classmethod
-    def from_predicted(
-        cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
-    ) -> ProvenanceQuantumModel:
+    def from_predicted(cls, predicted: PredictedQuantumDatasetsModel) -> ProvenanceQuantumModel:
         """Construct from a predicted quantum model.
         Parameters
         ----------
         predicted : `PredictedQuantumDatasetsModel`
             Information about the quantum from the predicted graph.
-        indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
-            Mapping from quantum or dataset UUID to internal integer ID.
         Returns
         -------
@@ -390,11 +569,11 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
             Provenance quantum model.
         """
         inputs = {
-            connection_name: [indices[d.dataset_id] for d in predicted_inputs]
+            connection_name: [d.dataset_id for d in predicted_inputs]
             for connection_name, predicted_inputs in predicted.inputs.items()
         }
         outputs = {
-            connection_name: [indices[d.dataset_id] for d in predicted_outputs]
+            connection_name: [d.dataset_id for d in predicted_outputs]
             for connection_name, predicted_outputs in predicted.outputs.items()
         }
         return cls(
@@ -405,16 +584,13 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
             outputs=outputs,
         )
-    def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
+    def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
         """Add this quantum and its edges to datasets to a provenance graph.
         Parameters
         ----------
         graph : `ProvenanceQuantumGraph`
             Graph to update in place.
-        address_reader : `AddressReader`
-            Reader object that can be used to look up UUIDs from integer
-            indexes.
         Notes
         -----
@@ -429,28 +605,32 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
         """
         task_node = graph.pipeline_graph.tasks[self.task_label]
         data_id = DataCoordinate.from_full_values(task_node.dimensions, tuple(self.data_coordinate))
+        last_attempt = (
+            self.attempts[-1]
+            if self.attempts
+            else ProvenanceQuantumAttemptModel(status=QuantumAttemptStatus.BLOCKED)
+        )
         graph._bipartite_xgraph.add_node(
             self.quantum_id,
             data_id=data_id,
             task_label=self.task_label,
             pipeline_node=task_node,
-            status=self.status,
-            caveats=self.caveats,
-            exception=self.exception,
-            resource_usage=self.resource_usage,
+            status=last_attempt.status,
+            caveats=last_attempt.caveats,
+            exception=last_attempt.exception,
+            resource_usage=last_attempt.resource_usage,
+            attempts=self.attempts,
         )
-        for connection_name, dataset_indices in self.inputs.items():
+        for connection_name, dataset_ids in self.inputs.items():
             read_edge = task_node.get_input_edge(connection_name)
-            for dataset_index in dataset_indices:
-                dataset_id = address_reader.find(dataset_index).key
+            for dataset_id in dataset_ids:
                 graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
                 graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
                     "pipeline_edges", []
                 ).append(read_edge)
-        for connection_name, dataset_indices in self.outputs.items():
+        for connection_name, dataset_ids in self.outputs.items():
             write_edge = task_node.get_output_edge(connection_name)
-            for dataset_index in dataset_indices:
-                dataset_id = address_reader.find(dataset_index).key
+            for dataset_id in dataset_ids:
                 graph._bipartite_xgraph.add_edge(
                     self.quantum_id,
                     dataset_id,
@@ -529,28 +709,24 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
     Note that full dataset type definitions are stored in the pipeline graph.
     """
-    inputs: dict[ConnectionName, DatasetIndex] = pydantic.Field(default_factory=dict)
-    """Internal integer IDs of the datasets predicted to be consumed by this
-    quantum, grouped by connection name.
+    inputs: dict[ConnectionName, uuid.UUID] = pydantic.Field(default_factory=dict)
+    """IDs of the datasets predicted to be consumed by this quantum, grouped by
+    connection name.
     """
-    outputs: dict[ConnectionName, DatasetIndex] = pydantic.Field(default_factory=dict)
-    """Internal integer IDs of the datasets predicted to be produced by this
-    quantum, grouped by connection name.
+    outputs: dict[ConnectionName, uuid.UUID] = pydantic.Field(default_factory=dict)
+    """IDs of the datasets predicted to be produced by this quantum, grouped by
+    connection name.
     """
     @classmethod
-    def from_predicted(
-        cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
-    ) -> ProvenanceInitQuantumModel:
+    def from_predicted(cls, predicted: PredictedQuantumDatasetsModel) -> ProvenanceInitQuantumModel:
         """Construct from a predicted quantum model.
         Parameters
         ----------
         predicted : `PredictedQuantumDatasetsModel`
             Information about the quantum from the predicted graph.
-        indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
-            Mapping from quantum or dataset UUID to internal integer ID.
         Returns
         -------
@@ -558,11 +734,11 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
             Provenance init quantum model.
         """
         inputs = {
-            connection_name: indices[predicted_inputs[0].dataset_id]
+            connection_name: predicted_inputs[0].dataset_id
             for connection_name, predicted_inputs in predicted.inputs.items()
         }
         outputs = {
-            connection_name: indices[predicted_outputs[0].dataset_id]
+            connection_name: predicted_outputs[0].dataset_id
             for connection_name, predicted_outputs in predicted.outputs.items()
         }
         return cls(
@@ -572,21 +748,13 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
             outputs=outputs,
         )
-    def _add_to_graph(
-        self,
-        graph: ProvenanceQuantumGraph,
-        address_reader: AddressReader,
-        empty_data_id: DataCoordinate,
-    ) -> None:
+    def _add_to_graph(self, graph: ProvenanceQuantumGraph, empty_data_id: DataCoordinate) -> None:
         """Add this quantum and its edges to datasets to a provenance graph.
         Parameters
         ----------
         graph : `ProvenanceQuantumGraph`
             Graph to update in place.
-        address_reader : `AddressReader`
-            Reader object that can be used to look up UUIDs from integer
-            indexes.
         empty_data_id : `lsst.daf.butler.DataCoordinate`
             The empty data ID for the appropriate dimension universe.
@@ -602,16 +770,14 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
         graph._bipartite_xgraph.add_node(
             self.quantum_id, data_id=empty_data_id, task_label=self.task_label, pipeline_node=task_init_node
         )
-        for connection_name, dataset_index in self.inputs.items():
+        for connection_name, dataset_id in self.inputs.items():
             read_edge = task_init_node.get_input_edge(connection_name)
-            dataset_id = address_reader.find(dataset_index).key
             graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
             graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
                 "pipeline_edges", []
             ).append(read_edge)
-        for connection_name, dataset_index in self.outputs.items():
+        for connection_name, dataset_id in self.outputs.items():
             write_edge = task_init_node.get_output_edge(connection_name)
-            dataset_id = address_reader.find(dataset_index).key
             graph._bipartite_xgraph.add_edge(
                 self.quantum_id,
                 dataset_id,
@@ -673,20 +839,17 @@ class ProvenanceInitQuantaModel(pydantic.RootModel):
     root: list[ProvenanceInitQuantumModel] = pydantic.Field(default_factory=list)
     """List of special "init" quanta, one for each task."""
-    def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
+    def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
         """Add this quantum and its edges to datasets to a provenance graph.
         Parameters
         ----------
         graph : `ProvenanceQuantumGraph`
             Graph to update in place.
-        address_reader : `AddressReader`
-            Reader object that can be used to look up UUIDs from integer
-            indexes.
         """
         empty_data_id = DataCoordinate.make_empty(graph.pipeline_graph.universe)
         for init_quantum in self.root:
-            init_quantum._add_to_graph(graph, address_reader, empty_data_id=empty_data_id)
+            init_quantum._add_to_graph(graph, empty_data_id=empty_data_id)
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
     # when we inherit those docstrings in our public classes.
@@ -881,7 +1044,7 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         for task_label, quanta_for_task in self.quanta_by_task.items():
             if not self.header.n_task_quanta[task_label]:
                 continue
-            status_counts = Counter[QuantumRunStatus](
+            status_counts = Counter[QuantumAttemptStatus](
                 self._quantum_only_xgraph.nodes[q]["status"] for q in quanta_for_task.values()
             )
             caveat_counts = Counter[QuantumSuccessCaveats | None](
@@ -901,11 +1064,11 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
             rows.append(
                 {
                     "Task": task_label,
-                    "Unknown": status_counts.get(QuantumRunStatus.METADATA_MISSING, 0),
-                    "Successful": status_counts.get(QuantumRunStatus.SUCCESSFUL, 0),
+                    "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
+                    "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
                     "Caveats": caveats,
-                    "Blocked": status_counts.get(QuantumRunStatus.BLOCKED, 0),
-                    "Failed": status_counts.get(QuantumRunStatus.FAILED, 0),
+                    "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
+                    "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
                     "TOTAL": len(quanta_for_task),
                     "EXPECTED": self.header.n_task_quanta[task_label],
                 }
@@ -988,7 +1151,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
     the `graph` attribute`.
     The various ``read_*`` methods in this class update the `graph` attribute
-    in place and return ``self``.
+    in place.
     """
     graph: ProvenanceQuantumGraph = dataclasses.field(init=False)
@@ -1037,30 +1200,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
     def __post_init__(self) -> None:
         self.graph = ProvenanceQuantumGraph(self.header, self.pipeline_graph)
-    def read_init_quanta(self) -> Self:
+    def read_init_quanta(self) -> None:
         """Read the thin graph, with all edge information and categorization of
         quanta by task label.
-        Returns
-        -------
-        self : `ProvenanceQuantumGraphReader`
-            The reader (to permit method-chaining).
         """
         init_quanta = self._read_single_block("init_quanta", ProvenanceInitQuantaModel)
         for init_quantum in init_quanta.root:
             self.graph._init_quanta[init_quantum.task_label] = init_quantum.quantum_id
-        init_quanta._add_to_graph(self.graph, self.address_reader)
-        return self
+        init_quanta._add_to_graph(self.graph)
-    def read_full_graph(self) -> Self:
+    def read_full_graph(self) -> None:
         """Read all bipartite edges and all quantum and dataset node
         attributes, fully populating the `graph` attribute.
-        Returns
-        -------
-        self : `ProvenanceQuantumGraphReader`
-            The reader (to permit method-chaining).
         Notes
         -----
         This does not read logs, metadata, or packages ; those must always be
@@ -1069,49 +1221,37 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
         self.read_init_quanta()
         self.read_datasets()
         self.read_quanta()
-        return self
-    def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) -> Self:
+    def read_datasets(self, datasets: Iterable[uuid.UUID] | None = None) -> None:
         """Read information about the given datasets.
         Parameters
         ----------
-        datasets : `~collections.abc.Iterable` [`uuid.UUID` or `int`], optional
-            Iterable of dataset IDs or indices to load.  If not provided, all
-            datasets will be loaded.  The UUIDs and indices of quanta will be
-            ignored.
-        Return
-        -------
-        self : `ProvenanceQuantumGraphReader`
-            The reader (to permit method-chaining).
+        datasets : `~collections.abc.Iterable` [`uuid.UUID`], optional
+            Iterable of dataset IDs to load.  If not provided, all datasets
+            will be loaded.  The UUIDs and indices of quanta will be ignored.
         """
-        return self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
+        self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
-    def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) -> Self:
+    def read_quanta(self, quanta: Iterable[uuid.UUID] | None = None) -> None:
         """Read information about the given quanta.
         Parameters
         ----------
-        quanta : `~collections.abc.Iterable` [`uuid.UUID` or `int`], optional
-            Iterable of quantum IDs or indices to load.  If not provided, all
-            quanta will be loaded.  The UUIDs and indices of datasets and
-            special init quanta will be ignored.
-        Return
-        -------
-        self : `ProvenanceQuantumGraphReader`
-            The reader (to permit method-chaining).
+        quanta : `~collections.abc.Iterable` [`uuid.UUID`], optional
+            Iterable of quantum IDs to load.  If not provided, all quanta will
+            be loaded.  The UUIDs and indices of datasets and special init
+            quanta will be ignored.
         """
-        return self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
+        self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
     def _read_nodes(
         self,
-        nodes: Iterable[uuid.UUID | int] | None,
+        nodes: Iterable[uuid.UUID] | None,
         address_index: int,
         mb_name: str,
         model_type: type[ProvenanceDatasetModel] | type[ProvenanceQuantumModel],
-    ) -> Self:
+    ) -> None:
         node: ProvenanceDatasetModel | ProvenanceQuantumModel | None
         if nodes is None:
             self.address_reader.read_all()
@@ -1128,7 +1268,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
                     # Use the old node to reduce memory usage (since it might
                     # also have other outstanding reference holders).
                     continue
-                node._add_to_graph(self.graph, self.address_reader)
+                node._add_to_graph(self.graph)
+            return
         with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
             for node_id_or_index in nodes:
                 address_row = self.address_reader.find(node_id_or_index)
@@ -1140,12 +1281,9 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
                     address_row.addresses[address_index], model_type, self.decompressor
                 )
                 if node is not None:
-                    node._add_to_graph(self.graph, self.address_reader)
-        return self
+                    node._add_to_graph(self.graph)
-    def fetch_logs(
-        self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
-    ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords]:
+    def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
         """Fetch log datasets.
         Parameters
@@ -1156,25 +1294,28 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
         Returns
         -------
-        logs : `dict` [ `uuid.UUID`, `ButlerLogRecords`]
-            Logs for the given IDs.
+        logs : `dict` [ `uuid.UUID`, `list` [\
+                `lsst.daf.butler.ButlerLogRecords` or `None`] ]
+            Logs for the given IDs.  Each value is a list of
+            `lsst.daf.butler.ButlerLogRecords` instances representing different
+            execution attempts, ordered chronologically from first to last.
+            Attempts where logs were missing will have `None` in this list.
         """
-        from lsst.daf.butler.logging import ButlerLogRecords
-        result: dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords] = {}
+        result: dict[uuid.UUID, list[ButlerLogRecords | None]] = {}
         with MultiblockReader.open_in_zip(self.zf, LOG_MB_NAME, int_size=self.header.int_size) as mb_reader:
             for node_id_or_index in nodes:
                 address_row = self.address_reader.find(node_id_or_index)
-                log = mb_reader.read_model(
-                    address_row.addresses[LOG_ADDRESS_INDEX], ButlerLogRecords, self.decompressor
+                logs_by_attempt = mb_reader.read_model(
+                    address_row.addresses[LOG_ADDRESS_INDEX], ProvenanceLogRecordsModel, self.decompressor
                 )
-                if log is not None:
-                    result[node_id_or_index] = log
+                if logs_by_attempt is not None:
+                    result[node_id_or_index] = [
+                        ButlerLogRecords.from_records(attempt_logs) if attempt_logs is not None else None
+                        for attempt_logs in logs_by_attempt.attempts
+                    ]
         return result
-    def fetch_metadata(
-        self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
-    ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata]:
+    def fetch_metadata(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[TaskMetadata | None]]:
         """Fetch metadata datasets.
         Parameters
@@ -1185,22 +1326,26 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
         Returns
         -------
-        metadata : `dict` [ `uuid.UUID`, `TaskMetadata`]
-            Metadata for the given IDs.
+        metadata : `dict` [ `uuid.UUID`, `list` [`.TaskMetadata`] ]
+            Metadata for the given IDs.  Each value is a list of
+            `.TaskMetadata` instances representing different execution
+            attempts, ordered chronologically from first to last. Attempts
+            where metadata was missing (not written even in the fallback extra
+            provenance in the logs) will have `None` in this list.
         """
-        from .._task_metadata import TaskMetadata
-        result: dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata] = {}
+        result: dict[uuid.UUID, list[TaskMetadata | None]] = {}
         with MultiblockReader.open_in_zip(
             self.zf, METADATA_MB_NAME, int_size=self.header.int_size
         ) as mb_reader:
             for node_id_or_index in nodes:
                 address_row = self.address_reader.find(node_id_or_index)
-                metadata = mb_reader.read_model(
-                    address_row.addresses[METADATA_ADDRESS_INDEX], TaskMetadata, self.decompressor
+                metadata_by_attempt = mb_reader.read_model(
+                    address_row.addresses[METADATA_ADDRESS_INDEX],
+                    ProvenanceTaskMetadataModel,
+                    self.decompressor,
                 )
-                if metadata is not None:
-                    result[node_id_or_index] = metadata
+                if metadata_by_attempt is not None:
+                    result[node_id_or_index] = metadata_by_attempt.attempts
         return result
     def fetch_packages(self) -> Packages:

lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl

lsst-pipe-base 29.2025.4500py3-none-any.whl → 29.2025.4700py3-none-any.whl