PyPI - lsst-pipe-base - Versions diffs - 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl - Mend

lsst-pipe-base 30.0.0rc3py3-none-any.whl → 30.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

lsst/pipe/base/_instrument.py +25 -15
lsst/pipe/base/_quantumContext.py +3 -3
lsst/pipe/base/_status.py +43 -10
lsst/pipe/base/_task_metadata.py +2 -2
lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
lsst/pipe/base/automatic_connection_constants.py +20 -1
lsst/pipe/base/cli/cmd/__init__.py +18 -2
lsst/pipe/base/cli/cmd/commands.py +149 -4
lsst/pipe/base/connectionTypes.py +72 -160
lsst/pipe/base/connections.py +6 -9
lsst/pipe/base/execution_reports.py +0 -5
lsst/pipe/base/graph/graph.py +11 -10
lsst/pipe/base/graph/quantumNode.py +4 -4
lsst/pipe/base/graph_walker.py +8 -10
lsst/pipe/base/log_capture.py +1 -1
lsst/pipe/base/log_on_close.py +4 -7
lsst/pipe/base/pipeline.py +5 -6
lsst/pipe/base/pipelineIR.py +2 -8
lsst/pipe/base/pipelineTask.py +5 -7
lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
lsst/pipe/base/pipeline_graph/_edges.py +32 -22
lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
lsst/pipe/base/pipeline_graph/expressions.py +2 -2
lsst/pipe/base/pipeline_graph/io.py +7 -10
lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
lsst/pipe/base/prerequisite_helpers.py +2 -1
lsst/pipe/base/quantum_graph/_common.py +15 -17
lsst/pipe/base/quantum_graph/_multiblock.py +36 -20
lsst/pipe/base/quantum_graph/_predicted.py +7 -3
lsst/pipe/base/quantum_graph/_provenance.py +501 -61
lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
lsst/pipe/base/quantum_graph/aggregator/_communicators.py +187 -240
lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
lsst/pipe/base/quantum_graph/aggregator/_scanner.py +15 -7
lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +19 -34
lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
lsst/pipe/base/quantum_graph/aggregator/_writer.py +3 -3
lsst/pipe/base/quantum_graph/formatter.py +74 -4
lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
lsst/pipe/base/quantum_graph/visualization.py +5 -1
lsst/pipe/base/quantum_graph_builder.py +21 -8
lsst/pipe/base/quantum_graph_skeleton.py +31 -29
lsst/pipe/base/quantum_provenance_graph.py +29 -12
lsst/pipe/base/separable_pipeline_executor.py +1 -1
lsst/pipe/base/single_quantum_executor.py +15 -8
lsst/pipe/base/struct.py +4 -0
lsst/pipe/base/testUtils.py +3 -3
lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
lsst/pipe/base/version.py +1 -1
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/METADATA +3 -3
lsst_pipe_base-30.0.1.dist-info/RECORD +129 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/WHEEL +1 -1
lsst_pipe_base-30.0.0rc3.dist-info/RECORD +0 -127
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/entry_points.txt +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/COPYRIGHT +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/LICENSE +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/bsd_license.txt +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/licenses/gpl-v3.0.txt +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/top_level.txt +0 -0
{lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.0.1.dist-info}/zip-safe +0 -0

lsst/pipe/base/quantum_graph/_provenance.py CHANGED Viewed

@@ -38,13 +38,14 @@ __all__ = (
     "ProvenanceQuantumGraphWriter",
     "ProvenanceQuantumInfo",
     "ProvenanceQuantumModel",
+    "ProvenanceQuantumReport",
     "ProvenanceQuantumScanData",
     "ProvenanceQuantumScanModels",
     "ProvenanceQuantumScanStatus",
+    "ProvenanceReport",
     "ProvenanceTaskMetadataModel",
 )
 import dataclasses
 import enum
 import itertools
@@ -53,16 +54,16 @@ import uuid
 from collections import Counter
 from collections.abc import Callable, Iterable, Iterator, Mapping
 from contextlib import ExitStack, contextmanager
-from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, TypeVar
+from typing import TYPE_CHECKING, Any, TypedDict
 import astropy.table
 import networkx
 import numpy as np
 import pydantic
-from lsst.daf.butler import DataCoordinate
+from lsst.daf.butler import Butler, DataCoordinate
 from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
-from lsst.resources import ResourcePathExpression
+from lsst.resources import ResourcePath, ResourcePathExpression
 from lsst.utils.iteration import ensure_iterable
 from lsst.utils.logging import LsstLogAdapter, getLogger
 from lsst.utils.packages import Packages
@@ -94,9 +95,14 @@ from ._predicted import (
     PredictedQuantumGraphComponents,
 )
-_T = TypeVar("_T")
+# Sphinx needs imports for type annotations of base class members.
+if "sphinx" in sys.modules:
+    import zipfile  # noqa: F401
+    from ._multiblock import AddressReader, Decompressor  # noqa: F401
-LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
+type LoopWrapper[T] = Callable[[Iterable[T]], Iterable[T]]
 _LOG = getLogger(__file__)
@@ -111,7 +117,7 @@ LOG_MB_NAME = "logs"
 METADATA_MB_NAME = "metadata"
-def pass_through(arg: _T) -> _T:
+def pass_through[T](arg: T) -> T:
     return arg
@@ -186,6 +192,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
     failure.
     """
+    metadata_id: uuid.UUID
+    """ID of this quantum's metadata dataset."""
+    log_id: uuid.UUID
+    """ID of this quantum's log dataset."""
 class ProvenanceInitQuantumInfo(TypedDict):
     """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -212,6 +224,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
     pipeline_node: TaskInitNode
     """Node in the pipeline graph for this task's init-only step."""
+    config_id: uuid.UUID
+    """ID of this task's config dataset."""
 class ProvenanceDatasetModel(PredictedDatasetModel):
     """Data model for the datasets in a provenance quantum graph file."""
@@ -543,6 +558,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
             return super().model_validate_strings(*args, **kwargs)
+class ProvenanceQuantumReport(pydantic.BaseModel):
+    """A Pydantic model that used to report information about a single
+    (generally problematic) quantum.
+    """
+    quantum_id: uuid.UUID
+    data_id: dict[str, int | str]
+    attempts: list[ProvenanceQuantumAttemptModel]
+    @classmethod
+    def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
+        """Construct from a provenance quantum graph node.
+        Parameters
+        ----------
+        quantum_id : `uuid.UUID`
+            Unique ID for the quantum.
+        quantum_info : `ProvenanceQuantumInfo`
+            Node attributes for this quantum.
+        """
+        return cls(
+            quantum_id=quantum_id,
+            data_id=dict(quantum_info["data_id"].mapping),
+            attempts=quantum_info["attempts"],
+        )
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class ProvenanceReport(pydantic.RootModel):
+    """A Pydantic model that groups quantum information by task label, then
+    status (as a string), and then exception type.
+    """
+    root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
 class ProvenanceQuantumModel(pydantic.BaseModel):
     """Data model for the quanta in a provenance quantum graph file."""
@@ -646,6 +786,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
             resource_usage=last_attempt.resource_usage,
             attempts=self.attempts,
         )
+        graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
+        graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
         for connection_name, dataset_ids in self.inputs.items():
             read_edge = task_node.get_input_edge(connection_name)
             for dataset_id in dataset_ids:
@@ -655,6 +797,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
                 ).append(read_edge)
         for connection_name, dataset_ids in self.outputs.items():
             write_edge = task_node.get_output_edge(connection_name)
+            if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.add_node(
+                    dataset_ids[0],
+                    data_id=data_id,
+                    dataset_type_name=write_edge.dataset_type_name,
+                    pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                    run=graph.header.output_run,
+                    produced=last_attempt.status.has_metadata,
+                )
+                graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
+                graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
+                graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
+            if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.add_node(
+                    dataset_ids[0],
+                    data_id=data_id,
+                    dataset_type_name=write_edge.dataset_type_name,
+                    pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                    run=graph.header.output_run,
+                    produced=last_attempt.status.has_log,
+                )
+                graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
+                graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
+                graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
             for dataset_id in dataset_ids:
                 graph._bipartite_xgraph.add_edge(
                     self.quantum_id,
@@ -663,8 +829,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
                     # There can only be one pipeline edge for an output.
                     pipeline_edges=[write_edge],
                 )
-        graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
-        graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
         for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
             for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
                 graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
@@ -803,6 +967,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
             ).append(read_edge)
         for connection_name, dataset_id in self.outputs.items():
             write_edge = task_init_node.get_output_edge(connection_name)
+            graph._bipartite_xgraph.add_node(
+                dataset_id,
+                data_id=empty_data_id,
+                dataset_type_name=write_edge.dataset_type_name,
+                pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                run=graph.header.output_run,
+                produced=True,
+            )
+            graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
             graph._bipartite_xgraph.add_edge(
                 self.quantum_id,
                 dataset_id,
@@ -810,6 +983,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
                 # There can only be one pipeline edge for an output.
                 pipeline_edges=[write_edge],
             )
+            if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
         graph._init_quanta[self.task_label] = self.quantum_id
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -954,6 +1129,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
             dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
         }
+    @classmethod
+    @contextmanager
+    def from_args(
+        cls,
+        repo_or_filename: str,
+        /,
+        collection: str | None = None,
+        *,
+        quanta: Iterable[uuid.UUID] | None = None,
+        datasets: Iterable[uuid.UUID] | None = None,
+        writeable: bool = False,
+    ) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
+        """Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
+        a file or butler-ingested graph dataset.
+        Parameters
+        ----------
+        repo_or_filename : `str`
+            Either a provenance quantum graph filename or a butler repository
+            path or alias.
+        collection : `str`, optional
+            Collection to search; presence indicates that the first argument
+            is a butler repository, not a filename.
+        quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
+            IDs of the quanta to load, or `None` to load all.
+        datasets : `~collections.abc.Iterable` [ `str` ], optional
+            IDs of the datasets to load, or `None` to load all.
+        writeable : `bool`, optional
+            Whether the butler should be constructed with write support.
+        Returns
+        -------
+        context : `contextlib.AbstractContextManager`
+            A context manager that yields a tuple of
+            - the `ProvenanceQuantumGraph`
+            - the `Butler` constructed (or `None`)
+            when entered.
+        """
+        exit_stack = ExitStack()
+        if collection is not None:
+            try:
+                butler = exit_stack.enter_context(
+                    Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
+                )
+            except Exception as err:
+                err.add_note(
+                    f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
+                    f"collection ({collection}) was provided."
+                )
+                raise
+            with exit_stack:
+                graph = butler.get(
+                    acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
+                )
+                yield graph, butler
+        else:
+            try:
+                reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
+            except Exception as err:
+                err.add_note(
+                    f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
+                    f"because no collection was provided."
+                )
+                raise
+            with exit_stack:
+                if quanta is None:
+                    reader.read_quanta()
+                elif not quanta:
+                    reader.read_quanta(quanta)
+                if datasets is None:
+                    reader.read_datasets()
+                elif not datasets:
+                    reader.read_datasets(datasets)
+                yield reader.graph, None
     @property
     def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
         """A mapping from task label to the ID of the special init quantum for
@@ -994,6 +1246,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         types in the pipeline graph are included, even if none of their
         datasets were loaded (i.e. nested mappings may be empty).
+        Reading a quantum also populates its log and metadata datasets.
         The returned object may be an internal dictionary; as the type
         annotation indicates, it should not be modified in place.
         """
@@ -1032,7 +1286,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
         `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
         with full attributes and edges to adjacent nodes with no attributes.
-        Loading quanta necessary to populate edge attributes.
+        Loading quanta is necessary to populate edge attributes.
+        Reading a quantum also populates its log and metadata datasets.
         Node attributes are described by the
         `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
@@ -1047,10 +1302,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         """
         return self._bipartite_xgraph.copy(as_view=True)
-    def make_quantum_table(self) -> astropy.table.Table:
+    def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
         """Construct an `astropy.table.Table` with a tabular summary of the
         quanta.
+        Parameters
+        ----------
+        drop_unused_columns : `bool`, optional
+            Whether to drop columns for rare states that did not actually
+            occur in this run.
         Returns
         -------
         table : `astropy.table.Table`
@@ -1086,28 +1347,30 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
                 caveats = f"{code.concise()}({count})"  # type: ignore[union-attr]
             else:
                 caveats = ""
-            rows.append(
+            row: dict[str, Any] = {
+                "Task": task_label,
+                "Caveats": caveats,
+            }
+            for status in QuantumAttemptStatus:
+                row[status.title] = status_counts.get(status, 0)
+            row.update(
                 {
-                    "Task": task_label,
-                    "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
-                    "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
-                    "Caveats": caveats,
-                    "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
-                    "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
                     "TOTAL": len(quanta_for_task),
                     "EXPECTED": self.header.n_task_quanta[task_label],
                 }
             )
-        return astropy.table.Table(rows)
+            rows.append(row)
+        table = astropy.table.Table(rows)
+        if drop_unused_columns:
+            for status in QuantumAttemptStatus:
+                if status.is_rare and not table[status.title].any():
+                    del table[status.title]
+        return table
     def make_exception_table(self) -> astropy.table.Table:
         """Construct an `astropy.table.Table` with counts for each exception
         type raised by each task.
-        At present this only includes information from partial-outputs-error
-        successes, since exception information for failures is not tracked.
-        This may change in the future.
         Returns
         -------
         table : `astropy.table.Table`
@@ -1115,13 +1378,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         """
         rows = []
         for task_label, quanta_for_task in self.quanta_by_task.items():
-            counts_by_type = Counter(
-                exc_info.type_name
-                for q in quanta_for_task.values()
-                if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
-            )
-            for type_name, count in counts_by_type.items():
-                rows.append({"Task": task_label, "Exception": type_name, "Count": count})
+            success_counts = Counter[str]()
+            failed_counts = Counter[str]()
+            for quantum_id in quanta_for_task.values():
+                quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
+                exc_info = quantum_info["exception"]
+                if exc_info is not None:
+                    if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
+                        success_counts[exc_info.type_name] += 1
+                    else:
+                        failed_counts[exc_info.type_name] += 1
+            for type_name in sorted(success_counts.keys() | failed_counts.keys()):
+                rows.append(
+                    {
+                        "Task": task_label,
+                        "Exception": type_name,
+                        "Successes": success_counts.get(type_name, 0),
+                        "Failures": failed_counts.get(type_name, 0),
+                    }
+                )
         return astropy.table.Table(rows)
     def make_task_resource_usage_table(
@@ -1164,6 +1439,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         array = np.array(rows, dtype=row_dtype)
         return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
+    def make_status_report(
+        self,
+        states: Iterable[QuantumAttemptStatus] = (
+            QuantumAttemptStatus.FAILED,
+            QuantumAttemptStatus.ABORTED,
+            QuantumAttemptStatus.ABORTED_SUCCESS,
+        ),
+        *,
+        also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
+        with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
+        data_id_table_dir: ResourcePathExpression | None = None,
+    ) -> ProvenanceReport:
+        """Make a JSON- or YAML-friendly report of all quanta with the given
+        states.
+        Parameters
+        ----------
+        states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            A quantum is included if it has any of these states.  Defaults to
+            states that clearly represent problems.
+        also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            Additional states to consider; unioned with ``states``.  This is
+            provided so users can easily request additional states while also
+            getting the defaults.
+        with_caveats : `..QuantumSuccessCaveats` or `None`, optional
+            If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
+            include quanta with these caveat flags.  May be set to `None`
+            to report on all successful quanta.
+        data_id_table_dir :  convertible to `~lsst.resources.ResourcePath`, \
+                optional
+            If provided, a directory to write data ID tables (in ECSV format)
+            with all of the data IDs with the given states, for use with the
+            ``--data-id-tables`` argument to the quantum graph builder.
+            Subdirectories for each task and status will created within this
+            directory, with one file for each exception type (or ``UNKNOWN``
+            when there is no exception).
+        Returns
+        -------
+        report : `ProvenanceModel`
+            A Pydantic model that groups quanta by task label and exception
+            type.
+        """
+        states = set(ensure_iterable(states))
+        states.update(ensure_iterable(also))
+        result = ProvenanceReport(root={})
+        if data_id_table_dir is not None:
+            data_id_table_dir = ResourcePath(data_id_table_dir)
+        for task_label, quanta_for_task in self.quanta_by_task.items():
+            reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
+            table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
+            for quantum_id in quanta_for_task.values():
+                quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
+                quantum_status = quantum_info["status"]
+                if quantum_status not in states:
+                    continue
+                if (
+                    quantum_status is QuantumAttemptStatus.SUCCESSFUL
+                    and with_caveats is not None
+                    and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
+                ):
+                    continue
+                key1 = quantum_status.name
+                exc_info = quantum_info["exception"]
+                key2 = exc_info.type_name if exc_info is not None else None
+                reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
+                    ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
+                )
+                if data_id_table_dir:
+                    table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
+                        quantum_info["data_id"].required_values
+                    )
+            if reports_for_task:
+                result.root[task_label] = reports_for_task
+            if table_rows_for_task:
+                assert data_id_table_dir is not None, "table_rows_for_task should be empty"
+                for status_name, table_rows_for_status in table_rows_for_task.items():
+                    dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
+                        status_name, forceDirectory=True
+                    )
+                    if dir_for_task_and_status.isLocal:
+                        dir_for_task_and_status.mkdir()
+                    for exc_name, data_id_rows in table_rows_for_status.items():
+                        table = astropy.table.Table(
+                            rows=data_id_rows,
+                            names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
+                        )
+                        filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
+                        with dir_for_task_and_status.join(filename).open("w") as stream:
+                            table.write(stream, format="ecsv")
+        return result
+    def make_many_reports(
+        self,
+        states: Iterable[QuantumAttemptStatus] = (
+            QuantumAttemptStatus.FAILED,
+            QuantumAttemptStatus.ABORTED,
+            QuantumAttemptStatus.ABORTED_SUCCESS,
+        ),
+        *,
+        status_report_file: ResourcePathExpression | None = None,
+        print_quantum_table: bool = False,
+        print_exception_table: bool = False,
+        also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
+        with_caveats: QuantumSuccessCaveats | None = None,
+        data_id_table_dir: ResourcePathExpression | None = None,
+    ) -> None:
+        """Write multiple reports.
+        Parameters
+        ----------
+        states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            A quantum is included in the status report and data ID tables if it
+            has any of these states.  Defaults to states that clearly represent
+            problems.
+        status_report_file : convertible to `~lsst.resources.ResourcePath`,
+                optional
+            Filename for the JSON status report (see `make_status_report`).
+        print_quantum_table : `bool`, optional
+            If `True`, print a quantum summary table (counts only) to STDOUT.
+        print_exception_table : `bool`, optional
+            If `True`, print an exception-type summary table (counts only) to
+            STDOUT.
+        also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            Additional states to consider in the status report and data ID
+            tables; unioned with ``states``.  This is provided so users can
+            easily request additional states while also getting the defaults.
+        with_caveats : `..QuantumSuccessCaveats` or `None`, optional
+            Only include quanta with these caveat flags in the status report
+            and data ID tables.  May be set to `None` to report on all
+            successful quanta (an empty sequence reports on only quanta with no
+            caveats).  If provided, `QuantumAttemptStatus.SUCCESSFUL` is
+            automatically included in ``states``.
+        data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
+                optional
+            If provided, a directory to write data ID tables (in ECSV format)
+            with all of the data IDs with the given states, for use with the
+            ``--data-id-tables`` argument to the quantum graph builder.
+            Subdirectories for each task and status will created within this
+            directory, with one file for each exception type (or ``UNKNOWN``
+            when there is no exception).
+        """
+        if status_report_file is not None or data_id_table_dir is not None:
+            status_report = self.make_status_report(
+                states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
+            )
+            if status_report_file is not None:
+                status_report_file = ResourcePath(status_report_file)
+                if status_report_file.isLocal:
+                    status_report_file.dirname().mkdir()
+                with ResourcePath(status_report_file).open("w") as stream:
+                    stream.write(status_report.model_dump_json(indent=2))
+        if print_quantum_table:
+            quantum_table = self.make_quantum_table()
+            quantum_table.pprint_all()
+            print("")
+        if print_exception_table:
+            exception_table = self.make_exception_table()
+            exception_table.pprint_all()
+            print("")
 @dataclasses.dataclass
 class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
@@ -1294,19 +1734,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
                     # also have other outstanding reference holders).
                     continue
                 node._add_to_graph(self.graph)
-            return
-        with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
-            for node_id_or_index in nodes:
-                address_row = self.address_reader.find(node_id_or_index)
-                if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
-                    # Use the old node to reduce memory usage (since it might
-                    # also have other outstanding reference holders).
-                    continue
-                node = mb_reader.read_model(
-                    address_row.addresses[address_index], model_type, self.decompressor
-                )
-                if node is not None:
-                    node._add_to_graph(self.graph)
+        else:
+            with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
+                for node_id_or_index in nodes:
+                    address_row = self.address_reader.find(node_id_or_index)
+                    if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
+                        # Use the old node to reduce memory usage (since it
+                        # might also have other outstanding reference holders).
+                        continue
+                    node = mb_reader.read_model(
+                        address_row.addresses[address_index], model_type, self.decompressor
+                    )
+                    if node is not None:
+                        node._add_to_graph(self.graph)
     def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
         """Fetch log datasets.
@@ -1588,7 +2028,7 @@ class ProvenanceQuantumGraphWriter:
         """
         predicted_quantum = self._predicted_quanta[quantum_id]
         provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
-            predicted_quantum, metadata, logs, assume_complete=True
+            predicted_quantum, metadata, logs, incomplete=False
         )
         scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
         self.write_scan_data(scan_data)
@@ -1665,8 +2105,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
     enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
     to stop trying for now.
-    This state means a later run with `ScannerConfig.assume_complete` is
-    required.
+    This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
+    be run again with ``incomplete=False``.
     """
     SUCCESSFUL = enum.auto()
@@ -1721,7 +2161,7 @@ class ProvenanceQuantumScanModels:
         metadata: TaskMetadata | None,
         logs: ButlerLogRecords | None,
         *,
-        assume_complete: bool = True,
+        incomplete: bool = False,
     ) -> ProvenanceQuantumScanModels:
         """Construct provenance information from task metadata and logs.
@@ -1733,8 +2173,8 @@ class ProvenanceQuantumScanModels:
             Task metadata.
         logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
             Task logs.
-        assume_complete : `bool`, optional
-            If `False`, treat execution failures as possibly-incomplete quanta
+        incomplete : `bool`, optional
+            If `True`, treat execution failures as possibly-incomplete quanta
             and do not fully process them; instead just set the status to
             `ProvenanceQuantumScanStatus.ABANDONED` and return.
@@ -1752,8 +2192,8 @@ class ProvenanceQuantumScanModels:
         """
         self = ProvenanceQuantumScanModels(predicted.quantum_id)
         last_attempt = ProvenanceQuantumAttemptModel()
-        self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
-        self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
+        self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
+        self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
         if self.status is ProvenanceQuantumScanStatus.ABANDONED:
             return self
         self._reconcile_attempts(last_attempt)
@@ -1766,15 +2206,15 @@ class ProvenanceQuantumScanModels:
         logs: ButlerLogRecords | None,
         last_attempt: ProvenanceQuantumAttemptModel,
         *,
-        assume_complete: bool,
+        incomplete: bool,
     ) -> None:
         (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
         if logs is None:
             self.output_existence[predicted_log_dataset.dataset_id] = False
-            if assume_complete:
-                self.status = ProvenanceQuantumScanStatus.FAILED
-            else:
+            if incomplete:
                 self.status = ProvenanceQuantumScanStatus.ABANDONED
+            else:
+                self.status = ProvenanceQuantumScanStatus.FAILED
         else:
             # Set the attempt's run status to FAILED, since the default is
             # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
@@ -1832,15 +2272,15 @@ class ProvenanceQuantumScanModels:
         metadata: TaskMetadata | None,
         last_attempt: ProvenanceQuantumAttemptModel,
         *,
-        assume_complete: bool,
+        incomplete: bool,
     ) -> None:
         (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
         if metadata is None:
             self.output_existence[predicted_metadata_dataset.dataset_id] = False
-            if assume_complete:
-                self.status = ProvenanceQuantumScanStatus.FAILED
-            else:
+            if incomplete:
                 self.status = ProvenanceQuantumScanStatus.ABANDONED
+            else:
+                self.status = ProvenanceQuantumScanStatus.FAILED
         else:
             self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
             self.output_existence[predicted_metadata_dataset.dataset_id] = True
@@ -1875,7 +2315,7 @@ class ProvenanceQuantumScanModels:
                 # But we found the metadata!  Either that hard error happened
                 # at a very unlucky time (in between those two writes), or
                 # something even weirder happened.
-                self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
+                self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
             else:
                 self.attempts[-1].status = QuantumAttemptStatus.FAILED
         if len(self.metadata.attempts) < len(self.attempts):
@@ -1985,7 +2425,7 @@ class ProvenanceQuantumScanData:
     """Serialized logs."""
     is_compressed: bool = False
-    """Whether the `quantum`, `metadata`, and `log` attributes are
+    """Whether the ``quantum``, ``metadata``, and ``log`` attributes are
     compressed.
     """

lsst-pipe-base 30.0.0rc3__py3-none-any.whl → 30.0.1__py3-none-any.whl

lsst-pipe-base 30.0.0rc3py3-none-any.whl → 30.0.1py3-none-any.whl