PyPI - lsst-pipe-base - Versions diffs - 30.2026.300__py3-none-any.whl → 30.2026.500__py3-none-any.whl - Mend

lsst-pipe-base 30.2026.300py3-none-any.whl → 30.2026.500py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

lsst/pipe/base/_instrument.py CHANGED Viewed

@@ -35,7 +35,7 @@ from abc import ABCMeta, abstractmethod
 from collections.abc import Sequence
 from typing import TYPE_CHECKING, Any, Self, cast, final
-from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter
+from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter, FormatterV2
 from lsst.daf.butler.registry import DataIdError
 from lsst.pex.config import Config, RegistryField
 from lsst.resources import ResourcePath, ResourcePathExpression
@@ -109,6 +109,10 @@ class Instrument(metaclass=ABCMeta):
             If `True` (`False` is default), update existing records if they
             differ from the new ones.
+        Returns
+        -------
+        None
         Raises
         ------
         lsst.daf.butler.registry.ConflictingDefinitionError
@@ -127,13 +131,6 @@ class Instrument(metaclass=ABCMeta):
         the level of individual dimension entries; new detectors and filters
         should be added, but changes to any existing record should not be.
         This can generally be achieved via a block like
-        .. code-block:: python
-            with registry.transaction():
-                registry.syncDimensionData("instrument", ...)
-                registry.syncDimensionData("detector", ...)
-                self.registerFilters(registry)
         """
         raise NotImplementedError()
@@ -314,7 +311,7 @@ class Instrument(metaclass=ABCMeta):
         return instrument_cls(collection_prefix=collection_prefix)
     @staticmethod
-    def importAll(registry: Registry) -> None:
+    def importAll(registry: Registry) -> dict[str, type[Instrument]]:
         """Import all the instruments known to this registry.
         This will ensure that all metadata translators have been registered.
@@ -324,20 +321,31 @@ class Instrument(metaclass=ABCMeta):
         registry : `lsst.daf.butler.Registry`
             Butler registry to query to find the information.
+        Returns
+        -------
+        imported : `dict` [`str`, `type` [`Instrument`]]
+            A mapping containing all the instrument classes that were loaded
+            successfully, keyed by their butler names.
         Notes
         -----
         It is allowed for a particular instrument class to fail on import.
         This might simply indicate that a particular obs package has
         not been setup.
         """
+        imported: dict[str, type[Instrument]] = {}
         records = list(registry.queryDimensionRecords("instrument"))
         for record in records:
             cls = record.class_name
+            instrument_name: str = cast(str, record.name)
             with contextlib.suppress(Exception):
-                doImportType(cls)
+                instr = doImportType(cls)
+                assert issubclass(instr, Instrument)
+                imported[instrument_name] = instr
+        return imported
     @abstractmethod
-    def getRawFormatter(self, dataId: DataId) -> type[Formatter]:
+    def getRawFormatter(self, dataId: DataId) -> type[Formatter | FormatterV2]:
         """Return the Formatter class that should be used to read a particular
         raw file.
@@ -348,7 +356,8 @@ class Instrument(metaclass=ABCMeta):
         Returns
         -------
-        formatter : `lsst.daf.butler.Formatter` class
+        formatter : `type` \
+                [`lsst.daf.butler.Formatter` | `lsst.daf.butler.FormatterV2` ]
             Class to be used that reads the file into the correct
             Python object for the raw data.
         """

lsst/pipe/base/_quantumContext.py CHANGED Viewed

@@ -380,8 +380,8 @@ class QuantumContext:
             if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef`
             or a single `~lsst.daf.butler.DatasetRef`. If ``values.NAME`` is
             None, no output is written.
-        dataset : `OutputQuantizedConnection` or `list`[`DatasetRef`] \
-                or `DatasetRef`
+        dataset : `OutputQuantizedConnection` or `list` \
+                [`lsst.daf.butler.DatasetRef`] or `lsst.daf.butler.DatasetRef`
             This argument may either be an `InputQuantizedConnection` which
             describes all the inputs of a quantum, a list of
             `lsst.daf.butler.DatasetRef`, or a single
@@ -460,7 +460,7 @@ class QuantumContext:
         Parameters
         ----------
-        ref : `DatasetRef`
+        ref : `lsst.daf.butler.DatasetRef`
             The dataset to attach provenance to. This dataset must have been
             retrieved by this quantum context.
         extra : `dict` [ `str`, `int` | `float` | `str` | `bool` ]

lsst/pipe/base/_status.py CHANGED Viewed

@@ -275,15 +275,23 @@ class ExceptionInfo(pydantic.BaseModel):
 class QuantumAttemptStatus(enum.Enum):
     """Enum summarizing an attempt to run a quantum."""
+    ABORTED = -4
+    """The quantum failed with a hard error that prevented both logs and
+    metadata from being written.
+    This state is only set if information from higher-level tooling (e.g. BPS)
+    is available to distinguish it from ``UNKNOWN``.
+    """
     UNKNOWN = -3
     """The status of this attempt is unknown.
-    This usually means no logs or metadata were written, and it at least could
-    not be determined whether the quantum was blocked by an upstream failure
-    (if it was definitely blocked, `BLOCKED` is set instead).
+    This means no logs or metadata were written, and it at least could not be
+    determined whether the quantum was blocked by an upstream failure (if it
+    was definitely blocked, `BLOCKED` is set instead).
     """
-    LOGS_MISSING = -2
+    ABORTED_SUCCESS = -2
     """Task metadata was written for this attempt but logs were not.
     This is a rare condition that requires a hard failure (i.e. the kind that
@@ -292,20 +300,21 @@ class QuantumAttemptStatus(enum.Enum):
     """
     FAILED = -1
-    """Execution of the quantum failed.
+    """Execution of the quantum failed gracefully.
     This is always set if the task metadata dataset was not written but logs
     were, as is the case when a Python exception is caught and handled by the
-    execution system.  It may also be set in cases where logs were not written
-    either, but other information was available (e.g. from higher-level
-    orchestration tooling) to mark it as a failure.
+    execution system.
+    This status guarantees that the task log dataset was produced but the
+    metadata dataset was not.
     """
     BLOCKED = 0
     """This quantum was not executed because an upstream quantum failed.
-    Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
-    `LOGS_MISSING` is not.
+    Upstream quanta with status `UNKNOWN`, `FAILED`, or `ABORTED` are
+    considered blockers; `ABORTED_SUCCESS` is not.
     """
     SUCCESSFUL = 1
@@ -319,6 +328,30 @@ class QuantumAttemptStatus(enum.Enum):
     these "successes with caveats" are reported.
     """
+    @property
+    def has_metadata(self) -> bool:
+        """Whether the task metadata dataset was produced."""
+        return self is self.SUCCESSFUL or self is self.ABORTED_SUCCESS
+    @property
+    def has_log(self) -> bool:
+        """Whether the log dataset was produced."""
+        return self is self.SUCCESSFUL or self is self.FAILED
+    @property
+    def title(self) -> str:
+        """A version of this status' name suitable for use as a title in a plot
+        or table.
+        """
+        return self.name.capitalize().replace("_", " ")
+    @property
+    def is_rare(self) -> bool:
+        """Whether this status is rare enough that it should only be listed
+        when it actually occurs.
+        """
+        return self in (self.ABORTED, self.ABORTED_SUCCESS, self.UNKNOWN)
 class GetSetDictMetadataHolder(Protocol):
     """Protocol for objects that have a ``metadata`` attribute that satisfies

lsst/pipe/base/automatic_connection_constants.py CHANGED Viewed

@@ -26,7 +26,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """Constants used to define the connections automatically added for each
-PipelineTask by the execution system.
+PipelineTask by the execution system, as well as other special dataset types.
 """
 from __future__ import annotations
@@ -43,6 +43,10 @@ __all__ = (
     "METADATA_OUTPUT_TEMPLATE",
     "PACKAGES_INIT_OUTPUT_NAME",
     "PACKAGES_INIT_OUTPUT_STORAGE_CLASS",
+    "PROVENANCE_DATASET_TYPE_NAME",
+    "PROVENANCE_STORAGE_CLASS",
+    "RESOURCE_USAGE_STORAGE_CLASS",
+    "RESOURCE_USAGE_TEMPLATE",
 )
@@ -91,3 +95,18 @@ type names.
 METADATA_OUTPUT_STORAGE_CLASS: str = "TaskMetadata"
 """Name of the storage class for task metadata output datasets.
 """
+PROVENANCE_DATASET_TYPE_NAME: str = "run_provenance"
+"""Name of the dataset used to store per-RUN provenance."""
+PROVENANCE_STORAGE_CLASS: str = "ProvenanceQuantumGraph"
+"""Name of the storage class used to store provenance."""
+RESOURCE_USAGE_TEMPLATE: str = "{label}_resource_usage"
+"""String template used to form the name of the resource usage dataset type for
+a task.
+"""
+RESOURCE_USAGE_STORAGE_CLASS: str = "ArrowAstropy"
+"""Storage class of the resource usage dataset type for a task.
+"""

lsst/pipe/base/cli/cmd/__init__.py CHANGED Viewed

@@ -25,6 +25,22 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-__all__ = ["register_instrument", "transfer_from_graph", "zip_from_graph", "retrieve_artifacts_for_quanta", "aggregate_graph"]
+__all__ = [
+    "register_instrument",
+    "transfer_from_graph",
+    "zip_from_graph",
+    "retrieve_artifacts_for_quanta",
+    "aggregate_graph",
+    "ingest_graph",
+    "provenance_report",
+]
-from .commands import (register_instrument, retrieve_artifacts_for_quanta, transfer_from_graph, zip_from_graph, aggregate_graph)
+from .commands import (
+    register_instrument,
+    retrieve_artifacts_for_quanta,
+    transfer_from_graph,
+    zip_from_graph,
+    aggregate_graph,
+    ingest_graph,
+    provenance_report,
+)

lsst/pipe/base/cli/cmd/commands.py CHANGED Viewed

@@ -25,6 +25,9 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import functools
+import operator
+from collections.abc import Iterable
 from typing import Any
 import click
@@ -40,6 +43,7 @@ from lsst.daf.butler.cli.opt import (
 from lsst.daf.butler.cli.utils import ButlerCommand, split_commas, unwrap
 from ... import script
+from ..._status import QuantumAttemptStatus, QuantumSuccessCaveats
 from ...quantum_graph import aggregator
 from ..opt import instrument_argument, update_output_chain_option
@@ -161,7 +165,7 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
 @click.command(short_help="Scan for the outputs of an active or completed quantum graph.", cls=ButlerCommand)
 @click.argument("predicted_graph", required=True)
-@repo_argument(required=True, help="Path to the central butler repository.")
+@repo_argument(required=True, help="Path or alias for the butler repository.")
 @click.option(
     "-o",
     "--output",
@@ -181,9 +185,9 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
     help="Number of processes to use.",
 )
 @click.option(
-    "--complete/--incomplete",
-    "assume_complete",
-    default=_AGGREGATOR_DEFAULTS.assume_complete,
+    "--incomplete/--complete",
+    "incomplete",
+    default=_AGGREGATOR_DEFAULTS.incomplete,
     help="Whether execution has completed (and failures cannot be retried).",
 )
 @click.option(
@@ -249,6 +253,14 @@ _AGGREGATOR_DEFAULTS = aggregator.AggregatorConfig()
     default=_AGGREGATOR_DEFAULTS.mock_storage_classes,
     help="Enable support for storage classes created by the lsst.pipe.base.tests.mocks package.",
 )
+@click.option(
+    "--promise-ingest-graph/--no-promise-ingest-graph",
+    default=_AGGREGATOR_DEFAULTS.promise_ingest_graph,
+    help=(
+        "Promise to run 'butler ingest-graph' later, allowing aggregate-graph "
+        "to skip metadata/log/config ingestion for now."
+    ),
+)
 def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
     """Scan for quantum graph's outputs to gather provenance, ingest datasets
     into the central butler repository, and delete datasets that are no
@@ -268,3 +280,136 @@ def aggregate_graph(predicted_graph: str, repo: str, **kwargs: Any) -> None:
         # When this exception is raised, we'll have already logged the relevant
         # traceback from a separate worker.
         raise click.ClickException(str(err)) from None
+@click.command(
+    short_help="Ingest a provenance quantum graph into a butler.",
+    cls=ButlerCommand,
+)
+@repo_argument(required=True, help="Path or alias for the butler repository.")
+@click.argument("provenance_graph", required=False)
+@transfer_option(default="move")
+@click.option("--batch-size", default=10000, help="How many datasets to process in each transaction.")
+@click.option(
+    "--output-run",
+    default=None,
+    help=(
+        "Name of the output RUN collection.  Must be provided if the provenance graph is not"
+        " provided (so the graph can be found in the butler)."
+    ),
+)
+def ingest_graph(
+    *,
+    repo: str,
+    provenance_graph: str | None,
+    transfer: str | None,
+    batch_size: int,
+    output_run: str | None,
+) -> None:
+    """Ingest a provenance graph into a butler repository."""
+    from ...quantum_graph.ingest_graph import ingest_graph as ingest_graph_py
+    ingest_graph_py(repo, provenance_graph, transfer=transfer, batch_size=batch_size, output_run=output_run)
+@click.command(
+    short_help="Print and write provenance reports.",
+    cls=ButlerCommand,
+)
+@click.argument("repo_or_qg")
+@click.argument("collection", required=False, default=None)
+@click.option(
+    "--state",
+    multiple=True,
+    type=click.Choice(QuantumAttemptStatus),
+    help=(
+        "Additional quantum state to include in the status report and data ID tables "
+        "(FAILED, ABORTED, and ABORTED_SUCCESS are included by default)."
+    ),
+)
+@click.option(
+    "--no-state",
+    multiple=True,
+    type=str,
+    metavar="STATE",
+    help="Quantum state to drop from in status report and data ID tables (same options as --state).",
+)
+@click.option(
+    "--status-report",
+    default=None,
+    metavar="URI",
+    help="File or URI (.json) for a detailed report (with data IDs) on quanta with certain states.",
+)
+@click.option(
+    "--quantum-table/--no-quantum-table",
+    default=True,
+    help="Whether to print summary of quantum status counts to STDOUT.",
+)
+@click.option(
+    "--exception-table/--no-exception-table",
+    default=True,
+    help="Whether to print summary of exception type counts STDOUT.",
+)
+@click.option(
+    "--caveat",
+    multiple=True,
+    type=click.Choice(QuantumSuccessCaveats),
+    help=(
+        "Include successful quanta in the status report if they have this caveat. "
+        "May be passed multiple times; any matching caveat is included. "
+        "Passing this option implicitly adds '--state SUCCESSFUL'."
+    ),
+)
+@click.option(
+    "--data-id-table-dir",
+    default=None,
+    metavar="URI",
+    help=(
+        "Directory (may be a URI) for a tree of data ID tables for each "
+        "task label, status, and exception type combination in the status report."
+    ),
+)
+def provenance_report(
+    *,
+    repo_or_qg: str,
+    collection: str | None,
+    state: Iterable[QuantumAttemptStatus],
+    no_state: Iterable[str],
+    status_report: str | None,
+    quantum_table: bool = False,
+    exception_table: bool = False,
+    caveat: Iterable[QuantumSuccessCaveats],
+    data_id_table_dir: str | None,
+) -> None:
+    """Read a provenance quantum graph from a butler or file and use it to
+    generate reports.
+    REPO_OR_QG is a path or alias for the butler repository (if reading an
+    ingested graph, as indicated by passing COLLECTION), or the path to a
+    provenance quantum graph file.
+    """
+    from ...quantum_graph import ProvenanceQuantumGraph
+    states = set(state)
+    states.add(QuantumAttemptStatus.FAILED)
+    states.add(QuantumAttemptStatus.ABORTED)
+    states.add(QuantumAttemptStatus.ABORTED_SUCCESS)
+    for state_name in no_state:
+        states.discard(QuantumAttemptStatus.__members__[state_name])
+    with_caveats: QuantumSuccessCaveats | None = None
+    if caveat:
+        states.add(QuantumAttemptStatus.SUCCESSFUL)
+        with_caveats = functools.reduce(
+            operator.__or__,
+            caveat,
+            QuantumSuccessCaveats.NO_CAVEATS,
+        )
+    with ProvenanceQuantumGraph.from_args(repo_or_qg, collection=collection, datasets=()) as (graph, _):
+        graph.make_many_reports(
+            status_report_file=status_report,
+            states=states,
+            print_quantum_table=quantum_table,
+            print_exception_table=exception_table,
+            with_caveats=with_caveats,
+            data_id_table_dir=data_id_table_dir,
+        )

lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.500__py3-none-any.whl

lsst-pipe-base 30.2026.300py3-none-any.whl → 30.2026.500py3-none-any.whl