PyPI - lsst-pipe-base - Versions diffs - 30.2026.400__py3-none-any.whl → 30.2026.500__py3-none-any.whl - Mend

lsst-pipe-base 30.2026.400py3-none-any.whl → 30.2026.500py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

lsst/pipe/base/quantum_graph/_provenance.py CHANGED Viewed

@@ -38,13 +38,14 @@ __all__ = (
     "ProvenanceQuantumGraphWriter",
     "ProvenanceQuantumInfo",
     "ProvenanceQuantumModel",
+    "ProvenanceQuantumReport",
     "ProvenanceQuantumScanData",
     "ProvenanceQuantumScanModels",
     "ProvenanceQuantumScanStatus",
+    "ProvenanceReport",
     "ProvenanceTaskMetadataModel",
 )
 import dataclasses
 import enum
 import itertools
@@ -60,9 +61,9 @@ import networkx
 import numpy as np
 import pydantic
-from lsst.daf.butler import DataCoordinate
+from lsst.daf.butler import Butler, DataCoordinate
 from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
-from lsst.resources import ResourcePathExpression
+from lsst.resources import ResourcePath, ResourcePathExpression
 from lsst.utils.iteration import ensure_iterable
 from lsst.utils.logging import LsstLogAdapter, getLogger
 from lsst.utils.packages import Packages
@@ -559,6 +560,131 @@ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
             return super().model_validate_strings(*args, **kwargs)
+class ProvenanceQuantumReport(pydantic.BaseModel):
+    """A Pydantic model that used to report information about a single
+    (generally problematic) quantum.
+    """
+    quantum_id: uuid.UUID
+    data_id: dict[str, int | str]
+    attempts: list[ProvenanceQuantumAttemptModel]
+    @classmethod
+    def from_info(cls, quantum_id: uuid.UUID, quantum_info: ProvenanceQuantumInfo) -> ProvenanceQuantumReport:
+        """Construct from a provenance quantum graph node.
+        Parameters
+        ----------
+        quantum_id : `uuid.UUID`
+            Unique ID for the quantum.
+        quantum_info : `ProvenanceQuantumInfo`
+            Node attributes for this quantum.
+        """
+        return cls(
+            quantum_id=quantum_id,
+            data_id=dict(quantum_info["data_id"].mapping),
+            attempts=quantum_info["attempts"],
+        )
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
+class ProvenanceReport(pydantic.RootModel):
+    """A Pydantic model that groups quantum information by task label, then
+    status (as a string), and then exception type.
+    """
+    root: dict[TaskLabel, dict[str, dict[str | None, list[ProvenanceQuantumReport]]]] = {}
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)
 class ProvenanceQuantumModel(pydantic.BaseModel):
     """Data model for the quanta in a provenance quantum graph file."""
@@ -1005,6 +1131,83 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
             dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
         }
+    @classmethod
+    @contextmanager
+    def from_args(
+        cls,
+        repo_or_filename: str,
+        /,
+        collection: str | None = None,
+        *,
+        quanta: Iterable[uuid.UUID] | None = None,
+        datasets: Iterable[uuid.UUID] | None = None,
+        writeable: bool = False,
+    ) -> Iterator[tuple[ProvenanceQuantumGraph, Butler | None]]:
+        """Construct a `ProvenanceQuantumGraph` fron CLI-friendly arguments for
+        a file or butler-ingested graph dataset.
+        Parameters
+        ----------
+        repo_or_filename : `str`
+            Either a provenance quantum graph filename or a butler repository
+            path or alias.
+        collections : `~collections.abc.Iterable` [ `str` ], optional
+            Collections to search; presence indicates that the first argument
+            is a butler repository, not a filename.
+        quanta : `~collections.abc.Iterable` [ `str` ] or `None`, optional
+            IDs of the quanta to load, or `None` to load all.
+        datasets : `~collections.abc.Iterable` [ `str` ], optional
+            IDs of the datasets to load, or `None` to load all.
+        writeable : `bool`, optional
+            Whether the butler should be constructed with write support.
+        Returns
+        -------
+        context : `contextlib.AbstractContextManager`
+            A context manager that yields a tuple of
+            - the `ProvenanceQuantumGraph`
+            - the `Butler` constructed (or `None`)
+            when entered.
+        """
+        exit_stack = ExitStack()
+        if collection is not None:
+            try:
+                butler = exit_stack.enter_context(
+                    Butler.from_config(repo_or_filename, collections=[collection], writeable=writeable)
+                )
+            except Exception as err:
+                err.add_note(
+                    f"Expected {repo_or_filename!r} to be a butler repository path or alias because a "
+                    f"collection ({collection}) was provided."
+                )
+                raise
+            with exit_stack:
+                graph = butler.get(
+                    acc.PROVENANCE_DATASET_TYPE_NAME, parameters={"quanta": quanta, "datasets": datasets}
+                )
+                yield graph, butler
+        else:
+            try:
+                reader = exit_stack.enter_context(ProvenanceQuantumGraphReader.open(repo_or_filename))
+            except Exception as err:
+                err.add_note(
+                    f"Expected a {repo_or_filename} to be a provenance quantum graph filename "
+                    f"because no collection was provided."
+                )
+                raise
+            with exit_stack:
+                if quanta is None:
+                    reader.read_quanta()
+                elif not quanta:
+                    reader.read_quanta(quanta)
+                if datasets is None:
+                    reader.read_datasets()
+                elif not datasets:
+                    reader.read_datasets(datasets)
+                yield reader.graph, None
     @property
     def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
         """A mapping from task label to the ID of the special init quantum for
@@ -1101,10 +1304,16 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         """
         return self._bipartite_xgraph.copy(as_view=True)
-    def make_quantum_table(self) -> astropy.table.Table:
+    def make_quantum_table(self, drop_unused_columns: bool = True) -> astropy.table.Table:
         """Construct an `astropy.table.Table` with a tabular summary of the
         quanta.
+        Parameters
+        ----------
+        drop_unused_columns : `bool`, optional
+            Whether to drop columns for rare states that did not actually
+            occur in this run.
         Returns
         -------
         table : `astropy.table.Table`
@@ -1140,19 +1349,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
                 caveats = f"{code.concise()}({count})"  # type: ignore[union-attr]
             else:
                 caveats = ""
-            rows.append(
+            row: dict[str, Any] = {
+                "Task": task_label,
+                "Caveats": caveats,
+            }
+            for status in QuantumAttemptStatus:
+                row[status.title] = status_counts.get(status, 0)
+            row.update(
                 {
-                    "Task": task_label,
-                    "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
-                    "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
-                    "Caveats": caveats,
-                    "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
-                    "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
                     "TOTAL": len(quanta_for_task),
                     "EXPECTED": self.header.n_task_quanta[task_label],
                 }
             )
-        return astropy.table.Table(rows)
+            rows.append(row)
+        table = astropy.table.Table(rows)
+        if drop_unused_columns:
+            for status in QuantumAttemptStatus:
+                if status.is_rare and not table[status.title].any():
+                    del table[status.title]
+        return table
     def make_exception_table(self) -> astropy.table.Table:
         """Construct an `astropy.table.Table` with counts for each exception
@@ -1165,13 +1380,25 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         """
         rows = []
         for task_label, quanta_for_task in self.quanta_by_task.items():
-            counts_by_type = Counter(
-                exc_info.type_name
-                for q in quanta_for_task.values()
-                if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
-            )
-            for type_name, count in counts_by_type.items():
-                rows.append({"Task": task_label, "Exception": type_name, "Count": count})
+            success_counts = Counter[str]()
+            failed_counts = Counter[str]()
+            for quantum_id in quanta_for_task.values():
+                quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
+                exc_info = quantum_info["exception"]
+                if exc_info is not None:
+                    if quantum_info["status"] is QuantumAttemptStatus.SUCCESSFUL:
+                        success_counts[exc_info.type_name] += 1
+                    else:
+                        failed_counts[exc_info.type_name] += 1
+            for type_name in sorted(success_counts.keys() | failed_counts.keys()):
+                rows.append(
+                    {
+                        "Task": task_label,
+                        "Exception": type_name,
+                        "Successes": success_counts.get(type_name, 0),
+                        "Failures": failed_counts.get(type_name, 0),
+                    }
+                )
         return astropy.table.Table(rows)
     def make_task_resource_usage_table(
@@ -1214,6 +1441,171 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         array = np.array(rows, dtype=row_dtype)
         return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
+    def make_status_report(
+        self,
+        states: Iterable[QuantumAttemptStatus] = (
+            QuantumAttemptStatus.FAILED,
+            QuantumAttemptStatus.ABORTED,
+            QuantumAttemptStatus.ABORTED_SUCCESS,
+        ),
+        *,
+        also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
+        with_caveats: QuantumSuccessCaveats | None = QuantumSuccessCaveats.PARTIAL_OUTPUTS_ERROR,
+        data_id_table_dir: ResourcePathExpression | None = None,
+    ) -> ProvenanceReport:
+        """Make a JSON- or YAML-friendly report of all quanta with the given
+        states.
+        Parameters
+        ----------
+        states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            A quantum is included if it has any of these states.  Defaults to
+            states that clearly represent problems.
+        also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            Additional states to consider; unioned with ``states``.  This is
+            provided so users can easily request additional states while also
+            getting the defaults.
+        with_caveats : `..QuantumSuccessCaveats` or `None`, optional
+            If `..QuantumAttemptStatus.SUCCESSFUL` is in ``states``, only
+            include quanta with these caveat flags.  May be set to `None`
+            to report on all successful quanta.
+        data_id_table_dir :  convertible to `~lsst.resources.ResourcePath`, \
+                optional
+            If provided, a directory to write data ID tables (in ECSV format)
+            with all of the data IDs with the given states, for use with the
+            ``--data-id-tables`` argument to the quantum graph builder.
+            Subdirectories for each task and status will created within this
+            directory, with one file for each exception type (or ``UNKNOWN``
+            when there is no exception).
+        Returns
+        -------
+        report : `ProvenanceModel`
+            A Pydantic model that groups quanta by task label and exception
+            type.
+        """
+        states = set(ensure_iterable(states))
+        states.update(ensure_iterable(also))
+        result = ProvenanceReport(root={})
+        if data_id_table_dir is not None:
+            data_id_table_dir = ResourcePath(data_id_table_dir)
+        for task_label, quanta_for_task in self.quanta_by_task.items():
+            reports_for_task: dict[str, dict[str | None, list[ProvenanceQuantumReport]]] = {}
+            table_rows_for_task: dict[str, dict[str | None, list[tuple[int | str, ...]]]] = {}
+            for quantum_id in quanta_for_task.values():
+                quantum_info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
+                quantum_status = quantum_info["status"]
+                if quantum_status not in states:
+                    continue
+                if (
+                    quantum_status is QuantumAttemptStatus.SUCCESSFUL
+                    and with_caveats is not None
+                    and (quantum_info["caveats"] is None or not (quantum_info["caveats"] & with_caveats))
+                ):
+                    continue
+                key1 = quantum_status.name
+                exc_info = quantum_info["exception"]
+                key2 = exc_info.type_name if exc_info is not None else None
+                reports_for_task.setdefault(key1, {}).setdefault(key2, []).append(
+                    ProvenanceQuantumReport.from_info(quantum_id, quantum_info)
+                )
+                if data_id_table_dir:
+                    table_rows_for_task.setdefault(key1, {}).setdefault(key2, []).append(
+                        quantum_info["data_id"].required_values
+                    )
+            if reports_for_task:
+                result.root[task_label] = reports_for_task
+            if table_rows_for_task:
+                assert data_id_table_dir is not None, "table_rows_for_task should be empty"
+                for status_name, table_rows_for_status in table_rows_for_task.items():
+                    dir_for_task_and_status = data_id_table_dir.join(task_label, forceDirectory=True).join(
+                        status_name, forceDirectory=True
+                    )
+                    if dir_for_task_and_status.isLocal:
+                        dir_for_task_and_status.mkdir()
+                    for exc_name, data_id_rows in table_rows_for_status.items():
+                        table = astropy.table.Table(
+                            rows=data_id_rows,
+                            names=list(self.pipeline_graph.tasks[task_label].dimensions.required),
+                        )
+                        filename = f"{exc_name}.ecsv" if exc_name is not None else "UNKNOWN.ecsv"
+                        with dir_for_task_and_status.join(filename).open("w") as stream:
+                            table.write(stream, format="ecsv")
+        return result
+    def make_many_reports(
+        self,
+        states: Iterable[QuantumAttemptStatus] = (
+            QuantumAttemptStatus.FAILED,
+            QuantumAttemptStatus.ABORTED,
+            QuantumAttemptStatus.ABORTED_SUCCESS,
+        ),
+        *,
+        status_report_file: ResourcePathExpression | None = None,
+        print_quantum_table: bool = False,
+        print_exception_table: bool = False,
+        also: QuantumAttemptStatus | Iterable[QuantumAttemptStatus] = (),
+        with_caveats: QuantumSuccessCaveats | None = None,
+        data_id_table_dir: ResourcePathExpression | None = None,
+    ) -> None:
+        """Write multiple reports.
+        Parameters
+        ----------
+        states : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            A quantum is included in the status report and data ID tables if it
+            has any of these states.  Defaults to states that clearly represent
+            problems.
+        status_report_file : convertible to `~lsst.resources.ResourcePath`,
+                optional
+            Filename for the JSON status report (see `make_status_report`).
+        print_quantum_table : `bool`, optional
+            If `True`, print a quantum summary table (counts only) to STDOUT.
+        print_exception_table : `bool`, optional
+            If `True`, print an exception-type summary table (counts only) to
+            STDOUT.
+        also : `~collections.abc.Iterable` [`..QuantumAttemptStatus`] or \
+                `..QuantumAttemptStatus`, optional
+            Additional states to consider in the status report and data ID
+            tables; unioned with ``states``.  This is provided so users can
+            easily request additional states while also getting the defaults.
+        with_caveats : `..QuantumSuccessCaveats` or `None`, optional
+            Only include quanta with these caveat flags in the status report
+            and data ID tables.  May be set to `None` to report on all
+            successful quanta (an empty sequence reports on only quanta with no
+            caveats).  If provided, `QuantumAttemptStatus.SUCCESSFUL` is
+            automatically included in ``states``.
+        data_id_table_dir : convertible to `~lsst.resources.ResourcePath`, \
+                optional
+            If provided, a directory to write data ID tables (in ECSV format)
+            with all of the data IDs with the given states, for use with the
+            ``--data-id-tables`` argument to the quantum graph builder.
+            Subdirectories for each task and status will created within this
+            directory, with one file for each exception type (or ``UNKNOWN``
+            when there is no exception).
+        """
+        if status_report_file is not None or data_id_table_dir is not None:
+            status_report = self.make_status_report(
+                states, also=also, with_caveats=with_caveats, data_id_table_dir=data_id_table_dir
+            )
+            if status_report_file is not None:
+                status_report_file = ResourcePath(status_report_file)
+                if status_report_file.isLocal:
+                    status_report_file.dirname().mkdir()
+                with ResourcePath(status_report_file).open("w") as stream:
+                    stream.write(status_report.model_dump_json(indent=2))
+        if print_quantum_table:
+            quantum_table = self.make_quantum_table()
+            quantum_table.pprint_all()
+            print("")
+        if print_exception_table:
+            exception_table = self.make_exception_table()
+            exception_table.pprint_all()
+            print("")
 @dataclasses.dataclass
 class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):

lsst/pipe/base/quantum_graph/aggregator/__init__.py CHANGED Viewed

@@ -140,4 +140,3 @@ from ._communicators import FatalWorkerError
 # - having the worker logs go to separate files is actually very nice, and it's
 #   more efficient if they just do that themselves, and that's not something
 #   our logging CLI can actually do, AFAICT.

lsst/pipe/base/quantum_graph/formatter.py CHANGED Viewed

@@ -60,13 +60,13 @@ class _ProvenanceFormatterParameters(pydantic.BaseModel):
     @pydantic.field_validator("quanta", mode="before")
     @classmethod
-    def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
-        return list(v)
+    def quanta_to_list(cls, v: Any) -> list[uuid.UUID] | None:
+        return list(v) if v is not None else None
     @pydantic.field_validator("datasets", mode="before")
     @classmethod
-    def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
-        return list(v)
+    def datasets_to_list(cls, v: Any) -> list[uuid.UUID] | None:
+        return list(v) if v is not None else None
     @property
     def nodes(self) -> list[uuid.UUID]:

lsst/pipe/base/quantum_graph/ingest_graph.py CHANGED Viewed

@@ -62,6 +62,7 @@ __all__ = ("ingest_graph",)
 import dataclasses
 import itertools
+import os
 import uuid
 from collections.abc import Iterator
 from contextlib import contextmanager
@@ -107,10 +108,13 @@ def ingest_graph(
     butler_config : `str`
         Path or alias for the butler repository, or a butler repository config
         object.
-    uri : convertible to `lsst.resources.ResourcePath` or `None`, optional
+    uri : `lsst.resources.ResourcePathExpression` or `None`, optional
         Location of the provenance quantum graph to ingest.  `None` indicates
         that the quantum graph has already been ingested, but other ingests
         and/or deletions failed and need to be resumed.
+    transfer : `str` or `None`, optional
+        Transfer mode to use when ingesting graph. Matches those supported
+        by `lsst.resources.ResourcePath.transfer_from`.
     batch_size : `int`, optional
         Number of datasets to process in each transaction.
     output_run : `str`, optional
@@ -120,8 +124,9 @@ def ingest_graph(
     Notes
     -----
-    After this operation, no further processing may be done in the
-    `~lsst.daf.butler.CollectionType.RUN` collection.
+    After this operation, any further processing done in the
+    `~lsst.daf.butler.CollectionType.RUN` collection will not be included in
+    the provenance.
     If this process is interrupted, it can pick up where it left off if run
     again (at the cost of some duplicate work to figure out how much progress
@@ -134,6 +139,32 @@ def ingest_graph(
             helper.forget_ingested_datasets(batch_size=batch_size)
             helper.ingest_graph_dataset(uri, transfer=transfer)
         helper.clean_and_reingest_datasets(batch_size=batch_size)
+        if helper.directories_to_delete:
+            _LOG.info(
+                "Deleting %d directories after checking that they are empty.",
+                len(helper.directories_to_delete),
+            )
+            n_deleted: int = 0
+            for top in sorted(helper.directories_to_delete):
+                nonempty: set[str] = set()
+                for root, dirnames, filenames in os.walk(top, topdown=False):
+                    if filenames:
+                        nonempty.add(root)
+                    for dirname in dirnames:
+                        dirpath = os.path.join(root, dirname)
+                        if dirpath in nonempty:
+                            nonempty.add(root)
+                        else:
+                            os.rmdir(dirpath)
+                if nonempty:
+                    _LOG.warning(
+                        "Directory %r was not deleted because it unexpectedly still had files in it.",
+                        top,
+                    )
+                else:
+                    os.rmdir(root)
+                    n_deleted += 1
+            _LOG.info("Deleted %d directories.", n_deleted)
 @dataclasses.dataclass
@@ -144,6 +175,7 @@ class _GraphIngester:
     graph_already_ingested: bool
     n_datasets: int
     datasets_already_ingested: set[uuid.UUID] = dataclasses.field(default_factory=set)
+    directories_to_delete: set[str] = dataclasses.field(default_factory=set)
     @property
     def output_run(self) -> str:
@@ -308,7 +340,7 @@ class _GraphIngester:
         if not to_process:
             return 0
         _LOG.verbose(
-            "Deleting and deleting a %d-dataset batch; %d/%d complete.",
+            "Deleting and re-ingesting a %d-dataset batch; %d/%d complete.",
             len(to_process),
             n_current,
             self.n_datasets,
@@ -331,6 +363,20 @@ class _GraphIngester:
                 raise status.exception
         file_dataset = FileDataset(refs=expanded_refs, path=direct_uri, formatter=ProvenanceFormatter)
         self.butler.ingest(file_dataset, transfer=None)
+        if len(original_uris) == len(expanded_refs):
+            for uri, ref in zip(original_uris, expanded_refs):
+                if uri.isLocal:
+                    if (
+                        parent_dir := self.find_dataset_type_directory(uri.ospath, ref.datasetType.name)
+                    ) is not None:
+                        self.directories_to_delete.add(parent_dir)
+        elif any(uri.isLocal for uri in original_uris):
+            _LOG.warning(
+                "Not attempting to delete empty metadata/log/config directories because the number "
+                "of paths (%s) did not match the number of datasets (%s).",
+                len(original_uris),
+                len(expanded_refs),
+            )
         n = len(to_process)
         to_process.clear()
         return n
@@ -354,3 +400,14 @@ class _GraphIngester:
             datastore_records={},
             dataset_types=dataset_types,
         )
+    def find_dataset_type_directory(self, ospath: str, dataset_type: str) -> str | None:
+        dir_components: list[str] = []
+        for component in os.path.dirname(ospath).split(os.path.sep):
+            dir_components.append(component)
+            # If the full dataset type name is in a single directory path
+            # component, we guess that directory can only have datasets of
+            # that type.
+            if dataset_type in component:
+                return os.path.sep.join(dir_components)
+        return None

lsst/pipe/base/quantum_graph_builder.py CHANGED Viewed

@@ -380,8 +380,6 @@ class QuantumGraphBuilder(ABC):
         Parameters
         ----------
-        metadata : `~collections.abc.Mapping`, optional
-            Flexible metadata to add to the quantum graph.
         attach_datastore_records : `bool`, optional
             Whether to include datastore records in the graph.  Required for
             `lsst.daf.butler.QuantumBackedButler` execution.
@@ -887,11 +885,6 @@ class QuantumGraphBuilder(ABC):
             Identifier for this quantum in the graph.
         skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
             Preliminary quantum graph, to be modified in-place.
-        skypix_bounds_builder : `~prerequisite_helpers.SkyPixBoundsBuilder`
-            An object that accumulates the appropriate spatial bounds for a
-            quantum.
-        timespan_builder : `~prerequisite_helpers.TimespanBuilder`
-            An object that accumulates the appropriate timespan for a quantum.
         Returns
         -------
@@ -1144,7 +1137,7 @@ class QuantumGraphBuilder(ABC):
             "outputs" attributes on all quantum nodes, as added by
             `_resolve_task_quanta`, as well as a "datastore_records" attribute
             as added by `_attach_datastore_records`.
-        metadata : `Mapping`
+        metadata : `~collections.abc.Mapping`
             Flexible metadata to add to the graph.
         Returns

lsst-pipe-base 30.2026.400__py3-none-any.whl → 30.2026.500__py3-none-any.whl

lsst-pipe-base 30.2026.400py3-none-any.whl → 30.2026.500py3-none-any.whl