PyPI - lsst-pipe-base - Versions diffs - 30.2026.300__py3-none-any.whl → 30.2026.400__py3-none-any.whl - Mend

lsst-pipe-base 30.2026.300py3-none-any.whl → 30.2026.400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

lsst/pipe/base/pipeline_graph/_pipeline_graph.py CHANGED Viewed

@@ -897,6 +897,10 @@ class PipelineGraph:
             New config objects or overrides to apply to copies of the current
             config objects, with task labels as the keywords.
+        Returns
+        -------
+        None
         Raises
         ------
         ValueError
@@ -1755,6 +1759,10 @@ class PipelineGraph:
             not considered part of the pipeline graph in other respects, but it
             does get written with other provenance datasets).
+        Returns
+        -------
+        None
         Raises
         ------
         lsst.daf.butler.MissingDatasetTypeError

lsst/pipe/base/quantum_graph/_common.py CHANGED Viewed

@@ -453,8 +453,10 @@ class BaseQuantumGraphWriter:
         cdict_data: bytes | None = None,
         zstd_level: int = 10,
     ) -> Iterator[Self]:
-        uri = ResourcePath(uri)
+        uri = ResourcePath(uri, forceDirectory=False)
         address_writer = AddressWriter()
+        if uri.isLocal:
+            os.makedirs(uri.dirname().ospath, exist_ok=True)
         cdict = zstandard.ZstdCompressionDict(cdict_data) if cdict_data is not None else None
         compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
         with uri.open(mode="wb") as stream:

lsst/pipe/base/quantum_graph/_predicted.py CHANGED Viewed

@@ -110,6 +110,13 @@ if TYPE_CHECKING:
     from ..config import PipelineTaskConfig
     from ..graph import QgraphSummary, QuantumGraph
+# Sphinx needs imports for type annotations of base class members.
+if "sphinx" in sys.modules:
+    import zipfile  # noqa: F401
+    from ._multiblock import AddressReader, Decompressor  # noqa: F401
 _LOG = logging.getLogger(__name__)

lsst/pipe/base/quantum_graph/_provenance.py CHANGED Viewed

@@ -94,6 +94,13 @@ from ._predicted import (
     PredictedQuantumGraphComponents,
 )
+# Sphinx needs imports for type annotations of base class members.
+if "sphinx" in sys.modules:
+    import zipfile  # noqa: F401
+    from ._multiblock import AddressReader, Decompressor  # noqa: F401
 _T = TypeVar("_T")
 LoopWrapper: TypeAlias = Callable[[Iterable[_T]], Iterable[_T]]
@@ -186,6 +193,12 @@ class ProvenanceQuantumInfo(QuantumInfo):
     failure.
     """
+    metadata_id: uuid.UUID
+    """ID of this quantum's metadata dataset."""
+    log_id: uuid.UUID
+    """ID of this quantum's log dataset."""
 class ProvenanceInitQuantumInfo(TypedDict):
     """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -212,6 +225,9 @@ class ProvenanceInitQuantumInfo(TypedDict):
     pipeline_node: TaskInitNode
     """Node in the pipeline graph for this task's init-only step."""
+    config_id: uuid.UUID
+    """ID of this task's config dataset."""
 class ProvenanceDatasetModel(PredictedDatasetModel):
     """Data model for the datasets in a provenance quantum graph file."""
@@ -646,6 +662,8 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
             resource_usage=last_attempt.resource_usage,
             attempts=self.attempts,
         )
+        graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
+        graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
         for connection_name, dataset_ids in self.inputs.items():
             read_edge = task_node.get_input_edge(connection_name)
             for dataset_id in dataset_ids:
@@ -655,6 +673,30 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
                 ).append(read_edge)
         for connection_name, dataset_ids in self.outputs.items():
             write_edge = task_node.get_output_edge(connection_name)
+            if connection_name == acc.METADATA_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.add_node(
+                    dataset_ids[0],
+                    data_id=data_id,
+                    dataset_type_name=write_edge.dataset_type_name,
+                    pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                    run=graph.header.output_run,
+                    produced=last_attempt.status.has_metadata,
+                )
+                graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
+                graph._bipartite_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
+                graph._quantum_only_xgraph.nodes[self.quantum_id]["metadata_id"] = dataset_ids[0]
+            if connection_name == acc.LOG_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.add_node(
+                    dataset_ids[0],
+                    data_id=data_id,
+                    dataset_type_name=write_edge.dataset_type_name,
+                    pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                    run=graph.header.output_run,
+                    produced=last_attempt.status.has_log,
+                )
+                graph._datasets_by_type[write_edge.dataset_type_name][data_id] = dataset_ids[0]
+                graph._bipartite_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
+                graph._quantum_only_xgraph.nodes[self.quantum_id]["log_id"] = dataset_ids[0]
             for dataset_id in dataset_ids:
                 graph._bipartite_xgraph.add_edge(
                     self.quantum_id,
@@ -663,8 +705,6 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
                     # There can only be one pipeline edge for an output.
                     pipeline_edges=[write_edge],
                 )
-        graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
-        graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
         for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
             for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
                 graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
@@ -803,6 +843,15 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
             ).append(read_edge)
         for connection_name, dataset_id in self.outputs.items():
             write_edge = task_init_node.get_output_edge(connection_name)
+            graph._bipartite_xgraph.add_node(
+                dataset_id,
+                data_id=empty_data_id,
+                dataset_type_name=write_edge.dataset_type_name,
+                pipeline_node=graph.pipeline_graph.dataset_types[write_edge.dataset_type_name],
+                run=graph.header.output_run,
+                produced=True,
+            )
+            graph._datasets_by_type[write_edge.dataset_type_name][empty_data_id] = dataset_id
             graph._bipartite_xgraph.add_edge(
                 self.quantum_id,
                 dataset_id,
@@ -810,6 +859,8 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
                 # There can only be one pipeline edge for an output.
                 pipeline_edges=[write_edge],
             )
+            if write_edge.connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
+                graph._bipartite_xgraph.nodes[self.quantum_id]["config_id"] = dataset_id
         graph._init_quanta[self.task_label] = self.quantum_id
     # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
@@ -994,6 +1045,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         types in the pipeline graph are included, even if none of their
         datasets were loaded (i.e. nested mappings may be empty).
+        Reading a quantum also populates its log and metadata datasets.
         The returned object may be an internal dictionary; as the type
         annotation indicates, it should not be modified in place.
         """
@@ -1032,7 +1085,8 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
         `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
         with full attributes and edges to adjacent nodes with no attributes.
-        Loading quanta necessary to populate edge attributes.
+        Loading quanta is necessary to populate edge attributes.
+        Reading a quantum also populates its log and metadata datasets.
         Node attributes are described by the
         `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
@@ -1104,10 +1158,6 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
         """Construct an `astropy.table.Table` with counts for each exception
         type raised by each task.
-        At present this only includes information from partial-outputs-error
-        successes, since exception information for failures is not tracked.
-        This may change in the future.
         Returns
         -------
         table : `astropy.table.Table`
@@ -1294,19 +1344,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
                     # also have other outstanding reference holders).
                     continue
                 node._add_to_graph(self.graph)
-            return
-        with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
-            for node_id_or_index in nodes:
-                address_row = self.address_reader.find(node_id_or_index)
-                if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
-                    # Use the old node to reduce memory usage (since it might
-                    # also have other outstanding reference holders).
-                    continue
-                node = mb_reader.read_model(
-                    address_row.addresses[address_index], model_type, self.decompressor
-                )
-                if node is not None:
-                    node._add_to_graph(self.graph)
+        else:
+            with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
+                for node_id_or_index in nodes:
+                    address_row = self.address_reader.find(node_id_or_index)
+                    if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
+                        # Use the old node to reduce memory usage (since it
+                        # might also have other outstanding reference holders).
+                        continue
+                    node = mb_reader.read_model(
+                        address_row.addresses[address_index], model_type, self.decompressor
+                    )
+                    if node is not None:
+                        node._add_to_graph(self.graph)
     def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
         """Fetch log datasets.
@@ -1588,7 +1638,7 @@ class ProvenanceQuantumGraphWriter:
         """
         predicted_quantum = self._predicted_quanta[quantum_id]
         provenance_models = ProvenanceQuantumScanModels.from_metadata_and_logs(
-            predicted_quantum, metadata, logs, assume_complete=True
+            predicted_quantum, metadata, logs, incomplete=False
         )
         scan_data = provenance_models.to_scan_data(predicted_quantum, compressor=self.compressor)
         self.write_scan_data(scan_data)
@@ -1665,8 +1715,8 @@ class ProvenanceQuantumScanStatus(enum.Enum):
     enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
     to stop trying for now.
-    This state means a later run with `ScannerConfig.assume_complete` is
-    required.
+    This state means `ProvenanceQuantumScanModels.from_metadata_and_logs` must
+    be run again with ``incomplete=False``.
     """
     SUCCESSFUL = enum.auto()
@@ -1721,7 +1771,7 @@ class ProvenanceQuantumScanModels:
         metadata: TaskMetadata | None,
         logs: ButlerLogRecords | None,
         *,
-        assume_complete: bool = True,
+        incomplete: bool = False,
     ) -> ProvenanceQuantumScanModels:
         """Construct provenance information from task metadata and logs.
@@ -1733,8 +1783,8 @@ class ProvenanceQuantumScanModels:
             Task metadata.
         logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
             Task logs.
-        assume_complete : `bool`, optional
-            If `False`, treat execution failures as possibly-incomplete quanta
+        incomplete : `bool`, optional
+            If `True`, treat execution failures as possibly-incomplete quanta
             and do not fully process them; instead just set the status to
             `ProvenanceQuantumScanStatus.ABANDONED` and return.
@@ -1752,8 +1802,8 @@ class ProvenanceQuantumScanModels:
         """
         self = ProvenanceQuantumScanModels(predicted.quantum_id)
         last_attempt = ProvenanceQuantumAttemptModel()
-        self._process_logs(predicted, logs, last_attempt, assume_complete=assume_complete)
-        self._process_metadata(predicted, metadata, last_attempt, assume_complete=assume_complete)
+        self._process_logs(predicted, logs, last_attempt, incomplete=incomplete)
+        self._process_metadata(predicted, metadata, last_attempt, incomplete=incomplete)
         if self.status is ProvenanceQuantumScanStatus.ABANDONED:
             return self
         self._reconcile_attempts(last_attempt)
@@ -1766,15 +1816,15 @@ class ProvenanceQuantumScanModels:
         logs: ButlerLogRecords | None,
         last_attempt: ProvenanceQuantumAttemptModel,
         *,
-        assume_complete: bool,
+        incomplete: bool,
     ) -> None:
         (predicted_log_dataset,) = predicted.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
         if logs is None:
             self.output_existence[predicted_log_dataset.dataset_id] = False
-            if assume_complete:
-                self.status = ProvenanceQuantumScanStatus.FAILED
-            else:
+            if incomplete:
                 self.status = ProvenanceQuantumScanStatus.ABANDONED
+            else:
+                self.status = ProvenanceQuantumScanStatus.FAILED
         else:
             # Set the attempt's run status to FAILED, since the default is
             # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
@@ -1832,15 +1882,15 @@ class ProvenanceQuantumScanModels:
         metadata: TaskMetadata | None,
         last_attempt: ProvenanceQuantumAttemptModel,
         *,
-        assume_complete: bool,
+        incomplete: bool,
     ) -> None:
         (predicted_metadata_dataset,) = predicted.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
         if metadata is None:
             self.output_existence[predicted_metadata_dataset.dataset_id] = False
-            if assume_complete:
-                self.status = ProvenanceQuantumScanStatus.FAILED
-            else:
+            if incomplete:
                 self.status = ProvenanceQuantumScanStatus.ABANDONED
+            else:
+                self.status = ProvenanceQuantumScanStatus.FAILED
         else:
             self.status = ProvenanceQuantumScanStatus.SUCCESSFUL
             self.output_existence[predicted_metadata_dataset.dataset_id] = True
@@ -1875,7 +1925,7 @@ class ProvenanceQuantumScanModels:
                 # But we found the metadata!  Either that hard error happened
                 # at a very unlucky time (in between those two writes), or
                 # something even weirder happened.
-                self.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
+                self.attempts[-1].status = QuantumAttemptStatus.ABORTED_SUCCESS
             else:
                 self.attempts[-1].status = QuantumAttemptStatus.FAILED
         if len(self.metadata.attempts) < len(self.attempts):

lsst/pipe/base/quantum_graph/aggregator/_communicators.py CHANGED Viewed

@@ -318,6 +318,12 @@ Report: TypeAlias = (
 )
+def _disable_resources_parallelism() -> None:
+    os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
+    os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
+    os.environ["LSST_S3_USE_THREADS"] = "False"
 class SupervisorCommunicator:
     """A helper object that lets the supervisor direct the other workers.
@@ -364,7 +370,7 @@ class SupervisorCommunicator:
         # starts its shutdown.
         self._write_requests: (
             Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
-        ) = context.make_queue() if config.output_path is not None else None
+        ) = context.make_queue() if config.is_writing_provenance else None
         # All other workers use this queue to send many different kinds of
         # reports the supervisor.  The supervisor waits for a _DONE sentinal
         # from each worker before it finishes its shutdown.
@@ -433,6 +439,7 @@ class SupervisorCommunicator:
         self._expect_empty_queue(self._compression_dict)
     def __enter__(self) -> Self:
+        _disable_resources_parallelism()
         self.progress.__enter__()
         # We make the low-level logger in __enter__ instead of __init__ only
         # because that's the pattern used by true workers (where it matters).
@@ -581,6 +588,7 @@ class WorkerCommunicator:
         self._cancel_event = supervisor._cancel_event
     def __enter__(self) -> Self:
+        _disable_resources_parallelism()
         self.log = make_worker_log(self.name, self.config)
         self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
         self._exit_stack = ExitStack().__enter__()

lsst/pipe/base/quantum_graph/aggregator/_config.py CHANGED Viewed

@@ -29,6 +29,8 @@ from __future__ import annotations
 __all__ = ("AggregatorConfig",)
+import sys
+from typing import TYPE_CHECKING, Any
 import pydantic
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
     n_processes: int = 1
     """Number of processes the scanner should use."""
-    assume_complete: bool = True
-    """If `True`, the aggregator can assume all quanta have run to completion
-    (including any automatic retries).  If `False`, only successes can be
-    considered final, and quanta that appear to have failed or to have not been
-    executed are ignored.
+    incomplete: bool = False
+    """If `True`, do not expect the graph to have been executed to completion
+    yet, and only ingest the outputs of successful quanta.
+    This disables writing the provenance quantum graph, since this is likely to
+    be wasted effort that just complicates a follow-up run with
+    ``incomplete=False`` later.
     """
     defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
     """
     dry_run: bool = False
-    """If `True`, do not actually perform any deletions or central butler
-    ingests.
+    """If `True`, do not actually perform any central butler ingests.
-    Most log messages concerning deletions and ingests will still be emitted in
-    order to provide a better emulation of a real run.
+    Most log messages concerning ingests will still be emitted in order to
+    provide a better emulation of a real run.
     """
     interactive_status: bool = False
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
     """Enable support for storage classes by created by the
     lsst.pipe.base.tests.mocks package.
     """
+    promise_ingest_graph: bool = False
+    """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
+    will be run later to ingest metadata/log/config datasets, and will not
+    ingest them itself.  This means that if `~.ingest_graph.ingest_graph` is
+    not run, those files will be abandoned in the butler storage root without
+    being present in the butler database, but it will speed up both processes.
+    It is *usually* safe to build a quantum graph for downstream processing
+    before or while running `~.ingest_graph.ingest_graph`, because
+    metadata/log/config datasets are rarely used as inputs.  To check, use
+    ``pipetask build ... --show inputs`` to show the overall-inputs to the
+    graph and scan for these dataset types.
+    """
+    @property
+    def is_writing_provenance(self) -> bool:
+        """Whether the aggregator is configured to write the provenance quantum
+        graph.
+        """
+        return self.output_path is not None and not self.incomplete
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)

lsst/pipe/base/quantum_graph/aggregator/_ingester.py CHANGED Viewed

@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
 from ...pipeline_graph import TaskImportMode
 from .._common import DatastoreName
-from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
+from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
 from ._communicators import IngesterCommunicator
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
             for ingest_request in self.comms.poll():
                 self.n_producers_pending += 1
                 self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
-                self.update_pending(ingest_request.datasets, ingest_request.records)
+                self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
                 if self.n_datasets_pending > self.comms.config.ingest_batch_size:
                     self.ingest()
             self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
             else:
                 del self.records_pending[datastore_name]
-    def update_pending(
-        self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
+    def update_outputs_pending(
+        self,
+        refs: list[DatasetRef],
+        records: dict[DatastoreName, DatastoreRecordData],
     ) -> None:
         """Add an ingest request to the pending-ingest data structures.
         Parameters
         ----------
-        datasets : `list` [ `PredictedDatasetModel` ]
-            Registry information about the datasets.
+        refs : `list` [ `lsst.daf.butler.DatasetRef` ]
+            Registry information about regular quantum-output datasets.
         records : `dict` [ `str`, \
                 `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
             Datastore information about the datasets.
         """
-        n_given = len(datasets)
+        n_given = len(refs)
         if self.already_ingested is not None:
-            datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
-            kept = {d.dataset_id for d in datasets}
+            refs = [ref for ref in refs if ref.id not in self.already_ingested]
+            kept = {ref.id for ref in refs}
             self.n_datasets_skipped += n_given - len(kept)
             records = {
                 datastore_name: filtered_records
                 for datastore_name, original_records in records.items()
                 if (filtered_records := original_records.subset(kept)) is not None
             }
-        for dataset in datasets:
-            ref = self.predicted.make_dataset_ref(dataset)
+        for ref in refs:
             self.refs_pending[ref.datasetType.dimensions].append(ref)
         for datastore_name, datastore_records in records.items():
             if (existing_records := self.records_pending.get(datastore_name)) is not None:

lsst/pipe/base/quantum_graph/aggregator/_scanner.py CHANGED Viewed

@@ -223,7 +223,7 @@ class Scanner(AbstractContextManager):
             logs = self._read_log(predicted_quantum)
             metadata = self._read_metadata(predicted_quantum)
             result = ProvenanceQuantumScanModels.from_metadata_and_logs(
-                predicted_quantum, metadata, logs, assume_complete=self.comms.config.assume_complete
+                predicted_quantum, metadata, logs, incomplete=self.comms.config.incomplete
             )
             if result.status is ProvenanceQuantumScanStatus.ABANDONED:
                 self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
@@ -233,7 +233,7 @@ class Scanner(AbstractContextManager):
             if predicted_output.dataset_id not in result.output_existence:
                 result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
         to_ingest = self._make_ingest_request(predicted_quantum, result)
-        if self.comms.config.output_path is not None:
+        if self.comms.config.is_writing_provenance:
             to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
             self.comms.request_write(to_write)
         self.comms.request_ingest(to_ingest)
@@ -261,15 +261,23 @@ class Scanner(AbstractContextManager):
         predicted_outputs_by_id = {
             d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
         }
-        to_ingest_predicted: list[PredictedDatasetModel] = []
         to_ingest_refs: list[DatasetRef] = []
+        to_ignore: set[uuid.UUID] = set()
+        if self.comms.config.promise_ingest_graph:
+            if result.status is ProvenanceQuantumScanStatus.INIT:
+                if predicted_quantum.task_label:  # i.e. not the 'packages' producer
+                    to_ignore.add(
+                        predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
+                    )
+            else:
+                to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
+                to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
         for dataset_id, was_produced in result.output_existence.items():
-            if was_produced:
+            if was_produced and dataset_id not in to_ignore:
                 predicted_output = predicted_outputs_by_id[dataset_id]
-                to_ingest_predicted.append(predicted_output)
                 to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
         to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
-        return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
+        return IngestRequest(result.quantum_id, to_ingest_refs, to_ingest_records)
     def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
         """Attempt to read the metadata dataset for a quantum.

lsst/pipe/base/quantum_graph/aggregator/_structs.py CHANGED Viewed

@@ -32,10 +32,10 @@ __all__ = ("IngestRequest", "ScanReport")
 import dataclasses
 import uuid
+from lsst.daf.butler import DatasetRef
 from lsst.daf.butler.datastore.record_data import DatastoreRecordData
 from .._common import DatastoreName
-from .._predicted import PredictedDatasetModel
 from .._provenance import ProvenanceQuantumScanStatus
@@ -57,11 +57,11 @@ class IngestRequest:
     producer_id: uuid.UUID
     """ID of the quantum that produced these datasets."""
-    datasets: list[PredictedDatasetModel]
+    refs: list[DatasetRef]
     """Registry information about the datasets."""
     records: dict[DatastoreName, DatastoreRecordData]
     """Datastore information about the datasets."""
     def __bool__(self) -> bool:
-        return bool(self.datasets or self.records)
+        return bool(self.refs or self.records)

lsst/pipe/base/quantum_graph/aggregator/_supervisor.py CHANGED Viewed

@@ -117,6 +117,18 @@ class Supervisor:
                 self.comms.request_scan(ready_set.pop())
             for scan_return in self.comms.poll():
                 self.handle_report(scan_return)
+        if self.comms.config.incomplete:
+            quantum_or_quanta = "quanta" if self.n_abandoned != 1 else "quantum"
+            self.comms.progress.log.info(
+                "%d %s incomplete/failed abandoned; re-run with incomplete=False to finish.",
+                self.n_abandoned,
+                quantum_or_quanta,
+            )
+        self.comms.progress.log.info(
+            "Scanning complete after %0.1fs; waiting for workers to finish.",
+            self.comms.progress.elapsed_time,
+        )
+        self.comms.wait_for_workers_to_finish()
     def handle_report(self, scan_report: ScanReport) -> None:
         """Handle a report from a scanner.
@@ -134,7 +146,7 @@ class Supervisor:
                 self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
                 blocked_quanta = self.walker.fail(scan_report.quantum_id)
                 for blocked_quantum_id in blocked_quanta:
-                    if self.comms.config.output_path is not None:
+                    if self.comms.config.is_writing_provenance:
                         self.comms.request_write(
                             ProvenanceQuantumScanData(
                                 blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
@@ -172,7 +184,7 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
     writer: Worker | None = None
     with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
         comms.progress.log.verbose("Starting workers.")
-        if config.output_path is not None:
+        if config.is_writing_provenance:
             writer_comms = WriterCommunicator(comms)
             writer = ctx.make_worker(
                 target=Writer.run,
@@ -198,17 +210,6 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
         ingester.start()
         supervisor = Supervisor(predicted_path, comms)
         supervisor.loop()
-        log.info(
-            "Scanning complete after %0.1fs; waiting for workers to finish.",
-            comms.progress.elapsed_time,
-        )
-        comms.wait_for_workers_to_finish()
-        if supervisor.n_abandoned:
-            raise RuntimeError(
-                f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
-                "abandoned because they did not succeed.  Re-run with assume_complete=True after all retry "
-                "attempts have been exhausted."
-            )
     for w in scanners:
         w.join()
     ingester.join()

lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.400__py3-none-any.whl

lsst-pipe-base 30.2026.300py3-none-any.whl → 30.2026.400py3-none-any.whl