PyPI - lsst-pipe-base - Versions diffs - 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl - Mend

lsst-pipe-base 30.2026.200py3-none-any.whl → 30.2026.400py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

lsst/pipe/base/quantum_graph/aggregator/_communicators.py CHANGED Viewed

@@ -51,16 +51,17 @@ import time
 import uuid
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Iterator
-from contextlib import AbstractContextManager, ExitStack, contextmanager
+from contextlib import ExitStack
 from traceback import format_exception
 from types import TracebackType
 from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
-from lsst.utils.logging import VERBOSE, LsstLogAdapter
+from lsst.utils.logging import LsstLogAdapter
+from .._provenance import ProvenanceQuantumScanData
 from ._config import AggregatorConfig
 from ._progress import ProgressManager, make_worker_log
-from ._structs import IngestRequest, ScanReport, WriteRequest
+from ._structs import IngestRequest, ScanReport
 _T = TypeVar("_T")
@@ -317,6 +318,12 @@ Report: TypeAlias = (
 )
+def _disable_resources_parallelism() -> None:
+    os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
+    os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
+    os.environ["LSST_S3_USE_THREADS"] = "False"
 class SupervisorCommunicator:
     """A helper object that lets the supervisor direct the other workers.
@@ -361,9 +368,9 @@ class SupervisorCommunicator:
         # scanner and the supervisor send one sentinal when done, and the
         # writer waits for (n_scanners + 1) sentinals to arrive before it
         # starts its shutdown.
-        self._write_requests: Queue[WriteRequest | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None = (
-            context.make_queue() if config.output_path is not None else None
-        )
+        self._write_requests: (
+            Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
+        ) = context.make_queue() if config.is_writing_provenance else None
         # All other workers use this queue to send many different kinds of
         # reports the supervisor.  The supervisor waits for a _DONE sentinal
         # from each worker before it finishes its shutdown.
@@ -432,6 +439,7 @@ class SupervisorCommunicator:
         self._expect_empty_queue(self._compression_dict)
     def __enter__(self) -> Self:
+        _disable_resources_parallelism()
         self.progress.__enter__()
         # We make the low-level logger in __enter__ instead of __init__ only
         # because that's the pattern used by true workers (where it matters).
@@ -461,12 +469,12 @@ class SupervisorCommunicator:
         """
         self._scan_requests.put(_ScanRequest(quantum_id), block=False)
-    def request_write(self, request: WriteRequest) -> None:
+    def request_write(self, request: ProvenanceQuantumScanData) -> None:
         """Send a request to the writer to write provenance for the given scan.
         Parameters
         ----------
-        request : `WriteRequest`
+        request : `ProvenanceQuantumScanData`
             Information from scanning a quantum (or knowing you don't have to,
             in the case of blocked quanta).
         """
@@ -580,6 +588,7 @@ class WorkerCommunicator:
         self._cancel_event = supervisor._cancel_event
     def __enter__(self) -> Self:
+        _disable_resources_parallelism()
         self.log = make_worker_log(self.name, self.config)
         self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
         self._exit_stack = ExitStack().__enter__()
@@ -621,6 +630,11 @@ class WorkerCommunicator:
         self._exit_stack.__exit__(exc_type, exc_value, traceback)
         return True
+    @property
+    def exit_stack(self) -> ExitStack:
+        """A `contextlib.ExitStack` tied to the communicator."""
+        return self._exit_stack
     def log_progress(self, level: int, message: str) -> None:
         """Send a high-level log message to the supervisor.
@@ -633,44 +647,6 @@ class WorkerCommunicator:
         """
         self._reports.put(_ProgressLog(message=message, level=level), block=False)
-    def enter(
-        self,
-        cm: AbstractContextManager[_T],
-        on_close: str | None = None,
-        level: int = VERBOSE,
-        is_progress_log: bool = False,
-    ) -> _T:
-        """Enter a context manager that will be exited when the communicator's
-        context is exited.
-        Parameters
-        ----------
-        cm : `contextlib.AbstractContextManager`
-            A context manager to enter.
-        on_close : `str`, optional
-            A log message to emit (on the worker's logger) just before the
-            given context manager is exited.  This can be used to indicate
-            what's going on when an ``__exit__`` implementation has a lot of
-            work to do (e.g. moving a large file into a zip archive).
-        level : `int`, optional
-            Level for the ``on_close`` log message.
-        is_progress_log : `bool`, optional
-            If `True`, send the ``on_close`` message to the supervisor via
-            `log_progress` as well as the worker's logger.
-        """
-        if on_close is None:
-            return self._exit_stack.enter_context(cm)
-        @contextmanager
-        def wrapper() -> Iterator[_T]:
-            with cm as result:
-                yield result
-                self.log.log(level, on_close)
-                if is_progress_log:
-                    self.log_progress(level, on_close)
-        return self._exit_stack.enter_context(wrapper())
     def check_for_cancel(self) -> None:
         """Check for a cancel signal from the supervisor and raise
         `FatalWorkerError` if it is present.
@@ -728,12 +704,12 @@ class ScannerCommunicator(WorkerCommunicator):
         else:
             self._reports.put(_IngestReport(1), block=False)
-    def request_write(self, request: WriteRequest) -> None:
+    def request_write(self, request: ProvenanceQuantumScanData) -> None:
         """Ask the writer to write provenance for a quantum.
         Parameters
         ----------
-        request : `WriteRequest`
+        request : `ProvenanceQuantumScanData`
             Result of scanning a quantum.
         """
         assert self._write_requests is not None, "Writer should not be used if writing is disabled."
@@ -913,12 +889,12 @@ class WriterCommunicator(WorkerCommunicator):
         self._reports.put(_Sentinel.WRITER_DONE, block=False)
         return result
-    def poll(self) -> Iterator[WriteRequest]:
+    def poll(self) -> Iterator[ProvenanceQuantumScanData]:
         """Poll for writer requests from the scanner workers and supervisor.
         Yields
         ------
-        request : `WriteRequest`
+        request : `ProvenanceQuantumScanData`
             The result of a quantum scan.
         Notes

lsst/pipe/base/quantum_graph/aggregator/_config.py CHANGED Viewed

@@ -29,6 +29,8 @@ from __future__ import annotations
 __all__ = ("AggregatorConfig",)
+import sys
+from typing import TYPE_CHECKING, Any
 import pydantic
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
     n_processes: int = 1
     """Number of processes the scanner should use."""
-    assume_complete: bool = True
-    """If `True`, the aggregator can assume all quanta have run to completion
-    (including any automatic retries).  If `False`, only successes can be
-    considered final, and quanta that appear to have failed or to have not been
-    executed are ignored.
+    incomplete: bool = False
+    """If `True`, do not expect the graph to have been executed to completion
+    yet, and only ingest the outputs of successful quanta.
+    This disables writing the provenance quantum graph, since this is likely to
+    be wasted effort that just complicates a follow-up run with
+    ``incomplete=False`` later.
     """
     defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
     """
     dry_run: bool = False
-    """If `True`, do not actually perform any deletions or central butler
-    ingests.
+    """If `True`, do not actually perform any central butler ingests.
-    Most log messages concerning deletions and ingests will still be emitted in
-    order to provide a better emulation of a real run.
+    Most log messages concerning ingests will still be emitted in order to
+    provide a better emulation of a real run.
     """
     interactive_status: bool = False
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
     """Enable support for storage classes by created by the
     lsst.pipe.base.tests.mocks package.
     """
+    promise_ingest_graph: bool = False
+    """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
+    will be run later to ingest metadata/log/config datasets, and will not
+    ingest them itself.  This means that if `~.ingest_graph.ingest_graph` is
+    not run, those files will be abandoned in the butler storage root without
+    being present in the butler database, but it will speed up both processes.
+    It is *usually* safe to build a quantum graph for downstream processing
+    before or while running `~.ingest_graph.ingest_graph`, because
+    metadata/log/config datasets are rarely used as inputs.  To check, use
+    ``pipetask build ... --show inputs`` to show the overall-inputs to the
+    graph and scan for these dataset types.
+    """
+    @property
+    def is_writing_provenance(self) -> bool:
+        """Whether the aggregator is configured to write the provenance quantum
+        graph.
+        """
+        return self.output_path is not None and not self.incomplete
+    # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
+    # when we inherit those docstrings in our public classes.
+    if "sphinx" in sys.modules and not TYPE_CHECKING:
+        def copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.copy`."""
+            return super().copy(*args, **kwargs)
+        def model_dump(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump`."""
+            return super().model_dump(*args, **kwargs)
+        def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_dump_json`."""
+            return super().model_dump(*args, **kwargs)
+        def model_copy(self, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_copy`."""
+            return super().model_copy(*args, **kwargs)
+        @classmethod
+        def model_construct(cls, *args: Any, **kwargs: Any) -> Any:  # type: ignore[misc, override]
+            """See `pydantic.BaseModel.model_construct`."""
+            return super().model_construct(*args, **kwargs)
+        @classmethod
+        def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_json_schema`."""
+            return super().model_json_schema(*args, **kwargs)
+        @classmethod
+        def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate`."""
+            return super().model_validate(*args, **kwargs)
+        @classmethod
+        def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_json`."""
+            return super().model_validate_json(*args, **kwargs)
+        @classmethod
+        def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
+            """See `pydantic.BaseModel.model_validate_strings`."""
+            return super().model_validate_strings(*args, **kwargs)

lsst/pipe/base/quantum_graph/aggregator/_ingester.py CHANGED Viewed

@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
 from ...pipeline_graph import TaskImportMode
 from .._common import DatastoreName
-from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
+from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
 from ._communicators import IngesterCommunicator
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
             for ingest_request in self.comms.poll():
                 self.n_producers_pending += 1
                 self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
-                self.update_pending(ingest_request.datasets, ingest_request.records)
+                self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
                 if self.n_datasets_pending > self.comms.config.ingest_batch_size:
                     self.ingest()
             self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
             else:
                 del self.records_pending[datastore_name]
-    def update_pending(
-        self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
+    def update_outputs_pending(
+        self,
+        refs: list[DatasetRef],
+        records: dict[DatastoreName, DatastoreRecordData],
     ) -> None:
         """Add an ingest request to the pending-ingest data structures.
         Parameters
         ----------
-        datasets : `list` [ `PredictedDatasetModel` ]
-            Registry information about the datasets.
+        refs : `list` [ `lsst.daf.butler.DatasetRef` ]
+            Registry information about regular quantum-output datasets.
         records : `dict` [ `str`, \
                 `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
             Datastore information about the datasets.
         """
-        n_given = len(datasets)
+        n_given = len(refs)
         if self.already_ingested is not None:
-            datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
-            kept = {d.dataset_id for d in datasets}
+            refs = [ref for ref in refs if ref.id not in self.already_ingested]
+            kept = {ref.id for ref in refs}
             self.n_datasets_skipped += n_given - len(kept)
             records = {
                 datastore_name: filtered_records
                 for datastore_name, original_records in records.items()
                 if (filtered_records := original_records.subset(kept)) is not None
             }
-        for dataset in datasets:
-            ref = self.predicted.make_dataset_ref(dataset)
+        for ref in refs:
             self.refs_pending[ref.datasetType.dimensions].append(ref)
         for datastore_name, datastore_records in records.items():
             if (existing_records := self.records_pending.get(datastore_name)) is not None:

lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl

lsst-pipe-base 30.2026.200py3-none-any.whl → 30.2026.400py3-none-any.whl