lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. lsst/pipe/base/_instrument.py +10 -12
  2. lsst/pipe/base/_status.py +29 -10
  3. lsst/pipe/base/automatic_connection_constants.py +9 -1
  4. lsst/pipe/base/cli/cmd/__init__.py +16 -2
  5. lsst/pipe/base/cli/cmd/commands.py +42 -4
  6. lsst/pipe/base/connectionTypes.py +72 -160
  7. lsst/pipe/base/connections.py +3 -6
  8. lsst/pipe/base/execution_reports.py +0 -5
  9. lsst/pipe/base/log_capture.py +8 -4
  10. lsst/pipe/base/log_on_close.py +79 -0
  11. lsst/pipe/base/mp_graph_executor.py +51 -15
  12. lsst/pipe/base/pipeline.py +3 -4
  13. lsst/pipe/base/pipelineIR.py +0 -6
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_edges.py +19 -7
  16. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
  17. lsst/pipe/base/quantum_graph/_common.py +7 -4
  18. lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
  19. lsst/pipe/base/quantum_graph/_predicted.py +111 -10
  20. lsst/pipe/base/quantum_graph/_provenance.py +727 -26
  21. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
  22. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  23. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  24. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
  25. lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
  26. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
  27. lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
  28. lsst/pipe/base/quantum_graph/formatter.py +171 -0
  29. lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
  30. lsst/pipe/base/quantum_graph_executor.py +116 -13
  31. lsst/pipe/base/quantum_provenance_graph.py +17 -2
  32. lsst/pipe/base/separable_pipeline_executor.py +18 -2
  33. lsst/pipe/base/single_quantum_executor.py +59 -41
  34. lsst/pipe/base/struct.py +4 -0
  35. lsst/pipe/base/version.py +1 -1
  36. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
  37. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
  38. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
  39. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
  40. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
  41. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
  42. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
  43. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
  44. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
  45. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
@@ -51,16 +51,17 @@ import time
51
51
  import uuid
52
52
  from abc import ABC, abstractmethod
53
53
  from collections.abc import Callable, Iterable, Iterator
54
- from contextlib import AbstractContextManager, ExitStack, contextmanager
54
+ from contextlib import ExitStack
55
55
  from traceback import format_exception
56
56
  from types import TracebackType
57
57
  from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
58
58
 
59
- from lsst.utils.logging import VERBOSE, LsstLogAdapter
59
+ from lsst.utils.logging import LsstLogAdapter
60
60
 
61
+ from .._provenance import ProvenanceQuantumScanData
61
62
  from ._config import AggregatorConfig
62
63
  from ._progress import ProgressManager, make_worker_log
63
- from ._structs import IngestRequest, ScanReport, WriteRequest
64
+ from ._structs import IngestRequest, ScanReport
64
65
 
65
66
  _T = TypeVar("_T")
66
67
 
@@ -317,6 +318,12 @@ Report: TypeAlias = (
317
318
  )
318
319
 
319
320
 
321
+ def _disable_resources_parallelism() -> None:
322
+ os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
323
+ os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
324
+ os.environ["LSST_S3_USE_THREADS"] = "False"
325
+
326
+
320
327
  class SupervisorCommunicator:
321
328
  """A helper object that lets the supervisor direct the other workers.
322
329
 
@@ -361,9 +368,9 @@ class SupervisorCommunicator:
361
368
  # scanner and the supervisor send one sentinal when done, and the
362
369
  # writer waits for (n_scanners + 1) sentinals to arrive before it
363
370
  # starts its shutdown.
364
- self._write_requests: Queue[WriteRequest | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None = (
365
- context.make_queue() if config.output_path is not None else None
366
- )
371
+ self._write_requests: (
372
+ Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
373
+ ) = context.make_queue() if config.is_writing_provenance else None
367
374
  # All other workers use this queue to send many different kinds of
368
375
  # reports the supervisor. The supervisor waits for a _DONE sentinal
369
376
  # from each worker before it finishes its shutdown.
@@ -432,6 +439,7 @@ class SupervisorCommunicator:
432
439
  self._expect_empty_queue(self._compression_dict)
433
440
 
434
441
  def __enter__(self) -> Self:
442
+ _disable_resources_parallelism()
435
443
  self.progress.__enter__()
436
444
  # We make the low-level logger in __enter__ instead of __init__ only
437
445
  # because that's the pattern used by true workers (where it matters).
@@ -461,12 +469,12 @@ class SupervisorCommunicator:
461
469
  """
462
470
  self._scan_requests.put(_ScanRequest(quantum_id), block=False)
463
471
 
464
- def request_write(self, request: WriteRequest) -> None:
472
+ def request_write(self, request: ProvenanceQuantumScanData) -> None:
465
473
  """Send a request to the writer to write provenance for the given scan.
466
474
 
467
475
  Parameters
468
476
  ----------
469
- request : `WriteRequest`
477
+ request : `ProvenanceQuantumScanData`
470
478
  Information from scanning a quantum (or knowing you don't have to,
471
479
  in the case of blocked quanta).
472
480
  """
@@ -580,6 +588,7 @@ class WorkerCommunicator:
580
588
  self._cancel_event = supervisor._cancel_event
581
589
 
582
590
  def __enter__(self) -> Self:
591
+ _disable_resources_parallelism()
583
592
  self.log = make_worker_log(self.name, self.config)
584
593
  self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
585
594
  self._exit_stack = ExitStack().__enter__()
@@ -621,6 +630,11 @@ class WorkerCommunicator:
621
630
  self._exit_stack.__exit__(exc_type, exc_value, traceback)
622
631
  return True
623
632
 
633
+ @property
634
+ def exit_stack(self) -> ExitStack:
635
+ """A `contextlib.ExitStack` tied to the communicator."""
636
+ return self._exit_stack
637
+
624
638
  def log_progress(self, level: int, message: str) -> None:
625
639
  """Send a high-level log message to the supervisor.
626
640
 
@@ -633,44 +647,6 @@ class WorkerCommunicator:
633
647
  """
634
648
  self._reports.put(_ProgressLog(message=message, level=level), block=False)
635
649
 
636
- def enter(
637
- self,
638
- cm: AbstractContextManager[_T],
639
- on_close: str | None = None,
640
- level: int = VERBOSE,
641
- is_progress_log: bool = False,
642
- ) -> _T:
643
- """Enter a context manager that will be exited when the communicator's
644
- context is exited.
645
-
646
- Parameters
647
- ----------
648
- cm : `contextlib.AbstractContextManager`
649
- A context manager to enter.
650
- on_close : `str`, optional
651
- A log message to emit (on the worker's logger) just before the
652
- given context manager is exited. This can be used to indicate
653
- what's going on when an ``__exit__`` implementation has a lot of
654
- work to do (e.g. moving a large file into a zip archive).
655
- level : `int`, optional
656
- Level for the ``on_close`` log message.
657
- is_progress_log : `bool`, optional
658
- If `True`, send the ``on_close`` message to the supervisor via
659
- `log_progress` as well as the worker's logger.
660
- """
661
- if on_close is None:
662
- return self._exit_stack.enter_context(cm)
663
-
664
- @contextmanager
665
- def wrapper() -> Iterator[_T]:
666
- with cm as result:
667
- yield result
668
- self.log.log(level, on_close)
669
- if is_progress_log:
670
- self.log_progress(level, on_close)
671
-
672
- return self._exit_stack.enter_context(wrapper())
673
-
674
650
  def check_for_cancel(self) -> None:
675
651
  """Check for a cancel signal from the supervisor and raise
676
652
  `FatalWorkerError` if it is present.
@@ -728,12 +704,12 @@ class ScannerCommunicator(WorkerCommunicator):
728
704
  else:
729
705
  self._reports.put(_IngestReport(1), block=False)
730
706
 
731
- def request_write(self, request: WriteRequest) -> None:
707
+ def request_write(self, request: ProvenanceQuantumScanData) -> None:
732
708
  """Ask the writer to write provenance for a quantum.
733
709
 
734
710
  Parameters
735
711
  ----------
736
- request : `WriteRequest`
712
+ request : `ProvenanceQuantumScanData`
737
713
  Result of scanning a quantum.
738
714
  """
739
715
  assert self._write_requests is not None, "Writer should not be used if writing is disabled."
@@ -913,12 +889,12 @@ class WriterCommunicator(WorkerCommunicator):
913
889
  self._reports.put(_Sentinel.WRITER_DONE, block=False)
914
890
  return result
915
891
 
916
- def poll(self) -> Iterator[WriteRequest]:
892
+ def poll(self) -> Iterator[ProvenanceQuantumScanData]:
917
893
  """Poll for writer requests from the scanner workers and supervisor.
918
894
 
919
895
  Yields
920
896
  ------
921
- request : `WriteRequest`
897
+ request : `ProvenanceQuantumScanData`
922
898
  The result of a quantum scan.
923
899
 
924
900
  Notes
@@ -29,6 +29,8 @@ from __future__ import annotations
29
29
 
30
30
  __all__ = ("AggregatorConfig",)
31
31
 
32
+ import sys
33
+ from typing import TYPE_CHECKING, Any
32
34
 
33
35
  import pydantic
34
36
 
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
60
62
  n_processes: int = 1
61
63
  """Number of processes the scanner should use."""
62
64
 
63
- assume_complete: bool = True
64
- """If `True`, the aggregator can assume all quanta have run to completion
65
- (including any automatic retries). If `False`, only successes can be
66
- considered final, and quanta that appear to have failed or to have not been
67
- executed are ignored.
65
+ incomplete: bool = False
66
+ """If `True`, do not expect the graph to have been executed to completion
67
+ yet, and only ingest the outputs of successful quanta.
68
+
69
+ This disables writing the provenance quantum graph, since this is likely to
70
+ be wasted effort that just complicates a follow-up run with
71
+ ``incomplete=False`` later.
68
72
  """
69
73
 
70
74
  defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
95
99
  """
96
100
 
97
101
  dry_run: bool = False
98
- """If `True`, do not actually perform any deletions or central butler
99
- ingests.
102
+ """If `True`, do not actually perform any central butler ingests.
100
103
 
101
- Most log messages concerning deletions and ingests will still be emitted in
102
- order to provide a better emulation of a real run.
104
+ Most log messages concerning ingests will still be emitted in order to
105
+ provide a better emulation of a real run.
103
106
  """
104
107
 
105
108
  interactive_status: bool = False
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
137
140
  """Enable support for storage classes by created by the
138
141
  lsst.pipe.base.tests.mocks package.
139
142
  """
143
+
144
+ promise_ingest_graph: bool = False
145
+ """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
146
+ will be run later to ingest metadata/log/config datasets, and will not
147
+ ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
148
+ not run, those files will be abandoned in the butler storage root without
149
+ being present in the butler database, but it will speed up both processes.
150
+
151
+ It is *usually* safe to build a quantum graph for downstream processing
152
+ before or while running `~.ingest_graph.ingest_graph`, because
153
+ metadata/log/config datasets are rarely used as inputs. To check, use
154
+ ``pipetask build ... --show inputs`` to show the overall-inputs to the
155
+ graph and scan for these dataset types.
156
+ """
157
+
158
+ @property
159
+ def is_writing_provenance(self) -> bool:
160
+ """Whether the aggregator is configured to write the provenance quantum
161
+ graph.
162
+ """
163
+ return self.output_path is not None and not self.incomplete
164
+
165
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
166
+ # when we inherit those docstrings in our public classes.
167
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
168
+
169
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
170
+ """See `pydantic.BaseModel.copy`."""
171
+ return super().copy(*args, **kwargs)
172
+
173
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
174
+ """See `pydantic.BaseModel.model_dump`."""
175
+ return super().model_dump(*args, **kwargs)
176
+
177
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
178
+ """See `pydantic.BaseModel.model_dump_json`."""
179
+ return super().model_dump(*args, **kwargs)
180
+
181
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
182
+ """See `pydantic.BaseModel.model_copy`."""
183
+ return super().model_copy(*args, **kwargs)
184
+
185
+ @classmethod
186
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
187
+ """See `pydantic.BaseModel.model_construct`."""
188
+ return super().model_construct(*args, **kwargs)
189
+
190
+ @classmethod
191
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
192
+ """See `pydantic.BaseModel.model_json_schema`."""
193
+ return super().model_json_schema(*args, **kwargs)
194
+
195
+ @classmethod
196
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
197
+ """See `pydantic.BaseModel.model_validate`."""
198
+ return super().model_validate(*args, **kwargs)
199
+
200
+ @classmethod
201
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
202
+ """See `pydantic.BaseModel.model_validate_json`."""
203
+ return super().model_validate_json(*args, **kwargs)
204
+
205
+ @classmethod
206
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
207
+ """See `pydantic.BaseModel.model_validate_strings`."""
208
+ return super().model_validate_strings(*args, **kwargs)
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
43
43
 
44
44
  from ...pipeline_graph import TaskImportMode
45
45
  from .._common import DatastoreName
46
- from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
46
+ from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
47
47
  from ._communicators import IngesterCommunicator
48
48
 
49
49
 
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
170
170
  for ingest_request in self.comms.poll():
171
171
  self.n_producers_pending += 1
172
172
  self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
173
- self.update_pending(ingest_request.datasets, ingest_request.records)
173
+ self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
174
174
  if self.n_datasets_pending > self.comms.config.ingest_batch_size:
175
175
  self.ingest()
176
176
  self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
266
266
  else:
267
267
  del self.records_pending[datastore_name]
268
268
 
269
- def update_pending(
270
- self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
269
+ def update_outputs_pending(
270
+ self,
271
+ refs: list[DatasetRef],
272
+ records: dict[DatastoreName, DatastoreRecordData],
271
273
  ) -> None:
272
274
  """Add an ingest request to the pending-ingest data structures.
273
275
 
274
276
  Parameters
275
277
  ----------
276
- datasets : `list` [ `PredictedDatasetModel` ]
277
- Registry information about the datasets.
278
+ refs : `list` [ `lsst.daf.butler.DatasetRef` ]
279
+ Registry information about regular quantum-output datasets.
278
280
  records : `dict` [ `str`, \
279
281
  `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
280
282
  Datastore information about the datasets.
281
283
  """
282
- n_given = len(datasets)
284
+ n_given = len(refs)
283
285
  if self.already_ingested is not None:
284
- datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
285
- kept = {d.dataset_id for d in datasets}
286
+ refs = [ref for ref in refs if ref.id not in self.already_ingested]
287
+ kept = {ref.id for ref in refs}
286
288
  self.n_datasets_skipped += n_given - len(kept)
287
289
  records = {
288
290
  datastore_name: filtered_records
289
291
  for datastore_name, original_records in records.items()
290
292
  if (filtered_records := original_records.subset(kept)) is not None
291
293
  }
292
- for dataset in datasets:
293
- ref = self.predicted.make_dataset_ref(dataset)
294
+ for ref in refs:
294
295
  self.refs_pending[ref.datasetType.dimensions].append(ref)
295
296
  for datastore_name, datastore_records in records.items():
296
297
  if (existing_records := self.records_pending.get(datastore_name)) is not None: