lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +10 -12
- lsst/pipe/base/_status.py +29 -10
- lsst/pipe/base/automatic_connection_constants.py +9 -1
- lsst/pipe/base/cli/cmd/__init__.py +16 -2
- lsst/pipe/base/cli/cmd/commands.py +42 -4
- lsst/pipe/base/connectionTypes.py +72 -160
- lsst/pipe/base/connections.py +3 -6
- lsst/pipe/base/execution_reports.py +0 -5
- lsst/pipe/base/log_capture.py +8 -4
- lsst/pipe/base/log_on_close.py +79 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/pipeline.py +3 -4
- lsst/pipe/base/pipelineIR.py +0 -6
- lsst/pipe/base/pipelineTask.py +5 -7
- lsst/pipe/base/pipeline_graph/_edges.py +19 -7
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
- lsst/pipe/base/quantum_graph/_common.py +7 -4
- lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
- lsst/pipe/base/quantum_graph/_predicted.py +111 -10
- lsst/pipe/base/quantum_graph/_provenance.py +727 -26
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
- lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
- lsst/pipe/base/quantum_graph/formatter.py +171 -0
- lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_provenance_graph.py +17 -2
- lsst/pipe/base/separable_pipeline_executor.py +18 -2
- lsst/pipe/base/single_quantum_executor.py +59 -41
- lsst/pipe/base/struct.py +4 -0
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
|
@@ -51,16 +51,17 @@ import time
|
|
|
51
51
|
import uuid
|
|
52
52
|
from abc import ABC, abstractmethod
|
|
53
53
|
from collections.abc import Callable, Iterable, Iterator
|
|
54
|
-
from contextlib import
|
|
54
|
+
from contextlib import ExitStack
|
|
55
55
|
from traceback import format_exception
|
|
56
56
|
from types import TracebackType
|
|
57
57
|
from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
|
|
58
58
|
|
|
59
|
-
from lsst.utils.logging import
|
|
59
|
+
from lsst.utils.logging import LsstLogAdapter
|
|
60
60
|
|
|
61
|
+
from .._provenance import ProvenanceQuantumScanData
|
|
61
62
|
from ._config import AggregatorConfig
|
|
62
63
|
from ._progress import ProgressManager, make_worker_log
|
|
63
|
-
from ._structs import IngestRequest, ScanReport
|
|
64
|
+
from ._structs import IngestRequest, ScanReport
|
|
64
65
|
|
|
65
66
|
_T = TypeVar("_T")
|
|
66
67
|
|
|
@@ -317,6 +318,12 @@ Report: TypeAlias = (
|
|
|
317
318
|
)
|
|
318
319
|
|
|
319
320
|
|
|
321
|
+
def _disable_resources_parallelism() -> None:
|
|
322
|
+
os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
|
|
323
|
+
os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
|
|
324
|
+
os.environ["LSST_S3_USE_THREADS"] = "False"
|
|
325
|
+
|
|
326
|
+
|
|
320
327
|
class SupervisorCommunicator:
|
|
321
328
|
"""A helper object that lets the supervisor direct the other workers.
|
|
322
329
|
|
|
@@ -361,9 +368,9 @@ class SupervisorCommunicator:
|
|
|
361
368
|
# scanner and the supervisor send one sentinal when done, and the
|
|
362
369
|
# writer waits for (n_scanners + 1) sentinals to arrive before it
|
|
363
370
|
# starts its shutdown.
|
|
364
|
-
self._write_requests:
|
|
365
|
-
|
|
366
|
-
)
|
|
371
|
+
self._write_requests: (
|
|
372
|
+
Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
|
|
373
|
+
) = context.make_queue() if config.is_writing_provenance else None
|
|
367
374
|
# All other workers use this queue to send many different kinds of
|
|
368
375
|
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
369
376
|
# from each worker before it finishes its shutdown.
|
|
@@ -432,6 +439,7 @@ class SupervisorCommunicator:
|
|
|
432
439
|
self._expect_empty_queue(self._compression_dict)
|
|
433
440
|
|
|
434
441
|
def __enter__(self) -> Self:
|
|
442
|
+
_disable_resources_parallelism()
|
|
435
443
|
self.progress.__enter__()
|
|
436
444
|
# We make the low-level logger in __enter__ instead of __init__ only
|
|
437
445
|
# because that's the pattern used by true workers (where it matters).
|
|
@@ -461,12 +469,12 @@ class SupervisorCommunicator:
|
|
|
461
469
|
"""
|
|
462
470
|
self._scan_requests.put(_ScanRequest(quantum_id), block=False)
|
|
463
471
|
|
|
464
|
-
def request_write(self, request:
|
|
472
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
465
473
|
"""Send a request to the writer to write provenance for the given scan.
|
|
466
474
|
|
|
467
475
|
Parameters
|
|
468
476
|
----------
|
|
469
|
-
request : `
|
|
477
|
+
request : `ProvenanceQuantumScanData`
|
|
470
478
|
Information from scanning a quantum (or knowing you don't have to,
|
|
471
479
|
in the case of blocked quanta).
|
|
472
480
|
"""
|
|
@@ -580,6 +588,7 @@ class WorkerCommunicator:
|
|
|
580
588
|
self._cancel_event = supervisor._cancel_event
|
|
581
589
|
|
|
582
590
|
def __enter__(self) -> Self:
|
|
591
|
+
_disable_resources_parallelism()
|
|
583
592
|
self.log = make_worker_log(self.name, self.config)
|
|
584
593
|
self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
|
|
585
594
|
self._exit_stack = ExitStack().__enter__()
|
|
@@ -621,6 +630,11 @@ class WorkerCommunicator:
|
|
|
621
630
|
self._exit_stack.__exit__(exc_type, exc_value, traceback)
|
|
622
631
|
return True
|
|
623
632
|
|
|
633
|
+
@property
|
|
634
|
+
def exit_stack(self) -> ExitStack:
|
|
635
|
+
"""A `contextlib.ExitStack` tied to the communicator."""
|
|
636
|
+
return self._exit_stack
|
|
637
|
+
|
|
624
638
|
def log_progress(self, level: int, message: str) -> None:
|
|
625
639
|
"""Send a high-level log message to the supervisor.
|
|
626
640
|
|
|
@@ -633,44 +647,6 @@ class WorkerCommunicator:
|
|
|
633
647
|
"""
|
|
634
648
|
self._reports.put(_ProgressLog(message=message, level=level), block=False)
|
|
635
649
|
|
|
636
|
-
def enter(
|
|
637
|
-
self,
|
|
638
|
-
cm: AbstractContextManager[_T],
|
|
639
|
-
on_close: str | None = None,
|
|
640
|
-
level: int = VERBOSE,
|
|
641
|
-
is_progress_log: bool = False,
|
|
642
|
-
) -> _T:
|
|
643
|
-
"""Enter a context manager that will be exited when the communicator's
|
|
644
|
-
context is exited.
|
|
645
|
-
|
|
646
|
-
Parameters
|
|
647
|
-
----------
|
|
648
|
-
cm : `contextlib.AbstractContextManager`
|
|
649
|
-
A context manager to enter.
|
|
650
|
-
on_close : `str`, optional
|
|
651
|
-
A log message to emit (on the worker's logger) just before the
|
|
652
|
-
given context manager is exited. This can be used to indicate
|
|
653
|
-
what's going on when an ``__exit__`` implementation has a lot of
|
|
654
|
-
work to do (e.g. moving a large file into a zip archive).
|
|
655
|
-
level : `int`, optional
|
|
656
|
-
Level for the ``on_close`` log message.
|
|
657
|
-
is_progress_log : `bool`, optional
|
|
658
|
-
If `True`, send the ``on_close`` message to the supervisor via
|
|
659
|
-
`log_progress` as well as the worker's logger.
|
|
660
|
-
"""
|
|
661
|
-
if on_close is None:
|
|
662
|
-
return self._exit_stack.enter_context(cm)
|
|
663
|
-
|
|
664
|
-
@contextmanager
|
|
665
|
-
def wrapper() -> Iterator[_T]:
|
|
666
|
-
with cm as result:
|
|
667
|
-
yield result
|
|
668
|
-
self.log.log(level, on_close)
|
|
669
|
-
if is_progress_log:
|
|
670
|
-
self.log_progress(level, on_close)
|
|
671
|
-
|
|
672
|
-
return self._exit_stack.enter_context(wrapper())
|
|
673
|
-
|
|
674
650
|
def check_for_cancel(self) -> None:
|
|
675
651
|
"""Check for a cancel signal from the supervisor and raise
|
|
676
652
|
`FatalWorkerError` if it is present.
|
|
@@ -728,12 +704,12 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
728
704
|
else:
|
|
729
705
|
self._reports.put(_IngestReport(1), block=False)
|
|
730
706
|
|
|
731
|
-
def request_write(self, request:
|
|
707
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
732
708
|
"""Ask the writer to write provenance for a quantum.
|
|
733
709
|
|
|
734
710
|
Parameters
|
|
735
711
|
----------
|
|
736
|
-
request : `
|
|
712
|
+
request : `ProvenanceQuantumScanData`
|
|
737
713
|
Result of scanning a quantum.
|
|
738
714
|
"""
|
|
739
715
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
@@ -913,12 +889,12 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
913
889
|
self._reports.put(_Sentinel.WRITER_DONE, block=False)
|
|
914
890
|
return result
|
|
915
891
|
|
|
916
|
-
def poll(self) -> Iterator[
|
|
892
|
+
def poll(self) -> Iterator[ProvenanceQuantumScanData]:
|
|
917
893
|
"""Poll for writer requests from the scanner workers and supervisor.
|
|
918
894
|
|
|
919
895
|
Yields
|
|
920
896
|
------
|
|
921
|
-
request : `
|
|
897
|
+
request : `ProvenanceQuantumScanData`
|
|
922
898
|
The result of a quantum scan.
|
|
923
899
|
|
|
924
900
|
Notes
|
|
@@ -29,6 +29,8 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
__all__ = ("AggregatorConfig",)
|
|
31
31
|
|
|
32
|
+
import sys
|
|
33
|
+
from typing import TYPE_CHECKING, Any
|
|
32
34
|
|
|
33
35
|
import pydantic
|
|
34
36
|
|
|
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
60
62
|
n_processes: int = 1
|
|
61
63
|
"""Number of processes the scanner should use."""
|
|
62
64
|
|
|
63
|
-
|
|
64
|
-
"""If `True`,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
incomplete: bool = False
|
|
66
|
+
"""If `True`, do not expect the graph to have been executed to completion
|
|
67
|
+
yet, and only ingest the outputs of successful quanta.
|
|
68
|
+
|
|
69
|
+
This disables writing the provenance quantum graph, since this is likely to
|
|
70
|
+
be wasted effort that just complicates a follow-up run with
|
|
71
|
+
``incomplete=False`` later.
|
|
68
72
|
"""
|
|
69
73
|
|
|
70
74
|
defensive_ingest: bool = False
|
|
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
95
99
|
"""
|
|
96
100
|
|
|
97
101
|
dry_run: bool = False
|
|
98
|
-
"""If `True`, do not actually perform any
|
|
99
|
-
ingests.
|
|
102
|
+
"""If `True`, do not actually perform any central butler ingests.
|
|
100
103
|
|
|
101
|
-
Most log messages concerning
|
|
102
|
-
|
|
104
|
+
Most log messages concerning ingests will still be emitted in order to
|
|
105
|
+
provide a better emulation of a real run.
|
|
103
106
|
"""
|
|
104
107
|
|
|
105
108
|
interactive_status: bool = False
|
|
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
137
140
|
"""Enable support for storage classes by created by the
|
|
138
141
|
lsst.pipe.base.tests.mocks package.
|
|
139
142
|
"""
|
|
143
|
+
|
|
144
|
+
promise_ingest_graph: bool = False
|
|
145
|
+
"""If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
|
|
146
|
+
will be run later to ingest metadata/log/config datasets, and will not
|
|
147
|
+
ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
|
|
148
|
+
not run, those files will be abandoned in the butler storage root without
|
|
149
|
+
being present in the butler database, but it will speed up both processes.
|
|
150
|
+
|
|
151
|
+
It is *usually* safe to build a quantum graph for downstream processing
|
|
152
|
+
before or while running `~.ingest_graph.ingest_graph`, because
|
|
153
|
+
metadata/log/config datasets are rarely used as inputs. To check, use
|
|
154
|
+
``pipetask build ... --show inputs`` to show the overall-inputs to the
|
|
155
|
+
graph and scan for these dataset types.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def is_writing_provenance(self) -> bool:
|
|
160
|
+
"""Whether the aggregator is configured to write the provenance quantum
|
|
161
|
+
graph.
|
|
162
|
+
"""
|
|
163
|
+
return self.output_path is not None and not self.incomplete
|
|
164
|
+
|
|
165
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
166
|
+
# when we inherit those docstrings in our public classes.
|
|
167
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
168
|
+
|
|
169
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
170
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
171
|
+
return super().copy(*args, **kwargs)
|
|
172
|
+
|
|
173
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
174
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
175
|
+
return super().model_dump(*args, **kwargs)
|
|
176
|
+
|
|
177
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
178
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
179
|
+
return super().model_dump(*args, **kwargs)
|
|
180
|
+
|
|
181
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
182
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
183
|
+
return super().model_copy(*args, **kwargs)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
187
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
188
|
+
return super().model_construct(*args, **kwargs)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
192
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
193
|
+
return super().model_json_schema(*args, **kwargs)
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
197
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
198
|
+
return super().model_validate(*args, **kwargs)
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
202
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
203
|
+
return super().model_validate_json(*args, **kwargs)
|
|
204
|
+
|
|
205
|
+
@classmethod
|
|
206
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
207
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
208
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
|
|
|
43
43
|
|
|
44
44
|
from ...pipeline_graph import TaskImportMode
|
|
45
45
|
from .._common import DatastoreName
|
|
46
|
-
from .._predicted import
|
|
46
|
+
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
47
47
|
from ._communicators import IngesterCommunicator
|
|
48
48
|
|
|
49
49
|
|
|
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
|
|
|
170
170
|
for ingest_request in self.comms.poll():
|
|
171
171
|
self.n_producers_pending += 1
|
|
172
172
|
self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
|
|
173
|
-
self.
|
|
173
|
+
self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
|
|
174
174
|
if self.n_datasets_pending > self.comms.config.ingest_batch_size:
|
|
175
175
|
self.ingest()
|
|
176
176
|
self.comms.log.info("All ingest requests received.")
|
|
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
|
|
|
266
266
|
else:
|
|
267
267
|
del self.records_pending[datastore_name]
|
|
268
268
|
|
|
269
|
-
def
|
|
270
|
-
self,
|
|
269
|
+
def update_outputs_pending(
|
|
270
|
+
self,
|
|
271
|
+
refs: list[DatasetRef],
|
|
272
|
+
records: dict[DatastoreName, DatastoreRecordData],
|
|
271
273
|
) -> None:
|
|
272
274
|
"""Add an ingest request to the pending-ingest data structures.
|
|
273
275
|
|
|
274
276
|
Parameters
|
|
275
277
|
----------
|
|
276
|
-
|
|
277
|
-
Registry information about
|
|
278
|
+
refs : `list` [ `lsst.daf.butler.DatasetRef` ]
|
|
279
|
+
Registry information about regular quantum-output datasets.
|
|
278
280
|
records : `dict` [ `str`, \
|
|
279
281
|
`lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
|
|
280
282
|
Datastore information about the datasets.
|
|
281
283
|
"""
|
|
282
|
-
n_given = len(
|
|
284
|
+
n_given = len(refs)
|
|
283
285
|
if self.already_ingested is not None:
|
|
284
|
-
|
|
285
|
-
kept = {
|
|
286
|
+
refs = [ref for ref in refs if ref.id not in self.already_ingested]
|
|
287
|
+
kept = {ref.id for ref in refs}
|
|
286
288
|
self.n_datasets_skipped += n_given - len(kept)
|
|
287
289
|
records = {
|
|
288
290
|
datastore_name: filtered_records
|
|
289
291
|
for datastore_name, original_records in records.items()
|
|
290
292
|
if (filtered_records := original_records.subset(kept)) is not None
|
|
291
293
|
}
|
|
292
|
-
for
|
|
293
|
-
ref = self.predicted.make_dataset_ref(dataset)
|
|
294
|
+
for ref in refs:
|
|
294
295
|
self.refs_pending[ref.datasetType.dimensions].append(ref)
|
|
295
296
|
for datastore_name, datastore_records in records.items():
|
|
296
297
|
if (existing_records := self.records_pending.get(datastore_name)) is not None:
|