lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +20 -31
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +10 -43
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
- lsst/pipe/base/automatic_connection_constants.py +1 -20
- lsst/pipe/base/cli/cmd/__init__.py +2 -18
- lsst/pipe/base/cli/cmd/commands.py +4 -149
- lsst/pipe/base/connectionTypes.py +160 -72
- lsst/pipe/base/connections.py +9 -6
- lsst/pipe/base/execution_reports.py +5 -0
- lsst/pipe/base/graph/graph.py +10 -11
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +10 -8
- lsst/pipe/base/log_capture.py +5 -9
- lsst/pipe/base/mp_graph_executor.py +15 -51
- lsst/pipe/base/pipeline.py +6 -5
- lsst/pipe/base/pipelineIR.py +8 -2
- lsst/pipe/base/pipelineTask.py +7 -5
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +22 -32
- lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +10 -7
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
- lsst/pipe/base/prerequisite_helpers.py +1 -2
- lsst/pipe/base/quantum_graph/_common.py +20 -19
- lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
- lsst/pipe/base/quantum_graph/_predicted.py +13 -111
- lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
- lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
- lsst/pipe/base/quantum_graph/visualization.py +1 -5
- lsst/pipe/base/quantum_graph_builder.py +8 -21
- lsst/pipe/base/quantum_graph_executor.py +13 -116
- lsst/pipe/base/quantum_graph_skeleton.py +29 -31
- lsst/pipe/base/quantum_provenance_graph.py +12 -29
- lsst/pipe/base/separable_pipeline_executor.py +3 -19
- lsst/pipe/base/single_quantum_executor.py +42 -67
- lsst/pipe/base/struct.py +0 -4
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.2025.5200.dist-info/RECORD +125 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/WHEEL +1 -1
- lsst/pipe/base/log_on_close.py +0 -76
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
- lsst/pipe/base/quantum_graph/formatter.py +0 -171
- lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
- lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5200.dist-info}/zip-safe +0 -0
|
@@ -27,16 +27,68 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
-
__all__ = (
|
|
30
|
+
__all__ = (
|
|
31
|
+
"InProgressScan",
|
|
32
|
+
"IngestRequest",
|
|
33
|
+
"ScanReport",
|
|
34
|
+
"ScanStatus",
|
|
35
|
+
"WriteRequest",
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
import dataclasses
|
|
39
|
+
import enum
|
|
33
40
|
import uuid
|
|
34
41
|
|
|
35
|
-
from lsst.daf.butler import DatasetRef
|
|
36
42
|
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
37
43
|
|
|
38
44
|
from .._common import DatastoreName
|
|
39
|
-
from ..
|
|
45
|
+
from .._predicted import PredictedDatasetModel
|
|
46
|
+
from .._provenance import (
|
|
47
|
+
ProvenanceLogRecordsModel,
|
|
48
|
+
ProvenanceQuantumAttemptModel,
|
|
49
|
+
ProvenanceTaskMetadataModel,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ScanStatus(enum.Enum):
|
|
54
|
+
"""Status enum for quantum scanning.
|
|
55
|
+
|
|
56
|
+
Note that this records the status for the *scanning* which is distinct
|
|
57
|
+
from the status of the quantum's execution.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
INCOMPLETE = enum.auto()
|
|
61
|
+
"""The quantum is not necessarily done running, and cannot be scanned
|
|
62
|
+
conclusively yet.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
ABANDONED = enum.auto()
|
|
66
|
+
"""The quantum's execution appears to have failed but we cannot rule out
|
|
67
|
+
the possibility that it could be recovered, but we've also waited long
|
|
68
|
+
enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
|
|
69
|
+
to stop trying for now.
|
|
70
|
+
|
|
71
|
+
This state means a later run with `ScannerConfig.assume_complete` is
|
|
72
|
+
required.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
SUCCESSFUL = enum.auto()
|
|
76
|
+
"""The quantum was conclusively scanned and was executed successfully,
|
|
77
|
+
unblocking scans for downstream quanta.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
FAILED = enum.auto()
|
|
81
|
+
"""The quantum was conclusively scanned and failed execution, blocking
|
|
82
|
+
scans for downstream quanta.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
BLOCKED = enum.auto()
|
|
86
|
+
"""A quantum upstream of this one failed."""
|
|
87
|
+
|
|
88
|
+
INIT = enum.auto()
|
|
89
|
+
"""Init quanta need special handling, because they don't have logs and
|
|
90
|
+
metadata.
|
|
91
|
+
"""
|
|
40
92
|
|
|
41
93
|
|
|
42
94
|
@dataclasses.dataclass
|
|
@@ -46,7 +98,7 @@ class ScanReport:
|
|
|
46
98
|
quantum_id: uuid.UUID
|
|
47
99
|
"""Unique ID of the quantum."""
|
|
48
100
|
|
|
49
|
-
status:
|
|
101
|
+
status: ScanStatus
|
|
50
102
|
"""Combined status of the scan and the execution of the quantum."""
|
|
51
103
|
|
|
52
104
|
|
|
@@ -57,11 +109,69 @@ class IngestRequest:
|
|
|
57
109
|
producer_id: uuid.UUID
|
|
58
110
|
"""ID of the quantum that produced these datasets."""
|
|
59
111
|
|
|
60
|
-
|
|
112
|
+
datasets: list[PredictedDatasetModel]
|
|
61
113
|
"""Registry information about the datasets."""
|
|
62
114
|
|
|
63
115
|
records: dict[DatastoreName, DatastoreRecordData]
|
|
64
116
|
"""Datastore information about the datasets."""
|
|
65
117
|
|
|
66
118
|
def __bool__(self) -> bool:
|
|
67
|
-
return bool(self.
|
|
119
|
+
return bool(self.datasets or self.records)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclasses.dataclass
|
|
123
|
+
class InProgressScan:
|
|
124
|
+
"""A struct that represents a quantum that is being scanned."""
|
|
125
|
+
|
|
126
|
+
quantum_id: uuid.UUID
|
|
127
|
+
"""Unique ID for the quantum."""
|
|
128
|
+
|
|
129
|
+
status: ScanStatus
|
|
130
|
+
"""Combined status for the scan and the execution of the quantum."""
|
|
131
|
+
|
|
132
|
+
attempts: list[ProvenanceQuantumAttemptModel] = dataclasses.field(default_factory=list)
|
|
133
|
+
"""Provenance information about each attempt to run the quantum."""
|
|
134
|
+
|
|
135
|
+
outputs: dict[uuid.UUID, bool] = dataclasses.field(default_factory=dict)
|
|
136
|
+
"""Unique IDs of the output datasets mapped to whether they were actually
|
|
137
|
+
produced.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
metadata: ProvenanceTaskMetadataModel = dataclasses.field(default_factory=ProvenanceTaskMetadataModel)
|
|
141
|
+
"""Task metadata information for each attempt.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
logs: ProvenanceLogRecordsModel = dataclasses.field(default_factory=ProvenanceLogRecordsModel)
|
|
145
|
+
"""Log records for each attempt.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclasses.dataclass
|
|
150
|
+
class WriteRequest:
|
|
151
|
+
"""A struct that represents a request to write provenance for a quantum."""
|
|
152
|
+
|
|
153
|
+
quantum_id: uuid.UUID
|
|
154
|
+
"""Unique ID for the quantum."""
|
|
155
|
+
|
|
156
|
+
status: ScanStatus
|
|
157
|
+
"""Combined status for the scan and the execution of the quantum."""
|
|
158
|
+
|
|
159
|
+
existing_outputs: set[uuid.UUID] = dataclasses.field(default_factory=set)
|
|
160
|
+
"""Unique IDs of the output datasets that were actually written."""
|
|
161
|
+
|
|
162
|
+
quantum: bytes = b""
|
|
163
|
+
"""Serialized quantum provenance model.
|
|
164
|
+
|
|
165
|
+
This may be empty for quanta that had no attempts.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
metadata: bytes = b""
|
|
169
|
+
"""Serialized task metadata."""
|
|
170
|
+
|
|
171
|
+
logs: bytes = b""
|
|
172
|
+
"""Serialized logs."""
|
|
173
|
+
|
|
174
|
+
is_compressed: bool = False
|
|
175
|
+
"""Whether the `quantum`, `metadata`, and `log` attributes are
|
|
176
|
+
compressed.
|
|
177
|
+
"""
|
|
@@ -42,18 +42,19 @@ from lsst.utils.usage import get_peak_mem_usage
|
|
|
42
42
|
from ...graph_walker import GraphWalker
|
|
43
43
|
from ...pipeline_graph import TaskImportMode
|
|
44
44
|
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
45
|
-
from .._provenance import ProvenanceQuantumScanData, ProvenanceQuantumScanStatus
|
|
46
45
|
from ._communicators import (
|
|
47
46
|
IngesterCommunicator,
|
|
48
47
|
ScannerCommunicator,
|
|
48
|
+
SpawnProcessContext,
|
|
49
49
|
SupervisorCommunicator,
|
|
50
|
+
ThreadingContext,
|
|
51
|
+
Worker,
|
|
50
52
|
WriterCommunicator,
|
|
51
53
|
)
|
|
52
54
|
from ._config import AggregatorConfig
|
|
53
55
|
from ._ingester import Ingester
|
|
54
56
|
from ._scanner import Scanner
|
|
55
|
-
from ._structs import ScanReport
|
|
56
|
-
from ._workers import SpawnWorkerFactory, ThreadWorkerFactory
|
|
57
|
+
from ._structs import ScanReport, ScanStatus, WriteRequest
|
|
57
58
|
from ._writer import Writer
|
|
58
59
|
|
|
59
60
|
|
|
@@ -115,17 +116,6 @@ class Supervisor:
|
|
|
115
116
|
self.comms.request_scan(ready_set.pop())
|
|
116
117
|
for scan_return in self.comms.poll():
|
|
117
118
|
self.handle_report(scan_return)
|
|
118
|
-
if self.comms.config.incomplete:
|
|
119
|
-
quantum_or_quanta = "quanta" if self.n_abandoned != 1 else "quantum"
|
|
120
|
-
self.comms.progress.log.info(
|
|
121
|
-
"%d %s incomplete/failed abandoned; re-run with incomplete=False to finish.",
|
|
122
|
-
self.n_abandoned,
|
|
123
|
-
quantum_or_quanta,
|
|
124
|
-
)
|
|
125
|
-
self.comms.progress.log.info(
|
|
126
|
-
"Scanning complete after %0.1fs; waiting for workers to finish.",
|
|
127
|
-
self.comms.progress.elapsed_time,
|
|
128
|
-
)
|
|
129
119
|
|
|
130
120
|
def handle_report(self, scan_report: ScanReport) -> None:
|
|
131
121
|
"""Handle a report from a scanner.
|
|
@@ -136,22 +126,18 @@ class Supervisor:
|
|
|
136
126
|
Information about the scan.
|
|
137
127
|
"""
|
|
138
128
|
match scan_report.status:
|
|
139
|
-
case
|
|
129
|
+
case ScanStatus.SUCCESSFUL | ScanStatus.INIT:
|
|
140
130
|
self.comms.log.debug("Scan complete for %s: quantum succeeded.", scan_report.quantum_id)
|
|
141
131
|
self.walker.finish(scan_report.quantum_id)
|
|
142
|
-
case
|
|
132
|
+
case ScanStatus.FAILED:
|
|
143
133
|
self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
|
|
144
134
|
blocked_quanta = self.walker.fail(scan_report.quantum_id)
|
|
145
135
|
for blocked_quantum_id in blocked_quanta:
|
|
146
|
-
if self.comms.config.
|
|
147
|
-
self.comms.request_write(
|
|
148
|
-
ProvenanceQuantumScanData(
|
|
149
|
-
blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
|
|
150
|
-
)
|
|
151
|
-
)
|
|
136
|
+
if self.comms.config.output_path is not None:
|
|
137
|
+
self.comms.request_write(WriteRequest(blocked_quantum_id, status=ScanStatus.BLOCKED))
|
|
152
138
|
self.comms.progress.scans.update(1)
|
|
153
139
|
self.comms.progress.quantum_ingests.update(len(blocked_quanta))
|
|
154
|
-
case
|
|
140
|
+
case ScanStatus.ABANDONED:
|
|
155
141
|
self.comms.log.debug("Abandoning scan for %s: quantum has not succeeded (yet).")
|
|
156
142
|
self.walker.fail(scan_report.quantum_id)
|
|
157
143
|
self.n_abandoned += 1
|
|
@@ -175,31 +161,55 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
|
|
|
175
161
|
Configuration for the aggregator.
|
|
176
162
|
"""
|
|
177
163
|
log = getLogger("lsst.pipe.base.quantum_graph.aggregator")
|
|
178
|
-
|
|
179
|
-
|
|
164
|
+
ctx = ThreadingContext() if config.n_processes == 1 else SpawnProcessContext()
|
|
165
|
+
scanners: list[Worker] = []
|
|
166
|
+
ingester: Worker
|
|
167
|
+
writer: Worker | None = None
|
|
168
|
+
with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
|
|
180
169
|
comms.progress.log.verbose("Starting workers.")
|
|
181
|
-
if config.
|
|
170
|
+
if config.output_path is not None:
|
|
182
171
|
writer_comms = WriterCommunicator(comms)
|
|
183
|
-
|
|
172
|
+
writer = ctx.make_worker(
|
|
184
173
|
target=Writer.run,
|
|
185
174
|
args=(predicted_path, writer_comms),
|
|
186
175
|
name=writer_comms.name,
|
|
187
176
|
)
|
|
177
|
+
writer.start()
|
|
188
178
|
for scanner_id in range(config.n_processes):
|
|
189
179
|
scanner_comms = ScannerCommunicator(comms, scanner_id)
|
|
190
|
-
|
|
180
|
+
worker = ctx.make_worker(
|
|
191
181
|
target=Scanner.run,
|
|
192
182
|
args=(predicted_path, butler_path, scanner_comms),
|
|
193
183
|
name=scanner_comms.name,
|
|
194
184
|
)
|
|
185
|
+
worker.start()
|
|
186
|
+
scanners.append(worker)
|
|
195
187
|
ingester_comms = IngesterCommunicator(comms)
|
|
196
|
-
|
|
188
|
+
ingester = ctx.make_worker(
|
|
197
189
|
target=Ingester.run,
|
|
198
190
|
args=(predicted_path, butler_path, ingester_comms),
|
|
199
191
|
name=ingester_comms.name,
|
|
200
192
|
)
|
|
193
|
+
ingester.start()
|
|
201
194
|
supervisor = Supervisor(predicted_path, comms)
|
|
202
195
|
supervisor.loop()
|
|
196
|
+
log.info(
|
|
197
|
+
"Scanning complete after %0.1fs; waiting for workers to finish.",
|
|
198
|
+
comms.progress.elapsed_time,
|
|
199
|
+
)
|
|
200
|
+
comms.wait_for_workers_to_finish()
|
|
201
|
+
if supervisor.n_abandoned:
|
|
202
|
+
raise RuntimeError(
|
|
203
|
+
f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
|
|
204
|
+
"abandoned because they did not succeed. Re-run with assume_complete=True after all retry "
|
|
205
|
+
"attempts have been exhausted."
|
|
206
|
+
)
|
|
207
|
+
for w in scanners:
|
|
208
|
+
w.join()
|
|
209
|
+
ingester.join()
|
|
210
|
+
if writer is not None and writer.is_alive():
|
|
211
|
+
log.info("Waiting for writer process to close (garbage collecting can be very slow).")
|
|
212
|
+
writer.join()
|
|
203
213
|
# We can't get memory usage for children until they've joined.
|
|
204
214
|
parent_mem, child_mem = get_peak_mem_usage()
|
|
205
215
|
# This is actually an upper bound on the peak (since the peaks could be
|