lsst-pipe-base 30.0.0rc3__py3-none-any.whl → 30.2025.5100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +5 -6
- lsst/pipe/base/log_capture.py +79 -39
- lsst/pipe/base/mp_graph_executor.py +15 -51
- lsst/pipe/base/quantum_graph/_common.py +3 -4
- lsst/pipe/base/quantum_graph/_multiblock.py +16 -6
- lsst/pipe/base/quantum_graph/_predicted.py +10 -104
- lsst/pipe/base/quantum_graph/_provenance.py +6 -657
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +50 -18
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +229 -35
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +113 -3
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +5 -10
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +348 -31
- lsst/pipe/base/quantum_graph_executor.py +13 -116
- lsst/pipe/base/separable_pipeline_executor.py +2 -18
- lsst/pipe/base/single_quantum_executor.py +35 -53
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/METADATA +1 -1
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/RECORD +26 -28
- lsst/pipe/base/log_on_close.py +0 -79
- lsst/pipe/base/quantum_graph/formatter.py +0 -101
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.0rc3.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/zip-safe +0 -0
|
@@ -42,7 +42,6 @@ from lsst.utils.usage import get_peak_mem_usage
|
|
|
42
42
|
from ...graph_walker import GraphWalker
|
|
43
43
|
from ...pipeline_graph import TaskImportMode
|
|
44
44
|
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
45
|
-
from .._provenance import ProvenanceQuantumScanData, ProvenanceQuantumScanStatus
|
|
46
45
|
from ._communicators import (
|
|
47
46
|
IngesterCommunicator,
|
|
48
47
|
ScannerCommunicator,
|
|
@@ -55,7 +54,7 @@ from ._communicators import (
|
|
|
55
54
|
from ._config import AggregatorConfig
|
|
56
55
|
from ._ingester import Ingester
|
|
57
56
|
from ._scanner import Scanner
|
|
58
|
-
from ._structs import ScanReport
|
|
57
|
+
from ._structs import ScanReport, ScanStatus, WriteRequest
|
|
59
58
|
from ._writer import Writer
|
|
60
59
|
|
|
61
60
|
|
|
@@ -127,22 +126,18 @@ class Supervisor:
|
|
|
127
126
|
Information about the scan.
|
|
128
127
|
"""
|
|
129
128
|
match scan_report.status:
|
|
130
|
-
case
|
|
129
|
+
case ScanStatus.SUCCESSFUL | ScanStatus.INIT:
|
|
131
130
|
self.comms.log.debug("Scan complete for %s: quantum succeeded.", scan_report.quantum_id)
|
|
132
131
|
self.walker.finish(scan_report.quantum_id)
|
|
133
|
-
case
|
|
132
|
+
case ScanStatus.FAILED:
|
|
134
133
|
self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
|
|
135
134
|
blocked_quanta = self.walker.fail(scan_report.quantum_id)
|
|
136
135
|
for blocked_quantum_id in blocked_quanta:
|
|
137
136
|
if self.comms.config.output_path is not None:
|
|
138
|
-
self.comms.request_write(
|
|
139
|
-
ProvenanceQuantumScanData(
|
|
140
|
-
blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
|
|
141
|
-
)
|
|
142
|
-
)
|
|
137
|
+
self.comms.request_write(WriteRequest(blocked_quantum_id, status=ScanStatus.BLOCKED))
|
|
143
138
|
self.comms.progress.scans.update(1)
|
|
144
139
|
self.comms.progress.quantum_ingests.update(len(blocked_quanta))
|
|
145
|
-
case
|
|
140
|
+
case ScanStatus.ABANDONED:
|
|
146
141
|
self.comms.log.debug("Abandoning scan for %s: quantum has not succeeded (yet).")
|
|
147
142
|
self.walker.fail(scan_report.quantum_id)
|
|
148
143
|
self.n_abandoned += 1
|
|
@@ -30,14 +30,130 @@ from __future__ import annotations
|
|
|
30
30
|
__all__ = ("Writer",)
|
|
31
31
|
|
|
32
32
|
import dataclasses
|
|
33
|
+
import itertools
|
|
34
|
+
import logging
|
|
35
|
+
import operator
|
|
36
|
+
import uuid
|
|
37
|
+
from typing import TypeVar
|
|
33
38
|
|
|
39
|
+
import networkx
|
|
34
40
|
import zstandard
|
|
35
41
|
|
|
36
|
-
from
|
|
42
|
+
from lsst.utils.packages import Packages
|
|
43
|
+
|
|
44
|
+
from ... import automatic_connection_constants as acc
|
|
37
45
|
from ...pipeline_graph import TaskImportMode
|
|
38
|
-
from ..
|
|
39
|
-
from ..
|
|
46
|
+
from .._common import BaseQuantumGraphWriter
|
|
47
|
+
from .._multiblock import Compressor, MultiblockWriter
|
|
48
|
+
from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
49
|
+
from .._provenance import (
|
|
50
|
+
DATASET_ADDRESS_INDEX,
|
|
51
|
+
DATASET_MB_NAME,
|
|
52
|
+
LOG_ADDRESS_INDEX,
|
|
53
|
+
LOG_MB_NAME,
|
|
54
|
+
METADATA_ADDRESS_INDEX,
|
|
55
|
+
METADATA_MB_NAME,
|
|
56
|
+
QUANTUM_ADDRESS_INDEX,
|
|
57
|
+
QUANTUM_MB_NAME,
|
|
58
|
+
ProvenanceDatasetModel,
|
|
59
|
+
ProvenanceInitQuantaModel,
|
|
60
|
+
ProvenanceInitQuantumModel,
|
|
61
|
+
ProvenanceQuantumModel,
|
|
62
|
+
)
|
|
40
63
|
from ._communicators import WriterCommunicator
|
|
64
|
+
from ._structs import WriteRequest
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclasses.dataclass
|
|
68
|
+
class _DataWriters:
|
|
69
|
+
"""A struct of low-level writer objects for the main components of a
|
|
70
|
+
provenance quantum graph.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
comms : `WriterCommunicator`
|
|
75
|
+
Communicator helper object for the writer.
|
|
76
|
+
predicted : `.PredictedQuantumGraphComponents`
|
|
77
|
+
Components of the predicted graph.
|
|
78
|
+
indices : `dict` [ `uuid.UUID`, `int` ]
|
|
79
|
+
Mapping from UUID to internal integer ID, including both quanta and
|
|
80
|
+
datasets.
|
|
81
|
+
compressor : `Compressor`
|
|
82
|
+
Object that can compress `bytes`.
|
|
83
|
+
cdict_data : `bytes` or `None`, optional
|
|
84
|
+
Bytes representation of the compression dictionary used by the
|
|
85
|
+
compressor.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
comms: WriterCommunicator,
|
|
91
|
+
predicted: PredictedQuantumGraphComponents,
|
|
92
|
+
indices: dict[uuid.UUID, int],
|
|
93
|
+
compressor: Compressor,
|
|
94
|
+
cdict_data: bytes | None = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
assert comms.config.output_path is not None
|
|
97
|
+
header = predicted.header.model_copy()
|
|
98
|
+
header.graph_type = "provenance"
|
|
99
|
+
self.graph = comms.enter(
|
|
100
|
+
BaseQuantumGraphWriter.open(
|
|
101
|
+
comms.config.output_path,
|
|
102
|
+
header,
|
|
103
|
+
predicted.pipeline_graph,
|
|
104
|
+
indices,
|
|
105
|
+
address_filename="nodes",
|
|
106
|
+
compressor=compressor,
|
|
107
|
+
cdict_data=cdict_data,
|
|
108
|
+
),
|
|
109
|
+
on_close="Finishing writing provenance quantum graph.",
|
|
110
|
+
is_progress_log=True,
|
|
111
|
+
)
|
|
112
|
+
self.graph.address_writer.addresses = [{}, {}, {}, {}]
|
|
113
|
+
self.logs = comms.enter(
|
|
114
|
+
MultiblockWriter.open_in_zip(self.graph.zf, LOG_MB_NAME, header.int_size, use_tempfile=True),
|
|
115
|
+
on_close="Copying logs into zip archive.",
|
|
116
|
+
is_progress_log=True,
|
|
117
|
+
)
|
|
118
|
+
self.graph.address_writer.addresses[LOG_ADDRESS_INDEX] = self.logs.addresses
|
|
119
|
+
self.metadata = comms.enter(
|
|
120
|
+
MultiblockWriter.open_in_zip(self.graph.zf, METADATA_MB_NAME, header.int_size, use_tempfile=True),
|
|
121
|
+
on_close="Copying metadata into zip archive.",
|
|
122
|
+
is_progress_log=True,
|
|
123
|
+
)
|
|
124
|
+
self.graph.address_writer.addresses[METADATA_ADDRESS_INDEX] = self.metadata.addresses
|
|
125
|
+
self.datasets = comms.enter(
|
|
126
|
+
MultiblockWriter.open_in_zip(self.graph.zf, DATASET_MB_NAME, header.int_size, use_tempfile=True),
|
|
127
|
+
on_close="Copying dataset provenance into zip archive.",
|
|
128
|
+
is_progress_log=True,
|
|
129
|
+
)
|
|
130
|
+
self.graph.address_writer.addresses[DATASET_ADDRESS_INDEX] = self.datasets.addresses
|
|
131
|
+
self.quanta = comms.enter(
|
|
132
|
+
MultiblockWriter.open_in_zip(self.graph.zf, QUANTUM_MB_NAME, header.int_size, use_tempfile=True),
|
|
133
|
+
on_close="Copying quantum provenance into zip archive.",
|
|
134
|
+
is_progress_log=True,
|
|
135
|
+
)
|
|
136
|
+
self.graph.address_writer.addresses[QUANTUM_ADDRESS_INDEX] = self.quanta.addresses
|
|
137
|
+
|
|
138
|
+
graph: BaseQuantumGraphWriter
|
|
139
|
+
"""The parent graph writer."""
|
|
140
|
+
|
|
141
|
+
datasets: MultiblockWriter
|
|
142
|
+
"""A writer for dataset provenance."""
|
|
143
|
+
|
|
144
|
+
quanta: MultiblockWriter
|
|
145
|
+
"""A writer for quantum provenance."""
|
|
146
|
+
|
|
147
|
+
metadata: MultiblockWriter
|
|
148
|
+
"""A writer for metadata content."""
|
|
149
|
+
|
|
150
|
+
logs: MultiblockWriter
|
|
151
|
+
"""A writer for log content."""
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def compressor(self) -> Compressor:
|
|
155
|
+
"""Object that should be used to compress all JSON blocks."""
|
|
156
|
+
return self.graph.compressor
|
|
41
157
|
|
|
42
158
|
|
|
43
159
|
@dataclasses.dataclass
|
|
@@ -55,7 +171,40 @@ class Writer:
|
|
|
55
171
|
predicted: PredictedQuantumGraphComponents = dataclasses.field(init=False)
|
|
56
172
|
"""Components of the predicted quantum graph."""
|
|
57
173
|
|
|
58
|
-
|
|
174
|
+
existing_init_outputs: dict[uuid.UUID, set[uuid.UUID]] = dataclasses.field(default_factory=dict)
|
|
175
|
+
"""Mapping that tracks which init-outputs exist.
|
|
176
|
+
|
|
177
|
+
This mapping is updated as scanners inform the writer about init-output
|
|
178
|
+
existence, since we want to write that provenance information out only at
|
|
179
|
+
the end.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
|
|
183
|
+
"""Mapping from UUID to internal integer ID, including both quanta and
|
|
184
|
+
datasets.
|
|
185
|
+
|
|
186
|
+
This is fully initialized at construction.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
output_dataset_ids: set[uuid.UUID] = dataclasses.field(default_factory=set)
|
|
190
|
+
"""The IDs of all datasets that are produced by this graph.
|
|
191
|
+
|
|
192
|
+
This is fully initialized at construction.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
overall_inputs: dict[uuid.UUID, PredictedDatasetModel] = dataclasses.field(default_factory=dict)
|
|
196
|
+
"""All datasets that are not produced by any quantum in this graph."""
|
|
197
|
+
|
|
198
|
+
xgraph: networkx.DiGraph = dataclasses.field(default_factory=networkx.DiGraph)
|
|
199
|
+
"""A bipartite NetworkX graph linking datasets to quanta and quanta to
|
|
200
|
+
datasets.
|
|
201
|
+
|
|
202
|
+
This is fully initialized at construction. There are no node or edge
|
|
203
|
+
attributes in this graph; we only need it to store adjacency information
|
|
204
|
+
with datasets as well as with quanta.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
pending_compression_training: list[WriteRequest] = dataclasses.field(default_factory=list)
|
|
59
208
|
"""Unprocessed quantum scans that are being accumulated in order to
|
|
60
209
|
build a compression dictionary.
|
|
61
210
|
"""
|
|
@@ -71,6 +220,58 @@ class Writer:
|
|
|
71
220
|
self.comms.check_for_cancel()
|
|
72
221
|
reader.read_quantum_datasets()
|
|
73
222
|
self.predicted = reader.components
|
|
223
|
+
for predicted_init_quantum in self.predicted.init_quanta.root:
|
|
224
|
+
self.existing_init_outputs[predicted_init_quantum.quantum_id] = set()
|
|
225
|
+
self.comms.check_for_cancel()
|
|
226
|
+
self.comms.log.info("Generating integer indexes and identifying outputs.")
|
|
227
|
+
self._populate_indices_and_outputs()
|
|
228
|
+
self.comms.check_for_cancel()
|
|
229
|
+
self._populate_xgraph_and_inputs()
|
|
230
|
+
self.comms.check_for_cancel()
|
|
231
|
+
self.comms.log_progress(
|
|
232
|
+
# We add one here for 'packages', which we do ingest but don't
|
|
233
|
+
# record provenance for.
|
|
234
|
+
logging.INFO,
|
|
235
|
+
f"Graph has {len(self.output_dataset_ids) + 1} predicted output dataset(s).",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def _populate_indices_and_outputs(self) -> None:
|
|
239
|
+
all_uuids = set(self.predicted.quantum_datasets.keys())
|
|
240
|
+
for quantum in self.comms.periodically_check_for_cancel(
|
|
241
|
+
itertools.chain(
|
|
242
|
+
self.predicted.init_quanta.root,
|
|
243
|
+
self.predicted.quantum_datasets.values(),
|
|
244
|
+
)
|
|
245
|
+
):
|
|
246
|
+
if not quantum.task_label:
|
|
247
|
+
# Skip the 'packages' producer quantum.
|
|
248
|
+
continue
|
|
249
|
+
all_uuids.update(quantum.iter_input_dataset_ids())
|
|
250
|
+
self.output_dataset_ids.update(quantum.iter_output_dataset_ids())
|
|
251
|
+
all_uuids.update(self.output_dataset_ids)
|
|
252
|
+
self.indices = {
|
|
253
|
+
node_id: node_index
|
|
254
|
+
for node_index, node_id in self.comms.periodically_check_for_cancel(
|
|
255
|
+
enumerate(sorted(all_uuids, key=operator.attrgetter("int")))
|
|
256
|
+
)
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
def _populate_xgraph_and_inputs(self) -> None:
|
|
260
|
+
for predicted_quantum in self.comms.periodically_check_for_cancel(
|
|
261
|
+
itertools.chain(
|
|
262
|
+
self.predicted.init_quanta.root,
|
|
263
|
+
self.predicted.quantum_datasets.values(),
|
|
264
|
+
)
|
|
265
|
+
):
|
|
266
|
+
if not predicted_quantum.task_label:
|
|
267
|
+
# Skip the 'packages' producer quantum.
|
|
268
|
+
continue
|
|
269
|
+
for predicted_input in itertools.chain.from_iterable(predicted_quantum.inputs.values()):
|
|
270
|
+
self.xgraph.add_edge(predicted_input.dataset_id, predicted_quantum.quantum_id)
|
|
271
|
+
if predicted_input.dataset_id not in self.output_dataset_ids:
|
|
272
|
+
self.overall_inputs.setdefault(predicted_input.dataset_id, predicted_input)
|
|
273
|
+
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
274
|
+
self.xgraph.add_edge(predicted_quantum.quantum_id, predicted_output.dataset_id)
|
|
74
275
|
|
|
75
276
|
@staticmethod
|
|
76
277
|
def run(predicted_path: str, comms: WriterCommunicator) -> None:
|
|
@@ -94,59 +295,52 @@ class Writer:
|
|
|
94
295
|
|
|
95
296
|
def loop(self) -> None:
|
|
96
297
|
"""Run the main loop for the writer."""
|
|
97
|
-
|
|
298
|
+
data_writers: _DataWriters | None = None
|
|
98
299
|
if not self.comms.config.zstd_dict_size:
|
|
99
|
-
|
|
300
|
+
data_writers = self.make_data_writers()
|
|
100
301
|
self.comms.log.info("Polling for write requests from scanners.")
|
|
101
302
|
for request in self.comms.poll():
|
|
102
|
-
if
|
|
303
|
+
if data_writers is None:
|
|
103
304
|
self.pending_compression_training.append(request)
|
|
104
305
|
if len(self.pending_compression_training) >= self.comms.config.zstd_dict_n_inputs:
|
|
105
|
-
|
|
306
|
+
data_writers = self.make_data_writers()
|
|
106
307
|
else:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
self.comms.log.info("Writing init outputs.")
|
|
112
|
-
qg_writer.write_init_outputs(assume_existence=False)
|
|
308
|
+
self.process_request(request, data_writers)
|
|
309
|
+
if data_writers is None:
|
|
310
|
+
data_writers = self.make_data_writers()
|
|
311
|
+
self.write_init_outputs(data_writers)
|
|
113
312
|
|
|
114
|
-
def
|
|
313
|
+
def make_data_writers(self) -> _DataWriters:
|
|
115
314
|
"""Make a compression dictionary, open the low-level writers, and
|
|
116
315
|
write any accumulated scans that were needed to make the compression
|
|
117
316
|
dictionary.
|
|
118
317
|
|
|
119
318
|
Returns
|
|
120
319
|
-------
|
|
121
|
-
|
|
320
|
+
data_writers : `_DataWriters`
|
|
122
321
|
Low-level writers struct.
|
|
123
322
|
"""
|
|
124
323
|
cdict = self.make_compression_dictionary()
|
|
125
324
|
self.comms.send_compression_dict(cdict.as_bytes())
|
|
126
325
|
assert self.comms.config.output_path is not None
|
|
127
|
-
self.comms.log.info("Opening output files
|
|
128
|
-
|
|
129
|
-
self.comms
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
zstd_level=self.comms.config.zstd_level,
|
|
326
|
+
self.comms.log.info("Opening output files.")
|
|
327
|
+
data_writers = _DataWriters(
|
|
328
|
+
self.comms,
|
|
329
|
+
self.predicted,
|
|
330
|
+
self.indices,
|
|
331
|
+
compressor=zstandard.ZstdCompressor(self.comms.config.zstd_level, cdict),
|
|
134
332
|
cdict_data=cdict.as_bytes(),
|
|
135
|
-
loop_wrapper=self.comms.periodically_check_for_cancel,
|
|
136
|
-
log=self.comms.log,
|
|
137
333
|
)
|
|
138
334
|
self.comms.check_for_cancel()
|
|
139
335
|
self.comms.log.info("Compressing and writing queued scan requests.")
|
|
140
336
|
for request in self.pending_compression_training:
|
|
141
|
-
|
|
142
|
-
self.comms.report_write()
|
|
337
|
+
self.process_request(request, data_writers)
|
|
143
338
|
del self.pending_compression_training
|
|
144
339
|
self.comms.check_for_cancel()
|
|
145
|
-
self.
|
|
146
|
-
|
|
147
|
-
qg_writer.write_packages()
|
|
340
|
+
self.write_overall_inputs(data_writers)
|
|
341
|
+
self.write_packages(data_writers)
|
|
148
342
|
self.comms.log.info("Returning to write request loop.")
|
|
149
|
-
return
|
|
343
|
+
return data_writers
|
|
150
344
|
|
|
151
345
|
def make_compression_dictionary(self) -> zstandard.ZstdCompressionDict:
|
|
152
346
|
"""Make the compression dictionary.
|
|
@@ -182,3 +376,126 @@ class Writer:
|
|
|
182
376
|
training_inputs.append(write_request.metadata)
|
|
183
377
|
training_inputs.append(write_request.logs)
|
|
184
378
|
return zstandard.train_dictionary(self.comms.config.zstd_dict_size, training_inputs)
|
|
379
|
+
|
|
380
|
+
def write_init_outputs(self, data_writers: _DataWriters) -> None:
|
|
381
|
+
"""Write provenance for init-output datasets and init-quanta.
|
|
382
|
+
|
|
383
|
+
Parameters
|
|
384
|
+
----------
|
|
385
|
+
data_writers : `_DataWriters`
|
|
386
|
+
Low-level writers struct.
|
|
387
|
+
"""
|
|
388
|
+
self.comms.log.info("Writing init outputs.")
|
|
389
|
+
init_quanta = ProvenanceInitQuantaModel()
|
|
390
|
+
for predicted_init_quantum in self.predicted.init_quanta.root:
|
|
391
|
+
if not predicted_init_quantum.task_label:
|
|
392
|
+
# Skip the 'packages' producer quantum.
|
|
393
|
+
continue
|
|
394
|
+
existing_outputs = self.existing_init_outputs[predicted_init_quantum.quantum_id]
|
|
395
|
+
for predicted_output in itertools.chain.from_iterable(predicted_init_quantum.outputs.values()):
|
|
396
|
+
provenance_output = ProvenanceDatasetModel.from_predicted(
|
|
397
|
+
predicted_output,
|
|
398
|
+
producer=predicted_init_quantum.quantum_id,
|
|
399
|
+
consumers=self.xgraph.successors(predicted_output.dataset_id),
|
|
400
|
+
)
|
|
401
|
+
provenance_output.produced = predicted_output.dataset_id in existing_outputs
|
|
402
|
+
data_writers.datasets.write_model(
|
|
403
|
+
provenance_output.dataset_id, provenance_output, data_writers.compressor
|
|
404
|
+
)
|
|
405
|
+
init_quanta.root.append(ProvenanceInitQuantumModel.from_predicted(predicted_init_quantum))
|
|
406
|
+
data_writers.graph.write_single_model("init_quanta", init_quanta)
|
|
407
|
+
|
|
408
|
+
def write_overall_inputs(self, data_writers: _DataWriters) -> None:
|
|
409
|
+
"""Write provenance for overall-input datasets.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
data_writers : `_DataWriters`
|
|
414
|
+
Low-level writers struct.
|
|
415
|
+
"""
|
|
416
|
+
self.comms.log.info("Writing overall inputs.")
|
|
417
|
+
for predicted_input in self.comms.periodically_check_for_cancel(self.overall_inputs.values()):
|
|
418
|
+
if predicted_input.dataset_id not in data_writers.datasets.addresses:
|
|
419
|
+
data_writers.datasets.write_model(
|
|
420
|
+
predicted_input.dataset_id,
|
|
421
|
+
ProvenanceDatasetModel.from_predicted(
|
|
422
|
+
predicted_input,
|
|
423
|
+
producer=None,
|
|
424
|
+
consumers=self.xgraph.successors(predicted_input.dataset_id),
|
|
425
|
+
),
|
|
426
|
+
data_writers.compressor,
|
|
427
|
+
)
|
|
428
|
+
del self.overall_inputs
|
|
429
|
+
|
|
430
|
+
@staticmethod
|
|
431
|
+
def write_packages(data_writers: _DataWriters) -> None:
|
|
432
|
+
"""Write package version information to the provenance graph.
|
|
433
|
+
|
|
434
|
+
Parameters
|
|
435
|
+
----------
|
|
436
|
+
data_writers : `_DataWriters`
|
|
437
|
+
Low-level writers struct.
|
|
438
|
+
"""
|
|
439
|
+
packages = Packages.fromSystem(include_all=True)
|
|
440
|
+
data = packages.toBytes("json")
|
|
441
|
+
data_writers.graph.write_single_block("packages", data)
|
|
442
|
+
|
|
443
|
+
def process_request(self, request: WriteRequest, data_writers: _DataWriters) -> None:
|
|
444
|
+
"""Process a `WriteRequest` into `_ScanData`.
|
|
445
|
+
|
|
446
|
+
Parameters
|
|
447
|
+
----------
|
|
448
|
+
request : `WriteRequest`
|
|
449
|
+
Result of a quantum scan.
|
|
450
|
+
data_writers : `_DataWriters`
|
|
451
|
+
Low-level writers struct.
|
|
452
|
+
"""
|
|
453
|
+
if (existing_init_outputs := self.existing_init_outputs.get(request.quantum_id)) is not None:
|
|
454
|
+
self.comms.log.debug("Handling init-output scan for %s.", request.quantum_id)
|
|
455
|
+
existing_init_outputs.update(request.existing_outputs)
|
|
456
|
+
self.comms.report_write()
|
|
457
|
+
return
|
|
458
|
+
self.comms.log.debug("Handling quantum scan for %s.", request.quantum_id)
|
|
459
|
+
predicted_quantum = self.predicted.quantum_datasets[request.quantum_id]
|
|
460
|
+
outputs: dict[uuid.UUID, bytes] = {}
|
|
461
|
+
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
462
|
+
provenance_output = ProvenanceDatasetModel.from_predicted(
|
|
463
|
+
predicted_output,
|
|
464
|
+
producer=predicted_quantum.quantum_id,
|
|
465
|
+
consumers=self.xgraph.successors(predicted_output.dataset_id),
|
|
466
|
+
)
|
|
467
|
+
provenance_output.produced = provenance_output.dataset_id in request.existing_outputs
|
|
468
|
+
outputs[provenance_output.dataset_id] = data_writers.compressor.compress(
|
|
469
|
+
provenance_output.model_dump_json().encode()
|
|
470
|
+
)
|
|
471
|
+
if not request.quantum:
|
|
472
|
+
request.quantum = (
|
|
473
|
+
ProvenanceQuantumModel.from_predicted(predicted_quantum).model_dump_json().encode()
|
|
474
|
+
)
|
|
475
|
+
if request.is_compressed:
|
|
476
|
+
request.quantum = data_writers.compressor.compress(request.quantum)
|
|
477
|
+
if not request.is_compressed:
|
|
478
|
+
request.quantum = data_writers.compressor.compress(request.quantum)
|
|
479
|
+
if request.metadata:
|
|
480
|
+
request.metadata = data_writers.compressor.compress(request.metadata)
|
|
481
|
+
if request.logs:
|
|
482
|
+
request.logs = data_writers.compressor.compress(request.logs)
|
|
483
|
+
self.comms.log.debug("Writing quantum %s.", request.quantum_id)
|
|
484
|
+
data_writers.quanta.write_bytes(request.quantum_id, request.quantum)
|
|
485
|
+
for dataset_id, dataset_data in outputs.items():
|
|
486
|
+
data_writers.datasets.write_bytes(dataset_id, dataset_data)
|
|
487
|
+
if request.metadata:
|
|
488
|
+
(metadata_output,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
|
|
489
|
+
address = data_writers.metadata.write_bytes(request.quantum_id, request.metadata)
|
|
490
|
+
data_writers.metadata.addresses[metadata_output.dataset_id] = address
|
|
491
|
+
if request.logs:
|
|
492
|
+
(log_output,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
|
|
493
|
+
address = data_writers.logs.write_bytes(request.quantum_id, request.logs)
|
|
494
|
+
data_writers.logs.addresses[log_output.dataset_id] = address
|
|
495
|
+
# We shouldn't need this predicted quantum anymore; delete it in the
|
|
496
|
+
# hopes that'll free up some memory.
|
|
497
|
+
del self.predicted.quantum_datasets[request.quantum_id]
|
|
498
|
+
self.comms.report_write()
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
_T = TypeVar("_T")
|
|
@@ -27,113 +27,23 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
-
__all__ = ["
|
|
30
|
+
__all__ = ["QuantumExecutor", "QuantumGraphExecutor"]
|
|
31
31
|
|
|
32
32
|
from abc import ABC, abstractmethod
|
|
33
|
-
from typing import TYPE_CHECKING
|
|
34
|
-
|
|
35
|
-
from lsst.daf.butler import Quantum
|
|
33
|
+
from typing import TYPE_CHECKING
|
|
36
34
|
|
|
37
35
|
from .quantum_reports import QuantumReport, Report
|
|
38
36
|
|
|
39
37
|
if TYPE_CHECKING:
|
|
40
38
|
import uuid
|
|
41
39
|
|
|
42
|
-
from lsst.daf.butler
|
|
40
|
+
from lsst.daf.butler import Quantum
|
|
43
41
|
|
|
44
|
-
from ._task_metadata import TaskMetadata
|
|
45
42
|
from .graph import QuantumGraph
|
|
46
43
|
from .pipeline_graph import TaskNode
|
|
47
44
|
from .quantum_graph import PredictedQuantumGraph
|
|
48
45
|
|
|
49
46
|
|
|
50
|
-
class QuantumExecutionResult(tuple[Quantum, QuantumReport | None]):
|
|
51
|
-
"""A result struct that captures information about a single quantum's
|
|
52
|
-
execution.
|
|
53
|
-
|
|
54
|
-
Parameters
|
|
55
|
-
----------
|
|
56
|
-
quantum : `lsst.daf.butler.Quantum`
|
|
57
|
-
Quantum that was executed.
|
|
58
|
-
report : `.quantum_reports.QuantumReport`
|
|
59
|
-
Report with basic information about the execution.
|
|
60
|
-
task_metadata : `TaskMetadata`, optional
|
|
61
|
-
Metadata saved by the task and executor during execution.
|
|
62
|
-
skipped_existing : `bool`, optional
|
|
63
|
-
If `True`, this quantum was not executed because it appeared to have
|
|
64
|
-
already been executed successfully.
|
|
65
|
-
adjusted_no_work : `bool`, optional
|
|
66
|
-
If `True`, this quantum was not executed because the
|
|
67
|
-
`PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
|
|
68
|
-
|
|
69
|
-
Notes
|
|
70
|
-
-----
|
|
71
|
-
For backwards compatibility, this class is a two-element tuple that allows
|
|
72
|
-
the ``quantum`` and ``report`` attributes to be unpacked. Additional
|
|
73
|
-
regular attributes may be added by executors (but the tuple must remain
|
|
74
|
-
only two elements to enable the current unpacking interface).
|
|
75
|
-
"""
|
|
76
|
-
|
|
77
|
-
def __new__(
|
|
78
|
-
cls,
|
|
79
|
-
quantum: Quantum,
|
|
80
|
-
report: QuantumReport | None,
|
|
81
|
-
*,
|
|
82
|
-
task_metadata: TaskMetadata | None = None,
|
|
83
|
-
skipped_existing: bool | None = None,
|
|
84
|
-
adjusted_no_work: bool | None = None,
|
|
85
|
-
) -> Self:
|
|
86
|
-
return super().__new__(cls, (quantum, report))
|
|
87
|
-
|
|
88
|
-
# We need to define both __init__ and __new__ because tuple inheritance
|
|
89
|
-
# requires __new__ and numpydoc requires __init__.
|
|
90
|
-
|
|
91
|
-
def __init__(
|
|
92
|
-
self,
|
|
93
|
-
quantum: Quantum,
|
|
94
|
-
report: QuantumReport | None,
|
|
95
|
-
*,
|
|
96
|
-
task_metadata: TaskMetadata | None = None,
|
|
97
|
-
skipped_existing: bool | None = None,
|
|
98
|
-
adjusted_no_work: bool | None = None,
|
|
99
|
-
):
|
|
100
|
-
self._task_metadata = task_metadata
|
|
101
|
-
self._skipped_existing = skipped_existing
|
|
102
|
-
self._adjusted_no_work = adjusted_no_work
|
|
103
|
-
|
|
104
|
-
@property
|
|
105
|
-
def quantum(self) -> Quantum:
|
|
106
|
-
"""The quantum actually executed."""
|
|
107
|
-
return self[0]
|
|
108
|
-
|
|
109
|
-
@property
|
|
110
|
-
def report(self) -> QuantumReport | None:
|
|
111
|
-
"""Structure describing the status of the execution of a quantum.
|
|
112
|
-
|
|
113
|
-
This is `None` if the implementation does not support this feature.
|
|
114
|
-
"""
|
|
115
|
-
return self[1]
|
|
116
|
-
|
|
117
|
-
@property
|
|
118
|
-
def task_metadata(self) -> TaskMetadata | None:
|
|
119
|
-
"""Metadata saved by the task and executor during execution."""
|
|
120
|
-
return self._task_metadata
|
|
121
|
-
|
|
122
|
-
@property
|
|
123
|
-
def skipped_existing(self) -> bool | None:
|
|
124
|
-
"""If `True`, this quantum was not executed because it appeared to have
|
|
125
|
-
already been executed successfully.
|
|
126
|
-
"""
|
|
127
|
-
return self._skipped_existing
|
|
128
|
-
|
|
129
|
-
@property
|
|
130
|
-
def adjusted_no_work(self) -> bool | None:
|
|
131
|
-
"""If `True`, this quantum was not executed because the
|
|
132
|
-
`PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
|
|
133
|
-
"""
|
|
134
|
-
return self._adjusted_no_work
|
|
135
|
-
|
|
136
|
-
|
|
137
47
|
class QuantumExecutor(ABC):
|
|
138
48
|
"""Class which abstracts execution of a single Quantum.
|
|
139
49
|
|
|
@@ -145,14 +55,8 @@ class QuantumExecutor(ABC):
|
|
|
145
55
|
|
|
146
56
|
@abstractmethod
|
|
147
57
|
def execute(
|
|
148
|
-
self,
|
|
149
|
-
|
|
150
|
-
/,
|
|
151
|
-
quantum: Quantum,
|
|
152
|
-
quantum_id: uuid.UUID | None = None,
|
|
153
|
-
*,
|
|
154
|
-
log_records: ButlerLogRecords | None = None,
|
|
155
|
-
) -> QuantumExecutionResult:
|
|
58
|
+
self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
|
|
59
|
+
) -> tuple[Quantum, QuantumReport | None]:
|
|
156
60
|
"""Execute single quantum.
|
|
157
61
|
|
|
158
62
|
Parameters
|
|
@@ -163,18 +67,15 @@ class QuantumExecutor(ABC):
|
|
|
163
67
|
Quantum for this execution.
|
|
164
68
|
quantum_id : `uuid.UUID` or `None`, optional
|
|
165
69
|
The ID of the quantum to be executed.
|
|
166
|
-
log_records : `lsst.daf.butler.ButlerLogRecords`, optional
|
|
167
|
-
Container that should be used to store logs in memory before
|
|
168
|
-
writing them to the butler. This disables streaming log (since
|
|
169
|
-
we'd have to store them in memory anyway), but it permits the
|
|
170
|
-
caller to prepend logs to be stored in the butler and allows task
|
|
171
|
-
logs to be inspected by the caller after execution is complete.
|
|
172
70
|
|
|
173
71
|
Returns
|
|
174
72
|
-------
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
73
|
+
quantum : `~lsst.daf.butler.Quantum`
|
|
74
|
+
The quantum actually executed.
|
|
75
|
+
report : `~.quantum_reports.QuantumReport`
|
|
76
|
+
Structure describing the status of the execution of a quantum.
|
|
77
|
+
`None` is returned if implementation does not support this
|
|
78
|
+
feature.
|
|
178
79
|
|
|
179
80
|
Notes
|
|
180
81
|
-----
|
|
@@ -192,9 +93,7 @@ class QuantumGraphExecutor(ABC):
|
|
|
192
93
|
"""
|
|
193
94
|
|
|
194
95
|
@abstractmethod
|
|
195
|
-
def execute(
|
|
196
|
-
self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
|
|
197
|
-
) -> None:
|
|
96
|
+
def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
|
|
198
97
|
"""Execute whole graph.
|
|
199
98
|
|
|
200
99
|
Implementation of this method depends on particular execution model
|
|
@@ -204,10 +103,8 @@ class QuantumGraphExecutor(ABC):
|
|
|
204
103
|
|
|
205
104
|
Parameters
|
|
206
105
|
----------
|
|
207
|
-
graph : `.QuantumGraph`
|
|
106
|
+
graph : `.QuantumGraph`
|
|
208
107
|
Execution graph.
|
|
209
|
-
provenance_graph_file : `str`, optional
|
|
210
|
-
A filename to write provenance to.
|
|
211
108
|
"""
|
|
212
109
|
raise NotImplementedError()
|
|
213
110
|
|