lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +156 -11
- lsst/pipe/base/log_capture.py +98 -7
- lsst/pipe/base/pipeline_graph/expressions.py +3 -3
- lsst/pipe/base/quantum_graph/_common.py +21 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +14 -39
- lsst/pipe/base/quantum_graph/_predicted.py +90 -90
- lsst/pipe/base/quantum_graph/_provenance.py +345 -200
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +19 -19
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +201 -72
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +45 -35
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +15 -17
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +57 -149
- lsst/pipe/base/quantum_graph_builder.py +0 -1
- lsst/pipe/base/quantum_provenance_graph.py +2 -44
- lsst/pipe/base/single_quantum_executor.py +43 -9
- lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/RECORD +29 -29
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/zip-safe +0 -0
|
@@ -39,10 +39,10 @@ from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
|
|
|
39
39
|
from lsst.utils.iteration import ensure_iterable
|
|
40
40
|
|
|
41
41
|
from ... import automatic_connection_constants as acc
|
|
42
|
-
from ..._status import QuantumSuccessCaveats
|
|
42
|
+
from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
|
|
43
43
|
from ..._task_metadata import TaskMetadata
|
|
44
|
+
from ...log_capture import _ExecutionLogRecordsExtra
|
|
44
45
|
from ...pipeline_graph import PipelineGraph, TaskImportMode
|
|
45
|
-
from ...quantum_provenance_graph import ExceptionInfo
|
|
46
46
|
from ...resource_usage import QuantumResourceUsage
|
|
47
47
|
from .._multiblock import Compressor
|
|
48
48
|
from .._predicted import (
|
|
@@ -50,8 +50,9 @@ from .._predicted import (
|
|
|
50
50
|
PredictedQuantumDatasetsModel,
|
|
51
51
|
PredictedQuantumGraphReader,
|
|
52
52
|
)
|
|
53
|
+
from .._provenance import ProvenanceInitQuantumModel, ProvenanceQuantumAttemptModel, ProvenanceQuantumModel
|
|
53
54
|
from ._communicators import ScannerCommunicator
|
|
54
|
-
from ._structs import IngestRequest, ScanReport,
|
|
55
|
+
from ._structs import IngestRequest, InProgressScan, ScanReport, ScanStatus, WriteRequest
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
@dataclasses.dataclass
|
|
@@ -179,12 +180,12 @@ class Scanner:
|
|
|
179
180
|
Returns
|
|
180
181
|
-------
|
|
181
182
|
exists : `bool``
|
|
182
|
-
Whether the dataset exists
|
|
183
|
+
Whether the dataset exists.
|
|
183
184
|
"""
|
|
184
185
|
ref = self.reader.components.make_dataset_ref(predicted)
|
|
185
186
|
return self.qbb.stored(ref)
|
|
186
187
|
|
|
187
|
-
def scan_quantum(self, quantum_id: uuid.UUID) ->
|
|
188
|
+
def scan_quantum(self, quantum_id: uuid.UUID) -> InProgressScan:
|
|
188
189
|
"""Scan for a quantum's completion and error status, and its output
|
|
189
190
|
datasets' existence.
|
|
190
191
|
|
|
@@ -195,11 +196,11 @@ class Scanner:
|
|
|
195
196
|
|
|
196
197
|
Returns
|
|
197
198
|
-------
|
|
198
|
-
result : `
|
|
199
|
+
result : `InProgressScan`
|
|
199
200
|
Scan result struct.
|
|
200
201
|
"""
|
|
201
202
|
if (predicted_quantum := self.init_quanta.get(quantum_id)) is not None:
|
|
202
|
-
result =
|
|
203
|
+
result = InProgressScan(predicted_quantum.quantum_id, status=ScanStatus.INIT)
|
|
203
204
|
self.comms.log.debug("Created init scan for %s (%s)", quantum_id, predicted_quantum.task_label)
|
|
204
205
|
else:
|
|
205
206
|
self.reader.read_quantum_datasets([quantum_id])
|
|
@@ -210,48 +211,69 @@ class Scanner:
|
|
|
210
211
|
predicted_quantum.task_label,
|
|
211
212
|
predicted_quantum.data_coordinate,
|
|
212
213
|
)
|
|
213
|
-
result =
|
|
214
|
+
result = InProgressScan(predicted_quantum.quantum_id, ScanStatus.INCOMPLETE)
|
|
214
215
|
del self.reader.components.quantum_datasets[quantum_id]
|
|
215
|
-
|
|
216
|
-
if not self.
|
|
216
|
+
last_attempt = ProvenanceQuantumAttemptModel()
|
|
217
|
+
if not self._read_log(predicted_quantum, result, last_attempt):
|
|
217
218
|
self.comms.log.debug("Abandoning scan for %s; no log dataset.", quantum_id)
|
|
218
|
-
result.status = ScanStatus.ABANDONED
|
|
219
219
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
220
220
|
return result
|
|
221
|
-
|
|
222
|
-
if result.metadata:
|
|
223
|
-
result.status = ScanStatus.SUCCESSFUL
|
|
224
|
-
result.existing_outputs.add(metadata_id)
|
|
225
|
-
elif self.comms.config.assume_complete:
|
|
226
|
-
result.status = ScanStatus.FAILED
|
|
227
|
-
else:
|
|
221
|
+
if not self._read_metadata(predicted_quantum, result, last_attempt):
|
|
228
222
|
# We found the log dataset, but no metadata; this means the
|
|
229
223
|
# quantum failed, but a retry might still happen that could
|
|
230
224
|
# turn it into a success if we can't yet assume the run is
|
|
231
225
|
# complete.
|
|
232
226
|
self.comms.log.debug("Abandoning scan for %s.", quantum_id)
|
|
233
|
-
result.status = ScanStatus.ABANDONED
|
|
234
227
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
235
228
|
return result
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
239
|
-
if predicted_output.dataset_id not in result.existing_outputs and self.scan_dataset(
|
|
240
|
-
predicted_output
|
|
241
|
-
):
|
|
242
|
-
result.existing_outputs.add(predicted_output.dataset_id)
|
|
243
|
-
to_ingest = self._make_ingest_request(predicted_quantum, result)
|
|
244
|
-
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
229
|
+
last_attempt.attempt = len(result.attempts)
|
|
230
|
+
result.attempts.append(last_attempt)
|
|
245
231
|
assert result.status is not ScanStatus.INCOMPLETE
|
|
246
232
|
assert result.status is not ScanStatus.ABANDONED
|
|
233
|
+
|
|
234
|
+
if len(result.logs.attempts) < len(result.attempts):
|
|
235
|
+
# Logs were not found for this attempt; must have been a hard error
|
|
236
|
+
# that kept the `finally` block from running or otherwise
|
|
237
|
+
# interrupted the writing of the logs.
|
|
238
|
+
result.logs.attempts.append(None)
|
|
239
|
+
if result.status is ScanStatus.SUCCESSFUL:
|
|
240
|
+
# But we found the metadata! Either that hard error happened
|
|
241
|
+
# at a very unlucky time (in between those two writes), or
|
|
242
|
+
# something even weirder happened.
|
|
243
|
+
result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
|
|
244
|
+
else:
|
|
245
|
+
result.attempts[-1].status = QuantumAttemptStatus.FAILED
|
|
246
|
+
if len(result.metadata.attempts) < len(result.attempts):
|
|
247
|
+
# Metadata missing usually just means a failure. In any case, the
|
|
248
|
+
# status will already be correct, either because it was set to a
|
|
249
|
+
# failure when we read the logs, or left at UNKNOWN if there were
|
|
250
|
+
# no logs. Note that scanners never process BLOCKED quanta at all.
|
|
251
|
+
result.metadata.attempts.append(None)
|
|
252
|
+
assert len(result.logs.attempts) == len(result.attempts) or len(result.metadata.attempts) == len(
|
|
253
|
+
result.attempts
|
|
254
|
+
), (
|
|
255
|
+
"The only way we can add more than one quantum attempt is by "
|
|
256
|
+
"extracting info stored with the logs, and that always appends "
|
|
257
|
+
"a log attempt and a metadata attempt, so this must be a bug in "
|
|
258
|
+
"the scanner."
|
|
259
|
+
)
|
|
260
|
+
# Scan for output dataset existence, skipping any the metadata reported
|
|
261
|
+
# on as well as and the metadata and logs themselves (since we just
|
|
262
|
+
# checked those).
|
|
263
|
+
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
264
|
+
if predicted_output.dataset_id not in result.outputs:
|
|
265
|
+
result.outputs[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
|
|
266
|
+
to_ingest = self._make_ingest_request(predicted_quantum, result)
|
|
247
267
|
if self.comms.config.output_path is not None:
|
|
248
|
-
self.
|
|
268
|
+
to_write = self._make_write_request(predicted_quantum, result)
|
|
269
|
+
self.comms.request_write(to_write)
|
|
249
270
|
self.comms.request_ingest(to_ingest)
|
|
271
|
+
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
250
272
|
self.comms.log.debug("Finished scan for %s.", quantum_id)
|
|
251
273
|
return result
|
|
252
274
|
|
|
253
275
|
def _make_ingest_request(
|
|
254
|
-
self, predicted_quantum: PredictedQuantumDatasetsModel, result:
|
|
276
|
+
self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
|
|
255
277
|
) -> IngestRequest:
|
|
256
278
|
"""Make an ingest request from a quantum scan.
|
|
257
279
|
|
|
@@ -259,7 +281,7 @@ class Scanner:
|
|
|
259
281
|
----------
|
|
260
282
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
261
283
|
Information about the predicted quantum.
|
|
262
|
-
result : `
|
|
284
|
+
result : `InProgressScan`
|
|
263
285
|
Result of a quantum scan.
|
|
264
286
|
|
|
265
287
|
Returns
|
|
@@ -272,16 +294,60 @@ class Scanner:
|
|
|
272
294
|
}
|
|
273
295
|
to_ingest_predicted: list[PredictedDatasetModel] = []
|
|
274
296
|
to_ingest_refs: list[DatasetRef] = []
|
|
275
|
-
for dataset_id in result.
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
297
|
+
for dataset_id, was_produced in result.outputs.items():
|
|
298
|
+
if was_produced:
|
|
299
|
+
predicted_output = predicted_outputs_by_id[dataset_id]
|
|
300
|
+
to_ingest_predicted.append(predicted_output)
|
|
301
|
+
to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
|
|
279
302
|
to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
|
|
280
303
|
return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
|
|
281
304
|
|
|
282
|
-
def
|
|
283
|
-
self, predicted_quantum: PredictedQuantumDatasetsModel, result:
|
|
284
|
-
) ->
|
|
305
|
+
def _make_write_request(
|
|
306
|
+
self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
|
|
307
|
+
) -> WriteRequest:
|
|
308
|
+
"""Make a write request from a quantum scan.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
313
|
+
Information about the predicted quantum.
|
|
314
|
+
result : `InProgressScan`
|
|
315
|
+
Result of a quantum scan.
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
write_request : `WriteRequest`
|
|
320
|
+
A request to be sent to the writer.
|
|
321
|
+
"""
|
|
322
|
+
quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
|
|
323
|
+
if result.status is ScanStatus.INIT:
|
|
324
|
+
quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
|
|
325
|
+
else:
|
|
326
|
+
quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
|
|
327
|
+
quantum.attempts = result.attempts
|
|
328
|
+
request = WriteRequest(
|
|
329
|
+
result.quantum_id,
|
|
330
|
+
result.status,
|
|
331
|
+
existing_outputs={
|
|
332
|
+
dataset_id for dataset_id, was_produced in result.outputs.items() if was_produced
|
|
333
|
+
},
|
|
334
|
+
quantum=quantum.model_dump_json().encode(),
|
|
335
|
+
logs=result.logs.model_dump_json().encode() if result.logs.attempts else b"",
|
|
336
|
+
metadata=result.metadata.model_dump_json().encode() if result.metadata.attempts else b"",
|
|
337
|
+
)
|
|
338
|
+
if self.compressor is not None:
|
|
339
|
+
request.quantum = self.compressor.compress(request.quantum)
|
|
340
|
+
request.logs = self.compressor.compress(request.logs) if request.logs else b""
|
|
341
|
+
request.metadata = self.compressor.compress(request.metadata) if request.metadata else b""
|
|
342
|
+
request.is_compressed = True
|
|
343
|
+
return request
|
|
344
|
+
|
|
345
|
+
def _read_metadata(
|
|
346
|
+
self,
|
|
347
|
+
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
348
|
+
result: InProgressScan,
|
|
349
|
+
last_attempt: ProvenanceQuantumAttemptModel,
|
|
350
|
+
) -> bool:
|
|
285
351
|
"""Attempt to read the metadata dataset for a quantum to extract
|
|
286
352
|
provenance information from it.
|
|
287
353
|
|
|
@@ -289,55 +355,68 @@ class Scanner:
|
|
|
289
355
|
----------
|
|
290
356
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
291
357
|
Information about the predicted quantum.
|
|
292
|
-
result : `
|
|
358
|
+
result : `InProgressScan`
|
|
293
359
|
Result object to be modified in-place.
|
|
360
|
+
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
361
|
+
Structure to fill in with information about the last attempt to
|
|
362
|
+
run this quantum.
|
|
294
363
|
|
|
295
364
|
Returns
|
|
296
365
|
-------
|
|
297
|
-
|
|
298
|
-
|
|
366
|
+
complete : `bool`
|
|
367
|
+
Whether the quantum is complete.
|
|
299
368
|
"""
|
|
300
|
-
assert not result.metadata, "We shouldn't be scanning again if we already read the metadata."
|
|
301
369
|
(predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
|
|
302
370
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
303
371
|
try:
|
|
304
372
|
# This assumes QBB metadata writes are atomic, which should be the
|
|
305
373
|
# case. If it's not we'll probably get pydantic validation errors
|
|
306
374
|
# here.
|
|
307
|
-
|
|
375
|
+
metadata: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
|
|
308
376
|
except FileNotFoundError:
|
|
309
|
-
|
|
310
|
-
|
|
377
|
+
result.outputs[ref.id] = False
|
|
378
|
+
if self.comms.config.assume_complete:
|
|
379
|
+
result.status = ScanStatus.FAILED
|
|
380
|
+
else:
|
|
381
|
+
result.status = ScanStatus.ABANDONED
|
|
382
|
+
return False
|
|
311
383
|
else:
|
|
384
|
+
result.status = ScanStatus.SUCCESSFUL
|
|
385
|
+
result.outputs[ref.id] = True
|
|
386
|
+
last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
|
|
312
387
|
try:
|
|
313
388
|
# Int conversion guards against spurious conversion to
|
|
314
389
|
# float that can apparently sometimes happen in
|
|
315
390
|
# TaskMetadata.
|
|
316
|
-
|
|
391
|
+
last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
|
|
317
392
|
except LookupError:
|
|
318
393
|
pass
|
|
319
394
|
try:
|
|
320
|
-
|
|
321
|
-
|
|
395
|
+
last_attempt.exception = ExceptionInfo._from_metadata(
|
|
396
|
+
metadata[predicted_quantum.task_label]["failure"]
|
|
322
397
|
)
|
|
323
398
|
except LookupError:
|
|
324
399
|
pass
|
|
325
400
|
try:
|
|
326
|
-
|
|
327
|
-
uuid.UUID(id_str)
|
|
328
|
-
}
|
|
401
|
+
for id_str in ensure_iterable(metadata["quantum"].getArray("outputs")):
|
|
402
|
+
result.outputs[uuid.UUID(id_str)]
|
|
329
403
|
except LookupError:
|
|
330
404
|
pass
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
405
|
+
else:
|
|
406
|
+
# If the metadata told us what it wrote, anything not in that
|
|
407
|
+
# list was not written.
|
|
408
|
+
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
409
|
+
result.outputs.setdefault(predicted_output.dataset_id, False)
|
|
410
|
+
last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
|
|
411
|
+
result.metadata.attempts.append(metadata)
|
|
412
|
+
return True
|
|
413
|
+
|
|
414
|
+
def _read_log(
|
|
415
|
+
self,
|
|
416
|
+
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
417
|
+
result: InProgressScan,
|
|
418
|
+
last_attempt: ProvenanceQuantumAttemptModel,
|
|
419
|
+
) -> bool:
|
|
341
420
|
"""Attempt to read the log dataset for a quantum to test for the
|
|
342
421
|
quantum's completion (the log is always written last) and aggregate
|
|
343
422
|
the log content in the provenance quantum graph.
|
|
@@ -346,26 +425,76 @@ class Scanner:
|
|
|
346
425
|
----------
|
|
347
426
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
348
427
|
Information about the predicted quantum.
|
|
349
|
-
result : `
|
|
428
|
+
result : `InProgressScan`
|
|
350
429
|
Result object to be modified in-place.
|
|
430
|
+
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
431
|
+
Structure to fill in with information about the last attempt to
|
|
432
|
+
run this quantum.
|
|
351
433
|
|
|
352
434
|
Returns
|
|
353
435
|
-------
|
|
354
|
-
|
|
355
|
-
|
|
436
|
+
complete : `bool`
|
|
437
|
+
Whether the quantum is complete.
|
|
356
438
|
"""
|
|
357
439
|
(predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
|
|
358
440
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
359
441
|
try:
|
|
360
442
|
# This assumes QBB log writes are atomic, which should be the case.
|
|
361
443
|
# If it's not we'll probably get pydantic validation errors here.
|
|
362
|
-
|
|
444
|
+
log_records: ButlerLogRecords = self.qbb.get(ref)
|
|
363
445
|
except FileNotFoundError:
|
|
364
|
-
|
|
365
|
-
|
|
446
|
+
result.outputs[ref.id] = False
|
|
447
|
+
if self.comms.config.assume_complete:
|
|
448
|
+
result.status = ScanStatus.FAILED
|
|
449
|
+
else:
|
|
450
|
+
result.status = ScanStatus.ABANDONED
|
|
451
|
+
return False
|
|
452
|
+
else:
|
|
453
|
+
# Set the attempt's run status to FAILED, since the default is
|
|
454
|
+
# UNKNOWN (i.e. logs *and* metadata are missing) and we now know
|
|
455
|
+
# the logs exist. This will usually get replaced by SUCCESSFUL
|
|
456
|
+
# when we look for metadata next.
|
|
457
|
+
last_attempt.status = QuantumAttemptStatus.FAILED
|
|
458
|
+
result.outputs[ref.id] = True
|
|
459
|
+
if log_records.extra:
|
|
460
|
+
log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
|
|
461
|
+
self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
|
|
462
|
+
result.logs.attempts.append(list(log_records))
|
|
463
|
+
return True
|
|
464
|
+
|
|
465
|
+
def _extract_from_log_extra(
|
|
466
|
+
self,
|
|
467
|
+
log_extra: _ExecutionLogRecordsExtra,
|
|
468
|
+
result: InProgressScan,
|
|
469
|
+
last_attempt: ProvenanceQuantumAttemptModel | None,
|
|
470
|
+
) -> None:
|
|
471
|
+
for previous_attempt_log_extra in log_extra.previous_attempts:
|
|
472
|
+
self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
|
|
473
|
+
quantum_attempt: ProvenanceQuantumAttemptModel
|
|
474
|
+
if last_attempt is None:
|
|
475
|
+
# This is not the last attempt, so it must be a failure.
|
|
476
|
+
quantum_attempt = ProvenanceQuantumAttemptModel(
|
|
477
|
+
attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
|
|
478
|
+
)
|
|
479
|
+
# We also need to get the logs from this extra provenance, since
|
|
480
|
+
# they won't be the main section of the log records.
|
|
481
|
+
result.logs.attempts.append(log_extra.logs)
|
|
482
|
+
# The special last attempt is only appended after we attempt to
|
|
483
|
+
# read metadata later, but we have to append this one now.
|
|
484
|
+
result.attempts.append(quantum_attempt)
|
|
366
485
|
else:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
486
|
+
assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
|
|
487
|
+
quantum_attempt = last_attempt
|
|
488
|
+
if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
|
|
489
|
+
# We won't be getting a separate metadata dataset, so anything we
|
|
490
|
+
# might get from the metadata has to come from this extra
|
|
491
|
+
# provenance in the logs.
|
|
492
|
+
quantum_attempt.exception = log_extra.exception
|
|
493
|
+
if log_extra.metadata is not None:
|
|
494
|
+
quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
|
|
495
|
+
result.metadata.attempts.append(log_extra.metadata)
|
|
496
|
+
else:
|
|
497
|
+
result.metadata.attempts.append(None)
|
|
498
|
+
# Regardless of whether this is the last attempt or not, we can only
|
|
499
|
+
# get the previous_process_quanta from the log extra.
|
|
500
|
+
quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
|
|
@@ -28,10 +28,11 @@
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
30
|
__all__ = (
|
|
31
|
+
"InProgressScan",
|
|
31
32
|
"IngestRequest",
|
|
32
33
|
"ScanReport",
|
|
33
|
-
"ScanResult",
|
|
34
34
|
"ScanStatus",
|
|
35
|
+
"WriteRequest",
|
|
35
36
|
)
|
|
36
37
|
|
|
37
38
|
import dataclasses
|
|
@@ -40,11 +41,13 @@ import uuid
|
|
|
40
41
|
|
|
41
42
|
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
42
43
|
|
|
43
|
-
from ..._status import QuantumSuccessCaveats
|
|
44
|
-
from ...quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
|
|
45
|
-
from ...resource_usage import QuantumResourceUsage
|
|
46
44
|
from .._common import DatastoreName
|
|
47
45
|
from .._predicted import PredictedDatasetModel
|
|
46
|
+
from .._provenance import (
|
|
47
|
+
ProvenanceLogRecordsModel,
|
|
48
|
+
ProvenanceQuantumAttemptModel,
|
|
49
|
+
ProvenanceTaskMetadataModel,
|
|
50
|
+
)
|
|
48
51
|
|
|
49
52
|
|
|
50
53
|
class ScanStatus(enum.Enum):
|
|
@@ -117,8 +120,8 @@ class IngestRequest:
|
|
|
117
120
|
|
|
118
121
|
|
|
119
122
|
@dataclasses.dataclass
|
|
120
|
-
class
|
|
121
|
-
"""A struct that represents
|
|
123
|
+
class InProgressScan:
|
|
124
|
+
"""A struct that represents a quantum that is being scanned."""
|
|
122
125
|
|
|
123
126
|
quantum_id: uuid.UUID
|
|
124
127
|
"""Unique ID for the quantum."""
|
|
@@ -126,42 +129,49 @@ class ScanResult:
|
|
|
126
129
|
status: ScanStatus
|
|
127
130
|
"""Combined status for the scan and the execution of the quantum."""
|
|
128
131
|
|
|
129
|
-
|
|
130
|
-
"""
|
|
132
|
+
attempts: list[ProvenanceQuantumAttemptModel] = dataclasses.field(default_factory=list)
|
|
133
|
+
"""Provenance information about each attempt to run the quantum."""
|
|
134
|
+
|
|
135
|
+
outputs: dict[uuid.UUID, bool] = dataclasses.field(default_factory=dict)
|
|
136
|
+
"""Unique IDs of the output datasets mapped to whether they were actually
|
|
137
|
+
produced.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
metadata: ProvenanceTaskMetadataModel = dataclasses.field(default_factory=ProvenanceTaskMetadataModel)
|
|
141
|
+
"""Task metadata information for each attempt.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
logs: ProvenanceLogRecordsModel = dataclasses.field(default_factory=ProvenanceLogRecordsModel)
|
|
145
|
+
"""Log records for each attempt.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclasses.dataclass
|
|
150
|
+
class WriteRequest:
|
|
151
|
+
"""A struct that represents a request to write provenance for a quantum."""
|
|
131
152
|
|
|
132
|
-
|
|
133
|
-
"""
|
|
153
|
+
quantum_id: uuid.UUID
|
|
154
|
+
"""Unique ID for the quantum."""
|
|
134
155
|
|
|
135
|
-
|
|
136
|
-
"""
|
|
156
|
+
status: ScanStatus
|
|
157
|
+
"""Combined status for the scan and the execution of the quantum."""
|
|
137
158
|
|
|
138
159
|
existing_outputs: set[uuid.UUID] = dataclasses.field(default_factory=set)
|
|
139
160
|
"""Unique IDs of the output datasets that were actually written."""
|
|
140
161
|
|
|
162
|
+
quantum: bytes = b""
|
|
163
|
+
"""Serialized quantum provenance model.
|
|
164
|
+
|
|
165
|
+
This may be empty for quanta that had no attempts.
|
|
166
|
+
"""
|
|
167
|
+
|
|
141
168
|
metadata: bytes = b""
|
|
142
|
-
"""
|
|
169
|
+
"""Serialized task metadata."""
|
|
143
170
|
|
|
144
|
-
|
|
145
|
-
"""
|
|
171
|
+
logs: bytes = b""
|
|
172
|
+
"""Serialized logs."""
|
|
146
173
|
|
|
147
174
|
is_compressed: bool = False
|
|
148
|
-
"""Whether the `metadata
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
"""Translate the scan status and metadata/log presence into a run
|
|
152
|
-
status.
|
|
153
|
-
"""
|
|
154
|
-
if self.status is ScanStatus.BLOCKED:
|
|
155
|
-
return QuantumRunStatus.BLOCKED
|
|
156
|
-
if self.status is ScanStatus.INIT:
|
|
157
|
-
return QuantumRunStatus.SUCCESSFUL
|
|
158
|
-
if self.log:
|
|
159
|
-
if self.metadata:
|
|
160
|
-
return QuantumRunStatus.SUCCESSFUL
|
|
161
|
-
else:
|
|
162
|
-
return QuantumRunStatus.FAILED
|
|
163
|
-
else:
|
|
164
|
-
if self.metadata:
|
|
165
|
-
return QuantumRunStatus.LOGS_MISSING
|
|
166
|
-
else:
|
|
167
|
-
return QuantumRunStatus.METADATA_MISSING
|
|
175
|
+
"""Whether the `quantum`, `metadata`, and `log` attributes are
|
|
176
|
+
compressed.
|
|
177
|
+
"""
|
|
@@ -30,6 +30,7 @@ from __future__ import annotations
|
|
|
30
30
|
__all__ = ("aggregate_graph",)
|
|
31
31
|
|
|
32
32
|
import dataclasses
|
|
33
|
+
import itertools
|
|
33
34
|
import uuid
|
|
34
35
|
|
|
35
36
|
import astropy.units as u
|
|
@@ -53,7 +54,7 @@ from ._communicators import (
|
|
|
53
54
|
from ._config import AggregatorConfig
|
|
54
55
|
from ._ingester import Ingester
|
|
55
56
|
from ._scanner import Scanner
|
|
56
|
-
from ._structs import ScanReport,
|
|
57
|
+
from ._structs import ScanReport, ScanStatus, WriteRequest
|
|
57
58
|
from ._writer import Writer
|
|
58
59
|
|
|
59
60
|
|
|
@@ -87,19 +88,15 @@ class Supervisor:
|
|
|
87
88
|
reader.read_init_quanta()
|
|
88
89
|
self.predicted = reader.components
|
|
89
90
|
self.comms.progress.log.info("Analyzing predicted graph.")
|
|
90
|
-
|
|
91
|
-
quantum_index: quantum_id for quantum_id, quantum_index in self.predicted.quantum_indices.items()
|
|
92
|
-
}
|
|
93
|
-
xgraph = networkx.DiGraph(
|
|
94
|
-
[(uuid_by_index[a], uuid_by_index[b]) for a, b in self.predicted.thin_graph.edges]
|
|
95
|
-
)
|
|
91
|
+
xgraph = networkx.DiGraph(self.predicted.thin_graph.edges)
|
|
96
92
|
# Make sure all quanta are in the graph, even if they don't have any
|
|
97
93
|
# quantum-only edges.
|
|
98
|
-
|
|
94
|
+
for thin_quantum in itertools.chain.from_iterable(self.predicted.thin_graph.quanta.values()):
|
|
95
|
+
xgraph.add_node(thin_quantum.quantum_id)
|
|
99
96
|
# Add init quanta as nodes without edges, because the scanner should
|
|
100
97
|
# only be run after init outputs are all written and hence we don't
|
|
101
98
|
# care when we process them.
|
|
102
|
-
for init_quantum in self.predicted.init_quanta.root
|
|
99
|
+
for init_quantum in self.predicted.init_quanta.root:
|
|
103
100
|
xgraph.add_node(init_quantum.quantum_id)
|
|
104
101
|
self.walker = GraphWalker(xgraph)
|
|
105
102
|
|
|
@@ -107,9 +104,10 @@ class Supervisor:
|
|
|
107
104
|
"""Scan the outputs of the quantum graph to gather provenance and
|
|
108
105
|
ingest outputs.
|
|
109
106
|
"""
|
|
110
|
-
self.
|
|
111
|
-
|
|
112
|
-
|
|
107
|
+
n_quanta = self.predicted.header.n_quanta + len(self.predicted.init_quanta.root)
|
|
108
|
+
self.comms.progress.scans.total = n_quanta
|
|
109
|
+
self.comms.progress.writes.total = n_quanta
|
|
110
|
+
self.comms.progress.quantum_ingests.total = n_quanta
|
|
113
111
|
ready_set: set[uuid.UUID] = set()
|
|
114
112
|
for ready_quanta in self.walker:
|
|
115
113
|
self.comms.log.debug("Sending %d new quanta to scan queue.", len(ready_quanta))
|
|
@@ -136,9 +134,9 @@ class Supervisor:
|
|
|
136
134
|
blocked_quanta = self.walker.fail(scan_report.quantum_id)
|
|
137
135
|
for blocked_quantum_id in blocked_quanta:
|
|
138
136
|
if self.comms.config.output_path is not None:
|
|
139
|
-
self.comms.request_write(
|
|
140
|
-
self.comms.progress.
|
|
141
|
-
self.comms.progress.
|
|
137
|
+
self.comms.request_write(WriteRequest(blocked_quantum_id, status=ScanStatus.BLOCKED))
|
|
138
|
+
self.comms.progress.scans.update(1)
|
|
139
|
+
self.comms.progress.quantum_ingests.update(len(blocked_quanta))
|
|
142
140
|
case ScanStatus.ABANDONED:
|
|
143
141
|
self.comms.log.debug("Abandoning scan for %s: quantum has not succeeded (yet).")
|
|
144
142
|
self.walker.fail(scan_report.quantum_id)
|
|
@@ -147,7 +145,7 @@ class Supervisor:
|
|
|
147
145
|
raise AssertionError(
|
|
148
146
|
f"Unexpected status {unexpected!r} in scanner loop for {scan_report.quantum_id}."
|
|
149
147
|
)
|
|
150
|
-
self.comms.progress.
|
|
148
|
+
self.comms.progress.scans.update(1)
|
|
151
149
|
|
|
152
150
|
|
|
153
151
|
def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorConfig) -> None:
|
|
@@ -159,7 +157,7 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
|
|
|
159
157
|
Path to the predicted quantum graph.
|
|
160
158
|
butler_path : `str`
|
|
161
159
|
Path or alias to the central butler repository.
|
|
162
|
-
config: `AggregatorConfig`
|
|
160
|
+
config : `AggregatorConfig`
|
|
163
161
|
Configuration for the aggregator.
|
|
164
162
|
"""
|
|
165
163
|
log = getLogger("lsst.pipe.base.quantum_graph.aggregator")
|