lsst-pipe-base 29.2025.4800__py3-none-any.whl → 30.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +6 -5
- lsst/pipe/base/caching_limited_butler.py +3 -0
- lsst/pipe/base/log_capture.py +39 -79
- lsst/pipe/base/log_on_close.py +79 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/quantum_graph/_common.py +4 -3
- lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
- lsst/pipe/base/quantum_graph/_predicted.py +106 -12
- lsst/pipe/base/quantum_graph/_provenance.py +657 -6
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +18 -50
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +14 -3
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -232
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -113
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +10 -5
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +31 -348
- lsst/pipe/base/quantum_graph/formatter.py +101 -0
- lsst/pipe/base/quantum_graph_builder.py +12 -1
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_graph_skeleton.py +1 -7
- lsst/pipe/base/script/register_instrument.py +4 -4
- lsst/pipe/base/script/retrieve_artifacts_for_quanta.py +5 -6
- lsst/pipe/base/script/transfer_from_graph.py +42 -42
- lsst/pipe/base/script/zip_from_graph.py +7 -8
- lsst/pipe/base/separable_pipeline_executor.py +18 -2
- lsst/pipe/base/simple_pipeline_executor.py +4 -3
- lsst/pipe/base/single_quantum_executor.py +70 -34
- lsst/pipe/base/tests/mocks/_repo.py +44 -16
- lsst/pipe/base/tests/simpleQGraph.py +43 -35
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/RECORD +39 -37
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/zip-safe +0 -0
|
@@ -51,16 +51,17 @@ import time
|
|
|
51
51
|
import uuid
|
|
52
52
|
from abc import ABC, abstractmethod
|
|
53
53
|
from collections.abc import Callable, Iterable, Iterator
|
|
54
|
-
from contextlib import
|
|
54
|
+
from contextlib import ExitStack
|
|
55
55
|
from traceback import format_exception
|
|
56
56
|
from types import TracebackType
|
|
57
57
|
from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
|
|
58
58
|
|
|
59
|
-
from lsst.utils.logging import
|
|
59
|
+
from lsst.utils.logging import LsstLogAdapter
|
|
60
60
|
|
|
61
|
+
from .._provenance import ProvenanceQuantumScanData
|
|
61
62
|
from ._config import AggregatorConfig
|
|
62
63
|
from ._progress import ProgressManager, make_worker_log
|
|
63
|
-
from ._structs import IngestRequest, ScanReport
|
|
64
|
+
from ._structs import IngestRequest, ScanReport
|
|
64
65
|
|
|
65
66
|
_T = TypeVar("_T")
|
|
66
67
|
|
|
@@ -361,9 +362,9 @@ class SupervisorCommunicator:
|
|
|
361
362
|
# scanner and the supervisor send one sentinal when done, and the
|
|
362
363
|
# writer waits for (n_scanners + 1) sentinals to arrive before it
|
|
363
364
|
# starts its shutdown.
|
|
364
|
-
self._write_requests:
|
|
365
|
-
|
|
366
|
-
)
|
|
365
|
+
self._write_requests: (
|
|
366
|
+
Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
|
|
367
|
+
) = context.make_queue() if config.output_path is not None else None
|
|
367
368
|
# All other workers use this queue to send many different kinds of
|
|
368
369
|
# reports the supervisor. The supervisor waits for a _DONE sentinal
|
|
369
370
|
# from each worker before it finishes its shutdown.
|
|
@@ -461,12 +462,12 @@ class SupervisorCommunicator:
|
|
|
461
462
|
"""
|
|
462
463
|
self._scan_requests.put(_ScanRequest(quantum_id), block=False)
|
|
463
464
|
|
|
464
|
-
def request_write(self, request:
|
|
465
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
465
466
|
"""Send a request to the writer to write provenance for the given scan.
|
|
466
467
|
|
|
467
468
|
Parameters
|
|
468
469
|
----------
|
|
469
|
-
request : `
|
|
470
|
+
request : `ProvenanceQuantumScanData`
|
|
470
471
|
Information from scanning a quantum (or knowing you don't have to,
|
|
471
472
|
in the case of blocked quanta).
|
|
472
473
|
"""
|
|
@@ -621,6 +622,11 @@ class WorkerCommunicator:
|
|
|
621
622
|
self._exit_stack.__exit__(exc_type, exc_value, traceback)
|
|
622
623
|
return True
|
|
623
624
|
|
|
625
|
+
@property
|
|
626
|
+
def exit_stack(self) -> ExitStack:
|
|
627
|
+
"""A `contextlib.ExitStack` tied to the communicator."""
|
|
628
|
+
return self._exit_stack
|
|
629
|
+
|
|
624
630
|
def log_progress(self, level: int, message: str) -> None:
|
|
625
631
|
"""Send a high-level log message to the supervisor.
|
|
626
632
|
|
|
@@ -633,44 +639,6 @@ class WorkerCommunicator:
|
|
|
633
639
|
"""
|
|
634
640
|
self._reports.put(_ProgressLog(message=message, level=level), block=False)
|
|
635
641
|
|
|
636
|
-
def enter(
|
|
637
|
-
self,
|
|
638
|
-
cm: AbstractContextManager[_T],
|
|
639
|
-
on_close: str | None = None,
|
|
640
|
-
level: int = VERBOSE,
|
|
641
|
-
is_progress_log: bool = False,
|
|
642
|
-
) -> _T:
|
|
643
|
-
"""Enter a context manager that will be exited when the communicator's
|
|
644
|
-
context is exited.
|
|
645
|
-
|
|
646
|
-
Parameters
|
|
647
|
-
----------
|
|
648
|
-
cm : `contextlib.AbstractContextManager`
|
|
649
|
-
A context manager to enter.
|
|
650
|
-
on_close : `str`, optional
|
|
651
|
-
A log message to emit (on the worker's logger) just before the
|
|
652
|
-
given context manager is exited. This can be used to indicate
|
|
653
|
-
what's going on when an ``__exit__`` implementation has a lot of
|
|
654
|
-
work to do (e.g. moving a large file into a zip archive).
|
|
655
|
-
level : `int`, optional
|
|
656
|
-
Level for the ``on_close`` log message.
|
|
657
|
-
is_progress_log : `bool`, optional
|
|
658
|
-
If `True`, send the ``on_close`` message to the supervisor via
|
|
659
|
-
`log_progress` as well as the worker's logger.
|
|
660
|
-
"""
|
|
661
|
-
if on_close is None:
|
|
662
|
-
return self._exit_stack.enter_context(cm)
|
|
663
|
-
|
|
664
|
-
@contextmanager
|
|
665
|
-
def wrapper() -> Iterator[_T]:
|
|
666
|
-
with cm as result:
|
|
667
|
-
yield result
|
|
668
|
-
self.log.log(level, on_close)
|
|
669
|
-
if is_progress_log:
|
|
670
|
-
self.log_progress(level, on_close)
|
|
671
|
-
|
|
672
|
-
return self._exit_stack.enter_context(wrapper())
|
|
673
|
-
|
|
674
642
|
def check_for_cancel(self) -> None:
|
|
675
643
|
"""Check for a cancel signal from the supervisor and raise
|
|
676
644
|
`FatalWorkerError` if it is present.
|
|
@@ -728,12 +696,12 @@ class ScannerCommunicator(WorkerCommunicator):
|
|
|
728
696
|
else:
|
|
729
697
|
self._reports.put(_IngestReport(1), block=False)
|
|
730
698
|
|
|
731
|
-
def request_write(self, request:
|
|
699
|
+
def request_write(self, request: ProvenanceQuantumScanData) -> None:
|
|
732
700
|
"""Ask the writer to write provenance for a quantum.
|
|
733
701
|
|
|
734
702
|
Parameters
|
|
735
703
|
----------
|
|
736
|
-
request : `
|
|
704
|
+
request : `ProvenanceQuantumScanData`
|
|
737
705
|
Result of scanning a quantum.
|
|
738
706
|
"""
|
|
739
707
|
assert self._write_requests is not None, "Writer should not be used if writing is disabled."
|
|
@@ -913,12 +881,12 @@ class WriterCommunicator(WorkerCommunicator):
|
|
|
913
881
|
self._reports.put(_Sentinel.WRITER_DONE, block=False)
|
|
914
882
|
return result
|
|
915
883
|
|
|
916
|
-
def poll(self) -> Iterator[
|
|
884
|
+
def poll(self) -> Iterator[ProvenanceQuantumScanData]:
|
|
917
885
|
"""Poll for writer requests from the scanner workers and supervisor.
|
|
918
886
|
|
|
919
887
|
Yields
|
|
920
888
|
------
|
|
921
|
-
request : `
|
|
889
|
+
request : `ProvenanceQuantumScanData`
|
|
922
890
|
The result of a quantum scan.
|
|
923
891
|
|
|
924
892
|
Notes
|
|
@@ -34,6 +34,8 @@ import logging
|
|
|
34
34
|
import time
|
|
35
35
|
import uuid
|
|
36
36
|
from collections import defaultdict
|
|
37
|
+
from contextlib import AbstractContextManager
|
|
38
|
+
from typing import Any, Literal, Self
|
|
37
39
|
|
|
38
40
|
from lsst.daf.butler import Butler, CollectionType, DatasetRef, DimensionGroup
|
|
39
41
|
from lsst.daf.butler.datastore.record_data import DatastoreRecordData
|
|
@@ -46,7 +48,7 @@ from ._communicators import IngesterCommunicator
|
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
@dataclasses.dataclass
|
|
49
|
-
class Ingester:
|
|
51
|
+
class Ingester(AbstractContextManager):
|
|
50
52
|
"""A helper class for the provenance aggregator that handles ingestion into
|
|
51
53
|
the central butler repository.
|
|
52
54
|
"""
|
|
@@ -107,6 +109,16 @@ class Ingester:
|
|
|
107
109
|
self.comms.log.verbose("Initializing butler.")
|
|
108
110
|
self.butler = Butler.from_config(self.butler_path, writeable=not self.comms.config.dry_run)
|
|
109
111
|
|
|
112
|
+
def __enter__(self) -> Self:
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
|
|
116
|
+
try:
|
|
117
|
+
self.butler.close()
|
|
118
|
+
except Exception:
|
|
119
|
+
self.comms.log.exception("An exception occurred during Ingester exit")
|
|
120
|
+
return False
|
|
121
|
+
|
|
110
122
|
@property
|
|
111
123
|
def n_datasets_pending(self) -> int:
|
|
112
124
|
"""The number of butler datasets currently pending."""
|
|
@@ -130,8 +142,7 @@ class Ingester:
|
|
|
130
142
|
This method is designed to run as the ``target`` in
|
|
131
143
|
`WorkerContext.make_worker`.
|
|
132
144
|
"""
|
|
133
|
-
with comms:
|
|
134
|
-
ingester = Ingester(predicted_path, butler_path, comms)
|
|
145
|
+
with comms, Ingester(predicted_path, butler_path, comms) as ingester:
|
|
135
146
|
ingester.loop()
|
|
136
147
|
|
|
137
148
|
def loop(self) -> None:
|
|
@@ -32,31 +32,29 @@ __all__ = ("Scanner",)
|
|
|
32
32
|
import dataclasses
|
|
33
33
|
import itertools
|
|
34
34
|
import uuid
|
|
35
|
+
from contextlib import AbstractContextManager
|
|
36
|
+
from typing import Any, Literal, Self
|
|
35
37
|
|
|
36
38
|
import zstandard
|
|
37
39
|
|
|
38
40
|
from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
|
|
39
|
-
from lsst.utils.iteration import ensure_iterable
|
|
40
41
|
|
|
41
42
|
from ... import automatic_connection_constants as acc
|
|
42
|
-
from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
|
|
43
43
|
from ..._task_metadata import TaskMetadata
|
|
44
|
-
from ...log_capture import _ExecutionLogRecordsExtra
|
|
45
44
|
from ...pipeline_graph import PipelineGraph, TaskImportMode
|
|
46
|
-
from ...resource_usage import QuantumResourceUsage
|
|
47
45
|
from .._multiblock import Compressor
|
|
48
46
|
from .._predicted import (
|
|
49
47
|
PredictedDatasetModel,
|
|
50
48
|
PredictedQuantumDatasetsModel,
|
|
51
49
|
PredictedQuantumGraphReader,
|
|
52
50
|
)
|
|
53
|
-
from .._provenance import
|
|
51
|
+
from .._provenance import ProvenanceQuantumScanModels, ProvenanceQuantumScanStatus
|
|
54
52
|
from ._communicators import ScannerCommunicator
|
|
55
|
-
from ._structs import IngestRequest,
|
|
53
|
+
from ._structs import IngestRequest, ScanReport
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
@dataclasses.dataclass
|
|
59
|
-
class Scanner:
|
|
57
|
+
class Scanner(AbstractContextManager):
|
|
60
58
|
"""A helper class for the provenance aggregator that reads metadata and log
|
|
61
59
|
files and scans for which outputs exist.
|
|
62
60
|
"""
|
|
@@ -92,7 +90,7 @@ class Scanner:
|
|
|
92
90
|
if self.comms.config.mock_storage_classes:
|
|
93
91
|
import lsst.pipe.base.tests.mocks # noqa: F401
|
|
94
92
|
self.comms.log.verbose("Reading from predicted quantum graph.")
|
|
95
|
-
self.reader = self.comms.
|
|
93
|
+
self.reader = self.comms.exit_stack.enter_context(
|
|
96
94
|
PredictedQuantumGraphReader.open(self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT)
|
|
97
95
|
)
|
|
98
96
|
self.reader.read_dimension_data()
|
|
@@ -101,6 +99,16 @@ class Scanner:
|
|
|
101
99
|
self.qbb = self.make_qbb(self.butler_path, self.reader.pipeline_graph)
|
|
102
100
|
self.init_quanta = {q.quantum_id: q for q in self.reader.components.init_quanta.root}
|
|
103
101
|
|
|
102
|
+
def __enter__(self) -> Self:
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
|
|
106
|
+
try:
|
|
107
|
+
self.qbb.close()
|
|
108
|
+
except Exception:
|
|
109
|
+
self.comms.log.exception("An exception occurred during Ingester exit")
|
|
110
|
+
return False
|
|
111
|
+
|
|
104
112
|
@staticmethod
|
|
105
113
|
def make_qbb(butler_config: str, pipeline_graph: PipelineGraph) -> QuantumBackedButler:
|
|
106
114
|
"""Make quantum-backed butler that can operate on the outputs of the
|
|
@@ -155,8 +163,7 @@ class Scanner:
|
|
|
155
163
|
This method is designed to run as the ``target`` in
|
|
156
164
|
`WorkerContext.make_worker`.
|
|
157
165
|
"""
|
|
158
|
-
with comms:
|
|
159
|
-
scanner = Scanner(predicted_path, butler_path, comms)
|
|
166
|
+
with comms, Scanner(predicted_path, butler_path, comms) as scanner:
|
|
160
167
|
scanner.loop()
|
|
161
168
|
|
|
162
169
|
def loop(self) -> None:
|
|
@@ -185,7 +192,7 @@ class Scanner:
|
|
|
185
192
|
ref = self.reader.components.make_dataset_ref(predicted)
|
|
186
193
|
return self.qbb.stored(ref)
|
|
187
194
|
|
|
188
|
-
def scan_quantum(self, quantum_id: uuid.UUID) ->
|
|
195
|
+
def scan_quantum(self, quantum_id: uuid.UUID) -> ProvenanceQuantumScanModels:
|
|
189
196
|
"""Scan for a quantum's completion and error status, and its output
|
|
190
197
|
datasets' existence.
|
|
191
198
|
|
|
@@ -196,76 +203,38 @@ class Scanner:
|
|
|
196
203
|
|
|
197
204
|
Returns
|
|
198
205
|
-------
|
|
199
|
-
result : `
|
|
206
|
+
result : `ProvenanceQuantumScanModels`
|
|
200
207
|
Scan result struct.
|
|
201
208
|
"""
|
|
202
209
|
if (predicted_quantum := self.init_quanta.get(quantum_id)) is not None:
|
|
203
|
-
result =
|
|
210
|
+
result = ProvenanceQuantumScanModels(
|
|
211
|
+
predicted_quantum.quantum_id, status=ProvenanceQuantumScanStatus.INIT
|
|
212
|
+
)
|
|
204
213
|
self.comms.log.debug("Created init scan for %s (%s)", quantum_id, predicted_quantum.task_label)
|
|
205
214
|
else:
|
|
206
215
|
self.reader.read_quantum_datasets([quantum_id])
|
|
207
|
-
predicted_quantum = self.reader.components.quantum_datasets
|
|
216
|
+
predicted_quantum = self.reader.components.quantum_datasets.pop(quantum_id)
|
|
208
217
|
self.comms.log.debug(
|
|
209
218
|
"Scanning %s (%s@%s)",
|
|
210
219
|
quantum_id,
|
|
211
220
|
predicted_quantum.task_label,
|
|
212
221
|
predicted_quantum.data_coordinate,
|
|
213
222
|
)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
if not self._read_metadata(predicted_quantum, result, last_attempt):
|
|
222
|
-
# We found the log dataset, but no metadata; this means the
|
|
223
|
-
# quantum failed, but a retry might still happen that could
|
|
224
|
-
# turn it into a success if we can't yet assume the run is
|
|
225
|
-
# complete.
|
|
226
|
-
self.comms.log.debug("Abandoning scan for %s.", quantum_id)
|
|
223
|
+
logs = self._read_log(predicted_quantum)
|
|
224
|
+
metadata = self._read_metadata(predicted_quantum)
|
|
225
|
+
result = ProvenanceQuantumScanModels.from_metadata_and_logs(
|
|
226
|
+
predicted_quantum, metadata, logs, assume_complete=self.comms.config.assume_complete
|
|
227
|
+
)
|
|
228
|
+
if result.status is ProvenanceQuantumScanStatus.ABANDONED:
|
|
229
|
+
self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
|
|
227
230
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
228
231
|
return result
|
|
229
|
-
last_attempt.attempt = len(result.attempts)
|
|
230
|
-
result.attempts.append(last_attempt)
|
|
231
|
-
assert result.status is not ScanStatus.INCOMPLETE
|
|
232
|
-
assert result.status is not ScanStatus.ABANDONED
|
|
233
|
-
|
|
234
|
-
if len(result.logs.attempts) < len(result.attempts):
|
|
235
|
-
# Logs were not found for this attempt; must have been a hard error
|
|
236
|
-
# that kept the `finally` block from running or otherwise
|
|
237
|
-
# interrupted the writing of the logs.
|
|
238
|
-
result.logs.attempts.append(None)
|
|
239
|
-
if result.status is ScanStatus.SUCCESSFUL:
|
|
240
|
-
# But we found the metadata! Either that hard error happened
|
|
241
|
-
# at a very unlucky time (in between those two writes), or
|
|
242
|
-
# something even weirder happened.
|
|
243
|
-
result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
|
|
244
|
-
else:
|
|
245
|
-
result.attempts[-1].status = QuantumAttemptStatus.FAILED
|
|
246
|
-
if len(result.metadata.attempts) < len(result.attempts):
|
|
247
|
-
# Metadata missing usually just means a failure. In any case, the
|
|
248
|
-
# status will already be correct, either because it was set to a
|
|
249
|
-
# failure when we read the logs, or left at UNKNOWN if there were
|
|
250
|
-
# no logs. Note that scanners never process BLOCKED quanta at all.
|
|
251
|
-
result.metadata.attempts.append(None)
|
|
252
|
-
assert len(result.logs.attempts) == len(result.attempts) or len(result.metadata.attempts) == len(
|
|
253
|
-
result.attempts
|
|
254
|
-
), (
|
|
255
|
-
"The only way we can add more than one quantum attempt is by "
|
|
256
|
-
"extracting info stored with the logs, and that always appends "
|
|
257
|
-
"a log attempt and a metadata attempt, so this must be a bug in "
|
|
258
|
-
"the scanner."
|
|
259
|
-
)
|
|
260
|
-
# Scan for output dataset existence, skipping any the metadata reported
|
|
261
|
-
# on as well as and the metadata and logs themselves (since we just
|
|
262
|
-
# checked those).
|
|
263
232
|
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
264
|
-
if predicted_output.dataset_id not in result.
|
|
265
|
-
result.
|
|
233
|
+
if predicted_output.dataset_id not in result.output_existence:
|
|
234
|
+
result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
|
|
266
235
|
to_ingest = self._make_ingest_request(predicted_quantum, result)
|
|
267
236
|
if self.comms.config.output_path is not None:
|
|
268
|
-
to_write =
|
|
237
|
+
to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
|
|
269
238
|
self.comms.request_write(to_write)
|
|
270
239
|
self.comms.request_ingest(to_ingest)
|
|
271
240
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
@@ -273,7 +242,7 @@ class Scanner:
|
|
|
273
242
|
return result
|
|
274
243
|
|
|
275
244
|
def _make_ingest_request(
|
|
276
|
-
self, predicted_quantum: PredictedQuantumDatasetsModel, result:
|
|
245
|
+
self, predicted_quantum: PredictedQuantumDatasetsModel, result: ProvenanceQuantumScanModels
|
|
277
246
|
) -> IngestRequest:
|
|
278
247
|
"""Make an ingest request from a quantum scan.
|
|
279
248
|
|
|
@@ -281,7 +250,7 @@ class Scanner:
|
|
|
281
250
|
----------
|
|
282
251
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
283
252
|
Information about the predicted quantum.
|
|
284
|
-
result : `
|
|
253
|
+
result : `ProvenanceQuantumScanModels`
|
|
285
254
|
Result of a quantum scan.
|
|
286
255
|
|
|
287
256
|
Returns
|
|
@@ -294,7 +263,7 @@ class Scanner:
|
|
|
294
263
|
}
|
|
295
264
|
to_ingest_predicted: list[PredictedDatasetModel] = []
|
|
296
265
|
to_ingest_refs: list[DatasetRef] = []
|
|
297
|
-
for dataset_id, was_produced in result.
|
|
266
|
+
for dataset_id, was_produced in result.output_existence.items():
|
|
298
267
|
if was_produced:
|
|
299
268
|
predicted_output = predicted_outputs_by_id[dataset_id]
|
|
300
269
|
to_ingest_predicted.append(predicted_output)
|
|
@@ -302,69 +271,18 @@ class Scanner:
|
|
|
302
271
|
to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
|
|
303
272
|
return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
|
|
304
273
|
|
|
305
|
-
def
|
|
306
|
-
|
|
307
|
-
) -> WriteRequest:
|
|
308
|
-
"""Make a write request from a quantum scan.
|
|
309
|
-
|
|
310
|
-
Parameters
|
|
311
|
-
----------
|
|
312
|
-
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
313
|
-
Information about the predicted quantum.
|
|
314
|
-
result : `InProgressScan`
|
|
315
|
-
Result of a quantum scan.
|
|
316
|
-
|
|
317
|
-
Returns
|
|
318
|
-
-------
|
|
319
|
-
write_request : `WriteRequest`
|
|
320
|
-
A request to be sent to the writer.
|
|
321
|
-
"""
|
|
322
|
-
quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
|
|
323
|
-
if result.status is ScanStatus.INIT:
|
|
324
|
-
quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
|
|
325
|
-
else:
|
|
326
|
-
quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
|
|
327
|
-
quantum.attempts = result.attempts
|
|
328
|
-
request = WriteRequest(
|
|
329
|
-
result.quantum_id,
|
|
330
|
-
result.status,
|
|
331
|
-
existing_outputs={
|
|
332
|
-
dataset_id for dataset_id, was_produced in result.outputs.items() if was_produced
|
|
333
|
-
},
|
|
334
|
-
quantum=quantum.model_dump_json().encode(),
|
|
335
|
-
logs=result.logs.model_dump_json().encode() if result.logs.attempts else b"",
|
|
336
|
-
metadata=result.metadata.model_dump_json().encode() if result.metadata.attempts else b"",
|
|
337
|
-
)
|
|
338
|
-
if self.compressor is not None:
|
|
339
|
-
request.quantum = self.compressor.compress(request.quantum)
|
|
340
|
-
request.logs = self.compressor.compress(request.logs) if request.logs else b""
|
|
341
|
-
request.metadata = self.compressor.compress(request.metadata) if request.metadata else b""
|
|
342
|
-
request.is_compressed = True
|
|
343
|
-
return request
|
|
344
|
-
|
|
345
|
-
def _read_metadata(
|
|
346
|
-
self,
|
|
347
|
-
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
348
|
-
result: InProgressScan,
|
|
349
|
-
last_attempt: ProvenanceQuantumAttemptModel,
|
|
350
|
-
) -> bool:
|
|
351
|
-
"""Attempt to read the metadata dataset for a quantum to extract
|
|
352
|
-
provenance information from it.
|
|
274
|
+
def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
|
|
275
|
+
"""Attempt to read the metadata dataset for a quantum.
|
|
353
276
|
|
|
354
277
|
Parameters
|
|
355
278
|
----------
|
|
356
279
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
357
280
|
Information about the predicted quantum.
|
|
358
|
-
result : `InProgressScan`
|
|
359
|
-
Result object to be modified in-place.
|
|
360
|
-
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
361
|
-
Structure to fill in with information about the last attempt to
|
|
362
|
-
run this quantum.
|
|
363
281
|
|
|
364
282
|
Returns
|
|
365
283
|
-------
|
|
366
|
-
|
|
367
|
-
|
|
284
|
+
metadata : `...TaskMetadata` or `None`
|
|
285
|
+
Task metadata.
|
|
368
286
|
"""
|
|
369
287
|
(predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
|
|
370
288
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
@@ -372,129 +290,28 @@ class Scanner:
|
|
|
372
290
|
# This assumes QBB metadata writes are atomic, which should be the
|
|
373
291
|
# case. If it's not we'll probably get pydantic validation errors
|
|
374
292
|
# here.
|
|
375
|
-
|
|
293
|
+
return self.qbb.get(ref, storageClass="TaskMetadata")
|
|
376
294
|
except FileNotFoundError:
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
result.status = ScanStatus.ABANDONED
|
|
382
|
-
return False
|
|
383
|
-
else:
|
|
384
|
-
result.status = ScanStatus.SUCCESSFUL
|
|
385
|
-
result.outputs[ref.id] = True
|
|
386
|
-
last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
|
|
387
|
-
try:
|
|
388
|
-
# Int conversion guards against spurious conversion to
|
|
389
|
-
# float that can apparently sometimes happen in
|
|
390
|
-
# TaskMetadata.
|
|
391
|
-
last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
|
|
392
|
-
except LookupError:
|
|
393
|
-
pass
|
|
394
|
-
try:
|
|
395
|
-
last_attempt.exception = ExceptionInfo._from_metadata(
|
|
396
|
-
metadata[predicted_quantum.task_label]["failure"]
|
|
397
|
-
)
|
|
398
|
-
except LookupError:
|
|
399
|
-
pass
|
|
400
|
-
try:
|
|
401
|
-
for id_str in ensure_iterable(metadata["quantum"].getArray("outputs")):
|
|
402
|
-
result.outputs[uuid.UUID(id_str)]
|
|
403
|
-
except LookupError:
|
|
404
|
-
pass
|
|
405
|
-
else:
|
|
406
|
-
# If the metadata told us what it wrote, anything not in that
|
|
407
|
-
# list was not written.
|
|
408
|
-
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
409
|
-
result.outputs.setdefault(predicted_output.dataset_id, False)
|
|
410
|
-
last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
|
|
411
|
-
result.metadata.attempts.append(metadata)
|
|
412
|
-
return True
|
|
413
|
-
|
|
414
|
-
def _read_log(
|
|
415
|
-
self,
|
|
416
|
-
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
417
|
-
result: InProgressScan,
|
|
418
|
-
last_attempt: ProvenanceQuantumAttemptModel,
|
|
419
|
-
) -> bool:
|
|
420
|
-
"""Attempt to read the log dataset for a quantum to test for the
|
|
421
|
-
quantum's completion (the log is always written last) and aggregate
|
|
422
|
-
the log content in the provenance quantum graph.
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
def _read_log(self, predicted_quantum: PredictedQuantumDatasetsModel) -> ButlerLogRecords | None:
|
|
298
|
+
"""Attempt to read the log dataset for a quantum.
|
|
423
299
|
|
|
424
300
|
Parameters
|
|
425
301
|
----------
|
|
426
302
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
427
303
|
Information about the predicted quantum.
|
|
428
|
-
result : `InProgressScan`
|
|
429
|
-
Result object to be modified in-place.
|
|
430
|
-
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
431
|
-
Structure to fill in with information about the last attempt to
|
|
432
|
-
run this quantum.
|
|
433
304
|
|
|
434
305
|
Returns
|
|
435
306
|
-------
|
|
436
|
-
|
|
437
|
-
|
|
307
|
+
logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
|
|
308
|
+
Task logs.
|
|
438
309
|
"""
|
|
439
310
|
(predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
|
|
440
311
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
441
312
|
try:
|
|
442
313
|
# This assumes QBB log writes are atomic, which should be the case.
|
|
443
314
|
# If it's not we'll probably get pydantic validation errors here.
|
|
444
|
-
|
|
315
|
+
return self.qbb.get(ref)
|
|
445
316
|
except FileNotFoundError:
|
|
446
|
-
|
|
447
|
-
if self.comms.config.assume_complete:
|
|
448
|
-
result.status = ScanStatus.FAILED
|
|
449
|
-
else:
|
|
450
|
-
result.status = ScanStatus.ABANDONED
|
|
451
|
-
return False
|
|
452
|
-
else:
|
|
453
|
-
# Set the attempt's run status to FAILED, since the default is
|
|
454
|
-
# UNKNOWN (i.e. logs *and* metadata are missing) and we now know
|
|
455
|
-
# the logs exist. This will usually get replaced by SUCCESSFUL
|
|
456
|
-
# when we look for metadata next.
|
|
457
|
-
last_attempt.status = QuantumAttemptStatus.FAILED
|
|
458
|
-
result.outputs[ref.id] = True
|
|
459
|
-
if log_records.extra:
|
|
460
|
-
log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
|
|
461
|
-
self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
|
|
462
|
-
result.logs.attempts.append(list(log_records))
|
|
463
|
-
return True
|
|
464
|
-
|
|
465
|
-
def _extract_from_log_extra(
|
|
466
|
-
self,
|
|
467
|
-
log_extra: _ExecutionLogRecordsExtra,
|
|
468
|
-
result: InProgressScan,
|
|
469
|
-
last_attempt: ProvenanceQuantumAttemptModel | None,
|
|
470
|
-
) -> None:
|
|
471
|
-
for previous_attempt_log_extra in log_extra.previous_attempts:
|
|
472
|
-
self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
|
|
473
|
-
quantum_attempt: ProvenanceQuantumAttemptModel
|
|
474
|
-
if last_attempt is None:
|
|
475
|
-
# This is not the last attempt, so it must be a failure.
|
|
476
|
-
quantum_attempt = ProvenanceQuantumAttemptModel(
|
|
477
|
-
attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
|
|
478
|
-
)
|
|
479
|
-
# We also need to get the logs from this extra provenance, since
|
|
480
|
-
# they won't be the main section of the log records.
|
|
481
|
-
result.logs.attempts.append(log_extra.logs)
|
|
482
|
-
# The special last attempt is only appended after we attempt to
|
|
483
|
-
# read metadata later, but we have to append this one now.
|
|
484
|
-
result.attempts.append(quantum_attempt)
|
|
485
|
-
else:
|
|
486
|
-
assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
|
|
487
|
-
quantum_attempt = last_attempt
|
|
488
|
-
if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
|
|
489
|
-
# We won't be getting a separate metadata dataset, so anything we
|
|
490
|
-
# might get from the metadata has to come from this extra
|
|
491
|
-
# provenance in the logs.
|
|
492
|
-
quantum_attempt.exception = log_extra.exception
|
|
493
|
-
if log_extra.metadata is not None:
|
|
494
|
-
quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
|
|
495
|
-
result.metadata.attempts.append(log_extra.metadata)
|
|
496
|
-
else:
|
|
497
|
-
result.metadata.attempts.append(None)
|
|
498
|
-
# Regardless of whether this is the last attempt or not, we can only
|
|
499
|
-
# get the previous_process_quanta from the log extra.
|
|
500
|
-
quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
|
|
317
|
+
return None
|