lsst-pipe-base 30.0.1rc1__py3-none-any.whl → 30.2025.5100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +20 -31
- lsst/pipe/base/_quantumContext.py +3 -3
- lsst/pipe/base/_status.py +10 -43
- lsst/pipe/base/_task_metadata.py +2 -2
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +3 -8
- lsst/pipe/base/automatic_connection_constants.py +1 -20
- lsst/pipe/base/cli/cmd/__init__.py +2 -18
- lsst/pipe/base/cli/cmd/commands.py +4 -149
- lsst/pipe/base/connectionTypes.py +160 -72
- lsst/pipe/base/connections.py +9 -6
- lsst/pipe/base/execution_reports.py +5 -0
- lsst/pipe/base/graph/graph.py +10 -11
- lsst/pipe/base/graph/quantumNode.py +4 -4
- lsst/pipe/base/graph_walker.py +10 -8
- lsst/pipe/base/log_capture.py +80 -40
- lsst/pipe/base/mp_graph_executor.py +15 -51
- lsst/pipe/base/pipeline.py +6 -5
- lsst/pipe/base/pipelineIR.py +8 -2
- lsst/pipe/base/pipelineTask.py +7 -5
- lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
- lsst/pipe/base/pipeline_graph/_edges.py +22 -32
- lsst/pipe/base/pipeline_graph/_mapping_views.py +7 -4
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +7 -14
- lsst/pipe/base/pipeline_graph/expressions.py +2 -2
- lsst/pipe/base/pipeline_graph/io.py +10 -7
- lsst/pipe/base/pipeline_graph/visualization/_dot.py +12 -13
- lsst/pipe/base/pipeline_graph/visualization/_layout.py +18 -16
- lsst/pipe/base/pipeline_graph/visualization/_merge.py +7 -4
- lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
- lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +0 -7
- lsst/pipe/base/prerequisite_helpers.py +1 -2
- lsst/pipe/base/quantum_graph/_common.py +20 -19
- lsst/pipe/base/quantum_graph/_multiblock.py +31 -37
- lsst/pipe/base/quantum_graph/_predicted.py +13 -111
- lsst/pipe/base/quantum_graph/_provenance.py +45 -1136
- lsst/pipe/base/quantum_graph/aggregator/__init__.py +1 -0
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +289 -204
- lsst/pipe/base/quantum_graph/aggregator/_config.py +9 -87
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -13
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +235 -49
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +116 -6
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +39 -29
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +351 -34
- lsst/pipe/base/quantum_graph/visualization.py +1 -5
- lsst/pipe/base/quantum_graph_builder.py +8 -21
- lsst/pipe/base/quantum_graph_executor.py +13 -116
- lsst/pipe/base/quantum_graph_skeleton.py +29 -31
- lsst/pipe/base/quantum_provenance_graph.py +12 -29
- lsst/pipe/base/separable_pipeline_executor.py +3 -19
- lsst/pipe/base/single_quantum_executor.py +42 -67
- lsst/pipe/base/struct.py +0 -4
- lsst/pipe/base/testUtils.py +3 -3
- lsst/pipe/base/tests/mocks/_storage_class.py +1 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/METADATA +3 -3
- lsst_pipe_base-30.2025.5100.dist-info/RECORD +125 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/WHEEL +1 -1
- lsst/pipe/base/log_on_close.py +0 -76
- lsst/pipe/base/quantum_graph/aggregator/_workers.py +0 -303
- lsst/pipe/base/quantum_graph/formatter.py +0 -171
- lsst/pipe/base/quantum_graph/ingest_graph.py +0 -413
- lsst_pipe_base-30.0.1rc1.dist-info/RECORD +0 -129
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-30.0.1rc1.dist-info → lsst_pipe_base-30.2025.5100.dist-info}/zip-safe +0 -0
|
@@ -29,8 +29,6 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
__all__ = ("AggregatorConfig",)
|
|
31
31
|
|
|
32
|
-
import sys
|
|
33
|
-
from typing import TYPE_CHECKING, Any
|
|
34
32
|
|
|
35
33
|
import pydantic
|
|
36
34
|
|
|
@@ -62,13 +60,11 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
62
60
|
n_processes: int = 1
|
|
63
61
|
"""Number of processes the scanner should use."""
|
|
64
62
|
|
|
65
|
-
|
|
66
|
-
"""If `True`,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
be wasted effort that just complicates a follow-up run with
|
|
71
|
-
``incomplete=False`` later.
|
|
63
|
+
assume_complete: bool = True
|
|
64
|
+
"""If `True`, the aggregator can assume all quanta have run to completion
|
|
65
|
+
(including any automatic retries). If `False`, only successes can be
|
|
66
|
+
considered final, and quanta that appear to have failed or to have not been
|
|
67
|
+
executed are ignored.
|
|
72
68
|
"""
|
|
73
69
|
|
|
74
70
|
defensive_ingest: bool = False
|
|
@@ -99,10 +95,11 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
99
95
|
"""
|
|
100
96
|
|
|
101
97
|
dry_run: bool = False
|
|
102
|
-
"""If `True`, do not actually perform any central butler
|
|
98
|
+
"""If `True`, do not actually perform any deletions or central butler
|
|
99
|
+
ingests.
|
|
103
100
|
|
|
104
|
-
Most log messages concerning ingests will still be emitted in
|
|
105
|
-
provide a better emulation of a real run.
|
|
101
|
+
Most log messages concerning deletions and ingests will still be emitted in
|
|
102
|
+
order to provide a better emulation of a real run.
|
|
106
103
|
"""
|
|
107
104
|
|
|
108
105
|
interactive_status: bool = False
|
|
@@ -140,78 +137,3 @@ class AggregatorConfig(pydantic.BaseModel):
|
|
|
140
137
|
"""Enable support for storage classes by created by the
|
|
141
138
|
lsst.pipe.base.tests.mocks package.
|
|
142
139
|
"""
|
|
143
|
-
|
|
144
|
-
promise_ingest_graph: bool = False
|
|
145
|
-
"""If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
|
|
146
|
-
will be run later to ingest metadata/log/config datasets, and will not
|
|
147
|
-
ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
|
|
148
|
-
not run, those files will be abandoned in the butler storage root without
|
|
149
|
-
being present in the butler database, but it will speed up both processes.
|
|
150
|
-
|
|
151
|
-
It is *usually* safe to build a quantum graph for downstream processing
|
|
152
|
-
before or while running `~.ingest_graph.ingest_graph`, because
|
|
153
|
-
metadata/log/config datasets are rarely used as inputs. To check, use
|
|
154
|
-
``pipetask build ... --show inputs`` to show the overall-inputs to the
|
|
155
|
-
graph and scan for these dataset types.
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
worker_check_timeout: float = 5.0
|
|
159
|
-
"""Time to wait (s) for reports from subprocesses before running
|
|
160
|
-
process-alive checks.
|
|
161
|
-
|
|
162
|
-
These checks are designed to kill the main aggregator process when a
|
|
163
|
-
subprocess has been unexpectedly killed (e.g. for for using too much
|
|
164
|
-
memory).
|
|
165
|
-
"""
|
|
166
|
-
|
|
167
|
-
@property
|
|
168
|
-
def is_writing_provenance(self) -> bool:
|
|
169
|
-
"""Whether the aggregator is configured to write the provenance quantum
|
|
170
|
-
graph.
|
|
171
|
-
"""
|
|
172
|
-
return self.output_path is not None and not self.incomplete
|
|
173
|
-
|
|
174
|
-
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
175
|
-
# when we inherit those docstrings in our public classes.
|
|
176
|
-
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
177
|
-
|
|
178
|
-
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
179
|
-
"""See `pydantic.BaseModel.copy`."""
|
|
180
|
-
return super().copy(*args, **kwargs)
|
|
181
|
-
|
|
182
|
-
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
183
|
-
"""See `pydantic.BaseModel.model_dump`."""
|
|
184
|
-
return super().model_dump(*args, **kwargs)
|
|
185
|
-
|
|
186
|
-
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
187
|
-
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
188
|
-
return super().model_dump(*args, **kwargs)
|
|
189
|
-
|
|
190
|
-
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
191
|
-
"""See `pydantic.BaseModel.model_copy`."""
|
|
192
|
-
return super().model_copy(*args, **kwargs)
|
|
193
|
-
|
|
194
|
-
@classmethod
|
|
195
|
-
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
196
|
-
"""See `pydantic.BaseModel.model_construct`."""
|
|
197
|
-
return super().model_construct(*args, **kwargs)
|
|
198
|
-
|
|
199
|
-
@classmethod
|
|
200
|
-
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
201
|
-
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
202
|
-
return super().model_json_schema(*args, **kwargs)
|
|
203
|
-
|
|
204
|
-
@classmethod
|
|
205
|
-
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
206
|
-
"""See `pydantic.BaseModel.model_validate`."""
|
|
207
|
-
return super().model_validate(*args, **kwargs)
|
|
208
|
-
|
|
209
|
-
@classmethod
|
|
210
|
-
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
211
|
-
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
212
|
-
return super().model_validate_json(*args, **kwargs)
|
|
213
|
-
|
|
214
|
-
@classmethod
|
|
215
|
-
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
216
|
-
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
217
|
-
return super().model_validate_strings(*args, **kwargs)
|
|
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
|
|
|
43
43
|
|
|
44
44
|
from ...pipeline_graph import TaskImportMode
|
|
45
45
|
from .._common import DatastoreName
|
|
46
|
-
from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
46
|
+
from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
|
|
47
47
|
from ._communicators import IngesterCommunicator
|
|
48
48
|
|
|
49
49
|
|
|
@@ -140,7 +140,7 @@ class Ingester(AbstractContextManager):
|
|
|
140
140
|
Notes
|
|
141
141
|
-----
|
|
142
142
|
This method is designed to run as the ``target`` in
|
|
143
|
-
`
|
|
143
|
+
`WorkerContext.make_worker`.
|
|
144
144
|
"""
|
|
145
145
|
with comms, Ingester(predicted_path, butler_path, comms) as ingester:
|
|
146
146
|
ingester.loop()
|
|
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
|
|
|
170
170
|
for ingest_request in self.comms.poll():
|
|
171
171
|
self.n_producers_pending += 1
|
|
172
172
|
self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
|
|
173
|
-
self.
|
|
173
|
+
self.update_pending(ingest_request.datasets, ingest_request.records)
|
|
174
174
|
if self.n_datasets_pending > self.comms.config.ingest_batch_size:
|
|
175
175
|
self.ingest()
|
|
176
176
|
self.comms.log.info("All ingest requests received.")
|
|
@@ -266,32 +266,31 @@ class Ingester(AbstractContextManager):
|
|
|
266
266
|
else:
|
|
267
267
|
del self.records_pending[datastore_name]
|
|
268
268
|
|
|
269
|
-
def
|
|
270
|
-
self,
|
|
271
|
-
refs: list[DatasetRef],
|
|
272
|
-
records: dict[DatastoreName, DatastoreRecordData],
|
|
269
|
+
def update_pending(
|
|
270
|
+
self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
|
|
273
271
|
) -> None:
|
|
274
272
|
"""Add an ingest request to the pending-ingest data structures.
|
|
275
273
|
|
|
276
274
|
Parameters
|
|
277
275
|
----------
|
|
278
|
-
|
|
279
|
-
Registry information about
|
|
276
|
+
datasets : `list` [ `PredictedDatasetModel` ]
|
|
277
|
+
Registry information about the datasets.
|
|
280
278
|
records : `dict` [ `str`, \
|
|
281
279
|
`lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
|
|
282
280
|
Datastore information about the datasets.
|
|
283
281
|
"""
|
|
284
|
-
n_given = len(
|
|
282
|
+
n_given = len(datasets)
|
|
285
283
|
if self.already_ingested is not None:
|
|
286
|
-
|
|
287
|
-
kept = {
|
|
284
|
+
datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
|
|
285
|
+
kept = {d.dataset_id for d in datasets}
|
|
288
286
|
self.n_datasets_skipped += n_given - len(kept)
|
|
289
287
|
records = {
|
|
290
288
|
datastore_name: filtered_records
|
|
291
289
|
for datastore_name, original_records in records.items()
|
|
292
290
|
if (filtered_records := original_records.subset(kept)) is not None
|
|
293
291
|
}
|
|
294
|
-
for
|
|
292
|
+
for dataset in datasets:
|
|
293
|
+
ref = self.predicted.make_dataset_ref(dataset)
|
|
295
294
|
self.refs_pending[ref.datasetType.dimensions].append(ref)
|
|
296
295
|
for datastore_name, datastore_records in records.items():
|
|
297
296
|
if (existing_records := self.records_pending.get(datastore_name)) is not None:
|
|
@@ -38,19 +38,23 @@ from typing import Any, Literal, Self
|
|
|
38
38
|
import zstandard
|
|
39
39
|
|
|
40
40
|
from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
|
|
41
|
+
from lsst.utils.iteration import ensure_iterable
|
|
41
42
|
|
|
42
43
|
from ... import automatic_connection_constants as acc
|
|
44
|
+
from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
|
|
43
45
|
from ..._task_metadata import TaskMetadata
|
|
46
|
+
from ...log_capture import _ExecutionLogRecordsExtra
|
|
44
47
|
from ...pipeline_graph import PipelineGraph, TaskImportMode
|
|
48
|
+
from ...resource_usage import QuantumResourceUsage
|
|
45
49
|
from .._multiblock import Compressor
|
|
46
50
|
from .._predicted import (
|
|
47
51
|
PredictedDatasetModel,
|
|
48
52
|
PredictedQuantumDatasetsModel,
|
|
49
53
|
PredictedQuantumGraphReader,
|
|
50
54
|
)
|
|
51
|
-
from .._provenance import
|
|
55
|
+
from .._provenance import ProvenanceInitQuantumModel, ProvenanceQuantumAttemptModel, ProvenanceQuantumModel
|
|
52
56
|
from ._communicators import ScannerCommunicator
|
|
53
|
-
from ._structs import IngestRequest, ScanReport
|
|
57
|
+
from ._structs import IngestRequest, InProgressScan, ScanReport, ScanStatus, WriteRequest
|
|
54
58
|
|
|
55
59
|
|
|
56
60
|
@dataclasses.dataclass
|
|
@@ -90,7 +94,7 @@ class Scanner(AbstractContextManager):
|
|
|
90
94
|
if self.comms.config.mock_storage_classes:
|
|
91
95
|
import lsst.pipe.base.tests.mocks # noqa: F401
|
|
92
96
|
self.comms.log.verbose("Reading from predicted quantum graph.")
|
|
93
|
-
self.reader = self.comms.
|
|
97
|
+
self.reader = self.comms.enter(
|
|
94
98
|
PredictedQuantumGraphReader.open(self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT)
|
|
95
99
|
)
|
|
96
100
|
self.reader.read_dimension_data()
|
|
@@ -161,7 +165,7 @@ class Scanner(AbstractContextManager):
|
|
|
161
165
|
Notes
|
|
162
166
|
-----
|
|
163
167
|
This method is designed to run as the ``target`` in
|
|
164
|
-
`
|
|
168
|
+
`WorkerContext.make_worker`.
|
|
165
169
|
"""
|
|
166
170
|
with comms, Scanner(predicted_path, butler_path, comms) as scanner:
|
|
167
171
|
scanner.loop()
|
|
@@ -192,7 +196,7 @@ class Scanner(AbstractContextManager):
|
|
|
192
196
|
ref = self.reader.components.make_dataset_ref(predicted)
|
|
193
197
|
return self.qbb.stored(ref)
|
|
194
198
|
|
|
195
|
-
def scan_quantum(self, quantum_id: uuid.UUID) ->
|
|
199
|
+
def scan_quantum(self, quantum_id: uuid.UUID) -> InProgressScan:
|
|
196
200
|
"""Scan for a quantum's completion and error status, and its output
|
|
197
201
|
datasets' existence.
|
|
198
202
|
|
|
@@ -203,38 +207,76 @@ class Scanner(AbstractContextManager):
|
|
|
203
207
|
|
|
204
208
|
Returns
|
|
205
209
|
-------
|
|
206
|
-
result : `
|
|
210
|
+
result : `InProgressScan`
|
|
207
211
|
Scan result struct.
|
|
208
212
|
"""
|
|
209
213
|
if (predicted_quantum := self.init_quanta.get(quantum_id)) is not None:
|
|
210
|
-
result =
|
|
211
|
-
predicted_quantum.quantum_id, status=ProvenanceQuantumScanStatus.INIT
|
|
212
|
-
)
|
|
214
|
+
result = InProgressScan(predicted_quantum.quantum_id, status=ScanStatus.INIT)
|
|
213
215
|
self.comms.log.debug("Created init scan for %s (%s)", quantum_id, predicted_quantum.task_label)
|
|
214
216
|
else:
|
|
215
217
|
self.reader.read_quantum_datasets([quantum_id])
|
|
216
|
-
predicted_quantum = self.reader.components.quantum_datasets
|
|
218
|
+
predicted_quantum = self.reader.components.quantum_datasets[quantum_id]
|
|
217
219
|
self.comms.log.debug(
|
|
218
220
|
"Scanning %s (%s@%s)",
|
|
219
221
|
quantum_id,
|
|
220
222
|
predicted_quantum.task_label,
|
|
221
223
|
predicted_quantum.data_coordinate,
|
|
222
224
|
)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
result = InProgressScan(predicted_quantum.quantum_id, ScanStatus.INCOMPLETE)
|
|
226
|
+
del self.reader.components.quantum_datasets[quantum_id]
|
|
227
|
+
last_attempt = ProvenanceQuantumAttemptModel()
|
|
228
|
+
if not self._read_log(predicted_quantum, result, last_attempt):
|
|
229
|
+
self.comms.log.debug("Abandoning scan for %s; no log dataset.", quantum_id)
|
|
230
|
+
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
231
|
+
return result
|
|
232
|
+
if not self._read_metadata(predicted_quantum, result, last_attempt):
|
|
233
|
+
# We found the log dataset, but no metadata; this means the
|
|
234
|
+
# quantum failed, but a retry might still happen that could
|
|
235
|
+
# turn it into a success if we can't yet assume the run is
|
|
236
|
+
# complete.
|
|
237
|
+
self.comms.log.debug("Abandoning scan for %s.", quantum_id)
|
|
230
238
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
231
239
|
return result
|
|
240
|
+
last_attempt.attempt = len(result.attempts)
|
|
241
|
+
result.attempts.append(last_attempt)
|
|
242
|
+
assert result.status is not ScanStatus.INCOMPLETE
|
|
243
|
+
assert result.status is not ScanStatus.ABANDONED
|
|
244
|
+
|
|
245
|
+
if len(result.logs.attempts) < len(result.attempts):
|
|
246
|
+
# Logs were not found for this attempt; must have been a hard error
|
|
247
|
+
# that kept the `finally` block from running or otherwise
|
|
248
|
+
# interrupted the writing of the logs.
|
|
249
|
+
result.logs.attempts.append(None)
|
|
250
|
+
if result.status is ScanStatus.SUCCESSFUL:
|
|
251
|
+
# But we found the metadata! Either that hard error happened
|
|
252
|
+
# at a very unlucky time (in between those two writes), or
|
|
253
|
+
# something even weirder happened.
|
|
254
|
+
result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
|
|
255
|
+
else:
|
|
256
|
+
result.attempts[-1].status = QuantumAttemptStatus.FAILED
|
|
257
|
+
if len(result.metadata.attempts) < len(result.attempts):
|
|
258
|
+
# Metadata missing usually just means a failure. In any case, the
|
|
259
|
+
# status will already be correct, either because it was set to a
|
|
260
|
+
# failure when we read the logs, or left at UNKNOWN if there were
|
|
261
|
+
# no logs. Note that scanners never process BLOCKED quanta at all.
|
|
262
|
+
result.metadata.attempts.append(None)
|
|
263
|
+
assert len(result.logs.attempts) == len(result.attempts) or len(result.metadata.attempts) == len(
|
|
264
|
+
result.attempts
|
|
265
|
+
), (
|
|
266
|
+
"The only way we can add more than one quantum attempt is by "
|
|
267
|
+
"extracting info stored with the logs, and that always appends "
|
|
268
|
+
"a log attempt and a metadata attempt, so this must be a bug in "
|
|
269
|
+
"the scanner."
|
|
270
|
+
)
|
|
271
|
+
# Scan for output dataset existence, skipping any the metadata reported
|
|
272
|
+
# on as well as and the metadata and logs themselves (since we just
|
|
273
|
+
# checked those).
|
|
232
274
|
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
233
|
-
if predicted_output.dataset_id not in result.
|
|
234
|
-
result.
|
|
275
|
+
if predicted_output.dataset_id not in result.outputs:
|
|
276
|
+
result.outputs[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
|
|
235
277
|
to_ingest = self._make_ingest_request(predicted_quantum, result)
|
|
236
|
-
if self.comms.config.
|
|
237
|
-
to_write =
|
|
278
|
+
if self.comms.config.output_path is not None:
|
|
279
|
+
to_write = self._make_write_request(predicted_quantum, result)
|
|
238
280
|
self.comms.request_write(to_write)
|
|
239
281
|
self.comms.request_ingest(to_ingest)
|
|
240
282
|
self.comms.report_scan(ScanReport(result.quantum_id, result.status))
|
|
@@ -242,7 +284,7 @@ class Scanner(AbstractContextManager):
|
|
|
242
284
|
return result
|
|
243
285
|
|
|
244
286
|
def _make_ingest_request(
|
|
245
|
-
self, predicted_quantum: PredictedQuantumDatasetsModel, result:
|
|
287
|
+
self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
|
|
246
288
|
) -> IngestRequest:
|
|
247
289
|
"""Make an ingest request from a quantum scan.
|
|
248
290
|
|
|
@@ -250,7 +292,7 @@ class Scanner(AbstractContextManager):
|
|
|
250
292
|
----------
|
|
251
293
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
252
294
|
Information about the predicted quantum.
|
|
253
|
-
result : `
|
|
295
|
+
result : `InProgressScan`
|
|
254
296
|
Result of a quantum scan.
|
|
255
297
|
|
|
256
298
|
Returns
|
|
@@ -261,36 +303,79 @@ class Scanner(AbstractContextManager):
|
|
|
261
303
|
predicted_outputs_by_id = {
|
|
262
304
|
d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
|
|
263
305
|
}
|
|
306
|
+
to_ingest_predicted: list[PredictedDatasetModel] = []
|
|
264
307
|
to_ingest_refs: list[DatasetRef] = []
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if result.status is ProvenanceQuantumScanStatus.INIT:
|
|
268
|
-
if predicted_quantum.task_label: # i.e. not the 'packages' producer
|
|
269
|
-
to_ignore.add(
|
|
270
|
-
predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
|
|
271
|
-
)
|
|
272
|
-
else:
|
|
273
|
-
to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
|
|
274
|
-
to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
|
|
275
|
-
for dataset_id, was_produced in result.output_existence.items():
|
|
276
|
-
if was_produced and dataset_id not in to_ignore:
|
|
308
|
+
for dataset_id, was_produced in result.outputs.items():
|
|
309
|
+
if was_produced:
|
|
277
310
|
predicted_output = predicted_outputs_by_id[dataset_id]
|
|
311
|
+
to_ingest_predicted.append(predicted_output)
|
|
278
312
|
to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
|
|
279
313
|
to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
|
|
280
|
-
return IngestRequest(result.quantum_id,
|
|
314
|
+
return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
|
|
281
315
|
|
|
282
|
-
def
|
|
283
|
-
|
|
316
|
+
def _make_write_request(
|
|
317
|
+
self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
|
|
318
|
+
) -> WriteRequest:
|
|
319
|
+
"""Make a write request from a quantum scan.
|
|
284
320
|
|
|
285
321
|
Parameters
|
|
286
322
|
----------
|
|
287
323
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
288
324
|
Information about the predicted quantum.
|
|
325
|
+
result : `InProgressScan`
|
|
326
|
+
Result of a quantum scan.
|
|
289
327
|
|
|
290
328
|
Returns
|
|
291
329
|
-------
|
|
292
|
-
|
|
293
|
-
|
|
330
|
+
write_request : `WriteRequest`
|
|
331
|
+
A request to be sent to the writer.
|
|
332
|
+
"""
|
|
333
|
+
quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
|
|
334
|
+
if result.status is ScanStatus.INIT:
|
|
335
|
+
quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
|
|
336
|
+
else:
|
|
337
|
+
quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
|
|
338
|
+
quantum.attempts = result.attempts
|
|
339
|
+
request = WriteRequest(
|
|
340
|
+
result.quantum_id,
|
|
341
|
+
result.status,
|
|
342
|
+
existing_outputs={
|
|
343
|
+
dataset_id for dataset_id, was_produced in result.outputs.items() if was_produced
|
|
344
|
+
},
|
|
345
|
+
quantum=quantum.model_dump_json().encode(),
|
|
346
|
+
logs=result.logs.model_dump_json().encode() if result.logs.attempts else b"",
|
|
347
|
+
metadata=result.metadata.model_dump_json().encode() if result.metadata.attempts else b"",
|
|
348
|
+
)
|
|
349
|
+
if self.compressor is not None:
|
|
350
|
+
request.quantum = self.compressor.compress(request.quantum)
|
|
351
|
+
request.logs = self.compressor.compress(request.logs) if request.logs else b""
|
|
352
|
+
request.metadata = self.compressor.compress(request.metadata) if request.metadata else b""
|
|
353
|
+
request.is_compressed = True
|
|
354
|
+
return request
|
|
355
|
+
|
|
356
|
+
def _read_metadata(
|
|
357
|
+
self,
|
|
358
|
+
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
359
|
+
result: InProgressScan,
|
|
360
|
+
last_attempt: ProvenanceQuantumAttemptModel,
|
|
361
|
+
) -> bool:
|
|
362
|
+
"""Attempt to read the metadata dataset for a quantum to extract
|
|
363
|
+
provenance information from it.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
368
|
+
Information about the predicted quantum.
|
|
369
|
+
result : `InProgressScan`
|
|
370
|
+
Result object to be modified in-place.
|
|
371
|
+
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
372
|
+
Structure to fill in with information about the last attempt to
|
|
373
|
+
run this quantum.
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
complete : `bool`
|
|
378
|
+
Whether the quantum is complete.
|
|
294
379
|
"""
|
|
295
380
|
(predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
|
|
296
381
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
@@ -298,28 +383,129 @@ class Scanner(AbstractContextManager):
|
|
|
298
383
|
# This assumes QBB metadata writes are atomic, which should be the
|
|
299
384
|
# case. If it's not we'll probably get pydantic validation errors
|
|
300
385
|
# here.
|
|
301
|
-
|
|
386
|
+
metadata: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
|
|
302
387
|
except FileNotFoundError:
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
388
|
+
result.outputs[ref.id] = False
|
|
389
|
+
if self.comms.config.assume_complete:
|
|
390
|
+
result.status = ScanStatus.FAILED
|
|
391
|
+
else:
|
|
392
|
+
result.status = ScanStatus.ABANDONED
|
|
393
|
+
return False
|
|
394
|
+
else:
|
|
395
|
+
result.status = ScanStatus.SUCCESSFUL
|
|
396
|
+
result.outputs[ref.id] = True
|
|
397
|
+
last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
|
|
398
|
+
try:
|
|
399
|
+
# Int conversion guards against spurious conversion to
|
|
400
|
+
# float that can apparently sometimes happen in
|
|
401
|
+
# TaskMetadata.
|
|
402
|
+
last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
|
|
403
|
+
except LookupError:
|
|
404
|
+
pass
|
|
405
|
+
try:
|
|
406
|
+
last_attempt.exception = ExceptionInfo._from_metadata(
|
|
407
|
+
metadata[predicted_quantum.task_label]["failure"]
|
|
408
|
+
)
|
|
409
|
+
except LookupError:
|
|
410
|
+
pass
|
|
411
|
+
try:
|
|
412
|
+
for id_str in ensure_iterable(metadata["quantum"].getArray("outputs")):
|
|
413
|
+
result.outputs[uuid.UUID(id_str)]
|
|
414
|
+
except LookupError:
|
|
415
|
+
pass
|
|
416
|
+
else:
|
|
417
|
+
# If the metadata told us what it wrote, anything not in that
|
|
418
|
+
# list was not written.
|
|
419
|
+
for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
|
|
420
|
+
result.outputs.setdefault(predicted_output.dataset_id, False)
|
|
421
|
+
last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
|
|
422
|
+
result.metadata.attempts.append(metadata)
|
|
423
|
+
return True
|
|
424
|
+
|
|
425
|
+
def _read_log(
|
|
426
|
+
self,
|
|
427
|
+
predicted_quantum: PredictedQuantumDatasetsModel,
|
|
428
|
+
result: InProgressScan,
|
|
429
|
+
last_attempt: ProvenanceQuantumAttemptModel,
|
|
430
|
+
) -> bool:
|
|
431
|
+
"""Attempt to read the log dataset for a quantum to test for the
|
|
432
|
+
quantum's completion (the log is always written last) and aggregate
|
|
433
|
+
the log content in the provenance quantum graph.
|
|
307
434
|
|
|
308
435
|
Parameters
|
|
309
436
|
----------
|
|
310
437
|
predicted_quantum : `PredictedQuantumDatasetsModel`
|
|
311
438
|
Information about the predicted quantum.
|
|
439
|
+
result : `InProgressScan`
|
|
440
|
+
Result object to be modified in-place.
|
|
441
|
+
last_attempt : `ScanningProvenanceQuantumAttemptModel`
|
|
442
|
+
Structure to fill in with information about the last attempt to
|
|
443
|
+
run this quantum.
|
|
312
444
|
|
|
313
445
|
Returns
|
|
314
446
|
-------
|
|
315
|
-
|
|
316
|
-
|
|
447
|
+
complete : `bool`
|
|
448
|
+
Whether the quantum is complete.
|
|
317
449
|
"""
|
|
318
450
|
(predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
|
|
319
451
|
ref = self.reader.components.make_dataset_ref(predicted_dataset)
|
|
320
452
|
try:
|
|
321
453
|
# This assumes QBB log writes are atomic, which should be the case.
|
|
322
454
|
# If it's not we'll probably get pydantic validation errors here.
|
|
323
|
-
|
|
455
|
+
log_records: ButlerLogRecords = self.qbb.get(ref)
|
|
324
456
|
except FileNotFoundError:
|
|
325
|
-
|
|
457
|
+
result.outputs[ref.id] = False
|
|
458
|
+
if self.comms.config.assume_complete:
|
|
459
|
+
result.status = ScanStatus.FAILED
|
|
460
|
+
else:
|
|
461
|
+
result.status = ScanStatus.ABANDONED
|
|
462
|
+
return False
|
|
463
|
+
else:
|
|
464
|
+
# Set the attempt's run status to FAILED, since the default is
|
|
465
|
+
# UNKNOWN (i.e. logs *and* metadata are missing) and we now know
|
|
466
|
+
# the logs exist. This will usually get replaced by SUCCESSFUL
|
|
467
|
+
# when we look for metadata next.
|
|
468
|
+
last_attempt.status = QuantumAttemptStatus.FAILED
|
|
469
|
+
result.outputs[ref.id] = True
|
|
470
|
+
if log_records.extra:
|
|
471
|
+
log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
|
|
472
|
+
self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
|
|
473
|
+
result.logs.attempts.append(list(log_records))
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
def _extract_from_log_extra(
|
|
477
|
+
self,
|
|
478
|
+
log_extra: _ExecutionLogRecordsExtra,
|
|
479
|
+
result: InProgressScan,
|
|
480
|
+
last_attempt: ProvenanceQuantumAttemptModel | None,
|
|
481
|
+
) -> None:
|
|
482
|
+
for previous_attempt_log_extra in log_extra.previous_attempts:
|
|
483
|
+
self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
|
|
484
|
+
quantum_attempt: ProvenanceQuantumAttemptModel
|
|
485
|
+
if last_attempt is None:
|
|
486
|
+
# This is not the last attempt, so it must be a failure.
|
|
487
|
+
quantum_attempt = ProvenanceQuantumAttemptModel(
|
|
488
|
+
attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
|
|
489
|
+
)
|
|
490
|
+
# We also need to get the logs from this extra provenance, since
|
|
491
|
+
# they won't be the main section of the log records.
|
|
492
|
+
result.logs.attempts.append(log_extra.logs)
|
|
493
|
+
# The special last attempt is only appended after we attempt to
|
|
494
|
+
# read metadata later, but we have to append this one now.
|
|
495
|
+
result.attempts.append(quantum_attempt)
|
|
496
|
+
else:
|
|
497
|
+
assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
|
|
498
|
+
quantum_attempt = last_attempt
|
|
499
|
+
if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
|
|
500
|
+
# We won't be getting a separate metadata dataset, so anything we
|
|
501
|
+
# might get from the metadata has to come from this extra
|
|
502
|
+
# provenance in the logs.
|
|
503
|
+
quantum_attempt.exception = log_extra.exception
|
|
504
|
+
if log_extra.metadata is not None:
|
|
505
|
+
quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
|
|
506
|
+
result.metadata.attempts.append(log_extra.metadata)
|
|
507
|
+
else:
|
|
508
|
+
result.metadata.attempts.append(None)
|
|
509
|
+
# Regardless of whether this is the last attempt or not, we can only
|
|
510
|
+
# get the previous_process_quanta from the log extra.
|
|
511
|
+
quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
|