lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +156 -11
- lsst/pipe/base/log_capture.py +98 -7
- lsst/pipe/base/pipeline_graph/expressions.py +3 -3
- lsst/pipe/base/quantum_graph/_common.py +21 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +14 -39
- lsst/pipe/base/quantum_graph/_predicted.py +90 -90
- lsst/pipe/base/quantum_graph/_provenance.py +345 -200
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +19 -19
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +201 -72
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +45 -35
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +15 -17
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +57 -149
- lsst/pipe/base/quantum_graph_builder.py +0 -1
- lsst/pipe/base/quantum_provenance_graph.py +2 -44
- lsst/pipe/base/single_quantum_executor.py +43 -9
- lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/RECORD +29 -29
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/zip-safe +0 -0
|
@@ -32,10 +32,12 @@ __all__ = (
|
|
|
32
32
|
"ProvenanceDatasetModel",
|
|
33
33
|
"ProvenanceInitQuantumInfo",
|
|
34
34
|
"ProvenanceInitQuantumModel",
|
|
35
|
+
"ProvenanceLogRecordsModel",
|
|
35
36
|
"ProvenanceQuantumGraph",
|
|
36
37
|
"ProvenanceQuantumGraphReader",
|
|
37
38
|
"ProvenanceQuantumInfo",
|
|
38
39
|
"ProvenanceQuantumModel",
|
|
40
|
+
"ProvenanceTaskMetadataModel",
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
|
|
@@ -45,7 +47,7 @@ import uuid
|
|
|
45
47
|
from collections import Counter
|
|
46
48
|
from collections.abc import Iterable, Iterator, Mapping
|
|
47
49
|
from contextlib import contextmanager
|
|
48
|
-
from typing import TYPE_CHECKING, Any,
|
|
50
|
+
from typing import TYPE_CHECKING, Any, TypedDict, TypeVar
|
|
49
51
|
|
|
50
52
|
import astropy.table
|
|
51
53
|
import networkx
|
|
@@ -53,35 +55,28 @@ import numpy as np
|
|
|
53
55
|
import pydantic
|
|
54
56
|
|
|
55
57
|
from lsst.daf.butler import DataCoordinate
|
|
58
|
+
from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
|
|
56
59
|
from lsst.resources import ResourcePathExpression
|
|
57
60
|
from lsst.utils.packages import Packages
|
|
58
61
|
|
|
59
|
-
from .._status import QuantumSuccessCaveats
|
|
62
|
+
from .._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
|
|
63
|
+
from .._task_metadata import TaskMetadata
|
|
60
64
|
from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
|
|
61
|
-
from ..quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
|
|
62
65
|
from ..resource_usage import QuantumResourceUsage
|
|
63
66
|
from ._common import (
|
|
64
67
|
BaseQuantumGraph,
|
|
65
68
|
BaseQuantumGraphReader,
|
|
66
69
|
ConnectionName,
|
|
67
70
|
DataCoordinateValues,
|
|
68
|
-
DatasetIndex,
|
|
69
71
|
DatasetInfo,
|
|
70
72
|
DatasetTypeName,
|
|
71
73
|
HeaderModel,
|
|
72
|
-
QuantumIndex,
|
|
73
74
|
QuantumInfo,
|
|
74
75
|
TaskLabel,
|
|
75
76
|
)
|
|
76
|
-
from ._multiblock import
|
|
77
|
+
from ._multiblock import MultiblockReader
|
|
77
78
|
from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
|
|
78
79
|
|
|
79
|
-
if TYPE_CHECKING:
|
|
80
|
-
from lsst.daf.butler.logging import ButlerLogRecords
|
|
81
|
-
|
|
82
|
-
from .._task_metadata import TaskMetadata
|
|
83
|
-
|
|
84
|
-
|
|
85
80
|
DATASET_ADDRESS_INDEX = 0
|
|
86
81
|
QUANTUM_ADDRESS_INDEX = 1
|
|
87
82
|
LOG_ADDRESS_INDEX = 2
|
|
@@ -92,6 +87,8 @@ QUANTUM_MB_NAME = "quanta"
|
|
|
92
87
|
LOG_MB_NAME = "logs"
|
|
93
88
|
METADATA_MB_NAME = "metadata"
|
|
94
89
|
|
|
90
|
+
_I = TypeVar("_I", bound=uuid.UUID | int)
|
|
91
|
+
|
|
95
92
|
|
|
96
93
|
class ProvenanceDatasetInfo(DatasetInfo):
|
|
97
94
|
"""A typed dictionary that annotates the attributes of the NetworkX graph
|
|
@@ -108,13 +105,13 @@ class ProvenanceDatasetInfo(DatasetInfo):
|
|
|
108
105
|
dataset_id: uuid.UUID
|
|
109
106
|
"""Unique identifier for the dataset."""
|
|
110
107
|
|
|
111
|
-
|
|
112
|
-
"""Whether this dataset
|
|
113
|
-
run.
|
|
108
|
+
produced: bool
|
|
109
|
+
"""Whether this dataset was produced (vs. only predicted).
|
|
114
110
|
|
|
115
111
|
This is always `True` for overall input datasets. It is also `True` for
|
|
116
112
|
datasets that were produced and then removed before/during transfer back to
|
|
117
|
-
the central butler repository
|
|
113
|
+
the central butler repository, so it may not reflect the continued
|
|
114
|
+
existence of the dataset.
|
|
118
115
|
"""
|
|
119
116
|
|
|
120
117
|
|
|
@@ -131,17 +128,38 @@ class ProvenanceQuantumInfo(QuantumInfo):
|
|
|
131
128
|
`ProvenanceQuantumGraph.quantum_only_xgraph`
|
|
132
129
|
"""
|
|
133
130
|
|
|
134
|
-
status:
|
|
135
|
-
"""Enumerated status for the quantum.
|
|
131
|
+
status: QuantumAttemptStatus
|
|
132
|
+
"""Enumerated status for the quantum.
|
|
133
|
+
|
|
134
|
+
This corresponds to the last attempt to run this quantum, or
|
|
135
|
+
`QuantumAttemptStatus.BLOCKED` if there were no attempts.
|
|
136
|
+
"""
|
|
136
137
|
|
|
137
138
|
caveats: QuantumSuccessCaveats | None
|
|
138
|
-
"""Flags indicating caveats on successful quanta.
|
|
139
|
+
"""Flags indicating caveats on successful quanta.
|
|
140
|
+
|
|
141
|
+
This corresponds to the last attempt to run this quantum.
|
|
142
|
+
"""
|
|
139
143
|
|
|
140
144
|
exception: ExceptionInfo | None
|
|
141
|
-
"""Information about an exception raised when the quantum was executing.
|
|
145
|
+
"""Information about an exception raised when the quantum was executing.
|
|
146
|
+
|
|
147
|
+
This corresponds to the last attempt to run this quantum.
|
|
148
|
+
"""
|
|
142
149
|
|
|
143
150
|
resource_usage: QuantumResourceUsage | None
|
|
144
|
-
"""Resource usage information (timing, memory use) for this quantum.
|
|
151
|
+
"""Resource usage information (timing, memory use) for this quantum.
|
|
152
|
+
|
|
153
|
+
This corresponds to the last attempt to run this quantum.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
attempts: list[ProvenanceQuantumAttemptModel]
|
|
157
|
+
"""Information about each attempt to run this quantum.
|
|
158
|
+
|
|
159
|
+
An entry is added merely if the quantum *should* have been attempted; an
|
|
160
|
+
empty `list` is used only for quanta that were blocked by an upstream
|
|
161
|
+
failure.
|
|
162
|
+
"""
|
|
145
163
|
|
|
146
164
|
|
|
147
165
|
class ProvenanceInitQuantumInfo(TypedDict):
|
|
@@ -173,25 +191,23 @@ class ProvenanceInitQuantumInfo(TypedDict):
|
|
|
173
191
|
class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
174
192
|
"""Data model for the datasets in a provenance quantum graph file."""
|
|
175
193
|
|
|
176
|
-
|
|
177
|
-
"""Whether this dataset
|
|
178
|
-
run.
|
|
194
|
+
produced: bool
|
|
195
|
+
"""Whether this dataset was produced (vs. only predicted).
|
|
179
196
|
|
|
180
197
|
This is always `True` for overall input datasets. It is also `True` for
|
|
181
198
|
datasets that were produced and then removed before/during transfer back to
|
|
182
|
-
the central butler repository
|
|
199
|
+
the central butler repository, so it may not reflect the continued
|
|
200
|
+
existence of the dataset.
|
|
183
201
|
"""
|
|
184
202
|
|
|
185
|
-
producer:
|
|
186
|
-
"""
|
|
203
|
+
producer: uuid.UUID | None = None
|
|
204
|
+
"""ID of the quantum that produced this dataset.
|
|
187
205
|
|
|
188
206
|
This is `None` for overall inputs to the graph.
|
|
189
207
|
"""
|
|
190
208
|
|
|
191
|
-
consumers: list[
|
|
192
|
-
"""
|
|
193
|
-
dataset.
|
|
194
|
-
"""
|
|
209
|
+
consumers: list[uuid.UUID] = pydantic.Field(default_factory=list)
|
|
210
|
+
"""IDs of quanta that were predicted to consume this dataset."""
|
|
195
211
|
|
|
196
212
|
@property
|
|
197
213
|
def node_id(self) -> uuid.UUID:
|
|
@@ -202,8 +218,8 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
202
218
|
def from_predicted(
|
|
203
219
|
cls,
|
|
204
220
|
predicted: PredictedDatasetModel,
|
|
205
|
-
producer:
|
|
206
|
-
consumers: Iterable[
|
|
221
|
+
producer: uuid.UUID | None = None,
|
|
222
|
+
consumers: Iterable[uuid.UUID] = (),
|
|
207
223
|
) -> ProvenanceDatasetModel:
|
|
208
224
|
"""Construct from a predicted dataset model.
|
|
209
225
|
|
|
@@ -211,12 +227,10 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
211
227
|
----------
|
|
212
228
|
predicted : `PredictedDatasetModel`
|
|
213
229
|
Information about the dataset from the predicted graph.
|
|
214
|
-
producer : `
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
Internal IDs of the quanta that were predicted to consume this
|
|
219
|
-
dataset.
|
|
230
|
+
producer : `uuid.UUID` or `None`, optional
|
|
231
|
+
ID of the quantum that was predicted to produce this dataset.
|
|
232
|
+
consumers : `~collections.abc.Iterable` [`uuid.UUID`], optional
|
|
233
|
+
IDs of the quanta that were predicted to consume this dataset.
|
|
220
234
|
|
|
221
235
|
Returns
|
|
222
236
|
-------
|
|
@@ -225,7 +239,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
225
239
|
|
|
226
240
|
Notes
|
|
227
241
|
-----
|
|
228
|
-
This initializes `
|
|
242
|
+
This initializes `produced` to `True` when ``producer is None`` and
|
|
229
243
|
`False` otherwise, on the assumption that it will be updated later.
|
|
230
244
|
"""
|
|
231
245
|
return cls.model_construct(
|
|
@@ -233,21 +247,18 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
233
247
|
dataset_type_name=predicted.dataset_type_name,
|
|
234
248
|
data_coordinate=predicted.data_coordinate,
|
|
235
249
|
run=predicted.run,
|
|
236
|
-
|
|
250
|
+
produced=(producer is None), # if it's not produced by this QG, it's an overall input
|
|
237
251
|
producer=producer,
|
|
238
252
|
consumers=list(consumers),
|
|
239
253
|
)
|
|
240
254
|
|
|
241
|
-
def _add_to_graph(self, graph: ProvenanceQuantumGraph
|
|
255
|
+
def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
|
|
242
256
|
"""Add this dataset and its edges to quanta to a provenance graph.
|
|
243
257
|
|
|
244
258
|
Parameters
|
|
245
259
|
----------
|
|
246
260
|
graph : `ProvenanceQuantumGraph`
|
|
247
261
|
Graph to update in place.
|
|
248
|
-
address_reader : `AddressReader`
|
|
249
|
-
Reader object that can be used to look up UUIDs from integer
|
|
250
|
-
indexes.
|
|
251
262
|
|
|
252
263
|
Notes
|
|
253
264
|
-----
|
|
@@ -268,17 +279,14 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
268
279
|
dataset_type_name=self.dataset_type_name,
|
|
269
280
|
pipeline_node=dataset_type_node,
|
|
270
281
|
run=self.run,
|
|
271
|
-
|
|
282
|
+
produced=self.produced,
|
|
272
283
|
)
|
|
273
|
-
producer_id: uuid.UUID | None = None
|
|
274
284
|
if self.producer is not None:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
for consumer_index in self.consumers:
|
|
278
|
-
consumer_id = address_reader.find(consumer_index).key
|
|
285
|
+
graph._bipartite_xgraph.add_edge(self.producer, self.dataset_id)
|
|
286
|
+
for consumer_id in self.consumers:
|
|
279
287
|
graph._bipartite_xgraph.add_edge(self.dataset_id, consumer_id)
|
|
280
|
-
if
|
|
281
|
-
graph._quantum_only_xgraph.add_edge(
|
|
288
|
+
if self.producer is not None:
|
|
289
|
+
graph._quantum_only_xgraph.add_edge(self.producer, consumer_id)
|
|
282
290
|
graph._datasets_by_type[self.dataset_type_name][data_id] = self.dataset_id
|
|
283
291
|
|
|
284
292
|
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
@@ -327,24 +335,15 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
327
335
|
return super().model_validate_strings(*args, **kwargs)
|
|
328
336
|
|
|
329
337
|
|
|
330
|
-
class
|
|
331
|
-
"""Data model for
|
|
332
|
-
|
|
333
|
-
quantum_id: uuid.UUID
|
|
334
|
-
"""Unique identifier for the quantum."""
|
|
335
|
-
|
|
336
|
-
task_label: TaskLabel
|
|
337
|
-
"""Name of the type of this dataset.
|
|
338
|
-
|
|
339
|
-
This is always a parent dataset type name, not a component.
|
|
340
|
-
|
|
341
|
-
Note that full dataset type definitions are stored in the pipeline graph.
|
|
338
|
+
class ProvenanceQuantumAttemptModel(pydantic.BaseModel):
|
|
339
|
+
"""Data model for a now-superseded attempt to run a quantum in a
|
|
340
|
+
provenance quantum graph file.
|
|
342
341
|
"""
|
|
343
342
|
|
|
344
|
-
|
|
345
|
-
"""
|
|
343
|
+
attempt: int = 0
|
|
344
|
+
"""Counter incremented for every attempt to execute this quantum."""
|
|
346
345
|
|
|
347
|
-
status:
|
|
346
|
+
status: QuantumAttemptStatus = QuantumAttemptStatus.UNKNOWN
|
|
348
347
|
"""Enumerated status for the quantum."""
|
|
349
348
|
|
|
350
349
|
caveats: QuantumSuccessCaveats | None = None
|
|
@@ -353,18 +352,202 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
353
352
|
exception: ExceptionInfo | None = None
|
|
354
353
|
"""Information about an exception raised when the quantum was executing."""
|
|
355
354
|
|
|
356
|
-
|
|
357
|
-
"""
|
|
358
|
-
|
|
355
|
+
resource_usage: QuantumResourceUsage | None = None
|
|
356
|
+
"""Resource usage information (timing, memory use) for this quantum."""
|
|
357
|
+
|
|
358
|
+
previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
|
|
359
|
+
"""The IDs of other quanta previously executed in the same process as this
|
|
360
|
+
one.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
364
|
+
# when we inherit those docstrings in our public classes.
|
|
365
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
366
|
+
|
|
367
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
368
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
369
|
+
return super().copy(*args, **kwargs)
|
|
370
|
+
|
|
371
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
372
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
373
|
+
return super().model_dump(*args, **kwargs)
|
|
374
|
+
|
|
375
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
376
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
377
|
+
return super().model_dump(*args, **kwargs)
|
|
378
|
+
|
|
379
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
380
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
381
|
+
return super().model_copy(*args, **kwargs)
|
|
382
|
+
|
|
383
|
+
@classmethod
|
|
384
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
385
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
386
|
+
return super().model_construct(*args, **kwargs)
|
|
387
|
+
|
|
388
|
+
@classmethod
|
|
389
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
390
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
391
|
+
return super().model_json_schema(*args, **kwargs)
|
|
392
|
+
|
|
393
|
+
@classmethod
|
|
394
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
395
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
396
|
+
return super().model_validate(*args, **kwargs)
|
|
397
|
+
|
|
398
|
+
@classmethod
|
|
399
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
400
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
401
|
+
return super().model_validate_json(*args, **kwargs)
|
|
402
|
+
|
|
403
|
+
@classmethod
|
|
404
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
405
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
406
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class ProvenanceLogRecordsModel(pydantic.BaseModel):
|
|
410
|
+
"""Data model for storing execution logs in a provenance quantum graph
|
|
411
|
+
file.
|
|
359
412
|
"""
|
|
360
413
|
|
|
361
|
-
|
|
362
|
-
"""
|
|
363
|
-
|
|
414
|
+
attempts: list[list[ButlerLogRecord] | None] = pydantic.Field(default_factory=list)
|
|
415
|
+
"""Logs from attempts to run this task, ordered chronologically from first
|
|
416
|
+
to last.
|
|
364
417
|
"""
|
|
365
418
|
|
|
366
|
-
|
|
367
|
-
|
|
419
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
420
|
+
# when we inherit those docstrings in our public classes.
|
|
421
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
422
|
+
|
|
423
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
424
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
425
|
+
return super().copy(*args, **kwargs)
|
|
426
|
+
|
|
427
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
428
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
429
|
+
return super().model_dump(*args, **kwargs)
|
|
430
|
+
|
|
431
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
432
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
433
|
+
return super().model_dump(*args, **kwargs)
|
|
434
|
+
|
|
435
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
436
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
437
|
+
return super().model_copy(*args, **kwargs)
|
|
438
|
+
|
|
439
|
+
@classmethod
|
|
440
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
441
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
442
|
+
return super().model_construct(*args, **kwargs)
|
|
443
|
+
|
|
444
|
+
@classmethod
|
|
445
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
446
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
447
|
+
return super().model_json_schema(*args, **kwargs)
|
|
448
|
+
|
|
449
|
+
@classmethod
|
|
450
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
451
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
452
|
+
return super().model_validate(*args, **kwargs)
|
|
453
|
+
|
|
454
|
+
@classmethod
|
|
455
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
456
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
457
|
+
return super().model_validate_json(*args, **kwargs)
|
|
458
|
+
|
|
459
|
+
@classmethod
|
|
460
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
461
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
462
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
class ProvenanceTaskMetadataModel(pydantic.BaseModel):
|
|
466
|
+
"""Data model for storing task metadata in a provenance quantum graph
|
|
467
|
+
file.
|
|
468
|
+
"""
|
|
469
|
+
|
|
470
|
+
attempts: list[TaskMetadata | None] = pydantic.Field(default_factory=list)
|
|
471
|
+
"""Metadata from attempts to run this task, ordered chronologically from
|
|
472
|
+
first to last.
|
|
473
|
+
"""
|
|
474
|
+
|
|
475
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
476
|
+
# when we inherit those docstrings in our public classes.
|
|
477
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
478
|
+
|
|
479
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
480
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
481
|
+
return super().copy(*args, **kwargs)
|
|
482
|
+
|
|
483
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
484
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
485
|
+
return super().model_dump(*args, **kwargs)
|
|
486
|
+
|
|
487
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
488
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
489
|
+
return super().model_dump(*args, **kwargs)
|
|
490
|
+
|
|
491
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
492
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
493
|
+
return super().model_copy(*args, **kwargs)
|
|
494
|
+
|
|
495
|
+
@classmethod
|
|
496
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
497
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
498
|
+
return super().model_construct(*args, **kwargs)
|
|
499
|
+
|
|
500
|
+
@classmethod
|
|
501
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
502
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
503
|
+
return super().model_json_schema(*args, **kwargs)
|
|
504
|
+
|
|
505
|
+
@classmethod
|
|
506
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
507
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
508
|
+
return super().model_validate(*args, **kwargs)
|
|
509
|
+
|
|
510
|
+
@classmethod
|
|
511
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
512
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
513
|
+
return super().model_validate_json(*args, **kwargs)
|
|
514
|
+
|
|
515
|
+
@classmethod
|
|
516
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
517
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
518
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
522
|
+
"""Data model for the quanta in a provenance quantum graph file."""
|
|
523
|
+
|
|
524
|
+
quantum_id: uuid.UUID
|
|
525
|
+
"""Unique identifier for the quantum."""
|
|
526
|
+
|
|
527
|
+
task_label: TaskLabel
|
|
528
|
+
"""Name of the type of this dataset."""
|
|
529
|
+
|
|
530
|
+
data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
|
|
531
|
+
"""The full values (required and implied) of this dataset's data ID."""
|
|
532
|
+
|
|
533
|
+
inputs: dict[ConnectionName, list[uuid.UUID]] = pydantic.Field(default_factory=dict)
|
|
534
|
+
"""IDs of the datasets predicted to be consumed by this quantum, grouped by
|
|
535
|
+
connection name.
|
|
536
|
+
"""
|
|
537
|
+
|
|
538
|
+
outputs: dict[ConnectionName, list[uuid.UUID]] = pydantic.Field(default_factory=dict)
|
|
539
|
+
"""IDs of the datasets predicted to be produced by this quantum, grouped by
|
|
540
|
+
connection name.
|
|
541
|
+
"""
|
|
542
|
+
|
|
543
|
+
attempts: list[ProvenanceQuantumAttemptModel] = pydantic.Field(default_factory=list)
|
|
544
|
+
"""Provenance for all attempts to execute this quantum, ordered
|
|
545
|
+
chronologically from first to last.
|
|
546
|
+
|
|
547
|
+
An entry is added merely if the quantum *should* have been attempted; an
|
|
548
|
+
empty `list` is used only for quanta that were blocked by an upstream
|
|
549
|
+
failure.
|
|
550
|
+
"""
|
|
368
551
|
|
|
369
552
|
@property
|
|
370
553
|
def node_id(self) -> uuid.UUID:
|
|
@@ -372,17 +555,13 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
372
555
|
return self.quantum_id
|
|
373
556
|
|
|
374
557
|
@classmethod
|
|
375
|
-
def from_predicted(
|
|
376
|
-
cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
|
|
377
|
-
) -> ProvenanceQuantumModel:
|
|
558
|
+
def from_predicted(cls, predicted: PredictedQuantumDatasetsModel) -> ProvenanceQuantumModel:
|
|
378
559
|
"""Construct from a predicted quantum model.
|
|
379
560
|
|
|
380
561
|
Parameters
|
|
381
562
|
----------
|
|
382
563
|
predicted : `PredictedQuantumDatasetsModel`
|
|
383
564
|
Information about the quantum from the predicted graph.
|
|
384
|
-
indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
|
|
385
|
-
Mapping from quantum or dataset UUID to internal integer ID.
|
|
386
565
|
|
|
387
566
|
Returns
|
|
388
567
|
-------
|
|
@@ -390,11 +569,11 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
390
569
|
Provenance quantum model.
|
|
391
570
|
"""
|
|
392
571
|
inputs = {
|
|
393
|
-
connection_name: [
|
|
572
|
+
connection_name: [d.dataset_id for d in predicted_inputs]
|
|
394
573
|
for connection_name, predicted_inputs in predicted.inputs.items()
|
|
395
574
|
}
|
|
396
575
|
outputs = {
|
|
397
|
-
connection_name: [
|
|
576
|
+
connection_name: [d.dataset_id for d in predicted_outputs]
|
|
398
577
|
for connection_name, predicted_outputs in predicted.outputs.items()
|
|
399
578
|
}
|
|
400
579
|
return cls(
|
|
@@ -405,16 +584,13 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
405
584
|
outputs=outputs,
|
|
406
585
|
)
|
|
407
586
|
|
|
408
|
-
def _add_to_graph(self, graph: ProvenanceQuantumGraph
|
|
587
|
+
def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
|
|
409
588
|
"""Add this quantum and its edges to datasets to a provenance graph.
|
|
410
589
|
|
|
411
590
|
Parameters
|
|
412
591
|
----------
|
|
413
592
|
graph : `ProvenanceQuantumGraph`
|
|
414
593
|
Graph to update in place.
|
|
415
|
-
address_reader : `AddressReader`
|
|
416
|
-
Reader object that can be used to look up UUIDs from integer
|
|
417
|
-
indexes.
|
|
418
594
|
|
|
419
595
|
Notes
|
|
420
596
|
-----
|
|
@@ -429,28 +605,32 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
429
605
|
"""
|
|
430
606
|
task_node = graph.pipeline_graph.tasks[self.task_label]
|
|
431
607
|
data_id = DataCoordinate.from_full_values(task_node.dimensions, tuple(self.data_coordinate))
|
|
608
|
+
last_attempt = (
|
|
609
|
+
self.attempts[-1]
|
|
610
|
+
if self.attempts
|
|
611
|
+
else ProvenanceQuantumAttemptModel(status=QuantumAttemptStatus.BLOCKED)
|
|
612
|
+
)
|
|
432
613
|
graph._bipartite_xgraph.add_node(
|
|
433
614
|
self.quantum_id,
|
|
434
615
|
data_id=data_id,
|
|
435
616
|
task_label=self.task_label,
|
|
436
617
|
pipeline_node=task_node,
|
|
437
|
-
status=
|
|
438
|
-
caveats=
|
|
439
|
-
exception=
|
|
440
|
-
resource_usage=
|
|
618
|
+
status=last_attempt.status,
|
|
619
|
+
caveats=last_attempt.caveats,
|
|
620
|
+
exception=last_attempt.exception,
|
|
621
|
+
resource_usage=last_attempt.resource_usage,
|
|
622
|
+
attempts=self.attempts,
|
|
441
623
|
)
|
|
442
|
-
for connection_name,
|
|
624
|
+
for connection_name, dataset_ids in self.inputs.items():
|
|
443
625
|
read_edge = task_node.get_input_edge(connection_name)
|
|
444
|
-
for
|
|
445
|
-
dataset_id = address_reader.find(dataset_index).key
|
|
626
|
+
for dataset_id in dataset_ids:
|
|
446
627
|
graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
|
|
447
628
|
graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
|
|
448
629
|
"pipeline_edges", []
|
|
449
630
|
).append(read_edge)
|
|
450
|
-
for connection_name,
|
|
631
|
+
for connection_name, dataset_ids in self.outputs.items():
|
|
451
632
|
write_edge = task_node.get_output_edge(connection_name)
|
|
452
|
-
for
|
|
453
|
-
dataset_id = address_reader.find(dataset_index).key
|
|
633
|
+
for dataset_id in dataset_ids:
|
|
454
634
|
graph._bipartite_xgraph.add_edge(
|
|
455
635
|
self.quantum_id,
|
|
456
636
|
dataset_id,
|
|
@@ -529,28 +709,24 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
529
709
|
Note that full dataset type definitions are stored in the pipeline graph.
|
|
530
710
|
"""
|
|
531
711
|
|
|
532
|
-
inputs: dict[ConnectionName,
|
|
533
|
-
"""
|
|
534
|
-
|
|
712
|
+
inputs: dict[ConnectionName, uuid.UUID] = pydantic.Field(default_factory=dict)
|
|
713
|
+
"""IDs of the datasets predicted to be consumed by this quantum, grouped by
|
|
714
|
+
connection name.
|
|
535
715
|
"""
|
|
536
716
|
|
|
537
|
-
outputs: dict[ConnectionName,
|
|
538
|
-
"""
|
|
539
|
-
|
|
717
|
+
outputs: dict[ConnectionName, uuid.UUID] = pydantic.Field(default_factory=dict)
|
|
718
|
+
"""IDs of the datasets predicted to be produced by this quantum, grouped by
|
|
719
|
+
connection name.
|
|
540
720
|
"""
|
|
541
721
|
|
|
542
722
|
@classmethod
|
|
543
|
-
def from_predicted(
|
|
544
|
-
cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
|
|
545
|
-
) -> ProvenanceInitQuantumModel:
|
|
723
|
+
def from_predicted(cls, predicted: PredictedQuantumDatasetsModel) -> ProvenanceInitQuantumModel:
|
|
546
724
|
"""Construct from a predicted quantum model.
|
|
547
725
|
|
|
548
726
|
Parameters
|
|
549
727
|
----------
|
|
550
728
|
predicted : `PredictedQuantumDatasetsModel`
|
|
551
729
|
Information about the quantum from the predicted graph.
|
|
552
|
-
indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
|
|
553
|
-
Mapping from quantum or dataset UUID to internal integer ID.
|
|
554
730
|
|
|
555
731
|
Returns
|
|
556
732
|
-------
|
|
@@ -558,11 +734,11 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
558
734
|
Provenance init quantum model.
|
|
559
735
|
"""
|
|
560
736
|
inputs = {
|
|
561
|
-
connection_name:
|
|
737
|
+
connection_name: predicted_inputs[0].dataset_id
|
|
562
738
|
for connection_name, predicted_inputs in predicted.inputs.items()
|
|
563
739
|
}
|
|
564
740
|
outputs = {
|
|
565
|
-
connection_name:
|
|
741
|
+
connection_name: predicted_outputs[0].dataset_id
|
|
566
742
|
for connection_name, predicted_outputs in predicted.outputs.items()
|
|
567
743
|
}
|
|
568
744
|
return cls(
|
|
@@ -572,21 +748,13 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
572
748
|
outputs=outputs,
|
|
573
749
|
)
|
|
574
750
|
|
|
575
|
-
def _add_to_graph(
|
|
576
|
-
self,
|
|
577
|
-
graph: ProvenanceQuantumGraph,
|
|
578
|
-
address_reader: AddressReader,
|
|
579
|
-
empty_data_id: DataCoordinate,
|
|
580
|
-
) -> None:
|
|
751
|
+
def _add_to_graph(self, graph: ProvenanceQuantumGraph, empty_data_id: DataCoordinate) -> None:
|
|
581
752
|
"""Add this quantum and its edges to datasets to a provenance graph.
|
|
582
753
|
|
|
583
754
|
Parameters
|
|
584
755
|
----------
|
|
585
756
|
graph : `ProvenanceQuantumGraph`
|
|
586
757
|
Graph to update in place.
|
|
587
|
-
address_reader : `AddressReader`
|
|
588
|
-
Reader object that can be used to look up UUIDs from integer
|
|
589
|
-
indexes.
|
|
590
758
|
empty_data_id : `lsst.daf.butler.DataCoordinate`
|
|
591
759
|
The empty data ID for the appropriate dimension universe.
|
|
592
760
|
|
|
@@ -602,16 +770,14 @@ class ProvenanceInitQuantumModel(pydantic.BaseModel):
|
|
|
602
770
|
graph._bipartite_xgraph.add_node(
|
|
603
771
|
self.quantum_id, data_id=empty_data_id, task_label=self.task_label, pipeline_node=task_init_node
|
|
604
772
|
)
|
|
605
|
-
for connection_name,
|
|
773
|
+
for connection_name, dataset_id in self.inputs.items():
|
|
606
774
|
read_edge = task_init_node.get_input_edge(connection_name)
|
|
607
|
-
dataset_id = address_reader.find(dataset_index).key
|
|
608
775
|
graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
|
|
609
776
|
graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
|
|
610
777
|
"pipeline_edges", []
|
|
611
778
|
).append(read_edge)
|
|
612
|
-
for connection_name,
|
|
779
|
+
for connection_name, dataset_id in self.outputs.items():
|
|
613
780
|
write_edge = task_init_node.get_output_edge(connection_name)
|
|
614
|
-
dataset_id = address_reader.find(dataset_index).key
|
|
615
781
|
graph._bipartite_xgraph.add_edge(
|
|
616
782
|
self.quantum_id,
|
|
617
783
|
dataset_id,
|
|
@@ -673,20 +839,17 @@ class ProvenanceInitQuantaModel(pydantic.RootModel):
|
|
|
673
839
|
root: list[ProvenanceInitQuantumModel] = pydantic.Field(default_factory=list)
|
|
674
840
|
"""List of special "init" quanta, one for each task."""
|
|
675
841
|
|
|
676
|
-
def _add_to_graph(self, graph: ProvenanceQuantumGraph
|
|
842
|
+
def _add_to_graph(self, graph: ProvenanceQuantumGraph) -> None:
|
|
677
843
|
"""Add this quantum and its edges to datasets to a provenance graph.
|
|
678
844
|
|
|
679
845
|
Parameters
|
|
680
846
|
----------
|
|
681
847
|
graph : `ProvenanceQuantumGraph`
|
|
682
848
|
Graph to update in place.
|
|
683
|
-
address_reader : `AddressReader`
|
|
684
|
-
Reader object that can be used to look up UUIDs from integer
|
|
685
|
-
indexes.
|
|
686
849
|
"""
|
|
687
850
|
empty_data_id = DataCoordinate.make_empty(graph.pipeline_graph.universe)
|
|
688
851
|
for init_quantum in self.root:
|
|
689
|
-
init_quantum._add_to_graph(graph,
|
|
852
|
+
init_quantum._add_to_graph(graph, empty_data_id=empty_data_id)
|
|
690
853
|
|
|
691
854
|
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
692
855
|
# when we inherit those docstrings in our public classes.
|
|
@@ -881,7 +1044,7 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
881
1044
|
for task_label, quanta_for_task in self.quanta_by_task.items():
|
|
882
1045
|
if not self.header.n_task_quanta[task_label]:
|
|
883
1046
|
continue
|
|
884
|
-
status_counts = Counter[
|
|
1047
|
+
status_counts = Counter[QuantumAttemptStatus](
|
|
885
1048
|
self._quantum_only_xgraph.nodes[q]["status"] for q in quanta_for_task.values()
|
|
886
1049
|
)
|
|
887
1050
|
caveat_counts = Counter[QuantumSuccessCaveats | None](
|
|
@@ -901,11 +1064,11 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
901
1064
|
rows.append(
|
|
902
1065
|
{
|
|
903
1066
|
"Task": task_label,
|
|
904
|
-
"Unknown": status_counts.get(
|
|
905
|
-
"Successful": status_counts.get(
|
|
1067
|
+
"Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
|
|
1068
|
+
"Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
|
|
906
1069
|
"Caveats": caveats,
|
|
907
|
-
"Blocked": status_counts.get(
|
|
908
|
-
"Failed": status_counts.get(
|
|
1070
|
+
"Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
|
|
1071
|
+
"Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
|
|
909
1072
|
"TOTAL": len(quanta_for_task),
|
|
910
1073
|
"EXPECTED": self.header.n_task_quanta[task_label],
|
|
911
1074
|
}
|
|
@@ -988,7 +1151,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
988
1151
|
the `graph` attribute`.
|
|
989
1152
|
|
|
990
1153
|
The various ``read_*`` methods in this class update the `graph` attribute
|
|
991
|
-
in place
|
|
1154
|
+
in place.
|
|
992
1155
|
"""
|
|
993
1156
|
|
|
994
1157
|
graph: ProvenanceQuantumGraph = dataclasses.field(init=False)
|
|
@@ -1037,30 +1200,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1037
1200
|
def __post_init__(self) -> None:
|
|
1038
1201
|
self.graph = ProvenanceQuantumGraph(self.header, self.pipeline_graph)
|
|
1039
1202
|
|
|
1040
|
-
def read_init_quanta(self) ->
|
|
1203
|
+
def read_init_quanta(self) -> None:
|
|
1041
1204
|
"""Read the thin graph, with all edge information and categorization of
|
|
1042
1205
|
quanta by task label.
|
|
1043
|
-
|
|
1044
|
-
Returns
|
|
1045
|
-
-------
|
|
1046
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1047
|
-
The reader (to permit method-chaining).
|
|
1048
1206
|
"""
|
|
1049
1207
|
init_quanta = self._read_single_block("init_quanta", ProvenanceInitQuantaModel)
|
|
1050
1208
|
for init_quantum in init_quanta.root:
|
|
1051
1209
|
self.graph._init_quanta[init_quantum.task_label] = init_quantum.quantum_id
|
|
1052
|
-
init_quanta._add_to_graph(self.graph
|
|
1053
|
-
return self
|
|
1210
|
+
init_quanta._add_to_graph(self.graph)
|
|
1054
1211
|
|
|
1055
|
-
def read_full_graph(self) ->
|
|
1212
|
+
def read_full_graph(self) -> None:
|
|
1056
1213
|
"""Read all bipartite edges and all quantum and dataset node
|
|
1057
1214
|
attributes, fully populating the `graph` attribute.
|
|
1058
1215
|
|
|
1059
|
-
Returns
|
|
1060
|
-
-------
|
|
1061
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1062
|
-
The reader (to permit method-chaining).
|
|
1063
|
-
|
|
1064
1216
|
Notes
|
|
1065
1217
|
-----
|
|
1066
1218
|
This does not read logs, metadata, or packages ; those must always be
|
|
@@ -1069,49 +1221,37 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1069
1221
|
self.read_init_quanta()
|
|
1070
1222
|
self.read_datasets()
|
|
1071
1223
|
self.read_quanta()
|
|
1072
|
-
return self
|
|
1073
1224
|
|
|
1074
|
-
def read_datasets(self, datasets: Iterable[uuid.UUID
|
|
1225
|
+
def read_datasets(self, datasets: Iterable[uuid.UUID] | None = None) -> None:
|
|
1075
1226
|
"""Read information about the given datasets.
|
|
1076
1227
|
|
|
1077
1228
|
Parameters
|
|
1078
1229
|
----------
|
|
1079
|
-
datasets : `~collections.abc.Iterable` [`uuid.UUID`
|
|
1080
|
-
Iterable of dataset IDs
|
|
1081
|
-
|
|
1082
|
-
ignored.
|
|
1083
|
-
|
|
1084
|
-
Return
|
|
1085
|
-
-------
|
|
1086
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1087
|
-
The reader (to permit method-chaining).
|
|
1230
|
+
datasets : `~collections.abc.Iterable` [`uuid.UUID`], optional
|
|
1231
|
+
Iterable of dataset IDs to load. If not provided, all datasets
|
|
1232
|
+
will be loaded. The UUIDs and indices of quanta will be ignored.
|
|
1088
1233
|
"""
|
|
1089
|
-
|
|
1234
|
+
self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
|
|
1090
1235
|
|
|
1091
|
-
def read_quanta(self, quanta: Iterable[uuid.UUID
|
|
1236
|
+
def read_quanta(self, quanta: Iterable[uuid.UUID] | None = None) -> None:
|
|
1092
1237
|
"""Read information about the given quanta.
|
|
1093
1238
|
|
|
1094
1239
|
Parameters
|
|
1095
1240
|
----------
|
|
1096
|
-
quanta : `~collections.abc.Iterable` [`uuid.UUID`
|
|
1097
|
-
Iterable of quantum IDs
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
Return
|
|
1102
|
-
-------
|
|
1103
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1104
|
-
The reader (to permit method-chaining).
|
|
1241
|
+
quanta : `~collections.abc.Iterable` [`uuid.UUID`], optional
|
|
1242
|
+
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
1243
|
+
be loaded. The UUIDs and indices of datasets and special init
|
|
1244
|
+
quanta will be ignored.
|
|
1105
1245
|
"""
|
|
1106
|
-
|
|
1246
|
+
self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
|
|
1107
1247
|
|
|
1108
1248
|
def _read_nodes(
|
|
1109
1249
|
self,
|
|
1110
|
-
nodes: Iterable[uuid.UUID
|
|
1250
|
+
nodes: Iterable[uuid.UUID] | None,
|
|
1111
1251
|
address_index: int,
|
|
1112
1252
|
mb_name: str,
|
|
1113
1253
|
model_type: type[ProvenanceDatasetModel] | type[ProvenanceQuantumModel],
|
|
1114
|
-
) ->
|
|
1254
|
+
) -> None:
|
|
1115
1255
|
node: ProvenanceDatasetModel | ProvenanceQuantumModel | None
|
|
1116
1256
|
if nodes is None:
|
|
1117
1257
|
self.address_reader.read_all()
|
|
@@ -1128,7 +1268,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1128
1268
|
# Use the old node to reduce memory usage (since it might
|
|
1129
1269
|
# also have other outstanding reference holders).
|
|
1130
1270
|
continue
|
|
1131
|
-
node._add_to_graph(self.graph
|
|
1271
|
+
node._add_to_graph(self.graph)
|
|
1272
|
+
return
|
|
1132
1273
|
with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
|
|
1133
1274
|
for node_id_or_index in nodes:
|
|
1134
1275
|
address_row = self.address_reader.find(node_id_or_index)
|
|
@@ -1140,12 +1281,9 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1140
1281
|
address_row.addresses[address_index], model_type, self.decompressor
|
|
1141
1282
|
)
|
|
1142
1283
|
if node is not None:
|
|
1143
|
-
node._add_to_graph(self.graph
|
|
1144
|
-
return self
|
|
1284
|
+
node._add_to_graph(self.graph)
|
|
1145
1285
|
|
|
1146
|
-
def fetch_logs(
|
|
1147
|
-
self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
|
|
1148
|
-
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords]:
|
|
1286
|
+
def fetch_logs(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[ButlerLogRecords | None]]:
|
|
1149
1287
|
"""Fetch log datasets.
|
|
1150
1288
|
|
|
1151
1289
|
Parameters
|
|
@@ -1156,25 +1294,28 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1156
1294
|
|
|
1157
1295
|
Returns
|
|
1158
1296
|
-------
|
|
1159
|
-
logs : `dict` [ `uuid.UUID`, `
|
|
1160
|
-
|
|
1297
|
+
logs : `dict` [ `uuid.UUID`, `list` [\
|
|
1298
|
+
`lsst.daf.butler.ButlerLogRecords` or `None`] ]
|
|
1299
|
+
Logs for the given IDs. Each value is a list of
|
|
1300
|
+
`lsst.daf.butler.ButlerLogRecords` instances representing different
|
|
1301
|
+
execution attempts, ordered chronologically from first to last.
|
|
1302
|
+
Attempts where logs were missing will have `None` in this list.
|
|
1161
1303
|
"""
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords] = {}
|
|
1304
|
+
result: dict[uuid.UUID, list[ButlerLogRecords | None]] = {}
|
|
1165
1305
|
with MultiblockReader.open_in_zip(self.zf, LOG_MB_NAME, int_size=self.header.int_size) as mb_reader:
|
|
1166
1306
|
for node_id_or_index in nodes:
|
|
1167
1307
|
address_row = self.address_reader.find(node_id_or_index)
|
|
1168
|
-
|
|
1169
|
-
address_row.addresses[LOG_ADDRESS_INDEX],
|
|
1308
|
+
logs_by_attempt = mb_reader.read_model(
|
|
1309
|
+
address_row.addresses[LOG_ADDRESS_INDEX], ProvenanceLogRecordsModel, self.decompressor
|
|
1170
1310
|
)
|
|
1171
|
-
if
|
|
1172
|
-
result[node_id_or_index] =
|
|
1311
|
+
if logs_by_attempt is not None:
|
|
1312
|
+
result[node_id_or_index] = [
|
|
1313
|
+
ButlerLogRecords.from_records(attempt_logs) if attempt_logs is not None else None
|
|
1314
|
+
for attempt_logs in logs_by_attempt.attempts
|
|
1315
|
+
]
|
|
1173
1316
|
return result
|
|
1174
1317
|
|
|
1175
|
-
def fetch_metadata(
|
|
1176
|
-
self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
|
|
1177
|
-
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata]:
|
|
1318
|
+
def fetch_metadata(self, nodes: Iterable[uuid.UUID]) -> dict[uuid.UUID, list[TaskMetadata | None]]:
|
|
1178
1319
|
"""Fetch metadata datasets.
|
|
1179
1320
|
|
|
1180
1321
|
Parameters
|
|
@@ -1185,22 +1326,26 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1185
1326
|
|
|
1186
1327
|
Returns
|
|
1187
1328
|
-------
|
|
1188
|
-
metadata : `dict` [ `uuid.UUID`, `TaskMetadata`]
|
|
1189
|
-
Metadata for the given IDs.
|
|
1329
|
+
metadata : `dict` [ `uuid.UUID`, `list` [`.TaskMetadata`] ]
|
|
1330
|
+
Metadata for the given IDs. Each value is a list of
|
|
1331
|
+
`.TaskMetadata` instances representing different execution
|
|
1332
|
+
attempts, ordered chronologically from first to last. Attempts
|
|
1333
|
+
where metadata was missing (not written even in the fallback extra
|
|
1334
|
+
provenance in the logs) will have `None` in this list.
|
|
1190
1335
|
"""
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata] = {}
|
|
1336
|
+
result: dict[uuid.UUID, list[TaskMetadata | None]] = {}
|
|
1194
1337
|
with MultiblockReader.open_in_zip(
|
|
1195
1338
|
self.zf, METADATA_MB_NAME, int_size=self.header.int_size
|
|
1196
1339
|
) as mb_reader:
|
|
1197
1340
|
for node_id_or_index in nodes:
|
|
1198
1341
|
address_row = self.address_reader.find(node_id_or_index)
|
|
1199
|
-
|
|
1200
|
-
address_row.addresses[METADATA_ADDRESS_INDEX],
|
|
1342
|
+
metadata_by_attempt = mb_reader.read_model(
|
|
1343
|
+
address_row.addresses[METADATA_ADDRESS_INDEX],
|
|
1344
|
+
ProvenanceTaskMetadataModel,
|
|
1345
|
+
self.decompressor,
|
|
1201
1346
|
)
|
|
1202
|
-
if
|
|
1203
|
-
result[node_id_or_index] =
|
|
1347
|
+
if metadata_by_attempt is not None:
|
|
1348
|
+
result[node_id_or_index] = metadata_by_attempt.attempts
|
|
1204
1349
|
return result
|
|
1205
1350
|
|
|
1206
1351
|
def fetch_packages(self) -> Packages:
|