lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +156 -11
- lsst/pipe/base/log_capture.py +98 -7
- lsst/pipe/base/pipeline_graph/expressions.py +3 -3
- lsst/pipe/base/quantum_graph/_common.py +6 -0
- lsst/pipe/base/quantum_graph/_predicted.py +13 -17
- lsst/pipe/base/quantum_graph/_provenance.py +322 -106
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -9
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +154 -53
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +27 -34
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +8 -7
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +5 -8
- lsst/pipe/base/quantum_provenance_graph.py +2 -44
- lsst/pipe/base/single_quantum_executor.py +43 -9
- lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/RECORD +27 -27
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/zip-safe +0 -0
|
@@ -32,10 +32,12 @@ __all__ = (
|
|
|
32
32
|
"ProvenanceDatasetModel",
|
|
33
33
|
"ProvenanceInitQuantumInfo",
|
|
34
34
|
"ProvenanceInitQuantumModel",
|
|
35
|
+
"ProvenanceLogRecordsModel",
|
|
35
36
|
"ProvenanceQuantumGraph",
|
|
36
37
|
"ProvenanceQuantumGraphReader",
|
|
37
38
|
"ProvenanceQuantumInfo",
|
|
38
39
|
"ProvenanceQuantumModel",
|
|
40
|
+
"ProvenanceTaskMetadataModel",
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
|
|
@@ -45,7 +47,7 @@ import uuid
|
|
|
45
47
|
from collections import Counter
|
|
46
48
|
from collections.abc import Iterable, Iterator, Mapping
|
|
47
49
|
from contextlib import contextmanager
|
|
48
|
-
from typing import TYPE_CHECKING, Any,
|
|
50
|
+
from typing import TYPE_CHECKING, Any, Generic, TypeAlias, TypedDict, TypeVar
|
|
49
51
|
|
|
50
52
|
import astropy.table
|
|
51
53
|
import networkx
|
|
@@ -53,12 +55,13 @@ import numpy as np
|
|
|
53
55
|
import pydantic
|
|
54
56
|
|
|
55
57
|
from lsst.daf.butler import DataCoordinate
|
|
58
|
+
from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
|
|
56
59
|
from lsst.resources import ResourcePathExpression
|
|
57
60
|
from lsst.utils.packages import Packages
|
|
58
61
|
|
|
59
|
-
from .._status import QuantumSuccessCaveats
|
|
62
|
+
from .._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
|
|
63
|
+
from .._task_metadata import TaskMetadata
|
|
60
64
|
from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
|
|
61
|
-
from ..quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
|
|
62
65
|
from ..resource_usage import QuantumResourceUsage
|
|
63
66
|
from ._common import (
|
|
64
67
|
BaseQuantumGraph,
|
|
@@ -76,12 +79,6 @@ from ._common import (
|
|
|
76
79
|
from ._multiblock import AddressReader, MultiblockReader
|
|
77
80
|
from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
|
|
78
81
|
|
|
79
|
-
if TYPE_CHECKING:
|
|
80
|
-
from lsst.daf.butler.logging import ButlerLogRecords
|
|
81
|
-
|
|
82
|
-
from .._task_metadata import TaskMetadata
|
|
83
|
-
|
|
84
|
-
|
|
85
82
|
DATASET_ADDRESS_INDEX = 0
|
|
86
83
|
QUANTUM_ADDRESS_INDEX = 1
|
|
87
84
|
LOG_ADDRESS_INDEX = 2
|
|
@@ -92,6 +89,8 @@ QUANTUM_MB_NAME = "quanta"
|
|
|
92
89
|
LOG_MB_NAME = "logs"
|
|
93
90
|
METADATA_MB_NAME = "metadata"
|
|
94
91
|
|
|
92
|
+
_I = TypeVar("_I", bound=uuid.UUID | int)
|
|
93
|
+
|
|
95
94
|
|
|
96
95
|
class ProvenanceDatasetInfo(DatasetInfo):
|
|
97
96
|
"""A typed dictionary that annotates the attributes of the NetworkX graph
|
|
@@ -108,13 +107,13 @@ class ProvenanceDatasetInfo(DatasetInfo):
|
|
|
108
107
|
dataset_id: uuid.UUID
|
|
109
108
|
"""Unique identifier for the dataset."""
|
|
110
109
|
|
|
111
|
-
|
|
112
|
-
"""Whether this dataset
|
|
113
|
-
run.
|
|
110
|
+
produced: bool
|
|
111
|
+
"""Whether this dataset was produced (vs. only predicted).
|
|
114
112
|
|
|
115
113
|
This is always `True` for overall input datasets. It is also `True` for
|
|
116
114
|
datasets that were produced and then removed before/during transfer back to
|
|
117
|
-
the central butler repository
|
|
115
|
+
the central butler repository, so it may not reflect the continued
|
|
116
|
+
existence of the dataset.
|
|
118
117
|
"""
|
|
119
118
|
|
|
120
119
|
|
|
@@ -131,17 +130,38 @@ class ProvenanceQuantumInfo(QuantumInfo):
|
|
|
131
130
|
`ProvenanceQuantumGraph.quantum_only_xgraph`
|
|
132
131
|
"""
|
|
133
132
|
|
|
134
|
-
status:
|
|
135
|
-
"""Enumerated status for the quantum.
|
|
133
|
+
status: QuantumAttemptStatus
|
|
134
|
+
"""Enumerated status for the quantum.
|
|
135
|
+
|
|
136
|
+
This corresponds to the last attempt to run this quantum, or
|
|
137
|
+
`QuantumAttemptStatus.BLOCKED` if there were no attempts.
|
|
138
|
+
"""
|
|
136
139
|
|
|
137
140
|
caveats: QuantumSuccessCaveats | None
|
|
138
|
-
"""Flags indicating caveats on successful quanta.
|
|
141
|
+
"""Flags indicating caveats on successful quanta.
|
|
142
|
+
|
|
143
|
+
This corresponds to the last attempt to run this quantum.
|
|
144
|
+
"""
|
|
139
145
|
|
|
140
146
|
exception: ExceptionInfo | None
|
|
141
|
-
"""Information about an exception raised when the quantum was executing.
|
|
147
|
+
"""Information about an exception raised when the quantum was executing.
|
|
148
|
+
|
|
149
|
+
This corresponds to the last attempt to run this quantum.
|
|
150
|
+
"""
|
|
142
151
|
|
|
143
152
|
resource_usage: QuantumResourceUsage | None
|
|
144
|
-
"""Resource usage information (timing, memory use) for this quantum.
|
|
153
|
+
"""Resource usage information (timing, memory use) for this quantum.
|
|
154
|
+
|
|
155
|
+
This corresponds to the last attempt to run this quantum.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
attempts: list[ProvenanceQuantumAttemptModel]
|
|
159
|
+
"""Information about each attempt to run this quantum.
|
|
160
|
+
|
|
161
|
+
An entry is added merely if the quantum *should* have been attempted; an
|
|
162
|
+
empty `list` is used only for quanta that were blocked by an upstream
|
|
163
|
+
failure.
|
|
164
|
+
"""
|
|
145
165
|
|
|
146
166
|
|
|
147
167
|
class ProvenanceInitQuantumInfo(TypedDict):
|
|
@@ -173,13 +193,13 @@ class ProvenanceInitQuantumInfo(TypedDict):
|
|
|
173
193
|
class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
174
194
|
"""Data model for the datasets in a provenance quantum graph file."""
|
|
175
195
|
|
|
176
|
-
|
|
177
|
-
"""Whether this dataset
|
|
178
|
-
run.
|
|
196
|
+
produced: bool
|
|
197
|
+
"""Whether this dataset was produced (vs. only predicted).
|
|
179
198
|
|
|
180
199
|
This is always `True` for overall input datasets. It is also `True` for
|
|
181
200
|
datasets that were produced and then removed before/during transfer back to
|
|
182
|
-
the central butler repository
|
|
201
|
+
the central butler repository, so it may not reflect the continued
|
|
202
|
+
existence of the dataset.
|
|
183
203
|
"""
|
|
184
204
|
|
|
185
205
|
producer: QuantumIndex | None = None
|
|
@@ -225,7 +245,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
225
245
|
|
|
226
246
|
Notes
|
|
227
247
|
-----
|
|
228
|
-
This initializes `
|
|
248
|
+
This initializes `produced` to `True` when ``producer is None`` and
|
|
229
249
|
`False` otherwise, on the assumption that it will be updated later.
|
|
230
250
|
"""
|
|
231
251
|
return cls.model_construct(
|
|
@@ -233,7 +253,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
233
253
|
dataset_type_name=predicted.dataset_type_name,
|
|
234
254
|
data_coordinate=predicted.data_coordinate,
|
|
235
255
|
run=predicted.run,
|
|
236
|
-
|
|
256
|
+
produced=(producer is None), # if it's not produced by this QG, it's an overall input
|
|
237
257
|
producer=producer,
|
|
238
258
|
consumers=list(consumers),
|
|
239
259
|
)
|
|
@@ -268,7 +288,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
268
288
|
dataset_type_name=self.dataset_type_name,
|
|
269
289
|
pipeline_node=dataset_type_node,
|
|
270
290
|
run=self.run,
|
|
271
|
-
|
|
291
|
+
produced=self.produced,
|
|
272
292
|
)
|
|
273
293
|
producer_id: uuid.UUID | None = None
|
|
274
294
|
if self.producer is not None:
|
|
@@ -327,24 +347,15 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
|
|
|
327
347
|
return super().model_validate_strings(*args, **kwargs)
|
|
328
348
|
|
|
329
349
|
|
|
330
|
-
class
|
|
331
|
-
"""Data model for
|
|
332
|
-
|
|
333
|
-
quantum_id: uuid.UUID
|
|
334
|
-
"""Unique identifier for the quantum."""
|
|
335
|
-
|
|
336
|
-
task_label: TaskLabel
|
|
337
|
-
"""Name of the type of this dataset.
|
|
338
|
-
|
|
339
|
-
This is always a parent dataset type name, not a component.
|
|
340
|
-
|
|
341
|
-
Note that full dataset type definitions are stored in the pipeline graph.
|
|
350
|
+
class _GenericProvenanceQuantumAttemptModel(pydantic.BaseModel, Generic[_I]):
|
|
351
|
+
"""Data model for a now-superseded attempt to run a quantum in a
|
|
352
|
+
provenance quantum graph file.
|
|
342
353
|
"""
|
|
343
354
|
|
|
344
|
-
|
|
345
|
-
"""
|
|
355
|
+
attempt: int = 0
|
|
356
|
+
"""Counter incremented for every attempt to execute this quantum."""
|
|
346
357
|
|
|
347
|
-
status:
|
|
358
|
+
status: QuantumAttemptStatus = QuantumAttemptStatus.UNKNOWN
|
|
348
359
|
"""Enumerated status for the quantum."""
|
|
349
360
|
|
|
350
361
|
caveats: QuantumSuccessCaveats | None = None
|
|
@@ -353,6 +364,212 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
353
364
|
exception: ExceptionInfo | None = None
|
|
354
365
|
"""Information about an exception raised when the quantum was executing."""
|
|
355
366
|
|
|
367
|
+
resource_usage: QuantumResourceUsage | None = None
|
|
368
|
+
"""Resource usage information (timing, memory use) for this quantum."""
|
|
369
|
+
|
|
370
|
+
previous_process_quanta: list[_I] = pydantic.Field(default_factory=list)
|
|
371
|
+
"""The IDs of other quanta previously executed in the same process as this
|
|
372
|
+
one.
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
def remap_uuids(
|
|
376
|
+
self: ProvenanceQuantumAttemptModel, indices: Mapping[uuid.UUID, QuantumIndex]
|
|
377
|
+
) -> StorageProvenanceQuantumAttemptModel:
|
|
378
|
+
return StorageProvenanceQuantumAttemptModel(
|
|
379
|
+
attempt=self.attempt,
|
|
380
|
+
status=self.status,
|
|
381
|
+
caveats=self.caveats,
|
|
382
|
+
exception=self.exception,
|
|
383
|
+
resource_usage=self.resource_usage,
|
|
384
|
+
previous_process_quanta=[indices[q] for q in self.previous_process_quanta],
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
def remap_indices(
|
|
388
|
+
self: StorageProvenanceQuantumAttemptModel, address_reader: AddressReader
|
|
389
|
+
) -> ProvenanceQuantumAttemptModel:
|
|
390
|
+
return ProvenanceQuantumAttemptModel(
|
|
391
|
+
attempt=self.attempt,
|
|
392
|
+
status=self.status,
|
|
393
|
+
caveats=self.caveats,
|
|
394
|
+
exception=self.exception,
|
|
395
|
+
resource_usage=self.resource_usage,
|
|
396
|
+
previous_process_quanta=[address_reader.find(q).key for q in self.previous_process_quanta],
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
400
|
+
# when we inherit those docstrings in our public classes.
|
|
401
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
402
|
+
|
|
403
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
404
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
405
|
+
return super().copy(*args, **kwargs)
|
|
406
|
+
|
|
407
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
408
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
409
|
+
return super().model_dump(*args, **kwargs)
|
|
410
|
+
|
|
411
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
412
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
413
|
+
return super().model_dump(*args, **kwargs)
|
|
414
|
+
|
|
415
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
416
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
417
|
+
return super().model_copy(*args, **kwargs)
|
|
418
|
+
|
|
419
|
+
@classmethod
|
|
420
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
421
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
422
|
+
return super().model_construct(*args, **kwargs)
|
|
423
|
+
|
|
424
|
+
@classmethod
|
|
425
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
426
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
427
|
+
return super().model_json_schema(*args, **kwargs)
|
|
428
|
+
|
|
429
|
+
@classmethod
|
|
430
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
431
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
432
|
+
return super().model_validate(*args, **kwargs)
|
|
433
|
+
|
|
434
|
+
@classmethod
|
|
435
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
436
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
437
|
+
return super().model_validate_json(*args, **kwargs)
|
|
438
|
+
|
|
439
|
+
@classmethod
|
|
440
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
441
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
442
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
StorageProvenanceQuantumAttemptModel: TypeAlias = _GenericProvenanceQuantumAttemptModel[QuantumIndex]
|
|
446
|
+
ProvenanceQuantumAttemptModel: TypeAlias = _GenericProvenanceQuantumAttemptModel[uuid.UUID]
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class ProvenanceLogRecordsModel(pydantic.BaseModel):
|
|
450
|
+
"""Data model for storing execution logs in a provenance quantum graph
|
|
451
|
+
file.
|
|
452
|
+
"""
|
|
453
|
+
|
|
454
|
+
attempts: list[list[ButlerLogRecord] | None] = pydantic.Field(default_factory=list)
|
|
455
|
+
"""Logs from attempts to run this task, ordered chronologically from first
|
|
456
|
+
to last.
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
460
|
+
# when we inherit those docstrings in our public classes.
|
|
461
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
462
|
+
|
|
463
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
464
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
465
|
+
return super().copy(*args, **kwargs)
|
|
466
|
+
|
|
467
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
468
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
469
|
+
return super().model_dump(*args, **kwargs)
|
|
470
|
+
|
|
471
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
472
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
473
|
+
return super().model_dump(*args, **kwargs)
|
|
474
|
+
|
|
475
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
476
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
477
|
+
return super().model_copy(*args, **kwargs)
|
|
478
|
+
|
|
479
|
+
@classmethod
|
|
480
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
481
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
482
|
+
return super().model_construct(*args, **kwargs)
|
|
483
|
+
|
|
484
|
+
@classmethod
|
|
485
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
486
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
487
|
+
return super().model_json_schema(*args, **kwargs)
|
|
488
|
+
|
|
489
|
+
@classmethod
|
|
490
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
491
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
492
|
+
return super().model_validate(*args, **kwargs)
|
|
493
|
+
|
|
494
|
+
@classmethod
|
|
495
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
496
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
497
|
+
return super().model_validate_json(*args, **kwargs)
|
|
498
|
+
|
|
499
|
+
@classmethod
|
|
500
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
501
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
502
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
class ProvenanceTaskMetadataModel(pydantic.BaseModel):
|
|
506
|
+
"""Data model for storing task metadata in a provenance quantum graph
|
|
507
|
+
file.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
attempts: list[TaskMetadata | None] = pydantic.Field(default_factory=list)
|
|
511
|
+
"""Metadata from attempts to run this task, ordered chronologically from
|
|
512
|
+
first to last.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
516
|
+
# when we inherit those docstrings in our public classes.
|
|
517
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
518
|
+
|
|
519
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
520
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
521
|
+
return super().copy(*args, **kwargs)
|
|
522
|
+
|
|
523
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
524
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
525
|
+
return super().model_dump(*args, **kwargs)
|
|
526
|
+
|
|
527
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
528
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
529
|
+
return super().model_dump(*args, **kwargs)
|
|
530
|
+
|
|
531
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
532
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
533
|
+
return super().model_copy(*args, **kwargs)
|
|
534
|
+
|
|
535
|
+
@classmethod
|
|
536
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
537
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
538
|
+
return super().model_construct(*args, **kwargs)
|
|
539
|
+
|
|
540
|
+
@classmethod
|
|
541
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
542
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
543
|
+
return super().model_json_schema(*args, **kwargs)
|
|
544
|
+
|
|
545
|
+
@classmethod
|
|
546
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
547
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
548
|
+
return super().model_validate(*args, **kwargs)
|
|
549
|
+
|
|
550
|
+
@classmethod
|
|
551
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
552
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
553
|
+
return super().model_validate_json(*args, **kwargs)
|
|
554
|
+
|
|
555
|
+
@classmethod
|
|
556
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
557
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
558
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
562
|
+
"""Data model for the quanta in a provenance quantum graph file."""
|
|
563
|
+
|
|
564
|
+
quantum_id: uuid.UUID
|
|
565
|
+
"""Unique identifier for the quantum."""
|
|
566
|
+
|
|
567
|
+
task_label: TaskLabel
|
|
568
|
+
"""Name of the type of this dataset."""
|
|
569
|
+
|
|
570
|
+
data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
|
|
571
|
+
"""The full values (required and implied) of this dataset's data ID."""
|
|
572
|
+
|
|
356
573
|
inputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
|
|
357
574
|
"""Internal integer IDs of the datasets predicted to be consumed by this
|
|
358
575
|
quantum, grouped by connection name.
|
|
@@ -363,8 +580,14 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
363
580
|
quantum, grouped by connection name.
|
|
364
581
|
"""
|
|
365
582
|
|
|
366
|
-
|
|
367
|
-
"""
|
|
583
|
+
attempts: list[StorageProvenanceQuantumAttemptModel] = pydantic.Field(default_factory=list)
|
|
584
|
+
"""Provenance for all attempts to execute this quantum, ordered
|
|
585
|
+
chronologically from first to last.
|
|
586
|
+
|
|
587
|
+
An entry is added merely if the quantum *should* have been attempted; an
|
|
588
|
+
empty `list` is used only for quanta that were blocked by an upstream
|
|
589
|
+
failure.
|
|
590
|
+
"""
|
|
368
591
|
|
|
369
592
|
@property
|
|
370
593
|
def node_id(self) -> uuid.UUID:
|
|
@@ -429,15 +652,21 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
|
|
|
429
652
|
"""
|
|
430
653
|
task_node = graph.pipeline_graph.tasks[self.task_label]
|
|
431
654
|
data_id = DataCoordinate.from_full_values(task_node.dimensions, tuple(self.data_coordinate))
|
|
655
|
+
last_attempt = (
|
|
656
|
+
self.attempts[-1]
|
|
657
|
+
if self.attempts
|
|
658
|
+
else StorageProvenanceQuantumAttemptModel(status=QuantumAttemptStatus.BLOCKED)
|
|
659
|
+
)
|
|
432
660
|
graph._bipartite_xgraph.add_node(
|
|
433
661
|
self.quantum_id,
|
|
434
662
|
data_id=data_id,
|
|
435
663
|
task_label=self.task_label,
|
|
436
664
|
pipeline_node=task_node,
|
|
437
|
-
status=
|
|
438
|
-
caveats=
|
|
439
|
-
exception=
|
|
440
|
-
resource_usage=
|
|
665
|
+
status=last_attempt.status,
|
|
666
|
+
caveats=last_attempt.caveats,
|
|
667
|
+
exception=last_attempt.exception,
|
|
668
|
+
resource_usage=last_attempt.resource_usage,
|
|
669
|
+
attempts=[a.remap_indices(address_reader) for a in self.attempts],
|
|
441
670
|
)
|
|
442
671
|
for connection_name, dataset_indices in self.inputs.items():
|
|
443
672
|
read_edge = task_node.get_input_edge(connection_name)
|
|
@@ -881,7 +1110,7 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
881
1110
|
for task_label, quanta_for_task in self.quanta_by_task.items():
|
|
882
1111
|
if not self.header.n_task_quanta[task_label]:
|
|
883
1112
|
continue
|
|
884
|
-
status_counts = Counter[
|
|
1113
|
+
status_counts = Counter[QuantumAttemptStatus](
|
|
885
1114
|
self._quantum_only_xgraph.nodes[q]["status"] for q in quanta_for_task.values()
|
|
886
1115
|
)
|
|
887
1116
|
caveat_counts = Counter[QuantumSuccessCaveats | None](
|
|
@@ -901,11 +1130,11 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
|
|
|
901
1130
|
rows.append(
|
|
902
1131
|
{
|
|
903
1132
|
"Task": task_label,
|
|
904
|
-
"Unknown": status_counts.get(
|
|
905
|
-
"Successful": status_counts.get(
|
|
1133
|
+
"Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
|
|
1134
|
+
"Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
|
|
906
1135
|
"Caveats": caveats,
|
|
907
|
-
"Blocked": status_counts.get(
|
|
908
|
-
"Failed": status_counts.get(
|
|
1136
|
+
"Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
|
|
1137
|
+
"Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
|
|
909
1138
|
"TOTAL": len(quanta_for_task),
|
|
910
1139
|
"EXPECTED": self.header.n_task_quanta[task_label],
|
|
911
1140
|
}
|
|
@@ -988,7 +1217,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
988
1217
|
the `graph` attribute`.
|
|
989
1218
|
|
|
990
1219
|
The various ``read_*`` methods in this class update the `graph` attribute
|
|
991
|
-
in place
|
|
1220
|
+
in place.
|
|
992
1221
|
"""
|
|
993
1222
|
|
|
994
1223
|
graph: ProvenanceQuantumGraph = dataclasses.field(init=False)
|
|
@@ -1037,30 +1266,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1037
1266
|
def __post_init__(self) -> None:
|
|
1038
1267
|
self.graph = ProvenanceQuantumGraph(self.header, self.pipeline_graph)
|
|
1039
1268
|
|
|
1040
|
-
def read_init_quanta(self) ->
|
|
1269
|
+
def read_init_quanta(self) -> None:
|
|
1041
1270
|
"""Read the thin graph, with all edge information and categorization of
|
|
1042
1271
|
quanta by task label.
|
|
1043
|
-
|
|
1044
|
-
Returns
|
|
1045
|
-
-------
|
|
1046
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1047
|
-
The reader (to permit method-chaining).
|
|
1048
1272
|
"""
|
|
1049
1273
|
init_quanta = self._read_single_block("init_quanta", ProvenanceInitQuantaModel)
|
|
1050
1274
|
for init_quantum in init_quanta.root:
|
|
1051
1275
|
self.graph._init_quanta[init_quantum.task_label] = init_quantum.quantum_id
|
|
1052
1276
|
init_quanta._add_to_graph(self.graph, self.address_reader)
|
|
1053
|
-
return self
|
|
1054
1277
|
|
|
1055
|
-
def read_full_graph(self) ->
|
|
1278
|
+
def read_full_graph(self) -> None:
|
|
1056
1279
|
"""Read all bipartite edges and all quantum and dataset node
|
|
1057
1280
|
attributes, fully populating the `graph` attribute.
|
|
1058
1281
|
|
|
1059
|
-
Returns
|
|
1060
|
-
-------
|
|
1061
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1062
|
-
The reader (to permit method-chaining).
|
|
1063
|
-
|
|
1064
1282
|
Notes
|
|
1065
1283
|
-----
|
|
1066
1284
|
This does not read logs, metadata, or packages ; those must always be
|
|
@@ -1069,9 +1287,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1069
1287
|
self.read_init_quanta()
|
|
1070
1288
|
self.read_datasets()
|
|
1071
1289
|
self.read_quanta()
|
|
1072
|
-
return self
|
|
1073
1290
|
|
|
1074
|
-
def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) ->
|
|
1291
|
+
def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) -> None:
|
|
1075
1292
|
"""Read information about the given datasets.
|
|
1076
1293
|
|
|
1077
1294
|
Parameters
|
|
@@ -1080,15 +1297,10 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1080
1297
|
Iterable of dataset IDs or indices to load. If not provided, all
|
|
1081
1298
|
datasets will be loaded. The UUIDs and indices of quanta will be
|
|
1082
1299
|
ignored.
|
|
1083
|
-
|
|
1084
|
-
Return
|
|
1085
|
-
-------
|
|
1086
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1087
|
-
The reader (to permit method-chaining).
|
|
1088
1300
|
"""
|
|
1089
|
-
|
|
1301
|
+
self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
|
|
1090
1302
|
|
|
1091
|
-
def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) ->
|
|
1303
|
+
def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) -> None:
|
|
1092
1304
|
"""Read information about the given quanta.
|
|
1093
1305
|
|
|
1094
1306
|
Parameters
|
|
@@ -1097,13 +1309,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1097
1309
|
Iterable of quantum IDs or indices to load. If not provided, all
|
|
1098
1310
|
quanta will be loaded. The UUIDs and indices of datasets and
|
|
1099
1311
|
special init quanta will be ignored.
|
|
1100
|
-
|
|
1101
|
-
Return
|
|
1102
|
-
-------
|
|
1103
|
-
self : `ProvenanceQuantumGraphReader`
|
|
1104
|
-
The reader (to permit method-chaining).
|
|
1105
1312
|
"""
|
|
1106
|
-
|
|
1313
|
+
self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
|
|
1107
1314
|
|
|
1108
1315
|
def _read_nodes(
|
|
1109
1316
|
self,
|
|
@@ -1111,7 +1318,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1111
1318
|
address_index: int,
|
|
1112
1319
|
mb_name: str,
|
|
1113
1320
|
model_type: type[ProvenanceDatasetModel] | type[ProvenanceQuantumModel],
|
|
1114
|
-
) ->
|
|
1321
|
+
) -> None:
|
|
1115
1322
|
node: ProvenanceDatasetModel | ProvenanceQuantumModel | None
|
|
1116
1323
|
if nodes is None:
|
|
1117
1324
|
self.address_reader.read_all()
|
|
@@ -1129,6 +1336,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1129
1336
|
# also have other outstanding reference holders).
|
|
1130
1337
|
continue
|
|
1131
1338
|
node._add_to_graph(self.graph, self.address_reader)
|
|
1339
|
+
return
|
|
1132
1340
|
with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
|
|
1133
1341
|
for node_id_or_index in nodes:
|
|
1134
1342
|
address_row = self.address_reader.find(node_id_or_index)
|
|
@@ -1141,66 +1349,74 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1141
1349
|
)
|
|
1142
1350
|
if node is not None:
|
|
1143
1351
|
node._add_to_graph(self.graph, self.address_reader)
|
|
1144
|
-
return self
|
|
1145
1352
|
|
|
1146
1353
|
def fetch_logs(
|
|
1147
1354
|
self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
|
|
1148
|
-
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords]:
|
|
1355
|
+
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, list[ButlerLogRecords | None]]:
|
|
1149
1356
|
"""Fetch log datasets.
|
|
1150
1357
|
|
|
1151
1358
|
Parameters
|
|
1152
1359
|
----------
|
|
1153
1360
|
nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
|
|
1154
|
-
UUIDs of the log datasets themselves or of
|
|
1155
|
-
correspond to.
|
|
1361
|
+
UUIDs or internal integer IDS of the log datasets themselves or of
|
|
1362
|
+
the quanta they correspond to.
|
|
1156
1363
|
|
|
1157
1364
|
Returns
|
|
1158
1365
|
-------
|
|
1159
|
-
logs : `dict` [ `uuid.UUID`, `
|
|
1160
|
-
|
|
1366
|
+
logs : `dict` [ `uuid.UUID` or `int`, `list` [\
|
|
1367
|
+
`lsst.daf.butler.ButlerLogRecords` or `None`] ]
|
|
1368
|
+
Logs for the given IDs. Each value is a list of
|
|
1369
|
+
`lsst.daf.butler.ButlerLogRecords` instances representing different
|
|
1370
|
+
execution attempts, ordered chronologically from first to last.
|
|
1371
|
+
Attempts where logs were missing will have `None` in this list.
|
|
1161
1372
|
"""
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords] = {}
|
|
1373
|
+
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, list[ButlerLogRecords | None]] = {}
|
|
1165
1374
|
with MultiblockReader.open_in_zip(self.zf, LOG_MB_NAME, int_size=self.header.int_size) as mb_reader:
|
|
1166
1375
|
for node_id_or_index in nodes:
|
|
1167
1376
|
address_row = self.address_reader.find(node_id_or_index)
|
|
1168
|
-
|
|
1169
|
-
address_row.addresses[LOG_ADDRESS_INDEX],
|
|
1377
|
+
logs_by_attempt = mb_reader.read_model(
|
|
1378
|
+
address_row.addresses[LOG_ADDRESS_INDEX], ProvenanceLogRecordsModel, self.decompressor
|
|
1170
1379
|
)
|
|
1171
|
-
if
|
|
1172
|
-
result[node_id_or_index] =
|
|
1380
|
+
if logs_by_attempt is not None:
|
|
1381
|
+
result[node_id_or_index] = [
|
|
1382
|
+
ButlerLogRecords.from_records(attempt_logs) if attempt_logs is not None else None
|
|
1383
|
+
for attempt_logs in logs_by_attempt.attempts
|
|
1384
|
+
]
|
|
1173
1385
|
return result
|
|
1174
1386
|
|
|
1175
1387
|
def fetch_metadata(
|
|
1176
1388
|
self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
|
|
1177
|
-
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata]:
|
|
1389
|
+
) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, list[TaskMetadata | None]]:
|
|
1178
1390
|
"""Fetch metadata datasets.
|
|
1179
1391
|
|
|
1180
1392
|
Parameters
|
|
1181
1393
|
----------
|
|
1182
1394
|
nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
|
|
1183
|
-
UUIDs of the metadata datasets themselves
|
|
1184
|
-
correspond to.
|
|
1395
|
+
UUIDs or internal integer IDs of the metadata datasets themselves
|
|
1396
|
+
or of the quanta they correspond to.
|
|
1185
1397
|
|
|
1186
1398
|
Returns
|
|
1187
1399
|
-------
|
|
1188
|
-
metadata : `dict` [ `uuid.UUID`, `TaskMetadata`]
|
|
1189
|
-
Metadata for the given IDs.
|
|
1400
|
+
metadata : `dict` [ `uuid.UUID` or `int`, `list` [`.TaskMetadata`] ]
|
|
1401
|
+
Metadata for the given IDs. Each value is a list of
|
|
1402
|
+
`.TaskMetadata` instances representing different execution
|
|
1403
|
+
attempts, ordered chronologically from first to last. Attempts
|
|
1404
|
+
where metadata was missing (not written even in the fallback extra
|
|
1405
|
+
provenance in the logs) will have `None` in this list.
|
|
1190
1406
|
"""
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata] = {}
|
|
1407
|
+
result: dict[uuid.UUID | DatasetIndex | QuantumIndex, list[TaskMetadata | None]] = {}
|
|
1194
1408
|
with MultiblockReader.open_in_zip(
|
|
1195
1409
|
self.zf, METADATA_MB_NAME, int_size=self.header.int_size
|
|
1196
1410
|
) as mb_reader:
|
|
1197
1411
|
for node_id_or_index in nodes:
|
|
1198
1412
|
address_row = self.address_reader.find(node_id_or_index)
|
|
1199
|
-
|
|
1200
|
-
address_row.addresses[METADATA_ADDRESS_INDEX],
|
|
1413
|
+
metadata_by_attempt = mb_reader.read_model(
|
|
1414
|
+
address_row.addresses[METADATA_ADDRESS_INDEX],
|
|
1415
|
+
ProvenanceTaskMetadataModel,
|
|
1416
|
+
self.decompressor,
|
|
1201
1417
|
)
|
|
1202
|
-
if
|
|
1203
|
-
result[node_id_or_index] =
|
|
1418
|
+
if metadata_by_attempt is not None:
|
|
1419
|
+
result[node_id_or_index] = metadata_by_attempt.attempts
|
|
1204
1420
|
return result
|
|
1205
1421
|
|
|
1206
1422
|
def fetch_packages(self) -> Packages:
|