lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. lsst/pipe/base/_status.py +156 -11
  2. lsst/pipe/base/log_capture.py +98 -7
  3. lsst/pipe/base/pipeline_graph/expressions.py +3 -3
  4. lsst/pipe/base/quantum_graph/_common.py +6 -0
  5. lsst/pipe/base/quantum_graph/_predicted.py +13 -17
  6. lsst/pipe/base/quantum_graph/_provenance.py +322 -106
  7. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -9
  8. lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
  9. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +154 -53
  10. lsst/pipe/base/quantum_graph/aggregator/_structs.py +27 -34
  11. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +8 -7
  12. lsst/pipe/base/quantum_graph/aggregator/_writer.py +5 -8
  13. lsst/pipe/base/quantum_provenance_graph.py +2 -44
  14. lsst/pipe/base/single_quantum_executor.py +43 -9
  15. lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
  16. lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
  17. lsst/pipe/base/version.py +1 -1
  18. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/METADATA +1 -1
  19. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/RECORD +27 -27
  20. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/WHEEL +0 -0
  21. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/entry_points.txt +0 -0
  22. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
  23. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
  24. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/bsd_license.txt +0 -0
  25. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/gpl-v3.0.txt +0 -0
  26. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/top_level.txt +0 -0
  27. {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/zip-safe +0 -0
@@ -32,10 +32,12 @@ __all__ = (
32
32
  "ProvenanceDatasetModel",
33
33
  "ProvenanceInitQuantumInfo",
34
34
  "ProvenanceInitQuantumModel",
35
+ "ProvenanceLogRecordsModel",
35
36
  "ProvenanceQuantumGraph",
36
37
  "ProvenanceQuantumGraphReader",
37
38
  "ProvenanceQuantumInfo",
38
39
  "ProvenanceQuantumModel",
40
+ "ProvenanceTaskMetadataModel",
39
41
  )
40
42
 
41
43
 
@@ -45,7 +47,7 @@ import uuid
45
47
  from collections import Counter
46
48
  from collections.abc import Iterable, Iterator, Mapping
47
49
  from contextlib import contextmanager
48
- from typing import TYPE_CHECKING, Any, Self, TypedDict
50
+ from typing import TYPE_CHECKING, Any, Generic, TypeAlias, TypedDict, TypeVar
49
51
 
50
52
  import astropy.table
51
53
  import networkx
@@ -53,12 +55,13 @@ import numpy as np
53
55
  import pydantic
54
56
 
55
57
  from lsst.daf.butler import DataCoordinate
58
+ from lsst.daf.butler.logging import ButlerLogRecord, ButlerLogRecords
56
59
  from lsst.resources import ResourcePathExpression
57
60
  from lsst.utils.packages import Packages
58
61
 
59
- from .._status import QuantumSuccessCaveats
62
+ from .._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
63
+ from .._task_metadata import TaskMetadata
60
64
  from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
61
- from ..quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
62
65
  from ..resource_usage import QuantumResourceUsage
63
66
  from ._common import (
64
67
  BaseQuantumGraph,
@@ -76,12 +79,6 @@ from ._common import (
76
79
  from ._multiblock import AddressReader, MultiblockReader
77
80
  from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
78
81
 
79
- if TYPE_CHECKING:
80
- from lsst.daf.butler.logging import ButlerLogRecords
81
-
82
- from .._task_metadata import TaskMetadata
83
-
84
-
85
82
  DATASET_ADDRESS_INDEX = 0
86
83
  QUANTUM_ADDRESS_INDEX = 1
87
84
  LOG_ADDRESS_INDEX = 2
@@ -92,6 +89,8 @@ QUANTUM_MB_NAME = "quanta"
92
89
  LOG_MB_NAME = "logs"
93
90
  METADATA_MB_NAME = "metadata"
94
91
 
92
+ _I = TypeVar("_I", bound=uuid.UUID | int)
93
+
95
94
 
96
95
  class ProvenanceDatasetInfo(DatasetInfo):
97
96
  """A typed dictionary that annotates the attributes of the NetworkX graph
@@ -108,13 +107,13 @@ class ProvenanceDatasetInfo(DatasetInfo):
108
107
  dataset_id: uuid.UUID
109
108
  """Unique identifier for the dataset."""
110
109
 
111
- exists: bool
112
- """Whether this dataset existed immediately after the quantum graph was
113
- run.
110
+ produced: bool
111
+ """Whether this dataset was produced (vs. only predicted).
114
112
 
115
113
  This is always `True` for overall input datasets. It is also `True` for
116
114
  datasets that were produced and then removed before/during transfer back to
117
- the central butler repository.
115
+ the central butler repository, so it may not reflect the continued
116
+ existence of the dataset.
118
117
  """
119
118
 
120
119
 
@@ -131,17 +130,38 @@ class ProvenanceQuantumInfo(QuantumInfo):
131
130
  `ProvenanceQuantumGraph.quantum_only_xgraph`
132
131
  """
133
132
 
134
- status: QuantumRunStatus
135
- """Enumerated status for the quantum."""
133
+ status: QuantumAttemptStatus
134
+ """Enumerated status for the quantum.
135
+
136
+ This corresponds to the last attempt to run this quantum, or
137
+ `QuantumAttemptStatus.BLOCKED` if there were no attempts.
138
+ """
136
139
 
137
140
  caveats: QuantumSuccessCaveats | None
138
- """Flags indicating caveats on successful quanta."""
141
+ """Flags indicating caveats on successful quanta.
142
+
143
+ This corresponds to the last attempt to run this quantum.
144
+ """
139
145
 
140
146
  exception: ExceptionInfo | None
141
- """Information about an exception raised when the quantum was executing."""
147
+ """Information about an exception raised when the quantum was executing.
148
+
149
+ This corresponds to the last attempt to run this quantum.
150
+ """
142
151
 
143
152
  resource_usage: QuantumResourceUsage | None
144
- """Resource usage information (timing, memory use) for this quantum."""
153
+ """Resource usage information (timing, memory use) for this quantum.
154
+
155
+ This corresponds to the last attempt to run this quantum.
156
+ """
157
+
158
+ attempts: list[ProvenanceQuantumAttemptModel]
159
+ """Information about each attempt to run this quantum.
160
+
161
+ An entry is added merely if the quantum *should* have been attempted; an
162
+ empty `list` is used only for quanta that were blocked by an upstream
163
+ failure.
164
+ """
145
165
 
146
166
 
147
167
  class ProvenanceInitQuantumInfo(TypedDict):
@@ -173,13 +193,13 @@ class ProvenanceInitQuantumInfo(TypedDict):
173
193
  class ProvenanceDatasetModel(PredictedDatasetModel):
174
194
  """Data model for the datasets in a provenance quantum graph file."""
175
195
 
176
- exists: bool
177
- """Whether this dataset existed immediately after the quantum graph was
178
- run.
196
+ produced: bool
197
+ """Whether this dataset was produced (vs. only predicted).
179
198
 
180
199
  This is always `True` for overall input datasets. It is also `True` for
181
200
  datasets that were produced and then removed before/during transfer back to
182
- the central butler repository.
201
+ the central butler repository, so it may not reflect the continued
202
+ existence of the dataset.
183
203
  """
184
204
 
185
205
  producer: QuantumIndex | None = None
@@ -225,7 +245,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
225
245
 
226
246
  Notes
227
247
  -----
228
- This initializes `exists` to `True` when ``producer is None`` and
248
+ This initializes `produced` to `True` when ``producer is None`` and
229
249
  `False` otherwise, on the assumption that it will be updated later.
230
250
  """
231
251
  return cls.model_construct(
@@ -233,7 +253,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
233
253
  dataset_type_name=predicted.dataset_type_name,
234
254
  data_coordinate=predicted.data_coordinate,
235
255
  run=predicted.run,
236
- exists=(producer is None), # if it's not produced by this QG, it's an overall input
256
+ produced=(producer is None), # if it's not produced by this QG, it's an overall input
237
257
  producer=producer,
238
258
  consumers=list(consumers),
239
259
  )
@@ -268,7 +288,7 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
268
288
  dataset_type_name=self.dataset_type_name,
269
289
  pipeline_node=dataset_type_node,
270
290
  run=self.run,
271
- exists=self.exists,
291
+ produced=self.produced,
272
292
  )
273
293
  producer_id: uuid.UUID | None = None
274
294
  if self.producer is not None:
@@ -327,24 +347,15 @@ class ProvenanceDatasetModel(PredictedDatasetModel):
327
347
  return super().model_validate_strings(*args, **kwargs)
328
348
 
329
349
 
330
- class ProvenanceQuantumModel(pydantic.BaseModel):
331
- """Data model for the quanta in a provenance quantum graph file."""
332
-
333
- quantum_id: uuid.UUID
334
- """Unique identifier for the quantum."""
335
-
336
- task_label: TaskLabel
337
- """Name of the type of this dataset.
338
-
339
- This is always a parent dataset type name, not a component.
340
-
341
- Note that full dataset type definitions are stored in the pipeline graph.
350
+ class _GenericProvenanceQuantumAttemptModel(pydantic.BaseModel, Generic[_I]):
351
+ """Data model for a now-superseded attempt to run a quantum in a
352
+ provenance quantum graph file.
342
353
  """
343
354
 
344
- data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
345
- """The full values (required and implied) of this dataset's data ID."""
355
+ attempt: int = 0
356
+ """Counter incremented for every attempt to execute this quantum."""
346
357
 
347
- status: QuantumRunStatus = QuantumRunStatus.METADATA_MISSING
358
+ status: QuantumAttemptStatus = QuantumAttemptStatus.UNKNOWN
348
359
  """Enumerated status for the quantum."""
349
360
 
350
361
  caveats: QuantumSuccessCaveats | None = None
@@ -353,6 +364,212 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
353
364
  exception: ExceptionInfo | None = None
354
365
  """Information about an exception raised when the quantum was executing."""
355
366
 
367
+ resource_usage: QuantumResourceUsage | None = None
368
+ """Resource usage information (timing, memory use) for this quantum."""
369
+
370
+ previous_process_quanta: list[_I] = pydantic.Field(default_factory=list)
371
+ """The IDs of other quanta previously executed in the same process as this
372
+ one.
373
+ """
374
+
375
+ def remap_uuids(
376
+ self: ProvenanceQuantumAttemptModel, indices: Mapping[uuid.UUID, QuantumIndex]
377
+ ) -> StorageProvenanceQuantumAttemptModel:
378
+ return StorageProvenanceQuantumAttemptModel(
379
+ attempt=self.attempt,
380
+ status=self.status,
381
+ caveats=self.caveats,
382
+ exception=self.exception,
383
+ resource_usage=self.resource_usage,
384
+ previous_process_quanta=[indices[q] for q in self.previous_process_quanta],
385
+ )
386
+
387
+ def remap_indices(
388
+ self: StorageProvenanceQuantumAttemptModel, address_reader: AddressReader
389
+ ) -> ProvenanceQuantumAttemptModel:
390
+ return ProvenanceQuantumAttemptModel(
391
+ attempt=self.attempt,
392
+ status=self.status,
393
+ caveats=self.caveats,
394
+ exception=self.exception,
395
+ resource_usage=self.resource_usage,
396
+ previous_process_quanta=[address_reader.find(q).key for q in self.previous_process_quanta],
397
+ )
398
+
399
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
400
+ # when we inherit those docstrings in our public classes.
401
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
402
+
403
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
404
+ """See `pydantic.BaseModel.copy`."""
405
+ return super().copy(*args, **kwargs)
406
+
407
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
408
+ """See `pydantic.BaseModel.model_dump`."""
409
+ return super().model_dump(*args, **kwargs)
410
+
411
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
412
+ """See `pydantic.BaseModel.model_dump_json`."""
413
+ return super().model_dump(*args, **kwargs)
414
+
415
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
416
+ """See `pydantic.BaseModel.model_copy`."""
417
+ return super().model_copy(*args, **kwargs)
418
+
419
+ @classmethod
420
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
421
+ """See `pydantic.BaseModel.model_construct`."""
422
+ return super().model_construct(*args, **kwargs)
423
+
424
+ @classmethod
425
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
426
+ """See `pydantic.BaseModel.model_json_schema`."""
427
+ return super().model_json_schema(*args, **kwargs)
428
+
429
+ @classmethod
430
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
431
+ """See `pydantic.BaseModel.model_validate`."""
432
+ return super().model_validate(*args, **kwargs)
433
+
434
+ @classmethod
435
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
436
+ """See `pydantic.BaseModel.model_validate_json`."""
437
+ return super().model_validate_json(*args, **kwargs)
438
+
439
+ @classmethod
440
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
441
+ """See `pydantic.BaseModel.model_validate_strings`."""
442
+ return super().model_validate_strings(*args, **kwargs)
443
+
444
+
445
+ StorageProvenanceQuantumAttemptModel: TypeAlias = _GenericProvenanceQuantumAttemptModel[QuantumIndex]
446
+ ProvenanceQuantumAttemptModel: TypeAlias = _GenericProvenanceQuantumAttemptModel[uuid.UUID]
447
+
448
+
449
+ class ProvenanceLogRecordsModel(pydantic.BaseModel):
450
+ """Data model for storing execution logs in a provenance quantum graph
451
+ file.
452
+ """
453
+
454
+ attempts: list[list[ButlerLogRecord] | None] = pydantic.Field(default_factory=list)
455
+ """Logs from attempts to run this task, ordered chronologically from first
456
+ to last.
457
+ """
458
+
459
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
460
+ # when we inherit those docstrings in our public classes.
461
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
462
+
463
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
464
+ """See `pydantic.BaseModel.copy`."""
465
+ return super().copy(*args, **kwargs)
466
+
467
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
468
+ """See `pydantic.BaseModel.model_dump`."""
469
+ return super().model_dump(*args, **kwargs)
470
+
471
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
472
+ """See `pydantic.BaseModel.model_dump_json`."""
473
+ return super().model_dump(*args, **kwargs)
474
+
475
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
476
+ """See `pydantic.BaseModel.model_copy`."""
477
+ return super().model_copy(*args, **kwargs)
478
+
479
+ @classmethod
480
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
481
+ """See `pydantic.BaseModel.model_construct`."""
482
+ return super().model_construct(*args, **kwargs)
483
+
484
+ @classmethod
485
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
486
+ """See `pydantic.BaseModel.model_json_schema`."""
487
+ return super().model_json_schema(*args, **kwargs)
488
+
489
+ @classmethod
490
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
491
+ """See `pydantic.BaseModel.model_validate`."""
492
+ return super().model_validate(*args, **kwargs)
493
+
494
+ @classmethod
495
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
496
+ """See `pydantic.BaseModel.model_validate_json`."""
497
+ return super().model_validate_json(*args, **kwargs)
498
+
499
+ @classmethod
500
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
501
+ """See `pydantic.BaseModel.model_validate_strings`."""
502
+ return super().model_validate_strings(*args, **kwargs)
503
+
504
+
505
+ class ProvenanceTaskMetadataModel(pydantic.BaseModel):
506
+ """Data model for storing task metadata in a provenance quantum graph
507
+ file.
508
+ """
509
+
510
+ attempts: list[TaskMetadata | None] = pydantic.Field(default_factory=list)
511
+ """Metadata from attempts to run this task, ordered chronologically from
512
+ first to last.
513
+ """
514
+
515
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
516
+ # when we inherit those docstrings in our public classes.
517
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
518
+
519
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
520
+ """See `pydantic.BaseModel.copy`."""
521
+ return super().copy(*args, **kwargs)
522
+
523
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
524
+ """See `pydantic.BaseModel.model_dump`."""
525
+ return super().model_dump(*args, **kwargs)
526
+
527
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
528
+ """See `pydantic.BaseModel.model_dump_json`."""
529
+ return super().model_dump(*args, **kwargs)
530
+
531
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
532
+ """See `pydantic.BaseModel.model_copy`."""
533
+ return super().model_copy(*args, **kwargs)
534
+
535
+ @classmethod
536
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
537
+ """See `pydantic.BaseModel.model_construct`."""
538
+ return super().model_construct(*args, **kwargs)
539
+
540
+ @classmethod
541
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
542
+ """See `pydantic.BaseModel.model_json_schema`."""
543
+ return super().model_json_schema(*args, **kwargs)
544
+
545
+ @classmethod
546
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
547
+ """See `pydantic.BaseModel.model_validate`."""
548
+ return super().model_validate(*args, **kwargs)
549
+
550
+ @classmethod
551
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
552
+ """See `pydantic.BaseModel.model_validate_json`."""
553
+ return super().model_validate_json(*args, **kwargs)
554
+
555
+ @classmethod
556
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
557
+ """See `pydantic.BaseModel.model_validate_strings`."""
558
+ return super().model_validate_strings(*args, **kwargs)
559
+
560
+
561
+ class ProvenanceQuantumModel(pydantic.BaseModel):
562
+ """Data model for the quanta in a provenance quantum graph file."""
563
+
564
+ quantum_id: uuid.UUID
565
+ """Unique identifier for the quantum."""
566
+
567
+ task_label: TaskLabel
568
+ """Name of the type of this dataset."""
569
+
570
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
571
+ """The full values (required and implied) of this dataset's data ID."""
572
+
356
573
  inputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
357
574
  """Internal integer IDs of the datasets predicted to be consumed by this
358
575
  quantum, grouped by connection name.
@@ -363,8 +580,14 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
363
580
  quantum, grouped by connection name.
364
581
  """
365
582
 
366
- resource_usage: QuantumResourceUsage | None = None
367
- """Resource usage information (timing, memory use) for this quantum."""
583
+ attempts: list[StorageProvenanceQuantumAttemptModel] = pydantic.Field(default_factory=list)
584
+ """Provenance for all attempts to execute this quantum, ordered
585
+ chronologically from first to last.
586
+
587
+ An entry is added merely if the quantum *should* have been attempted; an
588
+ empty `list` is used only for quanta that were blocked by an upstream
589
+ failure.
590
+ """
368
591
 
369
592
  @property
370
593
  def node_id(self) -> uuid.UUID:
@@ -429,15 +652,21 @@ class ProvenanceQuantumModel(pydantic.BaseModel):
429
652
  """
430
653
  task_node = graph.pipeline_graph.tasks[self.task_label]
431
654
  data_id = DataCoordinate.from_full_values(task_node.dimensions, tuple(self.data_coordinate))
655
+ last_attempt = (
656
+ self.attempts[-1]
657
+ if self.attempts
658
+ else StorageProvenanceQuantumAttemptModel(status=QuantumAttemptStatus.BLOCKED)
659
+ )
432
660
  graph._bipartite_xgraph.add_node(
433
661
  self.quantum_id,
434
662
  data_id=data_id,
435
663
  task_label=self.task_label,
436
664
  pipeline_node=task_node,
437
- status=self.status,
438
- caveats=self.caveats,
439
- exception=self.exception,
440
- resource_usage=self.resource_usage,
665
+ status=last_attempt.status,
666
+ caveats=last_attempt.caveats,
667
+ exception=last_attempt.exception,
668
+ resource_usage=last_attempt.resource_usage,
669
+ attempts=[a.remap_indices(address_reader) for a in self.attempts],
441
670
  )
442
671
  for connection_name, dataset_indices in self.inputs.items():
443
672
  read_edge = task_node.get_input_edge(connection_name)
@@ -881,7 +1110,7 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
881
1110
  for task_label, quanta_for_task in self.quanta_by_task.items():
882
1111
  if not self.header.n_task_quanta[task_label]:
883
1112
  continue
884
- status_counts = Counter[QuantumRunStatus](
1113
+ status_counts = Counter[QuantumAttemptStatus](
885
1114
  self._quantum_only_xgraph.nodes[q]["status"] for q in quanta_for_task.values()
886
1115
  )
887
1116
  caveat_counts = Counter[QuantumSuccessCaveats | None](
@@ -901,11 +1130,11 @@ class ProvenanceQuantumGraph(BaseQuantumGraph):
901
1130
  rows.append(
902
1131
  {
903
1132
  "Task": task_label,
904
- "Unknown": status_counts.get(QuantumRunStatus.METADATA_MISSING, 0),
905
- "Successful": status_counts.get(QuantumRunStatus.SUCCESSFUL, 0),
1133
+ "Unknown": status_counts.get(QuantumAttemptStatus.UNKNOWN, 0),
1134
+ "Successful": status_counts.get(QuantumAttemptStatus.SUCCESSFUL, 0),
906
1135
  "Caveats": caveats,
907
- "Blocked": status_counts.get(QuantumRunStatus.BLOCKED, 0),
908
- "Failed": status_counts.get(QuantumRunStatus.FAILED, 0),
1136
+ "Blocked": status_counts.get(QuantumAttemptStatus.BLOCKED, 0),
1137
+ "Failed": status_counts.get(QuantumAttemptStatus.FAILED, 0),
909
1138
  "TOTAL": len(quanta_for_task),
910
1139
  "EXPECTED": self.header.n_task_quanta[task_label],
911
1140
  }
@@ -988,7 +1217,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
988
1217
  the `graph` attribute`.
989
1218
 
990
1219
  The various ``read_*`` methods in this class update the `graph` attribute
991
- in place and return ``self``.
1220
+ in place.
992
1221
  """
993
1222
 
994
1223
  graph: ProvenanceQuantumGraph = dataclasses.field(init=False)
@@ -1037,30 +1266,19 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1037
1266
  def __post_init__(self) -> None:
1038
1267
  self.graph = ProvenanceQuantumGraph(self.header, self.pipeline_graph)
1039
1268
 
1040
- def read_init_quanta(self) -> Self:
1269
+ def read_init_quanta(self) -> None:
1041
1270
  """Read the thin graph, with all edge information and categorization of
1042
1271
  quanta by task label.
1043
-
1044
- Returns
1045
- -------
1046
- self : `ProvenanceQuantumGraphReader`
1047
- The reader (to permit method-chaining).
1048
1272
  """
1049
1273
  init_quanta = self._read_single_block("init_quanta", ProvenanceInitQuantaModel)
1050
1274
  for init_quantum in init_quanta.root:
1051
1275
  self.graph._init_quanta[init_quantum.task_label] = init_quantum.quantum_id
1052
1276
  init_quanta._add_to_graph(self.graph, self.address_reader)
1053
- return self
1054
1277
 
1055
- def read_full_graph(self) -> Self:
1278
+ def read_full_graph(self) -> None:
1056
1279
  """Read all bipartite edges and all quantum and dataset node
1057
1280
  attributes, fully populating the `graph` attribute.
1058
1281
 
1059
- Returns
1060
- -------
1061
- self : `ProvenanceQuantumGraphReader`
1062
- The reader (to permit method-chaining).
1063
-
1064
1282
  Notes
1065
1283
  -----
1066
1284
  This does not read logs, metadata, or packages ; those must always be
@@ -1069,9 +1287,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1069
1287
  self.read_init_quanta()
1070
1288
  self.read_datasets()
1071
1289
  self.read_quanta()
1072
- return self
1073
1290
 
1074
- def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) -> Self:
1291
+ def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) -> None:
1075
1292
  """Read information about the given datasets.
1076
1293
 
1077
1294
  Parameters
@@ -1080,15 +1297,10 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1080
1297
  Iterable of dataset IDs or indices to load. If not provided, all
1081
1298
  datasets will be loaded. The UUIDs and indices of quanta will be
1082
1299
  ignored.
1083
-
1084
- Return
1085
- -------
1086
- self : `ProvenanceQuantumGraphReader`
1087
- The reader (to permit method-chaining).
1088
1300
  """
1089
- return self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
1301
+ self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
1090
1302
 
1091
- def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) -> Self:
1303
+ def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) -> None:
1092
1304
  """Read information about the given quanta.
1093
1305
 
1094
1306
  Parameters
@@ -1097,13 +1309,8 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1097
1309
  Iterable of quantum IDs or indices to load. If not provided, all
1098
1310
  quanta will be loaded. The UUIDs and indices of datasets and
1099
1311
  special init quanta will be ignored.
1100
-
1101
- Return
1102
- -------
1103
- self : `ProvenanceQuantumGraphReader`
1104
- The reader (to permit method-chaining).
1105
1312
  """
1106
- return self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
1313
+ self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
1107
1314
 
1108
1315
  def _read_nodes(
1109
1316
  self,
@@ -1111,7 +1318,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1111
1318
  address_index: int,
1112
1319
  mb_name: str,
1113
1320
  model_type: type[ProvenanceDatasetModel] | type[ProvenanceQuantumModel],
1114
- ) -> Self:
1321
+ ) -> None:
1115
1322
  node: ProvenanceDatasetModel | ProvenanceQuantumModel | None
1116
1323
  if nodes is None:
1117
1324
  self.address_reader.read_all()
@@ -1129,6 +1336,7 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1129
1336
  # also have other outstanding reference holders).
1130
1337
  continue
1131
1338
  node._add_to_graph(self.graph, self.address_reader)
1339
+ return
1132
1340
  with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1133
1341
  for node_id_or_index in nodes:
1134
1342
  address_row = self.address_reader.find(node_id_or_index)
@@ -1141,66 +1349,74 @@ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
1141
1349
  )
1142
1350
  if node is not None:
1143
1351
  node._add_to_graph(self.graph, self.address_reader)
1144
- return self
1145
1352
 
1146
1353
  def fetch_logs(
1147
1354
  self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
1148
- ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords]:
1355
+ ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, list[ButlerLogRecords | None]]:
1149
1356
  """Fetch log datasets.
1150
1357
 
1151
1358
  Parameters
1152
1359
  ----------
1153
1360
  nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
1154
- UUIDs of the log datasets themselves or of the quanta they
1155
- correspond to.
1361
+ UUIDs or internal integer IDS of the log datasets themselves or of
1362
+ the quanta they correspond to.
1156
1363
 
1157
1364
  Returns
1158
1365
  -------
1159
- logs : `dict` [ `uuid.UUID`, `ButlerLogRecords`]
1160
- Logs for the given IDs.
1366
+ logs : `dict` [ `uuid.UUID` or `int`, `list` [\
1367
+ `lsst.daf.butler.ButlerLogRecords` or `None`] ]
1368
+ Logs for the given IDs. Each value is a list of
1369
+ `lsst.daf.butler.ButlerLogRecords` instances representing different
1370
+ execution attempts, ordered chronologically from first to last.
1371
+ Attempts where logs were missing will have `None` in this list.
1161
1372
  """
1162
- from lsst.daf.butler.logging import ButlerLogRecords
1163
-
1164
- result: dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords] = {}
1373
+ result: dict[uuid.UUID | DatasetIndex | QuantumIndex, list[ButlerLogRecords | None]] = {}
1165
1374
  with MultiblockReader.open_in_zip(self.zf, LOG_MB_NAME, int_size=self.header.int_size) as mb_reader:
1166
1375
  for node_id_or_index in nodes:
1167
1376
  address_row = self.address_reader.find(node_id_or_index)
1168
- log = mb_reader.read_model(
1169
- address_row.addresses[LOG_ADDRESS_INDEX], ButlerLogRecords, self.decompressor
1377
+ logs_by_attempt = mb_reader.read_model(
1378
+ address_row.addresses[LOG_ADDRESS_INDEX], ProvenanceLogRecordsModel, self.decompressor
1170
1379
  )
1171
- if log is not None:
1172
- result[node_id_or_index] = log
1380
+ if logs_by_attempt is not None:
1381
+ result[node_id_or_index] = [
1382
+ ButlerLogRecords.from_records(attempt_logs) if attempt_logs is not None else None
1383
+ for attempt_logs in logs_by_attempt.attempts
1384
+ ]
1173
1385
  return result
1174
1386
 
1175
1387
  def fetch_metadata(
1176
1388
  self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
1177
- ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata]:
1389
+ ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, list[TaskMetadata | None]]:
1178
1390
  """Fetch metadata datasets.
1179
1391
 
1180
1392
  Parameters
1181
1393
  ----------
1182
1394
  nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
1183
- UUIDs of the metadata datasets themselves or of the quanta they
1184
- correspond to.
1395
+ UUIDs or internal integer IDs of the metadata datasets themselves
1396
+ or of the quanta they correspond to.
1185
1397
 
1186
1398
  Returns
1187
1399
  -------
1188
- metadata : `dict` [ `uuid.UUID`, `TaskMetadata`]
1189
- Metadata for the given IDs.
1400
+ metadata : `dict` [ `uuid.UUID` or `int`, `list` [`.TaskMetadata`] ]
1401
+ Metadata for the given IDs. Each value is a list of
1402
+ `.TaskMetadata` instances representing different execution
1403
+ attempts, ordered chronologically from first to last. Attempts
1404
+ where metadata was missing (not written even in the fallback extra
1405
+ provenance in the logs) will have `None` in this list.
1190
1406
  """
1191
- from .._task_metadata import TaskMetadata
1192
-
1193
- result: dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata] = {}
1407
+ result: dict[uuid.UUID | DatasetIndex | QuantumIndex, list[TaskMetadata | None]] = {}
1194
1408
  with MultiblockReader.open_in_zip(
1195
1409
  self.zf, METADATA_MB_NAME, int_size=self.header.int_size
1196
1410
  ) as mb_reader:
1197
1411
  for node_id_or_index in nodes:
1198
1412
  address_row = self.address_reader.find(node_id_or_index)
1199
- metadata = mb_reader.read_model(
1200
- address_row.addresses[METADATA_ADDRESS_INDEX], TaskMetadata, self.decompressor
1413
+ metadata_by_attempt = mb_reader.read_model(
1414
+ address_row.addresses[METADATA_ADDRESS_INDEX],
1415
+ ProvenanceTaskMetadataModel,
1416
+ self.decompressor,
1201
1417
  )
1202
- if metadata is not None:
1203
- result[node_id_or_index] = metadata
1418
+ if metadata_by_attempt is not None:
1419
+ result[node_id_or_index] = metadata_by_attempt.attempts
1204
1420
  return result
1205
1421
 
1206
1422
  def fetch_packages(self) -> Packages: