lsst-pipe-base 29.2025.4400__py3-none-any.whl → 29.2025.4600__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. lsst/pipe/base/_status.py +156 -11
  2. lsst/pipe/base/log_capture.py +98 -7
  3. lsst/pipe/base/pipelineIR.py +36 -3
  4. lsst/pipe/base/pipeline_graph/expressions.py +3 -3
  5. lsst/pipe/base/quantum_graph/_common.py +6 -0
  6. lsst/pipe/base/quantum_graph/_predicted.py +13 -17
  7. lsst/pipe/base/quantum_graph/_provenance.py +322 -106
  8. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -9
  9. lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
  10. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +154 -53
  11. lsst/pipe/base/quantum_graph/aggregator/_structs.py +27 -34
  12. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +8 -7
  13. lsst/pipe/base/quantum_graph/aggregator/_writer.py +5 -8
  14. lsst/pipe/base/quantum_provenance_graph.py +2 -44
  15. lsst/pipe/base/single_quantum_executor.py +43 -9
  16. lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
  17. lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
  18. lsst/pipe/base/version.py +1 -1
  19. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/METADATA +1 -1
  20. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/RECORD +28 -28
  21. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/WHEEL +0 -0
  22. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/entry_points.txt +0 -0
  23. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
  24. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
  25. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/bsd_license.txt +0 -0
  26. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/gpl-v3.0.txt +0 -0
  27. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/top_level.txt +0 -0
  28. {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/zip-safe +0 -0
@@ -59,7 +59,7 @@ from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
59
59
  from lsst.utils.logging import VERBOSE, LsstLogAdapter
60
60
 
61
61
  from ._config import AggregatorConfig
62
- from ._progress import Progress, make_worker_log
62
+ from ._progress import ProgressManager, make_worker_log
63
63
  from ._structs import IngestRequest, ScanReport, ScanResult
64
64
 
65
65
  _T = TypeVar("_T")
@@ -340,7 +340,7 @@ class SupervisorCommunicator:
340
340
  config: AggregatorConfig,
341
341
  ) -> None:
342
342
  self.config = config
343
- self.progress = Progress(log, config)
343
+ self.progress = ProgressManager(log, config)
344
344
  self.n_scanners = n_scanners
345
345
  # The supervisor sends scan requests to scanners on this queue.
346
346
  # When complete, the supervisor sends n_scanners sentinals and each
@@ -406,13 +406,13 @@ class SupervisorCommunicator:
406
406
  pass
407
407
  case _Sentinel.INGESTER_DONE:
408
408
  self._ingester_done = True
409
- self.progress.finish_ingests()
409
+ self.progress.quantum_ingests.close()
410
410
  case _Sentinel.SCANNER_DONE:
411
411
  self._n_scanners_done += 1
412
- self.progress.finish_scans()
412
+ self.progress.scans.close()
413
413
  case _Sentinel.WRITER_DONE:
414
414
  self._writer_done = True
415
- self.progress.finish_writes()
415
+ self.progress.writes.close()
416
416
  case unexpected:
417
417
  raise AssertionError(f"Unexpected message {unexpected!r} to supervisor.")
418
418
  self.log.verbose(
@@ -530,9 +530,9 @@ class SupervisorCommunicator:
530
530
  if not already_failing:
531
531
  raise FatalWorkerError()
532
532
  case _IngestReport(n_producers=n_producers):
533
- self.progress.report_ingests(n_producers)
533
+ self.progress.quantum_ingests.update(n_producers)
534
534
  case _Sentinel.WRITE_REPORT:
535
- self.progress.report_write()
535
+ self.progress.writes.update(1)
536
536
  case _ProgressLog(message=message, level=level):
537
537
  self.progress.log.log(level, "%s [after %0.1fs]", message, self.progress.elapsed_time)
538
538
  case _:
@@ -626,10 +626,10 @@ class WorkerCommunicator:
626
626
 
627
627
  Parameters
628
628
  ----------
629
- message : `str`
630
- Log message.
631
629
  level : `int`
632
630
  Log level. Should be ``VERBOSE`` or higher.
631
+ message : `str`
632
+ Log message.
633
633
  """
634
634
  self._reports.put(_ProgressLog(message=message, level=level), block=False)
635
635
 
@@ -27,20 +27,86 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ("Progress", "make_worker_log")
30
+ __all__ = ("ProgressCounter", "ProgressManager", "make_worker_log")
31
31
 
32
32
  import logging
33
33
  import os
34
34
  import time
35
35
  from types import TracebackType
36
- from typing import Self
36
+ from typing import Any, Self
37
37
 
38
38
  from lsst.utils.logging import TRACE, VERBOSE, LsstLogAdapter, PeriodicLogger, getLogger
39
39
 
40
40
  from ._config import AggregatorConfig
41
41
 
42
42
 
43
- class Progress:
43
+ class ProgressCounter:
44
+ """A progress tracker for an individual aspect of the aggregation process.
45
+
46
+ Parameters
47
+ ----------
48
+ parent : `ProgressManager`
49
+ The parent progress manager object.
50
+ description : `str`
51
+ Human-readable description of this aspect.
52
+ unit : `str`
53
+ Unit (in plural form) for the items being counted.
54
+ total : `int`, optional
55
+ Expected total number of items. May be set later.
56
+ """
57
+
58
+ def __init__(self, parent: ProgressManager, description: str, unit: str, total: int | None = None):
59
+ self._parent = parent
60
+ self.total = total
61
+ self._description = description
62
+ self._current = 0
63
+ self._unit = unit
64
+ self._bar: Any = None
65
+
66
+ def update(self, n: int) -> None:
67
+ """Report that ``n`` new items have been processed.
68
+
69
+ Parameters
70
+ ----------
71
+ n : `int`
72
+ Number of new items processed.
73
+ """
74
+ self._current += n
75
+ if self._parent.interactive:
76
+ if self._bar is None:
77
+ if n == self.total:
78
+ return
79
+ from tqdm import tqdm
80
+
81
+ self._bar = tqdm(desc=self._description, total=self.total, leave=False, unit=f" {self._unit}")
82
+ else:
83
+ self._bar.update(n)
84
+ if self._current == self.total:
85
+ self._bar.close()
86
+ self._parent._log_status()
87
+
88
+ def close(self) -> None:
89
+ """Close the counter, guaranteeing that `update` will not be called
90
+ again.
91
+ """
92
+ if self._bar is not None:
93
+ self._bar.close()
94
+ self._bar = None
95
+
96
+ def append_log_terms(self, msg: list[str]) -> None:
97
+ """Append a log message for this counter to a list if it is active.
98
+
99
+ Parameters
100
+ ----------
101
+ msg : `list` [ `str` ]
102
+ List of messages to concatenate into a single line and log
103
+ together, to be modified in-place.
104
+ """
105
+ if self.total is not None and self._current > 0 and self._current < self.total:
106
+ msg.append(f"{self._description} ({self._current} of {self.total} {self._unit})")
107
+
108
+
109
+ class ProgressManager:
44
110
  """A helper class for the provenance aggregator that handles reporting
45
111
  progress to the user.
46
112
 
@@ -66,10 +132,9 @@ class Progress:
66
132
  self.log = log
67
133
  self.config = config
68
134
  self._periodic_log = PeriodicLogger(self.log, config.log_status_interval)
69
- self._n_scanned: int = 0
70
- self._n_ingested: int = 0
71
- self._n_written: int = 0
72
- self._n_quanta: int | None = None
135
+ self.scans = ProgressCounter(self, "scanning", "quanta")
136
+ self.writes = ProgressCounter(self, "writing", "quanta")
137
+ self.quantum_ingests = ProgressCounter(self, "ingesting outputs", "quanta")
73
138
  self.interactive = config.interactive_status
74
139
 
75
140
  def __enter__(self) -> Self:
@@ -90,29 +155,6 @@ class Progress:
90
155
  self._logging_redirect.__exit__(exc_type, exc_value, traceback)
91
156
  return None
92
157
 
93
- def set_n_quanta(self, n_quanta: int) -> None:
94
- """Set the total number of quanta.
95
-
96
- Parameters
97
- ----------
98
- n_quanta : `int`
99
- Total number of quanta, including special "init" quanta.
100
-
101
- Notes
102
- -----
103
- This method must be called before any of the ``report_*`` methods.
104
- """
105
- self._n_quanta = n_quanta
106
- if self.interactive:
107
- from tqdm import tqdm
108
-
109
- self._scan_progress = tqdm(desc="Scanning", total=n_quanta, leave=False, unit="quanta")
110
- self._ingest_progress = tqdm(
111
- desc="Ingesting", total=n_quanta, leave=False, smoothing=0.1, unit="quanta"
112
- )
113
- if self.config.output_path is not None:
114
- self._write_progress = tqdm(desc="Writing", total=n_quanta, leave=False, unit="quanta")
115
-
116
158
  @property
117
159
  def elapsed_time(self) -> float:
118
160
  """The time in seconds since the start of the aggregator."""
@@ -120,60 +162,11 @@ class Progress:
120
162
 
121
163
  def _log_status(self) -> None:
122
164
  """Invoke the periodic logger with the current status."""
123
- self._periodic_log.log(
124
- "%s quanta scanned, %s quantum outputs ingested, "
125
- "%s provenance quanta written (of %s) after %0.1fs.",
126
- self._n_scanned,
127
- self._n_ingested,
128
- self._n_written,
129
- self._n_quanta,
130
- self.elapsed_time,
131
- )
132
-
133
- def report_scan(self) -> None:
134
- """Report that a quantum was scanned."""
135
- self._n_scanned += 1
136
- if self.interactive:
137
- self._scan_progress.update(1)
138
- else:
139
- self._log_status()
140
-
141
- def finish_scans(self) -> None:
142
- """Report that all scanning is done."""
143
- if self.interactive:
144
- self._scan_progress.close()
145
-
146
- def report_ingests(self, n_quanta: int) -> None:
147
- """Report that ingests for multiple quanta were completed.
148
-
149
- Parameters
150
- ----------
151
- n_quanta : `int`
152
- Number of quanta whose outputs were ingested.
153
- """
154
- self._n_ingested += n_quanta
155
- if self.interactive:
156
- self._ingest_progress.update(n_quanta)
157
- else:
158
- self._log_status()
159
-
160
- def finish_ingests(self) -> None:
161
- """Report that all ingests are done."""
162
- if self.interactive:
163
- self._ingest_progress.close()
164
-
165
- def report_write(self) -> None:
166
- """Report that a quantum's provenance was written."""
167
- self._n_written += 1
168
- if self.interactive:
169
- self._write_progress.update()
170
- else:
171
- self._log_status()
172
-
173
- def finish_writes(self) -> None:
174
- """Report that all writes are done."""
175
- if self.interactive:
176
- self._write_progress.close()
165
+ log_terms: list[str] = []
166
+ self.scans.append_log_terms(log_terms)
167
+ self.writes.append_log_terms(log_terms)
168
+ self.quantum_ingests.append_log_terms(log_terms)
169
+ self._periodic_log.log("Status after %0.1fs: %s.", self.elapsed_time, "; ".join(log_terms))
177
170
 
178
171
 
179
172
  def make_worker_log(name: str, config: AggregatorConfig) -> LsstLogAdapter:
@@ -39,10 +39,10 @@ from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
39
39
  from lsst.utils.iteration import ensure_iterable
40
40
 
41
41
  from ... import automatic_connection_constants as acc
42
- from ..._status import QuantumSuccessCaveats
42
+ from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
43
43
  from ..._task_metadata import TaskMetadata
44
+ from ...log_capture import _ExecutionLogRecordsExtra
44
45
  from ...pipeline_graph import PipelineGraph, TaskImportMode
45
- from ...quantum_provenance_graph import ExceptionInfo
46
46
  from ...resource_usage import QuantumResourceUsage
47
47
  from .._multiblock import Compressor
48
48
  from .._predicted import (
@@ -50,6 +50,7 @@ from .._predicted import (
50
50
  PredictedQuantumDatasetsModel,
51
51
  PredictedQuantumGraphReader,
52
52
  )
53
+ from .._provenance import ProvenanceQuantumAttemptModel
53
54
  from ._communicators import ScannerCommunicator
54
55
  from ._structs import IngestRequest, ScanReport, ScanResult, ScanStatus
55
56
 
@@ -179,7 +180,7 @@ class Scanner:
179
180
  Returns
180
181
  -------
181
182
  exists : `bool``
182
- Whether the dataset exists
183
+ Whether the dataset exists.
183
184
  """
184
185
  ref = self.reader.components.make_dataset_ref(predicted)
185
186
  return self.qbb.stored(ref)
@@ -212,29 +213,67 @@ class Scanner:
212
213
  )
213
214
  result = ScanResult(predicted_quantum.quantum_id, ScanStatus.INCOMPLETE)
214
215
  del self.reader.components.quantum_datasets[quantum_id]
215
- log_id = self._read_and_compress_log(predicted_quantum, result)
216
- if not self.comms.config.assume_complete and not result.log:
216
+ last_attempt = ProvenanceQuantumAttemptModel()
217
+ if not self._read_log(predicted_quantum, result, last_attempt):
217
218
  self.comms.log.debug("Abandoning scan for %s; no log dataset.", quantum_id)
218
- result.status = ScanStatus.ABANDONED
219
219
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
220
220
  return result
221
- metadata_id = self._read_and_compress_metadata(predicted_quantum, result)
222
- if result.metadata:
223
- result.status = ScanStatus.SUCCESSFUL
224
- result.existing_outputs.add(metadata_id)
225
- elif self.comms.config.assume_complete:
226
- result.status = ScanStatus.FAILED
227
- else:
221
+ if not self._read_metadata(predicted_quantum, result, last_attempt):
228
222
  # We found the log dataset, but no metadata; this means the
229
223
  # quantum failed, but a retry might still happen that could
230
224
  # turn it into a success if we can't yet assume the run is
231
225
  # complete.
232
226
  self.comms.log.debug("Abandoning scan for %s.", quantum_id)
233
- result.status = ScanStatus.ABANDONED
234
227
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
235
228
  return result
236
- if result.log:
237
- result.existing_outputs.add(log_id)
229
+ last_attempt.attempt = len(result.attempts)
230
+ result.attempts.append(last_attempt)
231
+ assert result.status is not ScanStatus.INCOMPLETE
232
+ assert result.status is not ScanStatus.ABANDONED
233
+ assert result.log_model is not None, "Only set to None after converting to JSON."
234
+ assert result.metadata_model is not None, "Only set to None after converting to JSON."
235
+
236
+ if len(result.log_model.attempts) < len(result.attempts):
237
+ # Logs were not found for this attempt; must have been a hard error
238
+ # that kept the `finally` block from running or otherwise
239
+ # interrupted the writing of the logs.
240
+ result.log_model.attempts.append(None)
241
+ if result.status is ScanStatus.SUCCESSFUL:
242
+ # But we found the metadata! Either that hard error happened
243
+ # at a very unlucky time (in between those two writes), or
244
+ # something even weirder happened.
245
+ result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
246
+ else:
247
+ result.attempts[-1].status = QuantumAttemptStatus.FAILED
248
+ if len(result.metadata_model.attempts) < len(result.attempts):
249
+ # Metadata missing usually just means a failure. In any case, the
250
+ # status will already be correct, either because it was set to a
251
+ # failure when we read the logs, or left at UNKNOWN if there were
252
+ # no logs. Note that scanners never process BLOCKED quanta at all.
253
+ result.metadata_model.attempts.append(None)
254
+ assert len(result.log_model.attempts) == len(result.attempts) or len(
255
+ result.metadata_model.attempts
256
+ ) == len(result.attempts), (
257
+ "The only way we can add more than one quantum attempt is by "
258
+ "extracting info stored with the logs, and that always appends "
259
+ "a log attempt and a metadata attempt, so this must be a bug in "
260
+ "the scanner."
261
+ )
262
+ # Now that we're done gathering the log and metadata information into
263
+ # models, dump them to JSON and delete the originals.
264
+ result.log_content = result.log_model.model_dump_json().encode()
265
+ result.log_model = None
266
+ result.metadata_content = result.metadata_model.model_dump_json().encode()
267
+ result.metadata_model = None
268
+ if self.compressor is not None:
269
+ if result.log_content is not None:
270
+ result.log_content = self.compressor.compress(result.log_content)
271
+ if result.metadata_content is not None:
272
+ result.metadata_content = self.compressor.compress(result.metadata_content)
273
+ result.is_compressed = True
274
+ # Scan for output dataset existence, skipping any the metadata reported
275
+ # as having been definitively written, as well as and the metadata and
276
+ # logs themselves (since we just checked those).
238
277
  for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
239
278
  if predicted_output.dataset_id not in result.existing_outputs and self.scan_dataset(
240
279
  predicted_output
@@ -242,8 +281,6 @@ class Scanner:
242
281
  result.existing_outputs.add(predicted_output.dataset_id)
243
282
  to_ingest = self._make_ingest_request(predicted_quantum, result)
244
283
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
245
- assert result.status is not ScanStatus.INCOMPLETE
246
- assert result.status is not ScanStatus.ABANDONED
247
284
  if self.comms.config.output_path is not None:
248
285
  self.comms.request_write(result)
249
286
  self.comms.request_ingest(to_ingest)
@@ -279,9 +316,12 @@ class Scanner:
279
316
  to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
280
317
  return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
281
318
 
282
- def _read_and_compress_metadata(
283
- self, predicted_quantum: PredictedQuantumDatasetsModel, result: ScanResult
284
- ) -> uuid.UUID:
319
+ def _read_metadata(
320
+ self,
321
+ predicted_quantum: PredictedQuantumDatasetsModel,
322
+ result: ScanResult,
323
+ last_attempt: ProvenanceQuantumAttemptModel,
324
+ ) -> bool:
285
325
  """Attempt to read the metadata dataset for a quantum to extract
286
326
  provenance information from it.
287
327
 
@@ -291,53 +331,62 @@ class Scanner:
291
331
  Information about the predicted quantum.
292
332
  result : `ScanResult`
293
333
  Result object to be modified in-place.
334
+ last_attempt : `ScanningProvenanceQuantumAttemptModel`
335
+ Structure to fill in with information about the last attempt to
336
+ run this quantum.
294
337
 
295
338
  Returns
296
339
  -------
297
- dataset_id : `uuid.UUID`
298
- UUID of the metadata dataset.
340
+ complete : `bool`
341
+ Whether the quantum is complete.
299
342
  """
300
- assert not result.metadata, "We shouldn't be scanning again if we already read the metadata."
301
343
  (predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
302
344
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
303
345
  try:
304
346
  # This assumes QBB metadata writes are atomic, which should be the
305
347
  # case. If it's not we'll probably get pydantic validation errors
306
348
  # here.
307
- content: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
349
+ metadata: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
308
350
  except FileNotFoundError:
309
- if not self.comms.config.assume_complete:
310
- return ref.id
351
+ if self.comms.config.assume_complete:
352
+ result.status = ScanStatus.FAILED
353
+ else:
354
+ result.status = ScanStatus.ABANDONED
355
+ return False
311
356
  else:
357
+ result.status = ScanStatus.SUCCESSFUL
358
+ result.existing_outputs.add(ref.id)
359
+ last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
312
360
  try:
313
361
  # Int conversion guards against spurious conversion to
314
362
  # float that can apparently sometimes happen in
315
363
  # TaskMetadata.
316
- result.caveats = QuantumSuccessCaveats(int(content["quantum"]["caveats"]))
364
+ last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
317
365
  except LookupError:
318
366
  pass
319
367
  try:
320
- result.exception = ExceptionInfo._from_metadata(
321
- content[predicted_quantum.task_label]["failure"]
368
+ last_attempt.exception = ExceptionInfo._from_metadata(
369
+ metadata[predicted_quantum.task_label]["failure"]
322
370
  )
323
371
  except LookupError:
324
372
  pass
325
373
  try:
326
- result.existing_outputs = {
327
- uuid.UUID(id_str) for id_str in ensure_iterable(content["quantum"].getArray("outputs"))
328
- }
374
+ result.existing_outputs.update(
375
+ uuid.UUID(id_str) for id_str in ensure_iterable(metadata["quantum"].getArray("outputs"))
376
+ )
329
377
  except LookupError:
330
378
  pass
331
- result.resource_usage = QuantumResourceUsage.from_task_metadata(content)
332
- result.metadata = content.model_dump_json().encode()
333
- if self.compressor is not None:
334
- result.metadata = self.compressor.compress(result.metadata)
335
- result.is_compressed = True
336
- return ref.id
337
-
338
- def _read_and_compress_log(
339
- self, predicted_quantum: PredictedQuantumDatasetsModel, result: ScanResult
340
- ) -> uuid.UUID:
379
+ last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
380
+ assert result.metadata_model is not None, "Only set to None after converting to JSON."
381
+ result.metadata_model.attempts.append(metadata)
382
+ return True
383
+
384
+ def _read_log(
385
+ self,
386
+ predicted_quantum: PredictedQuantumDatasetsModel,
387
+ result: ScanResult,
388
+ last_attempt: ProvenanceQuantumAttemptModel,
389
+ ) -> bool:
341
390
  """Attempt to read the log dataset for a quantum to test for the
342
391
  quantum's completion (the log is always written last) and aggregate
343
392
  the log content in the provenance quantum graph.
@@ -348,24 +397,76 @@ class Scanner:
348
397
  Information about the predicted quantum.
349
398
  result : `ScanResult`
350
399
  Result object to be modified in-place.
400
+ last_attempt : `ScanningProvenanceQuantumAttemptModel`
401
+ Structure to fill in with information about the last attempt to
402
+ run this quantum.
351
403
 
352
404
  Returns
353
405
  -------
354
- dataset_id : `uuid.UUID`
355
- UUID of the log dataset.
406
+ complete : `bool`
407
+ Whether the quantum is complete.
356
408
  """
357
409
  (predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
358
410
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
359
411
  try:
360
412
  # This assumes QBB log writes are atomic, which should be the case.
361
413
  # If it's not we'll probably get pydantic validation errors here.
362
- content: ButlerLogRecords = self.qbb.get(ref)
414
+ log_records: ButlerLogRecords = self.qbb.get(ref)
363
415
  except FileNotFoundError:
364
- if not self.comms.config.assume_complete:
365
- return ref.id
416
+ if self.comms.config.assume_complete:
417
+ result.status = ScanStatus.FAILED
418
+ else:
419
+ result.status = ScanStatus.ABANDONED
420
+ return False
366
421
  else:
367
- result.log = content.model_dump_json().encode()
368
- if self.compressor is not None:
369
- result.log = self.compressor.compress(result.log)
370
- result.is_compressed = True
371
- return ref.id
422
+ # Set the attempt's run status to FAILED, since the default is
423
+ # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
424
+ # the logs exist. This will usually get replaced by SUCCESSFUL
425
+ # when we look for metadata next.
426
+ last_attempt.status = QuantumAttemptStatus.FAILED
427
+ result.existing_outputs.add(ref.id)
428
+ if log_records.extra:
429
+ log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
430
+ self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
431
+ assert result.log_model is not None, "Only set to None after converting to JSON."
432
+ result.log_model.attempts.append(list(log_records))
433
+ return True
434
+
435
+ def _extract_from_log_extra(
436
+ self,
437
+ log_extra: _ExecutionLogRecordsExtra,
438
+ result: ScanResult,
439
+ last_attempt: ProvenanceQuantumAttemptModel | None,
440
+ ) -> None:
441
+ for previous_attempt_log_extra in log_extra.previous_attempts:
442
+ self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
443
+ quantum_attempt: ProvenanceQuantumAttemptModel
444
+ if last_attempt is None:
445
+ # This is not the last attempt, so it must be a failure.
446
+ quantum_attempt = ProvenanceQuantumAttemptModel(
447
+ attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
448
+ )
449
+ # We also need to get the logs from this extra provenance, since
450
+ # they won't be the main section of the log records.
451
+ assert result.log_model is not None, "Only set to None after converting to JSON."
452
+ result.log_model.attempts.append(log_extra.logs)
453
+ # The special last attempt is only appended after we attempt to
454
+ # read metadata later, but we have to append this one now.
455
+ result.attempts.append(quantum_attempt)
456
+ else:
457
+ assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
458
+ quantum_attempt = last_attempt
459
+ if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
460
+ # We won't be getting a separate metadata dataset, so anything we
461
+ # might get from the metadata has to come from this extra
462
+ # provenance in the logs.
463
+ quantum_attempt.exception = log_extra.exception
464
+ assert result.metadata_model is not None, "Only set to None after converting to JSON."
465
+ if log_extra.metadata is not None:
466
+ quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
467
+ result.metadata_model.attempts.append(log_extra.metadata)
468
+ else:
469
+ result.metadata_model.attempts.append(None)
470
+ # Regardless of whether this is the last attempt or not, we can only
471
+ # get the previous_process_quanta from the log extra.
472
+ quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
@@ -40,11 +40,13 @@ import uuid
40
40
 
41
41
  from lsst.daf.butler.datastore.record_data import DatastoreRecordData
42
42
 
43
- from ..._status import QuantumSuccessCaveats
44
- from ...quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
45
- from ...resource_usage import QuantumResourceUsage
46
43
  from .._common import DatastoreName
47
44
  from .._predicted import PredictedDatasetModel
45
+ from .._provenance import (
46
+ ProvenanceLogRecordsModel,
47
+ ProvenanceQuantumAttemptModel,
48
+ ProvenanceTaskMetadataModel,
49
+ )
48
50
 
49
51
 
50
52
  class ScanStatus(enum.Enum):
@@ -126,42 +128,33 @@ class ScanResult:
126
128
  status: ScanStatus
127
129
  """Combined status for the scan and the execution of the quantum."""
128
130
 
129
- caveats: QuantumSuccessCaveats | None = None
130
- """Flags indicating caveats on successful quanta."""
131
-
132
- exception: ExceptionInfo | None = None
133
- """Information about an exception raised when the quantum was executing."""
134
-
135
- resource_usage: QuantumResourceUsage | None = None
136
- """Resource usage information (timing, memory use) for this quantum."""
131
+ attempts: list[ProvenanceQuantumAttemptModel] = dataclasses.field(default_factory=list)
132
+ """Provenance information about each attempt to run the quantum."""
137
133
 
138
134
  existing_outputs: set[uuid.UUID] = dataclasses.field(default_factory=set)
139
135
  """Unique IDs of the output datasets that were actually written."""
140
136
 
141
- metadata: bytes = b""
142
- """Raw content of the metadata dataset."""
137
+ metadata_model: ProvenanceTaskMetadataModel | None = dataclasses.field(
138
+ default_factory=ProvenanceTaskMetadataModel
139
+ )
140
+ """Task metadata information for each attempt.
141
+
142
+ This is set to `None` to keep the pickle size small after it is saved
143
+ to `metadata_content`.
144
+ """
145
+
146
+ metadata_content: bytes = b""
147
+ """Serialized form of `metadata_model`."""
148
+
149
+ log_model: ProvenanceLogRecordsModel | None = dataclasses.field(default_factory=ProvenanceLogRecordsModel)
150
+ """Log records for each attempt.
143
151
 
144
- log: bytes = b""
145
- """Raw content of the log dataset."""
152
+ This is set to `None` to keep the pickle size small after it is saved
153
+ to `log_content`.
154
+ """
155
+
156
+ log_content: bytes = b""
157
+ """Serialized form of `logs_model`."""
146
158
 
147
159
  is_compressed: bool = False
148
160
  """Whether the `metadata` and `log` attributes are compressed."""
149
-
150
- def get_run_status(self) -> QuantumRunStatus:
151
- """Translate the scan status and metadata/log presence into a run
152
- status.
153
- """
154
- if self.status is ScanStatus.BLOCKED:
155
- return QuantumRunStatus.BLOCKED
156
- if self.status is ScanStatus.INIT:
157
- return QuantumRunStatus.SUCCESSFUL
158
- if self.log:
159
- if self.metadata:
160
- return QuantumRunStatus.SUCCESSFUL
161
- else:
162
- return QuantumRunStatus.FAILED
163
- else:
164
- if self.metadata:
165
- return QuantumRunStatus.LOGS_MISSING
166
- else:
167
- return QuantumRunStatus.METADATA_MISSING