lsst-pipe-base 30.0.0__py3-none-any.whl → 30.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. lsst/pipe/base/_instrument.py +5 -6
  2. lsst/pipe/base/log_capture.py +79 -39
  3. lsst/pipe/base/mp_graph_executor.py +15 -51
  4. lsst/pipe/base/quantum_graph/_common.py +3 -4
  5. lsst/pipe/base/quantum_graph/_multiblock.py +16 -6
  6. lsst/pipe/base/quantum_graph/_predicted.py +12 -106
  7. lsst/pipe/base/quantum_graph/_provenance.py +6 -657
  8. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +50 -18
  9. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +229 -35
  10. lsst/pipe/base/quantum_graph/aggregator/_structs.py +113 -3
  11. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +5 -10
  12. lsst/pipe/base/quantum_graph/aggregator/_writer.py +348 -31
  13. lsst/pipe/base/quantum_graph_builder.py +1 -12
  14. lsst/pipe/base/quantum_graph_executor.py +13 -116
  15. lsst/pipe/base/quantum_graph_skeleton.py +7 -1
  16. lsst/pipe/base/separable_pipeline_executor.py +2 -18
  17. lsst/pipe/base/single_quantum_executor.py +35 -53
  18. lsst/pipe/base/version.py +1 -1
  19. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/METADATA +1 -1
  20. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/RECORD +28 -30
  21. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/WHEEL +1 -1
  22. lsst/pipe/base/log_on_close.py +0 -79
  23. lsst/pipe/base/quantum_graph/formatter.py +0 -101
  24. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/entry_points.txt +0 -0
  25. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/licenses/COPYRIGHT +0 -0
  26. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/licenses/LICENSE +0 -0
  27. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/licenses/bsd_license.txt +0 -0
  28. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  29. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/top_level.txt +0 -0
  30. {lsst_pipe_base-30.0.0.dist-info → lsst_pipe_base-30.0.0rc1.dist-info}/zip-safe +0 -0
@@ -51,17 +51,16 @@ import time
51
51
  import uuid
52
52
  from abc import ABC, abstractmethod
53
53
  from collections.abc import Callable, Iterable, Iterator
54
- from contextlib import ExitStack
54
+ from contextlib import AbstractContextManager, ExitStack, contextmanager
55
55
  from traceback import format_exception
56
56
  from types import TracebackType
57
57
  from typing import Any, Literal, Self, TypeAlias, TypeVar, Union
58
58
 
59
- from lsst.utils.logging import LsstLogAdapter
59
+ from lsst.utils.logging import VERBOSE, LsstLogAdapter
60
60
 
61
- from .._provenance import ProvenanceQuantumScanData
62
61
  from ._config import AggregatorConfig
63
62
  from ._progress import ProgressManager, make_worker_log
64
- from ._structs import IngestRequest, ScanReport
63
+ from ._structs import IngestRequest, ScanReport, WriteRequest
65
64
 
66
65
  _T = TypeVar("_T")
67
66
 
@@ -362,9 +361,9 @@ class SupervisorCommunicator:
362
361
  # scanner and the supervisor send one sentinal when done, and the
363
362
  # writer waits for (n_scanners + 1) sentinals to arrive before it
364
363
  # starts its shutdown.
365
- self._write_requests: (
366
- Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
367
- ) = context.make_queue() if config.output_path is not None else None
364
+ self._write_requests: Queue[WriteRequest | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None = (
365
+ context.make_queue() if config.output_path is not None else None
366
+ )
368
367
  # All other workers use this queue to send many different kinds of
369
368
  # reports the supervisor. The supervisor waits for a _DONE sentinal
370
369
  # from each worker before it finishes its shutdown.
@@ -462,12 +461,12 @@ class SupervisorCommunicator:
462
461
  """
463
462
  self._scan_requests.put(_ScanRequest(quantum_id), block=False)
464
463
 
465
- def request_write(self, request: ProvenanceQuantumScanData) -> None:
464
+ def request_write(self, request: WriteRequest) -> None:
466
465
  """Send a request to the writer to write provenance for the given scan.
467
466
 
468
467
  Parameters
469
468
  ----------
470
- request : `ProvenanceQuantumScanData`
469
+ request : `WriteRequest`
471
470
  Information from scanning a quantum (or knowing you don't have to,
472
471
  in the case of blocked quanta).
473
472
  """
@@ -622,11 +621,6 @@ class WorkerCommunicator:
622
621
  self._exit_stack.__exit__(exc_type, exc_value, traceback)
623
622
  return True
624
623
 
625
- @property
626
- def exit_stack(self) -> ExitStack:
627
- """A `contextlib.ExitStack` tied to the communicator."""
628
- return self._exit_stack
629
-
630
624
  def log_progress(self, level: int, message: str) -> None:
631
625
  """Send a high-level log message to the supervisor.
632
626
 
@@ -639,6 +633,44 @@ class WorkerCommunicator:
639
633
  """
640
634
  self._reports.put(_ProgressLog(message=message, level=level), block=False)
641
635
 
636
+ def enter(
637
+ self,
638
+ cm: AbstractContextManager[_T],
639
+ on_close: str | None = None,
640
+ level: int = VERBOSE,
641
+ is_progress_log: bool = False,
642
+ ) -> _T:
643
+ """Enter a context manager that will be exited when the communicator's
644
+ context is exited.
645
+
646
+ Parameters
647
+ ----------
648
+ cm : `contextlib.AbstractContextManager`
649
+ A context manager to enter.
650
+ on_close : `str`, optional
651
+ A log message to emit (on the worker's logger) just before the
652
+ given context manager is exited. This can be used to indicate
653
+ what's going on when an ``__exit__`` implementation has a lot of
654
+ work to do (e.g. moving a large file into a zip archive).
655
+ level : `int`, optional
656
+ Level for the ``on_close`` log message.
657
+ is_progress_log : `bool`, optional
658
+ If `True`, send the ``on_close`` message to the supervisor via
659
+ `log_progress` as well as the worker's logger.
660
+ """
661
+ if on_close is None:
662
+ return self._exit_stack.enter_context(cm)
663
+
664
+ @contextmanager
665
+ def wrapper() -> Iterator[_T]:
666
+ with cm as result:
667
+ yield result
668
+ self.log.log(level, on_close)
669
+ if is_progress_log:
670
+ self.log_progress(level, on_close)
671
+
672
+ return self._exit_stack.enter_context(wrapper())
673
+
642
674
  def check_for_cancel(self) -> None:
643
675
  """Check for a cancel signal from the supervisor and raise
644
676
  `FatalWorkerError` if it is present.
@@ -696,12 +728,12 @@ class ScannerCommunicator(WorkerCommunicator):
696
728
  else:
697
729
  self._reports.put(_IngestReport(1), block=False)
698
730
 
699
- def request_write(self, request: ProvenanceQuantumScanData) -> None:
731
+ def request_write(self, request: WriteRequest) -> None:
700
732
  """Ask the writer to write provenance for a quantum.
701
733
 
702
734
  Parameters
703
735
  ----------
704
- request : `ProvenanceQuantumScanData`
736
+ request : `WriteRequest`
705
737
  Result of scanning a quantum.
706
738
  """
707
739
  assert self._write_requests is not None, "Writer should not be used if writing is disabled."
@@ -881,12 +913,12 @@ class WriterCommunicator(WorkerCommunicator):
881
913
  self._reports.put(_Sentinel.WRITER_DONE, block=False)
882
914
  return result
883
915
 
884
- def poll(self) -> Iterator[ProvenanceQuantumScanData]:
916
+ def poll(self) -> Iterator[WriteRequest]:
885
917
  """Poll for writer requests from the scanner workers and supervisor.
886
918
 
887
919
  Yields
888
920
  ------
889
- request : `ProvenanceQuantumScanData`
921
+ request : `WriteRequest`
890
922
  The result of a quantum scan.
891
923
 
892
924
  Notes
@@ -38,19 +38,23 @@ from typing import Any, Literal, Self
38
38
  import zstandard
39
39
 
40
40
  from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
41
+ from lsst.utils.iteration import ensure_iterable
41
42
 
42
43
  from ... import automatic_connection_constants as acc
44
+ from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
43
45
  from ..._task_metadata import TaskMetadata
46
+ from ...log_capture import _ExecutionLogRecordsExtra
44
47
  from ...pipeline_graph import PipelineGraph, TaskImportMode
48
+ from ...resource_usage import QuantumResourceUsage
45
49
  from .._multiblock import Compressor
46
50
  from .._predicted import (
47
51
  PredictedDatasetModel,
48
52
  PredictedQuantumDatasetsModel,
49
53
  PredictedQuantumGraphReader,
50
54
  )
51
- from .._provenance import ProvenanceQuantumScanModels, ProvenanceQuantumScanStatus
55
+ from .._provenance import ProvenanceInitQuantumModel, ProvenanceQuantumAttemptModel, ProvenanceQuantumModel
52
56
  from ._communicators import ScannerCommunicator
53
- from ._structs import IngestRequest, ScanReport
57
+ from ._structs import IngestRequest, InProgressScan, ScanReport, ScanStatus, WriteRequest
54
58
 
55
59
 
56
60
  @dataclasses.dataclass
@@ -90,7 +94,7 @@ class Scanner(AbstractContextManager):
90
94
  if self.comms.config.mock_storage_classes:
91
95
  import lsst.pipe.base.tests.mocks # noqa: F401
92
96
  self.comms.log.verbose("Reading from predicted quantum graph.")
93
- self.reader = self.comms.exit_stack.enter_context(
97
+ self.reader = self.comms.enter(
94
98
  PredictedQuantumGraphReader.open(self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT)
95
99
  )
96
100
  self.reader.read_dimension_data()
@@ -192,7 +196,7 @@ class Scanner(AbstractContextManager):
192
196
  ref = self.reader.components.make_dataset_ref(predicted)
193
197
  return self.qbb.stored(ref)
194
198
 
195
- def scan_quantum(self, quantum_id: uuid.UUID) -> ProvenanceQuantumScanModels:
199
+ def scan_quantum(self, quantum_id: uuid.UUID) -> InProgressScan:
196
200
  """Scan for a quantum's completion and error status, and its output
197
201
  datasets' existence.
198
202
 
@@ -203,38 +207,76 @@ class Scanner(AbstractContextManager):
203
207
 
204
208
  Returns
205
209
  -------
206
- result : `ProvenanceQuantumScanModels`
210
+ result : `InProgressScan`
207
211
  Scan result struct.
208
212
  """
209
213
  if (predicted_quantum := self.init_quanta.get(quantum_id)) is not None:
210
- result = ProvenanceQuantumScanModels(
211
- predicted_quantum.quantum_id, status=ProvenanceQuantumScanStatus.INIT
212
- )
214
+ result = InProgressScan(predicted_quantum.quantum_id, status=ScanStatus.INIT)
213
215
  self.comms.log.debug("Created init scan for %s (%s)", quantum_id, predicted_quantum.task_label)
214
216
  else:
215
217
  self.reader.read_quantum_datasets([quantum_id])
216
- predicted_quantum = self.reader.components.quantum_datasets.pop(quantum_id)
218
+ predicted_quantum = self.reader.components.quantum_datasets[quantum_id]
217
219
  self.comms.log.debug(
218
220
  "Scanning %s (%s@%s)",
219
221
  quantum_id,
220
222
  predicted_quantum.task_label,
221
223
  predicted_quantum.data_coordinate,
222
224
  )
223
- logs = self._read_log(predicted_quantum)
224
- metadata = self._read_metadata(predicted_quantum)
225
- result = ProvenanceQuantumScanModels.from_metadata_and_logs(
226
- predicted_quantum, metadata, logs, assume_complete=self.comms.config.assume_complete
227
- )
228
- if result.status is ProvenanceQuantumScanStatus.ABANDONED:
229
- self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
225
+ result = InProgressScan(predicted_quantum.quantum_id, ScanStatus.INCOMPLETE)
226
+ del self.reader.components.quantum_datasets[quantum_id]
227
+ last_attempt = ProvenanceQuantumAttemptModel()
228
+ if not self._read_log(predicted_quantum, result, last_attempt):
229
+ self.comms.log.debug("Abandoning scan for %s; no log dataset.", quantum_id)
230
+ self.comms.report_scan(ScanReport(result.quantum_id, result.status))
231
+ return result
232
+ if not self._read_metadata(predicted_quantum, result, last_attempt):
233
+ # We found the log dataset, but no metadata; this means the
234
+ # quantum failed, but a retry might still happen that could
235
+ # turn it into a success if we can't yet assume the run is
236
+ # complete.
237
+ self.comms.log.debug("Abandoning scan for %s.", quantum_id)
230
238
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
231
239
  return result
240
+ last_attempt.attempt = len(result.attempts)
241
+ result.attempts.append(last_attempt)
242
+ assert result.status is not ScanStatus.INCOMPLETE
243
+ assert result.status is not ScanStatus.ABANDONED
244
+
245
+ if len(result.logs.attempts) < len(result.attempts):
246
+ # Logs were not found for this attempt; must have been a hard error
247
+ # that kept the `finally` block from running or otherwise
248
+ # interrupted the writing of the logs.
249
+ result.logs.attempts.append(None)
250
+ if result.status is ScanStatus.SUCCESSFUL:
251
+ # But we found the metadata! Either that hard error happened
252
+ # at a very unlucky time (in between those two writes), or
253
+ # something even weirder happened.
254
+ result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
255
+ else:
256
+ result.attempts[-1].status = QuantumAttemptStatus.FAILED
257
+ if len(result.metadata.attempts) < len(result.attempts):
258
+ # Metadata missing usually just means a failure. In any case, the
259
+ # status will already be correct, either because it was set to a
260
+ # failure when we read the logs, or left at UNKNOWN if there were
261
+ # no logs. Note that scanners never process BLOCKED quanta at all.
262
+ result.metadata.attempts.append(None)
263
+ assert len(result.logs.attempts) == len(result.attempts) or len(result.metadata.attempts) == len(
264
+ result.attempts
265
+ ), (
266
+ "The only way we can add more than one quantum attempt is by "
267
+ "extracting info stored with the logs, and that always appends "
268
+ "a log attempt and a metadata attempt, so this must be a bug in "
269
+ "the scanner."
270
+ )
271
+ # Scan for output dataset existence, skipping any the metadata reported
272
+ # on as well as and the metadata and logs themselves (since we just
273
+ # checked those).
232
274
  for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
233
- if predicted_output.dataset_id not in result.output_existence:
234
- result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
275
+ if predicted_output.dataset_id not in result.outputs:
276
+ result.outputs[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
235
277
  to_ingest = self._make_ingest_request(predicted_quantum, result)
236
278
  if self.comms.config.output_path is not None:
237
- to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
279
+ to_write = self._make_write_request(predicted_quantum, result)
238
280
  self.comms.request_write(to_write)
239
281
  self.comms.request_ingest(to_ingest)
240
282
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
@@ -242,7 +284,7 @@ class Scanner(AbstractContextManager):
242
284
  return result
243
285
 
244
286
  def _make_ingest_request(
245
- self, predicted_quantum: PredictedQuantumDatasetsModel, result: ProvenanceQuantumScanModels
287
+ self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
246
288
  ) -> IngestRequest:
247
289
  """Make an ingest request from a quantum scan.
248
290
 
@@ -250,7 +292,7 @@ class Scanner(AbstractContextManager):
250
292
  ----------
251
293
  predicted_quantum : `PredictedQuantumDatasetsModel`
252
294
  Information about the predicted quantum.
253
- result : `ProvenanceQuantumScanModels`
295
+ result : `InProgressScan`
254
296
  Result of a quantum scan.
255
297
 
256
298
  Returns
@@ -263,7 +305,7 @@ class Scanner(AbstractContextManager):
263
305
  }
264
306
  to_ingest_predicted: list[PredictedDatasetModel] = []
265
307
  to_ingest_refs: list[DatasetRef] = []
266
- for dataset_id, was_produced in result.output_existence.items():
308
+ for dataset_id, was_produced in result.outputs.items():
267
309
  if was_produced:
268
310
  predicted_output = predicted_outputs_by_id[dataset_id]
269
311
  to_ingest_predicted.append(predicted_output)
@@ -271,18 +313,69 @@ class Scanner(AbstractContextManager):
271
313
  to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
272
314
  return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
273
315
 
274
- def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
275
- """Attempt to read the metadata dataset for a quantum.
316
+ def _make_write_request(
317
+ self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
318
+ ) -> WriteRequest:
319
+ """Make a write request from a quantum scan.
276
320
 
277
321
  Parameters
278
322
  ----------
279
323
  predicted_quantum : `PredictedQuantumDatasetsModel`
280
324
  Information about the predicted quantum.
325
+ result : `InProgressScan`
326
+ Result of a quantum scan.
281
327
 
282
328
  Returns
283
329
  -------
284
- metadata : `...TaskMetadata` or `None`
285
- Task metadata.
330
+ write_request : `WriteRequest`
331
+ A request to be sent to the writer.
332
+ """
333
+ quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
334
+ if result.status is ScanStatus.INIT:
335
+ quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
336
+ else:
337
+ quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
338
+ quantum.attempts = result.attempts
339
+ request = WriteRequest(
340
+ result.quantum_id,
341
+ result.status,
342
+ existing_outputs={
343
+ dataset_id for dataset_id, was_produced in result.outputs.items() if was_produced
344
+ },
345
+ quantum=quantum.model_dump_json().encode(),
346
+ logs=result.logs.model_dump_json().encode() if result.logs.attempts else b"",
347
+ metadata=result.metadata.model_dump_json().encode() if result.metadata.attempts else b"",
348
+ )
349
+ if self.compressor is not None:
350
+ request.quantum = self.compressor.compress(request.quantum)
351
+ request.logs = self.compressor.compress(request.logs) if request.logs else b""
352
+ request.metadata = self.compressor.compress(request.metadata) if request.metadata else b""
353
+ request.is_compressed = True
354
+ return request
355
+
356
+ def _read_metadata(
357
+ self,
358
+ predicted_quantum: PredictedQuantumDatasetsModel,
359
+ result: InProgressScan,
360
+ last_attempt: ProvenanceQuantumAttemptModel,
361
+ ) -> bool:
362
+ """Attempt to read the metadata dataset for a quantum to extract
363
+ provenance information from it.
364
+
365
+ Parameters
366
+ ----------
367
+ predicted_quantum : `PredictedQuantumDatasetsModel`
368
+ Information about the predicted quantum.
369
+ result : `InProgressScan`
370
+ Result object to be modified in-place.
371
+ last_attempt : `ScanningProvenanceQuantumAttemptModel`
372
+ Structure to fill in with information about the last attempt to
373
+ run this quantum.
374
+
375
+ Returns
376
+ -------
377
+ complete : `bool`
378
+ Whether the quantum is complete.
286
379
  """
287
380
  (predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
288
381
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
@@ -290,28 +383,129 @@ class Scanner(AbstractContextManager):
290
383
  # This assumes QBB metadata writes are atomic, which should be the
291
384
  # case. If it's not we'll probably get pydantic validation errors
292
385
  # here.
293
- return self.qbb.get(ref, storageClass="TaskMetadata")
386
+ metadata: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
294
387
  except FileNotFoundError:
295
- return None
296
-
297
- def _read_log(self, predicted_quantum: PredictedQuantumDatasetsModel) -> ButlerLogRecords | None:
298
- """Attempt to read the log dataset for a quantum.
388
+ result.outputs[ref.id] = False
389
+ if self.comms.config.assume_complete:
390
+ result.status = ScanStatus.FAILED
391
+ else:
392
+ result.status = ScanStatus.ABANDONED
393
+ return False
394
+ else:
395
+ result.status = ScanStatus.SUCCESSFUL
396
+ result.outputs[ref.id] = True
397
+ last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
398
+ try:
399
+ # Int conversion guards against spurious conversion to
400
+ # float that can apparently sometimes happen in
401
+ # TaskMetadata.
402
+ last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
403
+ except LookupError:
404
+ pass
405
+ try:
406
+ last_attempt.exception = ExceptionInfo._from_metadata(
407
+ metadata[predicted_quantum.task_label]["failure"]
408
+ )
409
+ except LookupError:
410
+ pass
411
+ try:
412
+ for id_str in ensure_iterable(metadata["quantum"].getArray("outputs")):
413
+ result.outputs[uuid.UUID(id_str)]
414
+ except LookupError:
415
+ pass
416
+ else:
417
+ # If the metadata told us what it wrote, anything not in that
418
+ # list was not written.
419
+ for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
420
+ result.outputs.setdefault(predicted_output.dataset_id, False)
421
+ last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
422
+ result.metadata.attempts.append(metadata)
423
+ return True
424
+
425
+ def _read_log(
426
+ self,
427
+ predicted_quantum: PredictedQuantumDatasetsModel,
428
+ result: InProgressScan,
429
+ last_attempt: ProvenanceQuantumAttemptModel,
430
+ ) -> bool:
431
+ """Attempt to read the log dataset for a quantum to test for the
432
+ quantum's completion (the log is always written last) and aggregate
433
+ the log content in the provenance quantum graph.
299
434
 
300
435
  Parameters
301
436
  ----------
302
437
  predicted_quantum : `PredictedQuantumDatasetsModel`
303
438
  Information about the predicted quantum.
439
+ result : `InProgressScan`
440
+ Result object to be modified in-place.
441
+ last_attempt : `ScanningProvenanceQuantumAttemptModel`
442
+ Structure to fill in with information about the last attempt to
443
+ run this quantum.
304
444
 
305
445
  Returns
306
446
  -------
307
- logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
308
- Task logs.
447
+ complete : `bool`
448
+ Whether the quantum is complete.
309
449
  """
310
450
  (predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
311
451
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
312
452
  try:
313
453
  # This assumes QBB log writes are atomic, which should be the case.
314
454
  # If it's not we'll probably get pydantic validation errors here.
315
- return self.qbb.get(ref)
455
+ log_records: ButlerLogRecords = self.qbb.get(ref)
316
456
  except FileNotFoundError:
317
- return None
457
+ result.outputs[ref.id] = False
458
+ if self.comms.config.assume_complete:
459
+ result.status = ScanStatus.FAILED
460
+ else:
461
+ result.status = ScanStatus.ABANDONED
462
+ return False
463
+ else:
464
+ # Set the attempt's run status to FAILED, since the default is
465
+ # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
466
+ # the logs exist. This will usually get replaced by SUCCESSFUL
467
+ # when we look for metadata next.
468
+ last_attempt.status = QuantumAttemptStatus.FAILED
469
+ result.outputs[ref.id] = True
470
+ if log_records.extra:
471
+ log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
472
+ self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
473
+ result.logs.attempts.append(list(log_records))
474
+ return True
475
+
476
+ def _extract_from_log_extra(
477
+ self,
478
+ log_extra: _ExecutionLogRecordsExtra,
479
+ result: InProgressScan,
480
+ last_attempt: ProvenanceQuantumAttemptModel | None,
481
+ ) -> None:
482
+ for previous_attempt_log_extra in log_extra.previous_attempts:
483
+ self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
484
+ quantum_attempt: ProvenanceQuantumAttemptModel
485
+ if last_attempt is None:
486
+ # This is not the last attempt, so it must be a failure.
487
+ quantum_attempt = ProvenanceQuantumAttemptModel(
488
+ attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
489
+ )
490
+ # We also need to get the logs from this extra provenance, since
491
+ # they won't be the main section of the log records.
492
+ result.logs.attempts.append(log_extra.logs)
493
+ # The special last attempt is only appended after we attempt to
494
+ # read metadata later, but we have to append this one now.
495
+ result.attempts.append(quantum_attempt)
496
+ else:
497
+ assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
498
+ quantum_attempt = last_attempt
499
+ if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
500
+ # We won't be getting a separate metadata dataset, so anything we
501
+ # might get from the metadata has to come from this extra
502
+ # provenance in the logs.
503
+ quantum_attempt.exception = log_extra.exception
504
+ if log_extra.metadata is not None:
505
+ quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
506
+ result.metadata.attempts.append(log_extra.metadata)
507
+ else:
508
+ result.metadata.attempts.append(None)
509
+ # Regardless of whether this is the last attempt or not, we can only
510
+ # get the previous_process_quanta from the log extra.
511
+ quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
@@ -27,16 +27,68 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ("IngestRequest", "ScanReport")
30
+ __all__ = (
31
+ "InProgressScan",
32
+ "IngestRequest",
33
+ "ScanReport",
34
+ "ScanStatus",
35
+ "WriteRequest",
36
+ )
31
37
 
32
38
  import dataclasses
39
+ import enum
33
40
  import uuid
34
41
 
35
42
  from lsst.daf.butler.datastore.record_data import DatastoreRecordData
36
43
 
37
44
  from .._common import DatastoreName
38
45
  from .._predicted import PredictedDatasetModel
39
- from .._provenance import ProvenanceQuantumScanStatus
46
+ from .._provenance import (
47
+ ProvenanceLogRecordsModel,
48
+ ProvenanceQuantumAttemptModel,
49
+ ProvenanceTaskMetadataModel,
50
+ )
51
+
52
+
53
+ class ScanStatus(enum.Enum):
54
+ """Status enum for quantum scanning.
55
+
56
+ Note that this records the status for the *scanning* which is distinct
57
+ from the status of the quantum's execution.
58
+ """
59
+
60
+ INCOMPLETE = enum.auto()
61
+ """The quantum is not necessarily done running, and cannot be scanned
62
+ conclusively yet.
63
+ """
64
+
65
+ ABANDONED = enum.auto()
66
+ """The quantum's execution appears to have failed but we cannot rule out
67
+ the possibility that it could be recovered, but we've also waited long
68
+ enough (according to `ScannerTimeConfigDict.retry_timeout`) that it's time
69
+ to stop trying for now.
70
+
71
+ This state means a later run with `ScannerConfig.assume_complete` is
72
+ required.
73
+ """
74
+
75
+ SUCCESSFUL = enum.auto()
76
+ """The quantum was conclusively scanned and was executed successfully,
77
+ unblocking scans for downstream quanta.
78
+ """
79
+
80
+ FAILED = enum.auto()
81
+ """The quantum was conclusively scanned and failed execution, blocking
82
+ scans for downstream quanta.
83
+ """
84
+
85
+ BLOCKED = enum.auto()
86
+ """A quantum upstream of this one failed."""
87
+
88
+ INIT = enum.auto()
89
+ """Init quanta need special handling, because they don't have logs and
90
+ metadata.
91
+ """
40
92
 
41
93
 
42
94
  @dataclasses.dataclass
@@ -46,7 +98,7 @@ class ScanReport:
46
98
  quantum_id: uuid.UUID
47
99
  """Unique ID of the quantum."""
48
100
 
49
- status: ProvenanceQuantumScanStatus
101
+ status: ScanStatus
50
102
  """Combined status of the scan and the execution of the quantum."""
51
103
 
52
104
 
@@ -65,3 +117,61 @@ class IngestRequest:
65
117
 
66
118
  def __bool__(self) -> bool:
67
119
  return bool(self.datasets or self.records)
120
+
121
+
122
+ @dataclasses.dataclass
123
+ class InProgressScan:
124
+ """A struct that represents a quantum that is being scanned."""
125
+
126
+ quantum_id: uuid.UUID
127
+ """Unique ID for the quantum."""
128
+
129
+ status: ScanStatus
130
+ """Combined status for the scan and the execution of the quantum."""
131
+
132
+ attempts: list[ProvenanceQuantumAttemptModel] = dataclasses.field(default_factory=list)
133
+ """Provenance information about each attempt to run the quantum."""
134
+
135
+ outputs: dict[uuid.UUID, bool] = dataclasses.field(default_factory=dict)
136
+ """Unique IDs of the output datasets mapped to whether they were actually
137
+ produced.
138
+ """
139
+
140
+ metadata: ProvenanceTaskMetadataModel = dataclasses.field(default_factory=ProvenanceTaskMetadataModel)
141
+ """Task metadata information for each attempt.
142
+ """
143
+
144
+ logs: ProvenanceLogRecordsModel = dataclasses.field(default_factory=ProvenanceLogRecordsModel)
145
+ """Log records for each attempt.
146
+ """
147
+
148
+
149
+ @dataclasses.dataclass
150
+ class WriteRequest:
151
+ """A struct that represents a request to write provenance for a quantum."""
152
+
153
+ quantum_id: uuid.UUID
154
+ """Unique ID for the quantum."""
155
+
156
+ status: ScanStatus
157
+ """Combined status for the scan and the execution of the quantum."""
158
+
159
+ existing_outputs: set[uuid.UUID] = dataclasses.field(default_factory=set)
160
+ """Unique IDs of the output datasets that were actually written."""
161
+
162
+ quantum: bytes = b""
163
+ """Serialized quantum provenance model.
164
+
165
+ This may be empty for quanta that had no attempts.
166
+ """
167
+
168
+ metadata: bytes = b""
169
+ """Serialized task metadata."""
170
+
171
+ logs: bytes = b""
172
+ """Serialized logs."""
173
+
174
+ is_compressed: bool = False
175
+ """Whether the `quantum`, `metadata`, and `log` attributes are
176
+ compressed.
177
+ """