lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. lsst/pipe/base/_instrument.py +31 -20
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
  6. lsst/pipe/base/automatic_connection_constants.py +20 -1
  7. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  8. lsst/pipe/base/cli/cmd/commands.py +149 -4
  9. lsst/pipe/base/connectionTypes.py +72 -160
  10. lsst/pipe/base/connections.py +6 -9
  11. lsst/pipe/base/execution_reports.py +0 -5
  12. lsst/pipe/base/graph/graph.py +11 -10
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +8 -10
  15. lsst/pipe/base/log_capture.py +40 -80
  16. lsst/pipe/base/log_on_close.py +76 -0
  17. lsst/pipe/base/mp_graph_executor.py +51 -15
  18. lsst/pipe/base/pipeline.py +5 -6
  19. lsst/pipe/base/pipelineIR.py +2 -8
  20. lsst/pipe/base/pipelineTask.py +5 -7
  21. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  22. lsst/pipe/base/pipeline_graph/_edges.py +32 -22
  23. lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
  24. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
  25. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  26. lsst/pipe/base/pipeline_graph/io.py +7 -10
  27. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  28. lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
  29. lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
  30. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  31. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  32. lsst/pipe/base/prerequisite_helpers.py +2 -1
  33. lsst/pipe/base/quantum_graph/_common.py +19 -20
  34. lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
  35. lsst/pipe/base/quantum_graph/_predicted.py +113 -15
  36. lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
  37. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  38. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
  39. lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
  40. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
  41. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
  42. lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
  43. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
  44. lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
  45. lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
  46. lsst/pipe/base/quantum_graph/formatter.py +171 -0
  47. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  48. lsst/pipe/base/quantum_graph/visualization.py +5 -1
  49. lsst/pipe/base/quantum_graph_builder.py +33 -9
  50. lsst/pipe/base/quantum_graph_executor.py +116 -13
  51. lsst/pipe/base/quantum_graph_skeleton.py +31 -35
  52. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  53. lsst/pipe/base/separable_pipeline_executor.py +19 -3
  54. lsst/pipe/base/single_quantum_executor.py +67 -42
  55. lsst/pipe/base/struct.py +4 -0
  56. lsst/pipe/base/testUtils.py +3 -3
  57. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  58. lsst/pipe/base/version.py +1 -1
  59. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/METADATA +3 -3
  60. lsst_pipe_base-30.0.1rc1.dist-info/RECORD +129 -0
  61. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/WHEEL +1 -1
  62. lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
  63. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/entry_points.txt +0 -0
  64. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/COPYRIGHT +0 -0
  65. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  66. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/bsd_license.txt +0 -0
  67. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  68. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/top_level.txt +0 -0
  69. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/zip-safe +0 -0
@@ -29,6 +29,8 @@ from __future__ import annotations
29
29
 
30
30
  __all__ = ("AggregatorConfig",)
31
31
 
32
+ import sys
33
+ from typing import TYPE_CHECKING, Any
32
34
 
33
35
  import pydantic
34
36
 
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
60
62
  n_processes: int = 1
61
63
  """Number of processes the scanner should use."""
62
64
 
63
- assume_complete: bool = True
64
- """If `True`, the aggregator can assume all quanta have run to completion
65
- (including any automatic retries). If `False`, only successes can be
66
- considered final, and quanta that appear to have failed or to have not been
67
- executed are ignored.
65
+ incomplete: bool = False
66
+ """If `True`, do not expect the graph to have been executed to completion
67
+ yet, and only ingest the outputs of successful quanta.
68
+
69
+ This disables writing the provenance quantum graph, since this is likely to
70
+ be wasted effort that just complicates a follow-up run with
71
+ ``incomplete=False`` later.
68
72
  """
69
73
 
70
74
  defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
95
99
  """
96
100
 
97
101
  dry_run: bool = False
98
- """If `True`, do not actually perform any deletions or central butler
99
- ingests.
102
+ """If `True`, do not actually perform any central butler ingests.
100
103
 
101
- Most log messages concerning deletions and ingests will still be emitted in
102
- order to provide a better emulation of a real run.
104
+ Most log messages concerning ingests will still be emitted in order to
105
+ provide a better emulation of a real run.
103
106
  """
104
107
 
105
108
  interactive_status: bool = False
@@ -137,3 +140,78 @@ class AggregatorConfig(pydantic.BaseModel):
137
140
  """Enable support for storage classes by created by the
138
141
  lsst.pipe.base.tests.mocks package.
139
142
  """
143
+
144
+ promise_ingest_graph: bool = False
145
+ """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
146
+ will be run later to ingest metadata/log/config datasets, and will not
147
+ ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
148
+ not run, those files will be abandoned in the butler storage root without
149
+ being present in the butler database, but it will speed up both processes.
150
+
151
+ It is *usually* safe to build a quantum graph for downstream processing
152
+ before or while running `~.ingest_graph.ingest_graph`, because
153
+ metadata/log/config datasets are rarely used as inputs. To check, use
154
+ ``pipetask build ... --show inputs`` to show the overall-inputs to the
155
+ graph and scan for these dataset types.
156
+ """
157
+
158
+ worker_check_timeout: float = 5.0
159
+ """Time to wait (s) for reports from subprocesses before running
160
+ process-alive checks.
161
+
162
+ These checks are designed to kill the main aggregator process when a
163
+ subprocess has been unexpectedly killed (e.g. for for using too much
164
+ memory).
165
+ """
166
+
167
+ @property
168
+ def is_writing_provenance(self) -> bool:
169
+ """Whether the aggregator is configured to write the provenance quantum
170
+ graph.
171
+ """
172
+ return self.output_path is not None and not self.incomplete
173
+
174
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
175
+ # when we inherit those docstrings in our public classes.
176
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
177
+
178
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
179
+ """See `pydantic.BaseModel.copy`."""
180
+ return super().copy(*args, **kwargs)
181
+
182
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
183
+ """See `pydantic.BaseModel.model_dump`."""
184
+ return super().model_dump(*args, **kwargs)
185
+
186
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
187
+ """See `pydantic.BaseModel.model_dump_json`."""
188
+ return super().model_dump(*args, **kwargs)
189
+
190
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
191
+ """See `pydantic.BaseModel.model_copy`."""
192
+ return super().model_copy(*args, **kwargs)
193
+
194
+ @classmethod
195
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
196
+ """See `pydantic.BaseModel.model_construct`."""
197
+ return super().model_construct(*args, **kwargs)
198
+
199
+ @classmethod
200
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
201
+ """See `pydantic.BaseModel.model_json_schema`."""
202
+ return super().model_json_schema(*args, **kwargs)
203
+
204
+ @classmethod
205
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
206
+ """See `pydantic.BaseModel.model_validate`."""
207
+ return super().model_validate(*args, **kwargs)
208
+
209
+ @classmethod
210
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
211
+ """See `pydantic.BaseModel.model_validate_json`."""
212
+ return super().model_validate_json(*args, **kwargs)
213
+
214
+ @classmethod
215
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
216
+ """See `pydantic.BaseModel.model_validate_strings`."""
217
+ return super().model_validate_strings(*args, **kwargs)
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
43
43
 
44
44
  from ...pipeline_graph import TaskImportMode
45
45
  from .._common import DatastoreName
46
- from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
46
+ from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
47
47
  from ._communicators import IngesterCommunicator
48
48
 
49
49
 
@@ -140,7 +140,7 @@ class Ingester(AbstractContextManager):
140
140
  Notes
141
141
  -----
142
142
  This method is designed to run as the ``target`` in
143
- `WorkerContext.make_worker`.
143
+ `WorkerFactory.make_worker`.
144
144
  """
145
145
  with comms, Ingester(predicted_path, butler_path, comms) as ingester:
146
146
  ingester.loop()
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
170
170
  for ingest_request in self.comms.poll():
171
171
  self.n_producers_pending += 1
172
172
  self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
173
- self.update_pending(ingest_request.datasets, ingest_request.records)
173
+ self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
174
174
  if self.n_datasets_pending > self.comms.config.ingest_batch_size:
175
175
  self.ingest()
176
176
  self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
266
266
  else:
267
267
  del self.records_pending[datastore_name]
268
268
 
269
- def update_pending(
270
- self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
269
+ def update_outputs_pending(
270
+ self,
271
+ refs: list[DatasetRef],
272
+ records: dict[DatastoreName, DatastoreRecordData],
271
273
  ) -> None:
272
274
  """Add an ingest request to the pending-ingest data structures.
273
275
 
274
276
  Parameters
275
277
  ----------
276
- datasets : `list` [ `PredictedDatasetModel` ]
277
- Registry information about the datasets.
278
+ refs : `list` [ `lsst.daf.butler.DatasetRef` ]
279
+ Registry information about regular quantum-output datasets.
278
280
  records : `dict` [ `str`, \
279
281
  `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
280
282
  Datastore information about the datasets.
281
283
  """
282
- n_given = len(datasets)
284
+ n_given = len(refs)
283
285
  if self.already_ingested is not None:
284
- datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
285
- kept = {d.dataset_id for d in datasets}
286
+ refs = [ref for ref in refs if ref.id not in self.already_ingested]
287
+ kept = {ref.id for ref in refs}
286
288
  self.n_datasets_skipped += n_given - len(kept)
287
289
  records = {
288
290
  datastore_name: filtered_records
289
291
  for datastore_name, original_records in records.items()
290
292
  if (filtered_records := original_records.subset(kept)) is not None
291
293
  }
292
- for dataset in datasets:
293
- ref = self.predicted.make_dataset_ref(dataset)
294
+ for ref in refs:
294
295
  self.refs_pending[ref.datasetType.dimensions].append(ref)
295
296
  for datastore_name, datastore_records in records.items():
296
297
  if (existing_records := self.records_pending.get(datastore_name)) is not None:
@@ -38,23 +38,19 @@ from typing import Any, Literal, Self
38
38
  import zstandard
39
39
 
40
40
  from lsst.daf.butler import ButlerLogRecords, DatasetRef, QuantumBackedButler
41
- from lsst.utils.iteration import ensure_iterable
42
41
 
43
42
  from ... import automatic_connection_constants as acc
44
- from ..._status import ExceptionInfo, QuantumAttemptStatus, QuantumSuccessCaveats
45
43
  from ..._task_metadata import TaskMetadata
46
- from ...log_capture import _ExecutionLogRecordsExtra
47
44
  from ...pipeline_graph import PipelineGraph, TaskImportMode
48
- from ...resource_usage import QuantumResourceUsage
49
45
  from .._multiblock import Compressor
50
46
  from .._predicted import (
51
47
  PredictedDatasetModel,
52
48
  PredictedQuantumDatasetsModel,
53
49
  PredictedQuantumGraphReader,
54
50
  )
55
- from .._provenance import ProvenanceInitQuantumModel, ProvenanceQuantumAttemptModel, ProvenanceQuantumModel
51
+ from .._provenance import ProvenanceQuantumScanModels, ProvenanceQuantumScanStatus
56
52
  from ._communicators import ScannerCommunicator
57
- from ._structs import IngestRequest, InProgressScan, ScanReport, ScanStatus, WriteRequest
53
+ from ._structs import IngestRequest, ScanReport
58
54
 
59
55
 
60
56
  @dataclasses.dataclass
@@ -94,7 +90,7 @@ class Scanner(AbstractContextManager):
94
90
  if self.comms.config.mock_storage_classes:
95
91
  import lsst.pipe.base.tests.mocks # noqa: F401
96
92
  self.comms.log.verbose("Reading from predicted quantum graph.")
97
- self.reader = self.comms.enter(
93
+ self.reader = self.comms.exit_stack.enter_context(
98
94
  PredictedQuantumGraphReader.open(self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT)
99
95
  )
100
96
  self.reader.read_dimension_data()
@@ -165,7 +161,7 @@ class Scanner(AbstractContextManager):
165
161
  Notes
166
162
  -----
167
163
  This method is designed to run as the ``target`` in
168
- `WorkerContext.make_worker`.
164
+ `WorkerFactory.make_worker`.
169
165
  """
170
166
  with comms, Scanner(predicted_path, butler_path, comms) as scanner:
171
167
  scanner.loop()
@@ -196,7 +192,7 @@ class Scanner(AbstractContextManager):
196
192
  ref = self.reader.components.make_dataset_ref(predicted)
197
193
  return self.qbb.stored(ref)
198
194
 
199
- def scan_quantum(self, quantum_id: uuid.UUID) -> InProgressScan:
195
+ def scan_quantum(self, quantum_id: uuid.UUID) -> ProvenanceQuantumScanModels:
200
196
  """Scan for a quantum's completion and error status, and its output
201
197
  datasets' existence.
202
198
 
@@ -207,76 +203,38 @@ class Scanner(AbstractContextManager):
207
203
 
208
204
  Returns
209
205
  -------
210
- result : `InProgressScan`
206
+ result : `ProvenanceQuantumScanModels`
211
207
  Scan result struct.
212
208
  """
213
209
  if (predicted_quantum := self.init_quanta.get(quantum_id)) is not None:
214
- result = InProgressScan(predicted_quantum.quantum_id, status=ScanStatus.INIT)
210
+ result = ProvenanceQuantumScanModels(
211
+ predicted_quantum.quantum_id, status=ProvenanceQuantumScanStatus.INIT
212
+ )
215
213
  self.comms.log.debug("Created init scan for %s (%s)", quantum_id, predicted_quantum.task_label)
216
214
  else:
217
215
  self.reader.read_quantum_datasets([quantum_id])
218
- predicted_quantum = self.reader.components.quantum_datasets[quantum_id]
216
+ predicted_quantum = self.reader.components.quantum_datasets.pop(quantum_id)
219
217
  self.comms.log.debug(
220
218
  "Scanning %s (%s@%s)",
221
219
  quantum_id,
222
220
  predicted_quantum.task_label,
223
221
  predicted_quantum.data_coordinate,
224
222
  )
225
- result = InProgressScan(predicted_quantum.quantum_id, ScanStatus.INCOMPLETE)
226
- del self.reader.components.quantum_datasets[quantum_id]
227
- last_attempt = ProvenanceQuantumAttemptModel()
228
- if not self._read_log(predicted_quantum, result, last_attempt):
229
- self.comms.log.debug("Abandoning scan for %s; no log dataset.", quantum_id)
230
- self.comms.report_scan(ScanReport(result.quantum_id, result.status))
231
- return result
232
- if not self._read_metadata(predicted_quantum, result, last_attempt):
233
- # We found the log dataset, but no metadata; this means the
234
- # quantum failed, but a retry might still happen that could
235
- # turn it into a success if we can't yet assume the run is
236
- # complete.
237
- self.comms.log.debug("Abandoning scan for %s.", quantum_id)
223
+ logs = self._read_log(predicted_quantum)
224
+ metadata = self._read_metadata(predicted_quantum)
225
+ result = ProvenanceQuantumScanModels.from_metadata_and_logs(
226
+ predicted_quantum, metadata, logs, incomplete=self.comms.config.incomplete
227
+ )
228
+ if result.status is ProvenanceQuantumScanStatus.ABANDONED:
229
+ self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
238
230
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
239
231
  return result
240
- last_attempt.attempt = len(result.attempts)
241
- result.attempts.append(last_attempt)
242
- assert result.status is not ScanStatus.INCOMPLETE
243
- assert result.status is not ScanStatus.ABANDONED
244
-
245
- if len(result.logs.attempts) < len(result.attempts):
246
- # Logs were not found for this attempt; must have been a hard error
247
- # that kept the `finally` block from running or otherwise
248
- # interrupted the writing of the logs.
249
- result.logs.attempts.append(None)
250
- if result.status is ScanStatus.SUCCESSFUL:
251
- # But we found the metadata! Either that hard error happened
252
- # at a very unlucky time (in between those two writes), or
253
- # something even weirder happened.
254
- result.attempts[-1].status = QuantumAttemptStatus.LOGS_MISSING
255
- else:
256
- result.attempts[-1].status = QuantumAttemptStatus.FAILED
257
- if len(result.metadata.attempts) < len(result.attempts):
258
- # Metadata missing usually just means a failure. In any case, the
259
- # status will already be correct, either because it was set to a
260
- # failure when we read the logs, or left at UNKNOWN if there were
261
- # no logs. Note that scanners never process BLOCKED quanta at all.
262
- result.metadata.attempts.append(None)
263
- assert len(result.logs.attempts) == len(result.attempts) or len(result.metadata.attempts) == len(
264
- result.attempts
265
- ), (
266
- "The only way we can add more than one quantum attempt is by "
267
- "extracting info stored with the logs, and that always appends "
268
- "a log attempt and a metadata attempt, so this must be a bug in "
269
- "the scanner."
270
- )
271
- # Scan for output dataset existence, skipping any the metadata reported
272
- # on as well as and the metadata and logs themselves (since we just
273
- # checked those).
274
232
  for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
275
- if predicted_output.dataset_id not in result.outputs:
276
- result.outputs[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
233
+ if predicted_output.dataset_id not in result.output_existence:
234
+ result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
277
235
  to_ingest = self._make_ingest_request(predicted_quantum, result)
278
- if self.comms.config.output_path is not None:
279
- to_write = self._make_write_request(predicted_quantum, result)
236
+ if self.comms.config.is_writing_provenance:
237
+ to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
280
238
  self.comms.request_write(to_write)
281
239
  self.comms.request_ingest(to_ingest)
282
240
  self.comms.report_scan(ScanReport(result.quantum_id, result.status))
@@ -284,7 +242,7 @@ class Scanner(AbstractContextManager):
284
242
  return result
285
243
 
286
244
  def _make_ingest_request(
287
- self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
245
+ self, predicted_quantum: PredictedQuantumDatasetsModel, result: ProvenanceQuantumScanModels
288
246
  ) -> IngestRequest:
289
247
  """Make an ingest request from a quantum scan.
290
248
 
@@ -292,7 +250,7 @@ class Scanner(AbstractContextManager):
292
250
  ----------
293
251
  predicted_quantum : `PredictedQuantumDatasetsModel`
294
252
  Information about the predicted quantum.
295
- result : `InProgressScan`
253
+ result : `ProvenanceQuantumScanModels`
296
254
  Result of a quantum scan.
297
255
 
298
256
  Returns
@@ -303,79 +261,36 @@ class Scanner(AbstractContextManager):
303
261
  predicted_outputs_by_id = {
304
262
  d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
305
263
  }
306
- to_ingest_predicted: list[PredictedDatasetModel] = []
307
264
  to_ingest_refs: list[DatasetRef] = []
308
- for dataset_id, was_produced in result.outputs.items():
309
- if was_produced:
265
+ to_ignore: set[uuid.UUID] = set()
266
+ if self.comms.config.promise_ingest_graph:
267
+ if result.status is ProvenanceQuantumScanStatus.INIT:
268
+ if predicted_quantum.task_label: # i.e. not the 'packages' producer
269
+ to_ignore.add(
270
+ predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
271
+ )
272
+ else:
273
+ to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
274
+ to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
275
+ for dataset_id, was_produced in result.output_existence.items():
276
+ if was_produced and dataset_id not in to_ignore:
310
277
  predicted_output = predicted_outputs_by_id[dataset_id]
311
- to_ingest_predicted.append(predicted_output)
312
278
  to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
313
279
  to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
314
- return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
280
+ return IngestRequest(result.quantum_id, to_ingest_refs, to_ingest_records)
315
281
 
316
- def _make_write_request(
317
- self, predicted_quantum: PredictedQuantumDatasetsModel, result: InProgressScan
318
- ) -> WriteRequest:
319
- """Make a write request from a quantum scan.
282
+ def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
283
+ """Attempt to read the metadata dataset for a quantum.
320
284
 
321
285
  Parameters
322
286
  ----------
323
287
  predicted_quantum : `PredictedQuantumDatasetsModel`
324
288
  Information about the predicted quantum.
325
- result : `InProgressScan`
326
- Result of a quantum scan.
327
289
 
328
290
  Returns
329
291
  -------
330
- write_request : `WriteRequest`
331
- A request to be sent to the writer.
332
- """
333
- quantum: ProvenanceInitQuantumModel | ProvenanceQuantumModel
334
- if result.status is ScanStatus.INIT:
335
- quantum = ProvenanceInitQuantumModel.from_predicted(predicted_quantum)
336
- else:
337
- quantum = ProvenanceQuantumModel.from_predicted(predicted_quantum)
338
- quantum.attempts = result.attempts
339
- request = WriteRequest(
340
- result.quantum_id,
341
- result.status,
342
- existing_outputs={
343
- dataset_id for dataset_id, was_produced in result.outputs.items() if was_produced
344
- },
345
- quantum=quantum.model_dump_json().encode(),
346
- logs=result.logs.model_dump_json().encode() if result.logs.attempts else b"",
347
- metadata=result.metadata.model_dump_json().encode() if result.metadata.attempts else b"",
348
- )
349
- if self.compressor is not None:
350
- request.quantum = self.compressor.compress(request.quantum)
351
- request.logs = self.compressor.compress(request.logs) if request.logs else b""
352
- request.metadata = self.compressor.compress(request.metadata) if request.metadata else b""
353
- request.is_compressed = True
354
- return request
355
-
356
- def _read_metadata(
357
- self,
358
- predicted_quantum: PredictedQuantumDatasetsModel,
359
- result: InProgressScan,
360
- last_attempt: ProvenanceQuantumAttemptModel,
361
- ) -> bool:
362
- """Attempt to read the metadata dataset for a quantum to extract
363
- provenance information from it.
364
-
365
- Parameters
366
- ----------
367
- predicted_quantum : `PredictedQuantumDatasetsModel`
368
- Information about the predicted quantum.
369
- result : `InProgressScan`
370
- Result object to be modified in-place.
371
- last_attempt : `ScanningProvenanceQuantumAttemptModel`
372
- Structure to fill in with information about the last attempt to
373
- run this quantum.
374
-
375
- Returns
376
- -------
377
- complete : `bool`
378
- Whether the quantum is complete.
292
+ metadata : `...TaskMetadata` or `None`
293
+ Task metadata.
379
294
  """
380
295
  (predicted_dataset,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
381
296
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
@@ -383,129 +298,28 @@ class Scanner(AbstractContextManager):
383
298
  # This assumes QBB metadata writes are atomic, which should be the
384
299
  # case. If it's not we'll probably get pydantic validation errors
385
300
  # here.
386
- metadata: TaskMetadata = self.qbb.get(ref, storageClass="TaskMetadata")
301
+ return self.qbb.get(ref, storageClass="TaskMetadata")
387
302
  except FileNotFoundError:
388
- result.outputs[ref.id] = False
389
- if self.comms.config.assume_complete:
390
- result.status = ScanStatus.FAILED
391
- else:
392
- result.status = ScanStatus.ABANDONED
393
- return False
394
- else:
395
- result.status = ScanStatus.SUCCESSFUL
396
- result.outputs[ref.id] = True
397
- last_attempt.status = QuantumAttemptStatus.SUCCESSFUL
398
- try:
399
- # Int conversion guards against spurious conversion to
400
- # float that can apparently sometimes happen in
401
- # TaskMetadata.
402
- last_attempt.caveats = QuantumSuccessCaveats(int(metadata["quantum"]["caveats"]))
403
- except LookupError:
404
- pass
405
- try:
406
- last_attempt.exception = ExceptionInfo._from_metadata(
407
- metadata[predicted_quantum.task_label]["failure"]
408
- )
409
- except LookupError:
410
- pass
411
- try:
412
- for id_str in ensure_iterable(metadata["quantum"].getArray("outputs")):
413
- result.outputs[uuid.UUID(id_str)]
414
- except LookupError:
415
- pass
416
- else:
417
- # If the metadata told us what it wrote, anything not in that
418
- # list was not written.
419
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
420
- result.outputs.setdefault(predicted_output.dataset_id, False)
421
- last_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(metadata)
422
- result.metadata.attempts.append(metadata)
423
- return True
424
-
425
- def _read_log(
426
- self,
427
- predicted_quantum: PredictedQuantumDatasetsModel,
428
- result: InProgressScan,
429
- last_attempt: ProvenanceQuantumAttemptModel,
430
- ) -> bool:
431
- """Attempt to read the log dataset for a quantum to test for the
432
- quantum's completion (the log is always written last) and aggregate
433
- the log content in the provenance quantum graph.
303
+ return None
304
+
305
+ def _read_log(self, predicted_quantum: PredictedQuantumDatasetsModel) -> ButlerLogRecords | None:
306
+ """Attempt to read the log dataset for a quantum.
434
307
 
435
308
  Parameters
436
309
  ----------
437
310
  predicted_quantum : `PredictedQuantumDatasetsModel`
438
311
  Information about the predicted quantum.
439
- result : `InProgressScan`
440
- Result object to be modified in-place.
441
- last_attempt : `ScanningProvenanceQuantumAttemptModel`
442
- Structure to fill in with information about the last attempt to
443
- run this quantum.
444
312
 
445
313
  Returns
446
314
  -------
447
- complete : `bool`
448
- Whether the quantum is complete.
315
+ logs : `lsst.daf.butler.logging.ButlerLogRecords` or `None`
316
+ Task logs.
449
317
  """
450
318
  (predicted_dataset,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
451
319
  ref = self.reader.components.make_dataset_ref(predicted_dataset)
452
320
  try:
453
321
  # This assumes QBB log writes are atomic, which should be the case.
454
322
  # If it's not we'll probably get pydantic validation errors here.
455
- log_records: ButlerLogRecords = self.qbb.get(ref)
323
+ return self.qbb.get(ref)
456
324
  except FileNotFoundError:
457
- result.outputs[ref.id] = False
458
- if self.comms.config.assume_complete:
459
- result.status = ScanStatus.FAILED
460
- else:
461
- result.status = ScanStatus.ABANDONED
462
- return False
463
- else:
464
- # Set the attempt's run status to FAILED, since the default is
465
- # UNKNOWN (i.e. logs *and* metadata are missing) and we now know
466
- # the logs exist. This will usually get replaced by SUCCESSFUL
467
- # when we look for metadata next.
468
- last_attempt.status = QuantumAttemptStatus.FAILED
469
- result.outputs[ref.id] = True
470
- if log_records.extra:
471
- log_extra = _ExecutionLogRecordsExtra.model_validate(log_records.extra)
472
- self._extract_from_log_extra(log_extra, result, last_attempt=last_attempt)
473
- result.logs.attempts.append(list(log_records))
474
- return True
475
-
476
- def _extract_from_log_extra(
477
- self,
478
- log_extra: _ExecutionLogRecordsExtra,
479
- result: InProgressScan,
480
- last_attempt: ProvenanceQuantumAttemptModel | None,
481
- ) -> None:
482
- for previous_attempt_log_extra in log_extra.previous_attempts:
483
- self._extract_from_log_extra(previous_attempt_log_extra, result, last_attempt=None)
484
- quantum_attempt: ProvenanceQuantumAttemptModel
485
- if last_attempt is None:
486
- # This is not the last attempt, so it must be a failure.
487
- quantum_attempt = ProvenanceQuantumAttemptModel(
488
- attempt=len(result.attempts), status=QuantumAttemptStatus.FAILED
489
- )
490
- # We also need to get the logs from this extra provenance, since
491
- # they won't be the main section of the log records.
492
- result.logs.attempts.append(log_extra.logs)
493
- # The special last attempt is only appended after we attempt to
494
- # read metadata later, but we have to append this one now.
495
- result.attempts.append(quantum_attempt)
496
- else:
497
- assert not log_extra.logs, "Logs for the last attempt should not be stored in the extra JSON."
498
- quantum_attempt = last_attempt
499
- if log_extra.exception is not None or log_extra.metadata is not None or last_attempt is None:
500
- # We won't be getting a separate metadata dataset, so anything we
501
- # might get from the metadata has to come from this extra
502
- # provenance in the logs.
503
- quantum_attempt.exception = log_extra.exception
504
- if log_extra.metadata is not None:
505
- quantum_attempt.resource_usage = QuantumResourceUsage.from_task_metadata(log_extra.metadata)
506
- result.metadata.attempts.append(log_extra.metadata)
507
- else:
508
- result.metadata.attempts.append(None)
509
- # Regardless of whether this is the last attempt or not, we can only
510
- # get the previous_process_quanta from the log extra.
511
- quantum_attempt.previous_process_quanta.extend(log_extra.previous_process_quanta)
325
+ return None