lsst-pipe-base 30.0.0rc2__py3-none-any.whl → 30.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. lsst/pipe/base/_instrument.py +31 -20
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/_task_metadata.py +2 -2
  5. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +8 -3
  6. lsst/pipe/base/automatic_connection_constants.py +20 -1
  7. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  8. lsst/pipe/base/cli/cmd/commands.py +149 -4
  9. lsst/pipe/base/connectionTypes.py +72 -160
  10. lsst/pipe/base/connections.py +6 -9
  11. lsst/pipe/base/execution_reports.py +0 -5
  12. lsst/pipe/base/graph/graph.py +11 -10
  13. lsst/pipe/base/graph/quantumNode.py +4 -4
  14. lsst/pipe/base/graph_walker.py +8 -10
  15. lsst/pipe/base/log_capture.py +40 -80
  16. lsst/pipe/base/log_on_close.py +76 -0
  17. lsst/pipe/base/mp_graph_executor.py +51 -15
  18. lsst/pipe/base/pipeline.py +5 -6
  19. lsst/pipe/base/pipelineIR.py +2 -8
  20. lsst/pipe/base/pipelineTask.py +5 -7
  21. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  22. lsst/pipe/base/pipeline_graph/_edges.py +32 -22
  23. lsst/pipe/base/pipeline_graph/_mapping_views.py +4 -7
  24. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +14 -7
  25. lsst/pipe/base/pipeline_graph/expressions.py +2 -2
  26. lsst/pipe/base/pipeline_graph/io.py +7 -10
  27. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  28. lsst/pipe/base/pipeline_graph/visualization/_layout.py +16 -18
  29. lsst/pipe/base/pipeline_graph/visualization/_merge.py +4 -7
  30. lsst/pipe/base/pipeline_graph/visualization/_printer.py +10 -10
  31. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  32. lsst/pipe/base/prerequisite_helpers.py +2 -1
  33. lsst/pipe/base/quantum_graph/_common.py +19 -20
  34. lsst/pipe/base/quantum_graph/_multiblock.py +37 -31
  35. lsst/pipe/base/quantum_graph/_predicted.py +113 -15
  36. lsst/pipe/base/quantum_graph/_provenance.py +1136 -45
  37. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  38. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +204 -289
  39. lsst/pipe/base/quantum_graph/aggregator/_config.py +87 -9
  40. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +13 -12
  41. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -235
  42. lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
  43. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +29 -39
  44. lsst/pipe/base/quantum_graph/aggregator/_workers.py +303 -0
  45. lsst/pipe/base/quantum_graph/aggregator/_writer.py +34 -351
  46. lsst/pipe/base/quantum_graph/formatter.py +171 -0
  47. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  48. lsst/pipe/base/quantum_graph/visualization.py +5 -1
  49. lsst/pipe/base/quantum_graph_builder.py +33 -9
  50. lsst/pipe/base/quantum_graph_executor.py +116 -13
  51. lsst/pipe/base/quantum_graph_skeleton.py +31 -35
  52. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  53. lsst/pipe/base/separable_pipeline_executor.py +19 -3
  54. lsst/pipe/base/single_quantum_executor.py +67 -42
  55. lsst/pipe/base/struct.py +4 -0
  56. lsst/pipe/base/testUtils.py +3 -3
  57. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  58. lsst/pipe/base/version.py +1 -1
  59. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/METADATA +3 -3
  60. lsst_pipe_base-30.0.1rc1.dist-info/RECORD +129 -0
  61. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/WHEEL +1 -1
  62. lsst_pipe_base-30.0.0rc2.dist-info/RECORD +0 -125
  63. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/entry_points.txt +0 -0
  64. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/COPYRIGHT +0 -0
  65. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  66. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/bsd_license.txt +0 -0
  67. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/licenses/gpl-v3.0.txt +0 -0
  68. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/top_level.txt +0 -0
  69. {lsst_pipe_base-30.0.0rc2.dist-info → lsst_pipe_base-30.0.1rc1.dist-info}/zip-safe +0 -0
@@ -30,130 +30,14 @@ from __future__ import annotations
30
30
  __all__ = ("Writer",)
31
31
 
32
32
  import dataclasses
33
- import itertools
34
- import logging
35
- import operator
36
- import uuid
37
- from typing import TypeVar
38
33
 
39
- import networkx
40
34
  import zstandard
41
35
 
42
- from lsst.utils.packages import Packages
43
-
44
- from ... import automatic_connection_constants as acc
36
+ from ...log_on_close import LogOnClose
45
37
  from ...pipeline_graph import TaskImportMode
46
- from .._common import BaseQuantumGraphWriter
47
- from .._multiblock import Compressor, MultiblockWriter
48
- from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
49
- from .._provenance import (
50
- DATASET_ADDRESS_INDEX,
51
- DATASET_MB_NAME,
52
- LOG_ADDRESS_INDEX,
53
- LOG_MB_NAME,
54
- METADATA_ADDRESS_INDEX,
55
- METADATA_MB_NAME,
56
- QUANTUM_ADDRESS_INDEX,
57
- QUANTUM_MB_NAME,
58
- ProvenanceDatasetModel,
59
- ProvenanceInitQuantaModel,
60
- ProvenanceInitQuantumModel,
61
- ProvenanceQuantumModel,
62
- )
38
+ from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
39
+ from .._provenance import ProvenanceQuantumGraphWriter, ProvenanceQuantumScanData
63
40
  from ._communicators import WriterCommunicator
64
- from ._structs import WriteRequest
65
-
66
-
67
- @dataclasses.dataclass
68
- class _DataWriters:
69
- """A struct of low-level writer objects for the main components of a
70
- provenance quantum graph.
71
-
72
- Parameters
73
- ----------
74
- comms : `WriterCommunicator`
75
- Communicator helper object for the writer.
76
- predicted : `.PredictedQuantumGraphComponents`
77
- Components of the predicted graph.
78
- indices : `dict` [ `uuid.UUID`, `int` ]
79
- Mapping from UUID to internal integer ID, including both quanta and
80
- datasets.
81
- compressor : `Compressor`
82
- Object that can compress `bytes`.
83
- cdict_data : `bytes` or `None`, optional
84
- Bytes representation of the compression dictionary used by the
85
- compressor.
86
- """
87
-
88
- def __init__(
89
- self,
90
- comms: WriterCommunicator,
91
- predicted: PredictedQuantumGraphComponents,
92
- indices: dict[uuid.UUID, int],
93
- compressor: Compressor,
94
- cdict_data: bytes | None = None,
95
- ) -> None:
96
- assert comms.config.output_path is not None
97
- header = predicted.header.model_copy()
98
- header.graph_type = "provenance"
99
- self.graph = comms.enter(
100
- BaseQuantumGraphWriter.open(
101
- comms.config.output_path,
102
- header,
103
- predicted.pipeline_graph,
104
- indices,
105
- address_filename="nodes",
106
- compressor=compressor,
107
- cdict_data=cdict_data,
108
- ),
109
- on_close="Finishing writing provenance quantum graph.",
110
- is_progress_log=True,
111
- )
112
- self.graph.address_writer.addresses = [{}, {}, {}, {}]
113
- self.logs = comms.enter(
114
- MultiblockWriter.open_in_zip(self.graph.zf, LOG_MB_NAME, header.int_size, use_tempfile=True),
115
- on_close="Copying logs into zip archive.",
116
- is_progress_log=True,
117
- )
118
- self.graph.address_writer.addresses[LOG_ADDRESS_INDEX] = self.logs.addresses
119
- self.metadata = comms.enter(
120
- MultiblockWriter.open_in_zip(self.graph.zf, METADATA_MB_NAME, header.int_size, use_tempfile=True),
121
- on_close="Copying metadata into zip archive.",
122
- is_progress_log=True,
123
- )
124
- self.graph.address_writer.addresses[METADATA_ADDRESS_INDEX] = self.metadata.addresses
125
- self.datasets = comms.enter(
126
- MultiblockWriter.open_in_zip(self.graph.zf, DATASET_MB_NAME, header.int_size, use_tempfile=True),
127
- on_close="Copying dataset provenance into zip archive.",
128
- is_progress_log=True,
129
- )
130
- self.graph.address_writer.addresses[DATASET_ADDRESS_INDEX] = self.datasets.addresses
131
- self.quanta = comms.enter(
132
- MultiblockWriter.open_in_zip(self.graph.zf, QUANTUM_MB_NAME, header.int_size, use_tempfile=True),
133
- on_close="Copying quantum provenance into zip archive.",
134
- is_progress_log=True,
135
- )
136
- self.graph.address_writer.addresses[QUANTUM_ADDRESS_INDEX] = self.quanta.addresses
137
-
138
- graph: BaseQuantumGraphWriter
139
- """The parent graph writer."""
140
-
141
- datasets: MultiblockWriter
142
- """A writer for dataset provenance."""
143
-
144
- quanta: MultiblockWriter
145
- """A writer for quantum provenance."""
146
-
147
- metadata: MultiblockWriter
148
- """A writer for metadata content."""
149
-
150
- logs: MultiblockWriter
151
- """A writer for log content."""
152
-
153
- @property
154
- def compressor(self) -> Compressor:
155
- """Object that should be used to compress all JSON blocks."""
156
- return self.graph.compressor
157
41
 
158
42
 
159
43
  @dataclasses.dataclass
@@ -171,46 +55,13 @@ class Writer:
171
55
  predicted: PredictedQuantumGraphComponents = dataclasses.field(init=False)
172
56
  """Components of the predicted quantum graph."""
173
57
 
174
- existing_init_outputs: dict[uuid.UUID, set[uuid.UUID]] = dataclasses.field(default_factory=dict)
175
- """Mapping that tracks which init-outputs exist.
176
-
177
- This mapping is updated as scanners inform the writer about init-output
178
- existence, since we want to write that provenance information out only at
179
- the end.
180
- """
181
-
182
- indices: dict[uuid.UUID, int] = dataclasses.field(default_factory=dict)
183
- """Mapping from UUID to internal integer ID, including both quanta and
184
- datasets.
185
-
186
- This is fully initialized at construction.
187
- """
188
-
189
- output_dataset_ids: set[uuid.UUID] = dataclasses.field(default_factory=set)
190
- """The IDs of all datasets that are produced by this graph.
191
-
192
- This is fully initialized at construction.
193
- """
194
-
195
- overall_inputs: dict[uuid.UUID, PredictedDatasetModel] = dataclasses.field(default_factory=dict)
196
- """All datasets that are not produced by any quantum in this graph."""
197
-
198
- xgraph: networkx.DiGraph = dataclasses.field(default_factory=networkx.DiGraph)
199
- """A bipartite NetworkX graph linking datasets to quanta and quanta to
200
- datasets.
201
-
202
- This is fully initialized at construction. There are no node or edge
203
- attributes in this graph; we only need it to store adjacency information
204
- with datasets as well as with quanta.
205
- """
206
-
207
- pending_compression_training: list[WriteRequest] = dataclasses.field(default_factory=list)
58
+ pending_compression_training: list[ProvenanceQuantumScanData] = dataclasses.field(default_factory=list)
208
59
  """Unprocessed quantum scans that are being accumulated in order to
209
60
  build a compression dictionary.
210
61
  """
211
62
 
212
63
  def __post_init__(self) -> None:
213
- assert self.comms.config.output_path is not None, "Writer should not be used if writing is disabled."
64
+ assert self.comms.config.is_writing_provenance, "Writer should not be used if writing is disabled."
214
65
  self.comms.log.info("Reading predicted quantum graph.")
215
66
  with PredictedQuantumGraphReader.open(
216
67
  self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT
@@ -220,58 +71,6 @@ class Writer:
220
71
  self.comms.check_for_cancel()
221
72
  reader.read_quantum_datasets()
222
73
  self.predicted = reader.components
223
- for predicted_init_quantum in self.predicted.init_quanta.root:
224
- self.existing_init_outputs[predicted_init_quantum.quantum_id] = set()
225
- self.comms.check_for_cancel()
226
- self.comms.log.info("Generating integer indexes and identifying outputs.")
227
- self._populate_indices_and_outputs()
228
- self.comms.check_for_cancel()
229
- self._populate_xgraph_and_inputs()
230
- self.comms.check_for_cancel()
231
- self.comms.log_progress(
232
- # We add one here for 'packages', which we do ingest but don't
233
- # record provenance for.
234
- logging.INFO,
235
- f"Graph has {len(self.output_dataset_ids) + 1} predicted output dataset(s).",
236
- )
237
-
238
- def _populate_indices_and_outputs(self) -> None:
239
- all_uuids = set(self.predicted.quantum_datasets.keys())
240
- for quantum in self.comms.periodically_check_for_cancel(
241
- itertools.chain(
242
- self.predicted.init_quanta.root,
243
- self.predicted.quantum_datasets.values(),
244
- )
245
- ):
246
- if not quantum.task_label:
247
- # Skip the 'packages' producer quantum.
248
- continue
249
- all_uuids.update(quantum.iter_input_dataset_ids())
250
- self.output_dataset_ids.update(quantum.iter_output_dataset_ids())
251
- all_uuids.update(self.output_dataset_ids)
252
- self.indices = {
253
- node_id: node_index
254
- for node_index, node_id in self.comms.periodically_check_for_cancel(
255
- enumerate(sorted(all_uuids, key=operator.attrgetter("int")))
256
- )
257
- }
258
-
259
- def _populate_xgraph_and_inputs(self) -> None:
260
- for predicted_quantum in self.comms.periodically_check_for_cancel(
261
- itertools.chain(
262
- self.predicted.init_quanta.root,
263
- self.predicted.quantum_datasets.values(),
264
- )
265
- ):
266
- if not predicted_quantum.task_label:
267
- # Skip the 'packages' producer quantum.
268
- continue
269
- for predicted_input in itertools.chain.from_iterable(predicted_quantum.inputs.values()):
270
- self.xgraph.add_edge(predicted_input.dataset_id, predicted_quantum.quantum_id)
271
- if predicted_input.dataset_id not in self.output_dataset_ids:
272
- self.overall_inputs.setdefault(predicted_input.dataset_id, predicted_input)
273
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
274
- self.xgraph.add_edge(predicted_quantum.quantum_id, predicted_output.dataset_id)
275
74
 
276
75
  @staticmethod
277
76
  def run(predicted_path: str, comms: WriterCommunicator) -> None:
@@ -287,7 +86,7 @@ class Writer:
287
86
  Notes
288
87
  -----
289
88
  This method is designed to run as the ``target`` in
290
- `WorkerContext.make_worker`.
89
+ `WorkerFactory.make_worker`.
291
90
  """
292
91
  with comms:
293
92
  writer = Writer(predicted_path, comms)
@@ -295,52 +94,59 @@ class Writer:
295
94
 
296
95
  def loop(self) -> None:
297
96
  """Run the main loop for the writer."""
298
- data_writers: _DataWriters | None = None
97
+ qg_writer: ProvenanceQuantumGraphWriter | None = None
299
98
  if not self.comms.config.zstd_dict_size:
300
- data_writers = self.make_data_writers()
99
+ qg_writer = self.make_qg_writer()
301
100
  self.comms.log.info("Polling for write requests from scanners.")
302
101
  for request in self.comms.poll():
303
- if data_writers is None:
102
+ if qg_writer is None:
304
103
  self.pending_compression_training.append(request)
305
104
  if len(self.pending_compression_training) >= self.comms.config.zstd_dict_n_inputs:
306
- data_writers = self.make_data_writers()
105
+ qg_writer = self.make_qg_writer()
307
106
  else:
308
- self.process_request(request, data_writers)
309
- if data_writers is None:
310
- data_writers = self.make_data_writers()
311
- self.write_init_outputs(data_writers)
107
+ qg_writer.write_scan_data(request)
108
+ self.comms.report_write()
109
+ if qg_writer is None:
110
+ qg_writer = self.make_qg_writer()
111
+ self.comms.log.info("Writing init outputs.")
112
+ qg_writer.write_init_outputs(assume_existence=False)
312
113
 
313
- def make_data_writers(self) -> _DataWriters:
114
+ def make_qg_writer(self) -> ProvenanceQuantumGraphWriter:
314
115
  """Make a compression dictionary, open the low-level writers, and
315
116
  write any accumulated scans that were needed to make the compression
316
117
  dictionary.
317
118
 
318
119
  Returns
319
120
  -------
320
- data_writers : `_DataWriters`
121
+ qg_writer : `ProvenanceQuantumGraphWriter`
321
122
  Low-level writers struct.
322
123
  """
323
124
  cdict = self.make_compression_dictionary()
324
125
  self.comms.send_compression_dict(cdict.as_bytes())
325
- assert self.comms.config.output_path is not None
326
- self.comms.log.info("Opening output files.")
327
- data_writers = _DataWriters(
328
- self.comms,
329
- self.predicted,
330
- self.indices,
331
- compressor=zstandard.ZstdCompressor(self.comms.config.zstd_level, cdict),
126
+ assert self.comms.config.is_writing_provenance and self.comms.config.output_path is not None
127
+ self.comms.log.info("Opening output files and processing predicted graph.")
128
+ qg_writer = ProvenanceQuantumGraphWriter(
129
+ self.comms.config.output_path,
130
+ exit_stack=self.comms.exit_stack,
131
+ log_on_close=LogOnClose(self.comms.log_progress),
132
+ predicted=self.predicted,
133
+ zstd_level=self.comms.config.zstd_level,
332
134
  cdict_data=cdict.as_bytes(),
135
+ loop_wrapper=self.comms.periodically_check_for_cancel,
136
+ log=self.comms.log,
333
137
  )
334
138
  self.comms.check_for_cancel()
335
139
  self.comms.log.info("Compressing and writing queued scan requests.")
336
140
  for request in self.pending_compression_training:
337
- self.process_request(request, data_writers)
141
+ qg_writer.write_scan_data(request)
142
+ self.comms.report_write()
338
143
  del self.pending_compression_training
339
144
  self.comms.check_for_cancel()
340
- self.write_overall_inputs(data_writers)
341
- self.write_packages(data_writers)
145
+ self.comms.log.info("Writing overall inputs.")
146
+ qg_writer.write_overall_inputs(self.comms.periodically_check_for_cancel)
147
+ qg_writer.write_packages()
342
148
  self.comms.log.info("Returning to write request loop.")
343
- return data_writers
149
+ return qg_writer
344
150
 
345
151
  def make_compression_dictionary(self) -> zstandard.ZstdCompressionDict:
346
152
  """Make the compression dictionary.
@@ -376,126 +182,3 @@ class Writer:
376
182
  training_inputs.append(write_request.metadata)
377
183
  training_inputs.append(write_request.logs)
378
184
  return zstandard.train_dictionary(self.comms.config.zstd_dict_size, training_inputs)
379
-
380
- def write_init_outputs(self, data_writers: _DataWriters) -> None:
381
- """Write provenance for init-output datasets and init-quanta.
382
-
383
- Parameters
384
- ----------
385
- data_writers : `_DataWriters`
386
- Low-level writers struct.
387
- """
388
- self.comms.log.info("Writing init outputs.")
389
- init_quanta = ProvenanceInitQuantaModel()
390
- for predicted_init_quantum in self.predicted.init_quanta.root:
391
- if not predicted_init_quantum.task_label:
392
- # Skip the 'packages' producer quantum.
393
- continue
394
- existing_outputs = self.existing_init_outputs[predicted_init_quantum.quantum_id]
395
- for predicted_output in itertools.chain.from_iterable(predicted_init_quantum.outputs.values()):
396
- provenance_output = ProvenanceDatasetModel.from_predicted(
397
- predicted_output,
398
- producer=predicted_init_quantum.quantum_id,
399
- consumers=self.xgraph.successors(predicted_output.dataset_id),
400
- )
401
- provenance_output.produced = predicted_output.dataset_id in existing_outputs
402
- data_writers.datasets.write_model(
403
- provenance_output.dataset_id, provenance_output, data_writers.compressor
404
- )
405
- init_quanta.root.append(ProvenanceInitQuantumModel.from_predicted(predicted_init_quantum))
406
- data_writers.graph.write_single_model("init_quanta", init_quanta)
407
-
408
- def write_overall_inputs(self, data_writers: _DataWriters) -> None:
409
- """Write provenance for overall-input datasets.
410
-
411
- Parameters
412
- ----------
413
- data_writers : `_DataWriters`
414
- Low-level writers struct.
415
- """
416
- self.comms.log.info("Writing overall inputs.")
417
- for predicted_input in self.comms.periodically_check_for_cancel(self.overall_inputs.values()):
418
- if predicted_input.dataset_id not in data_writers.datasets.addresses:
419
- data_writers.datasets.write_model(
420
- predicted_input.dataset_id,
421
- ProvenanceDatasetModel.from_predicted(
422
- predicted_input,
423
- producer=None,
424
- consumers=self.xgraph.successors(predicted_input.dataset_id),
425
- ),
426
- data_writers.compressor,
427
- )
428
- del self.overall_inputs
429
-
430
- @staticmethod
431
- def write_packages(data_writers: _DataWriters) -> None:
432
- """Write package version information to the provenance graph.
433
-
434
- Parameters
435
- ----------
436
- data_writers : `_DataWriters`
437
- Low-level writers struct.
438
- """
439
- packages = Packages.fromSystem(include_all=True)
440
- data = packages.toBytes("json")
441
- data_writers.graph.write_single_block("packages", data)
442
-
443
- def process_request(self, request: WriteRequest, data_writers: _DataWriters) -> None:
444
- """Process a `WriteRequest` into `_ScanData`.
445
-
446
- Parameters
447
- ----------
448
- request : `WriteRequest`
449
- Result of a quantum scan.
450
- data_writers : `_DataWriters`
451
- Low-level writers struct.
452
- """
453
- if (existing_init_outputs := self.existing_init_outputs.get(request.quantum_id)) is not None:
454
- self.comms.log.debug("Handling init-output scan for %s.", request.quantum_id)
455
- existing_init_outputs.update(request.existing_outputs)
456
- self.comms.report_write()
457
- return
458
- self.comms.log.debug("Handling quantum scan for %s.", request.quantum_id)
459
- predicted_quantum = self.predicted.quantum_datasets[request.quantum_id]
460
- outputs: dict[uuid.UUID, bytes] = {}
461
- for predicted_output in itertools.chain.from_iterable(predicted_quantum.outputs.values()):
462
- provenance_output = ProvenanceDatasetModel.from_predicted(
463
- predicted_output,
464
- producer=predicted_quantum.quantum_id,
465
- consumers=self.xgraph.successors(predicted_output.dataset_id),
466
- )
467
- provenance_output.produced = provenance_output.dataset_id in request.existing_outputs
468
- outputs[provenance_output.dataset_id] = data_writers.compressor.compress(
469
- provenance_output.model_dump_json().encode()
470
- )
471
- if not request.quantum:
472
- request.quantum = (
473
- ProvenanceQuantumModel.from_predicted(predicted_quantum).model_dump_json().encode()
474
- )
475
- if request.is_compressed:
476
- request.quantum = data_writers.compressor.compress(request.quantum)
477
- if not request.is_compressed:
478
- request.quantum = data_writers.compressor.compress(request.quantum)
479
- if request.metadata:
480
- request.metadata = data_writers.compressor.compress(request.metadata)
481
- if request.logs:
482
- request.logs = data_writers.compressor.compress(request.logs)
483
- self.comms.log.debug("Writing quantum %s.", request.quantum_id)
484
- data_writers.quanta.write_bytes(request.quantum_id, request.quantum)
485
- for dataset_id, dataset_data in outputs.items():
486
- data_writers.datasets.write_bytes(dataset_id, dataset_data)
487
- if request.metadata:
488
- (metadata_output,) = predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME]
489
- address = data_writers.metadata.write_bytes(request.quantum_id, request.metadata)
490
- data_writers.metadata.addresses[metadata_output.dataset_id] = address
491
- if request.logs:
492
- (log_output,) = predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME]
493
- address = data_writers.logs.write_bytes(request.quantum_id, request.logs)
494
- data_writers.logs.addresses[log_output.dataset_id] = address
495
- # We shouldn't need this predicted quantum anymore; delete it in the
496
- # hopes that'll free up some memory.
497
- del self.predicted.quantum_datasets[request.quantum_id]
498
- self.comms.report_write()
499
-
500
-
501
- _T = TypeVar("_T")
@@ -0,0 +1,171 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("ProvenanceFormatter",)
31
+
32
+ import uuid
33
+ from typing import Any, ClassVar
34
+
35
+ import pydantic
36
+
37
+ from lsst.daf.butler import FormatterV2
38
+ from lsst.daf.butler.logging import ButlerLogRecords
39
+ from lsst.pex.config import Config
40
+ from lsst.resources import ResourcePath
41
+ from lsst.utils.logging import getLogger
42
+ from lsst.utils.packages import Packages
43
+
44
+ from .._task_metadata import TaskMetadata
45
+ from ..pipeline_graph import TaskImportMode
46
+ from ._provenance import ProvenanceQuantumGraphReader
47
+
48
+ _LOG = getLogger(__file__)
49
+
50
+
51
+ class _ProvenanceFormatterParameters(pydantic.BaseModel):
52
+ """A Pydantic model for validating and applying defaults to the
53
+ read parameters of `ProvenanceFormatter`.
54
+ """
55
+
56
+ import_mode: TaskImportMode = TaskImportMode.DO_NOT_IMPORT
57
+ quanta: list[uuid.UUID] | None = None
58
+ datasets: list[uuid.UUID] | None = None
59
+ read_init_quanta: bool = True
60
+
61
+ @pydantic.field_validator("quanta", mode="before")
62
+ @classmethod
63
+ def quanta_to_list(cls, v: Any) -> list[uuid.UUID] | None:
64
+ return list(v) if v is not None else None
65
+
66
+ @pydantic.field_validator("datasets", mode="before")
67
+ @classmethod
68
+ def datasets_to_list(cls, v: Any) -> list[uuid.UUID] | None:
69
+ return list(v) if v is not None else None
70
+
71
+ @property
72
+ def nodes(self) -> list[uuid.UUID]:
73
+ if self.quanta is not None:
74
+ if self.datasets is not None:
75
+ return self.quanta + self.datasets
76
+ else:
77
+ return self.quanta
78
+ elif self.datasets is not None:
79
+ return self.datasets
80
+ raise ValueError("'datasets' and/or 'quanta' parameters are required for this component")
81
+
82
+
83
+ class ProvenanceFormatter(FormatterV2):
84
+ """Butler interface for reading `ProvenanceQuantumGraph` objects."""
85
+
86
+ default_extension: ClassVar[str] = ".qg"
87
+ can_read_from_uri: ClassVar[bool] = True
88
+
89
+ def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
90
+ match self._dataset_ref.datasetType.storageClass_name:
91
+ case "TaskMetadata" | "PropertySet":
92
+ return self._read_metadata(uri)
93
+ case "ButlerLogRecords":
94
+ return self._read_log(uri)
95
+ case "Config":
96
+ return self._read_config(uri)
97
+ case "ProvenanceQuantumGraph":
98
+ pass
99
+ case unexpected:
100
+ raise ValueError(f"Unsupported storage class {unexpected!r} for ProvenanceFormatter.")
101
+ parameters = _ProvenanceFormatterParameters.model_validate(self.file_descriptor.parameters or {})
102
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=parameters.import_mode) as reader:
103
+ match component:
104
+ case None:
105
+ if parameters.read_init_quanta:
106
+ reader.read_init_quanta()
107
+ reader.read_quanta(parameters.quanta)
108
+ reader.read_datasets(parameters.datasets)
109
+ return reader.graph
110
+ case "metadata":
111
+ return reader.fetch_metadata(parameters.nodes)
112
+ case "logs":
113
+ return reader.fetch_logs(parameters.nodes)
114
+ case "packages":
115
+ return reader.fetch_packages()
116
+ raise AssertionError(f"Unexpected component {component!r}.")
117
+
118
+ def _read_metadata(self, uri: ResourcePath) -> TaskMetadata:
119
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
120
+ try:
121
+ attempts = reader.fetch_metadata([self._dataset_ref.id])[self._dataset_ref.id]
122
+ except LookupError:
123
+ raise FileNotFoundError(
124
+ f"No dataset with ID {self._dataset_ref.id} present in this graph."
125
+ ) from None
126
+ if not attempts:
127
+ raise FileNotFoundError(
128
+ f"No metadata dataset {self._dataset_ref} stored in this graph "
129
+ "(no attempts for this quantum)."
130
+ )
131
+ if attempts[-1] is None:
132
+ raise FileNotFoundError(
133
+ f"No metadata dataset {self._dataset_ref} stored in this graph "
134
+ "(most recent attempt failed and did not write metadata)."
135
+ )
136
+ return attempts[-1]
137
+
138
+ def _read_log(self, uri: ResourcePath) -> ButlerLogRecords:
139
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
140
+ try:
141
+ attempts = reader.fetch_logs([self._dataset_ref.id])[self._dataset_ref.id]
142
+ except LookupError:
143
+ raise FileNotFoundError(
144
+ f"No dataset with ID {self._dataset_ref.id} present in this graph."
145
+ ) from None
146
+ if not attempts:
147
+ raise FileNotFoundError(
148
+ f"No log dataset {self._dataset_ref} stored in this graph (no attempts for this quantum)."
149
+ )
150
+ if attempts[-1] is None:
151
+ raise FileNotFoundError(
152
+ f"No log dataset {self._dataset_ref} stored in this graph "
153
+ "(most recent attempt failed and did not write logs)."
154
+ )
155
+ return attempts[-1]
156
+
157
+ def _read_packages(self, uri: ResourcePath) -> Packages:
158
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
159
+ return reader.fetch_packages()
160
+
161
+ def _read_config(self, uri: ResourcePath) -> Config:
162
+ task_label = self._dataset_ref.datasetType.name.removesuffix("_config")
163
+ with ProvenanceQuantumGraphReader.open(
164
+ uri, import_mode=TaskImportMode.ASSUME_CONSISTENT_EDGES
165
+ ) as reader:
166
+ try:
167
+ return reader.pipeline_graph.tasks[task_label].config.copy()
168
+ except KeyError:
169
+ raise FileNotFoundError(
170
+ f"No task with label {task_label!r} found in the pipeline graph."
171
+ ) from None