lsst-pipe-base 30.2026.300__py3-none-any.whl → 30.2026.500__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. lsst/pipe/base/_instrument.py +21 -12
  2. lsst/pipe/base/_quantumContext.py +3 -3
  3. lsst/pipe/base/_status.py +43 -10
  4. lsst/pipe/base/automatic_connection_constants.py +20 -1
  5. lsst/pipe/base/cli/cmd/__init__.py +18 -2
  6. lsst/pipe/base/cli/cmd/commands.py +149 -4
  7. lsst/pipe/base/connectionTypes.py +72 -160
  8. lsst/pipe/base/connections.py +3 -6
  9. lsst/pipe/base/execution_reports.py +0 -5
  10. lsst/pipe/base/graph/graph.py +9 -8
  11. lsst/pipe/base/log_capture.py +1 -1
  12. lsst/pipe/base/pipeline.py +5 -6
  13. lsst/pipe/base/pipelineIR.py +1 -7
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_dataset_types.py +2 -2
  16. lsst/pipe/base/pipeline_graph/_edges.py +30 -18
  17. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +10 -2
  18. lsst/pipe/base/pipeline_graph/visualization/_dot.py +13 -12
  19. lsst/pipe/base/pipeline_graph/visualization/_status_annotator.py +7 -0
  20. lsst/pipe/base/prerequisite_helpers.py +2 -1
  21. lsst/pipe/base/quantum_graph/_common.py +3 -1
  22. lsst/pipe/base/quantum_graph/_multiblock.py +29 -13
  23. lsst/pipe/base/quantum_graph/_predicted.py +7 -0
  24. lsst/pipe/base/quantum_graph/_provenance.py +498 -56
  25. lsst/pipe/base/quantum_graph/aggregator/__init__.py +0 -1
  26. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -1
  27. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  28. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  29. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +14 -6
  30. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -3
  31. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +14 -13
  32. lsst/pipe/base/quantum_graph/aggregator/_writer.py +2 -2
  33. lsst/pipe/base/quantum_graph/formatter.py +74 -4
  34. lsst/pipe/base/quantum_graph/ingest_graph.py +413 -0
  35. lsst/pipe/base/quantum_graph_builder.py +1 -8
  36. lsst/pipe/base/quantum_graph_skeleton.py +29 -27
  37. lsst/pipe/base/quantum_provenance_graph.py +29 -12
  38. lsst/pipe/base/separable_pipeline_executor.py +6 -7
  39. lsst/pipe/base/single_quantum_executor.py +7 -7
  40. lsst/pipe/base/struct.py +4 -0
  41. lsst/pipe/base/tests/mocks/_storage_class.py +2 -1
  42. lsst/pipe/base/version.py +1 -1
  43. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/METADATA +2 -1
  44. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/RECORD +52 -51
  45. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/WHEEL +1 -1
  46. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/entry_points.txt +0 -0
  47. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/COPYRIGHT +0 -0
  48. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/LICENSE +0 -0
  49. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/bsd_license.txt +0 -0
  50. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/licenses/gpl-v3.0.txt +0 -0
  51. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/top_level.txt +0 -0
  52. {lsst_pipe_base-30.2026.300.dist-info → lsst_pipe_base-30.2026.500.dist-info}/zip-safe +0 -0
@@ -140,4 +140,3 @@ from ._communicators import FatalWorkerError
140
140
  # - having the worker logs go to separate files is actually very nice, and it's
141
141
  # more efficient if they just do that themselves, and that's not something
142
142
  # our logging CLI can actually do, AFAICT.
143
-
@@ -318,6 +318,12 @@ Report: TypeAlias = (
318
318
  )
319
319
 
320
320
 
321
+ def _disable_resources_parallelism() -> None:
322
+ os.environ["LSST_RESOURCES_NUM_WORKERS"] = "1"
323
+ os.environ.pop("LSST_RESOURCES_EXECUTOR", None)
324
+ os.environ["LSST_S3_USE_THREADS"] = "False"
325
+
326
+
321
327
  class SupervisorCommunicator:
322
328
  """A helper object that lets the supervisor direct the other workers.
323
329
 
@@ -364,7 +370,7 @@ class SupervisorCommunicator:
364
370
  # starts its shutdown.
365
371
  self._write_requests: (
366
372
  Queue[ProvenanceQuantumScanData | Literal[_Sentinel.NO_MORE_WRITE_REQUESTS]] | None
367
- ) = context.make_queue() if config.output_path is not None else None
373
+ ) = context.make_queue() if config.is_writing_provenance else None
368
374
  # All other workers use this queue to send many different kinds of
369
375
  # reports the supervisor. The supervisor waits for a _DONE sentinal
370
376
  # from each worker before it finishes its shutdown.
@@ -433,6 +439,7 @@ class SupervisorCommunicator:
433
439
  self._expect_empty_queue(self._compression_dict)
434
440
 
435
441
  def __enter__(self) -> Self:
442
+ _disable_resources_parallelism()
436
443
  self.progress.__enter__()
437
444
  # We make the low-level logger in __enter__ instead of __init__ only
438
445
  # because that's the pattern used by true workers (where it matters).
@@ -581,6 +588,7 @@ class WorkerCommunicator:
581
588
  self._cancel_event = supervisor._cancel_event
582
589
 
583
590
  def __enter__(self) -> Self:
591
+ _disable_resources_parallelism()
584
592
  self.log = make_worker_log(self.name, self.config)
585
593
  self.log.verbose("%s has PID %s (parent is %s).", self.name, os.getpid(), os.getppid())
586
594
  self._exit_stack = ExitStack().__enter__()
@@ -29,6 +29,8 @@ from __future__ import annotations
29
29
 
30
30
  __all__ = ("AggregatorConfig",)
31
31
 
32
+ import sys
33
+ from typing import TYPE_CHECKING, Any
32
34
 
33
35
  import pydantic
34
36
 
@@ -60,11 +62,13 @@ class AggregatorConfig(pydantic.BaseModel):
60
62
  n_processes: int = 1
61
63
  """Number of processes the scanner should use."""
62
64
 
63
- assume_complete: bool = True
64
- """If `True`, the aggregator can assume all quanta have run to completion
65
- (including any automatic retries). If `False`, only successes can be
66
- considered final, and quanta that appear to have failed or to have not been
67
- executed are ignored.
65
+ incomplete: bool = False
66
+ """If `True`, do not expect the graph to have been executed to completion
67
+ yet, and only ingest the outputs of successful quanta.
68
+
69
+ This disables writing the provenance quantum graph, since this is likely to
70
+ be wasted effort that just complicates a follow-up run with
71
+ ``incomplete=False`` later.
68
72
  """
69
73
 
70
74
  defensive_ingest: bool = False
@@ -95,11 +99,10 @@ class AggregatorConfig(pydantic.BaseModel):
95
99
  """
96
100
 
97
101
  dry_run: bool = False
98
- """If `True`, do not actually perform any deletions or central butler
99
- ingests.
102
+ """If `True`, do not actually perform any central butler ingests.
100
103
 
101
- Most log messages concerning deletions and ingests will still be emitted in
102
- order to provide a better emulation of a real run.
104
+ Most log messages concerning ingests will still be emitted in order to
105
+ provide a better emulation of a real run.
103
106
  """
104
107
 
105
108
  interactive_status: bool = False
@@ -137,3 +140,69 @@ class AggregatorConfig(pydantic.BaseModel):
137
140
  """Enable support for storage classes by created by the
138
141
  lsst.pipe.base.tests.mocks package.
139
142
  """
143
+
144
+ promise_ingest_graph: bool = False
145
+ """If `True`, the aggregator will assume that `~.ingest_graph.ingest_graph`
146
+ will be run later to ingest metadata/log/config datasets, and will not
147
+ ingest them itself. This means that if `~.ingest_graph.ingest_graph` is
148
+ not run, those files will be abandoned in the butler storage root without
149
+ being present in the butler database, but it will speed up both processes.
150
+
151
+ It is *usually* safe to build a quantum graph for downstream processing
152
+ before or while running `~.ingest_graph.ingest_graph`, because
153
+ metadata/log/config datasets are rarely used as inputs. To check, use
154
+ ``pipetask build ... --show inputs`` to show the overall-inputs to the
155
+ graph and scan for these dataset types.
156
+ """
157
+
158
+ @property
159
+ def is_writing_provenance(self) -> bool:
160
+ """Whether the aggregator is configured to write the provenance quantum
161
+ graph.
162
+ """
163
+ return self.output_path is not None and not self.incomplete
164
+
165
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
166
+ # when we inherit those docstrings in our public classes.
167
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
168
+
169
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
170
+ """See `pydantic.BaseModel.copy`."""
171
+ return super().copy(*args, **kwargs)
172
+
173
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
174
+ """See `pydantic.BaseModel.model_dump`."""
175
+ return super().model_dump(*args, **kwargs)
176
+
177
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
178
+ """See `pydantic.BaseModel.model_dump_json`."""
179
+ return super().model_dump(*args, **kwargs)
180
+
181
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
182
+ """See `pydantic.BaseModel.model_copy`."""
183
+ return super().model_copy(*args, **kwargs)
184
+
185
+ @classmethod
186
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
187
+ """See `pydantic.BaseModel.model_construct`."""
188
+ return super().model_construct(*args, **kwargs)
189
+
190
+ @classmethod
191
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
192
+ """See `pydantic.BaseModel.model_json_schema`."""
193
+ return super().model_json_schema(*args, **kwargs)
194
+
195
+ @classmethod
196
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
197
+ """See `pydantic.BaseModel.model_validate`."""
198
+ return super().model_validate(*args, **kwargs)
199
+
200
+ @classmethod
201
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
202
+ """See `pydantic.BaseModel.model_validate_json`."""
203
+ return super().model_validate_json(*args, **kwargs)
204
+
205
+ @classmethod
206
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
207
+ """See `pydantic.BaseModel.model_validate_strings`."""
208
+ return super().model_validate_strings(*args, **kwargs)
@@ -43,7 +43,7 @@ from lsst.daf.butler.registry import ConflictingDefinitionError
43
43
 
44
44
  from ...pipeline_graph import TaskImportMode
45
45
  from .._common import DatastoreName
46
- from .._predicted import PredictedDatasetModel, PredictedQuantumGraphComponents, PredictedQuantumGraphReader
46
+ from .._predicted import PredictedQuantumGraphComponents, PredictedQuantumGraphReader
47
47
  from ._communicators import IngesterCommunicator
48
48
 
49
49
 
@@ -170,7 +170,7 @@ class Ingester(AbstractContextManager):
170
170
  for ingest_request in self.comms.poll():
171
171
  self.n_producers_pending += 1
172
172
  self.comms.log.debug(f"Got ingest request for producer {ingest_request.producer_id}.")
173
- self.update_pending(ingest_request.datasets, ingest_request.records)
173
+ self.update_outputs_pending(refs=ingest_request.refs, records=ingest_request.records)
174
174
  if self.n_datasets_pending > self.comms.config.ingest_batch_size:
175
175
  self.ingest()
176
176
  self.comms.log.info("All ingest requests received.")
@@ -266,31 +266,32 @@ class Ingester(AbstractContextManager):
266
266
  else:
267
267
  del self.records_pending[datastore_name]
268
268
 
269
- def update_pending(
270
- self, datasets: list[PredictedDatasetModel], records: dict[DatastoreName, DatastoreRecordData]
269
+ def update_outputs_pending(
270
+ self,
271
+ refs: list[DatasetRef],
272
+ records: dict[DatastoreName, DatastoreRecordData],
271
273
  ) -> None:
272
274
  """Add an ingest request to the pending-ingest data structures.
273
275
 
274
276
  Parameters
275
277
  ----------
276
- datasets : `list` [ `PredictedDatasetModel` ]
277
- Registry information about the datasets.
278
+ refs : `list` [ `lsst.daf.butler.DatasetRef` ]
279
+ Registry information about regular quantum-output datasets.
278
280
  records : `dict` [ `str`, \
279
281
  `lsst.daf.butler.datastore.record_data.DatastoreRecordData` ]
280
282
  Datastore information about the datasets.
281
283
  """
282
- n_given = len(datasets)
284
+ n_given = len(refs)
283
285
  if self.already_ingested is not None:
284
- datasets = [d for d in datasets if d.dataset_id not in self.already_ingested]
285
- kept = {d.dataset_id for d in datasets}
286
+ refs = [ref for ref in refs if ref.id not in self.already_ingested]
287
+ kept = {ref.id for ref in refs}
286
288
  self.n_datasets_skipped += n_given - len(kept)
287
289
  records = {
288
290
  datastore_name: filtered_records
289
291
  for datastore_name, original_records in records.items()
290
292
  if (filtered_records := original_records.subset(kept)) is not None
291
293
  }
292
- for dataset in datasets:
293
- ref = self.predicted.make_dataset_ref(dataset)
294
+ for ref in refs:
294
295
  self.refs_pending[ref.datasetType.dimensions].append(ref)
295
296
  for datastore_name, datastore_records in records.items():
296
297
  if (existing_records := self.records_pending.get(datastore_name)) is not None:
@@ -223,7 +223,7 @@ class Scanner(AbstractContextManager):
223
223
  logs = self._read_log(predicted_quantum)
224
224
  metadata = self._read_metadata(predicted_quantum)
225
225
  result = ProvenanceQuantumScanModels.from_metadata_and_logs(
226
- predicted_quantum, metadata, logs, assume_complete=self.comms.config.assume_complete
226
+ predicted_quantum, metadata, logs, incomplete=self.comms.config.incomplete
227
227
  )
228
228
  if result.status is ProvenanceQuantumScanStatus.ABANDONED:
229
229
  self.comms.log.debug("Abandoning scan for failed quantum %s.", quantum_id)
@@ -233,7 +233,7 @@ class Scanner(AbstractContextManager):
233
233
  if predicted_output.dataset_id not in result.output_existence:
234
234
  result.output_existence[predicted_output.dataset_id] = self.scan_dataset(predicted_output)
235
235
  to_ingest = self._make_ingest_request(predicted_quantum, result)
236
- if self.comms.config.output_path is not None:
236
+ if self.comms.config.is_writing_provenance:
237
237
  to_write = result.to_scan_data(predicted_quantum, compressor=self.compressor)
238
238
  self.comms.request_write(to_write)
239
239
  self.comms.request_ingest(to_ingest)
@@ -261,15 +261,23 @@ class Scanner(AbstractContextManager):
261
261
  predicted_outputs_by_id = {
262
262
  d.dataset_id: d for d in itertools.chain.from_iterable(predicted_quantum.outputs.values())
263
263
  }
264
- to_ingest_predicted: list[PredictedDatasetModel] = []
265
264
  to_ingest_refs: list[DatasetRef] = []
265
+ to_ignore: set[uuid.UUID] = set()
266
+ if self.comms.config.promise_ingest_graph:
267
+ if result.status is ProvenanceQuantumScanStatus.INIT:
268
+ if predicted_quantum.task_label: # i.e. not the 'packages' producer
269
+ to_ignore.add(
270
+ predicted_quantum.outputs[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME][0].dataset_id
271
+ )
272
+ else:
273
+ to_ignore.add(predicted_quantum.outputs[acc.METADATA_OUTPUT_CONNECTION_NAME][0].dataset_id)
274
+ to_ignore.add(predicted_quantum.outputs[acc.LOG_OUTPUT_CONNECTION_NAME][0].dataset_id)
266
275
  for dataset_id, was_produced in result.output_existence.items():
267
- if was_produced:
276
+ if was_produced and dataset_id not in to_ignore:
268
277
  predicted_output = predicted_outputs_by_id[dataset_id]
269
- to_ingest_predicted.append(predicted_output)
270
278
  to_ingest_refs.append(self.reader.components.make_dataset_ref(predicted_output))
271
279
  to_ingest_records = self.qbb._datastore.export_predicted_records(to_ingest_refs)
272
- return IngestRequest(result.quantum_id, to_ingest_predicted, to_ingest_records)
280
+ return IngestRequest(result.quantum_id, to_ingest_refs, to_ingest_records)
273
281
 
274
282
  def _read_metadata(self, predicted_quantum: PredictedQuantumDatasetsModel) -> TaskMetadata | None:
275
283
  """Attempt to read the metadata dataset for a quantum.
@@ -32,10 +32,10 @@ __all__ = ("IngestRequest", "ScanReport")
32
32
  import dataclasses
33
33
  import uuid
34
34
 
35
+ from lsst.daf.butler import DatasetRef
35
36
  from lsst.daf.butler.datastore.record_data import DatastoreRecordData
36
37
 
37
38
  from .._common import DatastoreName
38
- from .._predicted import PredictedDatasetModel
39
39
  from .._provenance import ProvenanceQuantumScanStatus
40
40
 
41
41
 
@@ -57,11 +57,11 @@ class IngestRequest:
57
57
  producer_id: uuid.UUID
58
58
  """ID of the quantum that produced these datasets."""
59
59
 
60
- datasets: list[PredictedDatasetModel]
60
+ refs: list[DatasetRef]
61
61
  """Registry information about the datasets."""
62
62
 
63
63
  records: dict[DatastoreName, DatastoreRecordData]
64
64
  """Datastore information about the datasets."""
65
65
 
66
66
  def __bool__(self) -> bool:
67
- return bool(self.datasets or self.records)
67
+ return bool(self.refs or self.records)
@@ -117,6 +117,18 @@ class Supervisor:
117
117
  self.comms.request_scan(ready_set.pop())
118
118
  for scan_return in self.comms.poll():
119
119
  self.handle_report(scan_return)
120
+ if self.comms.config.incomplete:
121
+ quantum_or_quanta = "quanta" if self.n_abandoned != 1 else "quantum"
122
+ self.comms.progress.log.info(
123
+ "%d %s incomplete/failed abandoned; re-run with incomplete=False to finish.",
124
+ self.n_abandoned,
125
+ quantum_or_quanta,
126
+ )
127
+ self.comms.progress.log.info(
128
+ "Scanning complete after %0.1fs; waiting for workers to finish.",
129
+ self.comms.progress.elapsed_time,
130
+ )
131
+ self.comms.wait_for_workers_to_finish()
120
132
 
121
133
  def handle_report(self, scan_report: ScanReport) -> None:
122
134
  """Handle a report from a scanner.
@@ -134,7 +146,7 @@ class Supervisor:
134
146
  self.comms.log.debug("Scan complete for %s: quantum failed.", scan_report.quantum_id)
135
147
  blocked_quanta = self.walker.fail(scan_report.quantum_id)
136
148
  for blocked_quantum_id in blocked_quanta:
137
- if self.comms.config.output_path is not None:
149
+ if self.comms.config.is_writing_provenance:
138
150
  self.comms.request_write(
139
151
  ProvenanceQuantumScanData(
140
152
  blocked_quantum_id, status=ProvenanceQuantumScanStatus.BLOCKED
@@ -172,7 +184,7 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
172
184
  writer: Worker | None = None
173
185
  with SupervisorCommunicator(log, config.n_processes, ctx, config) as comms:
174
186
  comms.progress.log.verbose("Starting workers.")
175
- if config.output_path is not None:
187
+ if config.is_writing_provenance:
176
188
  writer_comms = WriterCommunicator(comms)
177
189
  writer = ctx.make_worker(
178
190
  target=Writer.run,
@@ -198,17 +210,6 @@ def aggregate_graph(predicted_path: str, butler_path: str, config: AggregatorCon
198
210
  ingester.start()
199
211
  supervisor = Supervisor(predicted_path, comms)
200
212
  supervisor.loop()
201
- log.info(
202
- "Scanning complete after %0.1fs; waiting for workers to finish.",
203
- comms.progress.elapsed_time,
204
- )
205
- comms.wait_for_workers_to_finish()
206
- if supervisor.n_abandoned:
207
- raise RuntimeError(
208
- f"{supervisor.n_abandoned} {'quanta' if supervisor.n_abandoned > 1 else 'quantum'} "
209
- "abandoned because they did not succeed. Re-run with assume_complete=True after all retry "
210
- "attempts have been exhausted."
211
- )
212
213
  for w in scanners:
213
214
  w.join()
214
215
  ingester.join()
@@ -61,7 +61,7 @@ class Writer:
61
61
  """
62
62
 
63
63
  def __post_init__(self) -> None:
64
- assert self.comms.config.output_path is not None, "Writer should not be used if writing is disabled."
64
+ assert self.comms.config.is_writing_provenance, "Writer should not be used if writing is disabled."
65
65
  self.comms.log.info("Reading predicted quantum graph.")
66
66
  with PredictedQuantumGraphReader.open(
67
67
  self.predicted_path, import_mode=TaskImportMode.DO_NOT_IMPORT
@@ -123,7 +123,7 @@ class Writer:
123
123
  """
124
124
  cdict = self.make_compression_dictionary()
125
125
  self.comms.send_compression_dict(cdict.as_bytes())
126
- assert self.comms.config.output_path is not None
126
+ assert self.comms.config.is_writing_provenance and self.comms.config.output_path is not None
127
127
  self.comms.log.info("Opening output files and processing predicted graph.")
128
128
  qg_writer = ProvenanceQuantumGraphWriter(
129
129
  self.comms.config.output_path,
@@ -35,9 +35,13 @@ from typing import Any, ClassVar
35
35
  import pydantic
36
36
 
37
37
  from lsst.daf.butler import FormatterV2
38
+ from lsst.daf.butler.logging import ButlerLogRecords
39
+ from lsst.pex.config import Config
38
40
  from lsst.resources import ResourcePath
39
41
  from lsst.utils.logging import getLogger
42
+ from lsst.utils.packages import Packages
40
43
 
44
+ from .._task_metadata import TaskMetadata
41
45
  from ..pipeline_graph import TaskImportMode
42
46
  from ._provenance import ProvenanceQuantumGraphReader
43
47
 
@@ -56,13 +60,13 @@ class _ProvenanceFormatterParameters(pydantic.BaseModel):
56
60
 
57
61
  @pydantic.field_validator("quanta", mode="before")
58
62
  @classmethod
59
- def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
60
- return list(v)
63
+ def quanta_to_list(cls, v: Any) -> list[uuid.UUID] | None:
64
+ return list(v) if v is not None else None
61
65
 
62
66
  @pydantic.field_validator("datasets", mode="before")
63
67
  @classmethod
64
- def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
65
- return list(v)
68
+ def datasets_to_list(cls, v: Any) -> list[uuid.UUID] | None:
69
+ return list(v) if v is not None else None
66
70
 
67
71
  @property
68
72
  def nodes(self) -> list[uuid.UUID]:
@@ -83,6 +87,17 @@ class ProvenanceFormatter(FormatterV2):
83
87
  can_read_from_uri: ClassVar[bool] = True
84
88
 
85
89
  def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
90
+ match self._dataset_ref.datasetType.storageClass_name:
91
+ case "TaskMetadata" | "PropertySet":
92
+ return self._read_metadata(uri)
93
+ case "ButlerLogRecords":
94
+ return self._read_log(uri)
95
+ case "Config":
96
+ return self._read_config(uri)
97
+ case "ProvenanceQuantumGraph":
98
+ pass
99
+ case unexpected:
100
+ raise ValueError(f"Unsupported storage class {unexpected!r} for ProvenanceFormatter.")
86
101
  parameters = _ProvenanceFormatterParameters.model_validate(self.file_descriptor.parameters or {})
87
102
  with ProvenanceQuantumGraphReader.open(uri, import_mode=parameters.import_mode) as reader:
88
103
  match component:
@@ -99,3 +114,58 @@ class ProvenanceFormatter(FormatterV2):
99
114
  case "packages":
100
115
  return reader.fetch_packages()
101
116
  raise AssertionError(f"Unexpected component {component!r}.")
117
+
118
+ def _read_metadata(self, uri: ResourcePath) -> TaskMetadata:
119
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
120
+ try:
121
+ attempts = reader.fetch_metadata([self._dataset_ref.id])[self._dataset_ref.id]
122
+ except LookupError:
123
+ raise FileNotFoundError(
124
+ f"No dataset with ID {self._dataset_ref.id} present in this graph."
125
+ ) from None
126
+ if not attempts:
127
+ raise FileNotFoundError(
128
+ f"No metadata dataset {self._dataset_ref} stored in this graph "
129
+ "(no attempts for this quantum)."
130
+ )
131
+ if attempts[-1] is None:
132
+ raise FileNotFoundError(
133
+ f"No metadata dataset {self._dataset_ref} stored in this graph "
134
+ "(most recent attempt failed and did not write metadata)."
135
+ )
136
+ return attempts[-1]
137
+
138
+ def _read_log(self, uri: ResourcePath) -> ButlerLogRecords:
139
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
140
+ try:
141
+ attempts = reader.fetch_logs([self._dataset_ref.id])[self._dataset_ref.id]
142
+ except LookupError:
143
+ raise FileNotFoundError(
144
+ f"No dataset with ID {self._dataset_ref.id} present in this graph."
145
+ ) from None
146
+ if not attempts:
147
+ raise FileNotFoundError(
148
+ f"No log dataset {self._dataset_ref} stored in this graph (no attempts for this quantum)."
149
+ )
150
+ if attempts[-1] is None:
151
+ raise FileNotFoundError(
152
+ f"No log dataset {self._dataset_ref} stored in this graph "
153
+ "(most recent attempt failed and did not write logs)."
154
+ )
155
+ return attempts[-1]
156
+
157
+ def _read_packages(self, uri: ResourcePath) -> Packages:
158
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=TaskImportMode.DO_NOT_IMPORT) as reader:
159
+ return reader.fetch_packages()
160
+
161
+ def _read_config(self, uri: ResourcePath) -> Config:
162
+ task_label = self._dataset_ref.datasetType.name.removesuffix("_config")
163
+ with ProvenanceQuantumGraphReader.open(
164
+ uri, import_mode=TaskImportMode.ASSUME_CONSISTENT_EDGES
165
+ ) as reader:
166
+ try:
167
+ return reader.pipeline_graph.tasks[task_label].config.copy()
168
+ except KeyError:
169
+ raise FileNotFoundError(
170
+ f"No task with label {task_label!r} found in the pipeline graph."
171
+ ) from None