lsst-pipe-base 29.2025.4100__py3-none-any.whl → 29.2025.4300__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. lsst/pipe/base/_status.py +1 -1
  2. lsst/pipe/base/cli/cmd/__init__.py +2 -2
  3. lsst/pipe/base/cli/cmd/commands.py +116 -1
  4. lsst/pipe/base/graph_walker.py +8 -4
  5. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +30 -5
  6. lsst/pipe/base/quantum_graph/__init__.py +1 -0
  7. lsst/pipe/base/quantum_graph/_common.py +2 -1
  8. lsst/pipe/base/quantum_graph/_multiblock.py +41 -7
  9. lsst/pipe/base/quantum_graph/_predicted.py +62 -5
  10. lsst/pipe/base/quantum_graph/_provenance.py +1209 -0
  11. lsst/pipe/base/quantum_graph/aggregator/__init__.py +143 -0
  12. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +981 -0
  13. lsst/pipe/base/quantum_graph/aggregator/_config.py +139 -0
  14. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +312 -0
  15. lsst/pipe/base/quantum_graph/aggregator/_progress.py +208 -0
  16. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +371 -0
  17. lsst/pipe/base/quantum_graph/aggregator/_structs.py +167 -0
  18. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +225 -0
  19. lsst/pipe/base/quantum_graph/aggregator/_writer.py +593 -0
  20. lsst/pipe/base/resource_usage.py +183 -0
  21. lsst/pipe/base/simple_pipeline_executor.py +4 -1
  22. lsst/pipe/base/tests/util.py +31 -0
  23. lsst/pipe/base/version.py +1 -1
  24. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/METADATA +1 -1
  25. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/RECORD +33 -22
  26. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/WHEEL +0 -0
  27. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/entry_points.txt +0 -0
  28. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/COPYRIGHT +0 -0
  29. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/LICENSE +0 -0
  30. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/bsd_license.txt +0 -0
  31. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/licenses/gpl-v3.0.txt +0 -0
  32. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/top_level.txt +0 -0
  33. {lsst_pipe_base-29.2025.4100.dist-info → lsst_pipe_base-29.2025.4300.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1209 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = (
31
+ "ProvenanceDatasetInfo",
32
+ "ProvenanceDatasetModel",
33
+ "ProvenanceInitQuantumInfo",
34
+ "ProvenanceInitQuantumModel",
35
+ "ProvenanceQuantumGraph",
36
+ "ProvenanceQuantumGraphReader",
37
+ "ProvenanceQuantumInfo",
38
+ "ProvenanceQuantumModel",
39
+ )
40
+
41
+
42
+ import dataclasses
43
+ import sys
44
+ import uuid
45
+ from collections import Counter
46
+ from collections.abc import Iterable, Iterator, Mapping
47
+ from contextlib import contextmanager
48
+ from typing import TYPE_CHECKING, Any, Self, TypedDict
49
+
50
+ import astropy.table
51
+ import networkx
52
+ import numpy as np
53
+ import pydantic
54
+
55
+ from lsst.daf.butler import DataCoordinate
56
+ from lsst.resources import ResourcePathExpression
57
+ from lsst.utils.packages import Packages
58
+
59
+ from .._status import QuantumSuccessCaveats
60
+ from ..pipeline_graph import PipelineGraph, TaskImportMode, TaskInitNode
61
+ from ..quantum_provenance_graph import ExceptionInfo, QuantumRunStatus
62
+ from ..resource_usage import QuantumResourceUsage
63
+ from ._common import (
64
+ BaseQuantumGraph,
65
+ BaseQuantumGraphReader,
66
+ ConnectionName,
67
+ DataCoordinateValues,
68
+ DatasetIndex,
69
+ DatasetInfo,
70
+ DatasetTypeName,
71
+ HeaderModel,
72
+ QuantumIndex,
73
+ QuantumInfo,
74
+ TaskLabel,
75
+ )
76
+ from ._multiblock import AddressReader, MultiblockReader
77
+ from ._predicted import PredictedDatasetModel, PredictedQuantumDatasetsModel
78
+
79
+ if TYPE_CHECKING:
80
+ from lsst.daf.butler.logging import ButlerLogRecords
81
+
82
+ from .._task_metadata import TaskMetadata
83
+
84
+
85
+ DATASET_ADDRESS_INDEX = 0
86
+ QUANTUM_ADDRESS_INDEX = 1
87
+ LOG_ADDRESS_INDEX = 2
88
+ METADATA_ADDRESS_INDEX = 3
89
+
90
+ DATASET_MB_NAME = "datasets"
91
+ QUANTUM_MB_NAME = "quanta"
92
+ LOG_MB_NAME = "logs"
93
+ METADATA_MB_NAME = "metadata"
94
+
95
+
96
+ class ProvenanceDatasetInfo(DatasetInfo):
97
+ """A typed dictionary that annotates the attributes of the NetworkX graph
98
+ node data for a provenance dataset.
99
+
100
+ Since NetworkX types are not generic over their node mapping type, this has
101
+ to be used explicitly, e.g.::
102
+
103
+ node_data: ProvenanceDatasetInfo = xgraph.nodes[dataset_id]
104
+
105
+ where ``xgraph`` is `ProvenanceQuantumGraph.bipartite_xgraph`.
106
+ """
107
+
108
+ dataset_id: uuid.UUID
109
+ """Unique identifier for the dataset."""
110
+
111
+ exists: bool
112
+ """Whether this dataset existed immediately after the quantum graph was
113
+ run.
114
+
115
+ This is always `True` for overall input datasets. It is also `True` for
116
+ datasets that were produced and then removed before/during transfer back to
117
+ the central butler repository.
118
+ """
119
+
120
+
121
+ class ProvenanceQuantumInfo(QuantumInfo):
122
+ """A typed dictionary that annotates the attributes of the NetworkX graph
123
+ node data for a provenance quantum.
124
+
125
+ Since NetworkX types are not generic over their node mapping type, this has
126
+ to be used explicitly, e.g.::
127
+
128
+ node_data: ProvenanceQuantumInfo = xgraph.nodes[quantum_id]
129
+
130
+ where ``xgraph`` is `ProvenanceQuantumGraph.bipartite_xgraph` or
131
+ `ProvenanceQuantumGraph.quantum_only_xgraph`
132
+ """
133
+
134
+ status: QuantumRunStatus
135
+ """Enumerated status for the quantum."""
136
+
137
+ caveats: QuantumSuccessCaveats | None
138
+ """Flags indicating caveats on successful quanta."""
139
+
140
+ exception: ExceptionInfo | None
141
+ """Information about an exception raised when the quantum was executing."""
142
+
143
+ resource_usage: QuantumResourceUsage | None
144
+ """Resource usage information (timing, memory use) for this quantum."""
145
+
146
+
147
+ class ProvenanceInitQuantumInfo(TypedDict):
148
+ """A typed dictionary that annotates the attributes of the NetworkX graph
149
+ node data for a provenance init quantum.
150
+
151
+ Since NetworkX types are not generic over their node mapping type, this has
152
+ to be used explicitly, e.g.::
153
+
154
+ node_data: ProvenanceInitQuantumInfo = xgraph.nodes[quantum_id]
155
+
156
+ where ``xgraph`` is `ProvenanceQuantumGraph.bipartite_xgraph`.
157
+ """
158
+
159
+ data_id: DataCoordinate
160
+ """Data ID of the quantum.
161
+
162
+ This is always an empty ID; this key exists to allow init-quanta and
163
+ regular quanta to be treated more similarly.
164
+ """
165
+
166
+ task_label: str
167
+ """Label of the task for this quantum."""
168
+
169
+ pipeline_node: TaskInitNode
170
+ """Node in the pipeline graph for this task's init-only step."""
171
+
172
+
173
+ class ProvenanceDatasetModel(PredictedDatasetModel):
174
+ """Data model for the datasets in a provenance quantum graph file."""
175
+
176
+ exists: bool
177
+ """Whether this dataset existed immediately after the quantum graph was
178
+ run.
179
+
180
+ This is always `True` for overall input datasets. It is also `True` for
181
+ datasets that were produced and then removed before/during transfer back to
182
+ the central butler repository.
183
+ """
184
+
185
+ producer: QuantumIndex | None = None
186
+ """Internal integer ID of the quantum that produced this dataset.
187
+
188
+ This is `None` for overall inputs to the graph.
189
+ """
190
+
191
+ consumers: list[QuantumIndex] = pydantic.Field(default_factory=list)
192
+ """Internal integer IDs of quanta that were predicted to consume this
193
+ dataset.
194
+ """
195
+
196
+ @property
197
+ def node_id(self) -> uuid.UUID:
198
+ """Alias for the dataset ID."""
199
+ return self.dataset_id
200
+
201
+ @classmethod
202
+ def from_predicted(
203
+ cls,
204
+ predicted: PredictedDatasetModel,
205
+ producer: QuantumIndex | None = None,
206
+ consumers: Iterable[QuantumIndex] = (),
207
+ ) -> ProvenanceDatasetModel:
208
+ """Construct from a predicted dataset model.
209
+
210
+ Parameters
211
+ ----------
212
+ predicted : `PredictedDatasetModel`
213
+ Information about the dataset from the predicted graph.
214
+ producer : `int` or `None`, optional
215
+ Internal ID of the quantum that was predicted to produce this
216
+ dataset.
217
+ consumers : `~collections.abc.Iterable` [`int`], optional
218
+ Internal IDs of the quanta that were predicted to consume this
219
+ dataset.
220
+
221
+ Returns
222
+ -------
223
+ provenance : `ProvenanceDatasetModel`
224
+ Provenance dataset model.
225
+
226
+ Notes
227
+ -----
228
+ This initializes `exists` to `True` when ``producer is None`` and
229
+ `False` otherwise, on the assumption that it will be updated later.
230
+ """
231
+ return cls.model_construct(
232
+ dataset_id=predicted.dataset_id,
233
+ dataset_type_name=predicted.dataset_type_name,
234
+ data_coordinate=predicted.data_coordinate,
235
+ run=predicted.run,
236
+ exists=(producer is None), # if it's not produced by this QG, it's an overall input
237
+ producer=producer,
238
+ consumers=list(consumers),
239
+ )
240
+
241
+ def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
242
+ """Add this dataset and its edges to quanta to a provenance graph.
243
+
244
+ Parameters
245
+ ----------
246
+ graph : `ProvenanceQuantumGraph`
247
+ Graph to update in place.
248
+ address_reader : `AddressReader`
249
+ Reader object that can be used to look up UUIDs from integer
250
+ indexes.
251
+
252
+ Notes
253
+ -----
254
+ This method adds:
255
+
256
+ - a ``bipartite_xgraph`` dataset node with full attributes;
257
+ - ``bipartite_xgraph`` edges to adjacent quanta (which adds quantum
258
+ nodes with no attributes), without populating edge attributes;
259
+ - ``quantum_only_xgraph`` edges for each pair of quanta in which one
260
+ produces this dataset and another consumes it (this also adds quantum
261
+ nodes with no attributes).
262
+ """
263
+ dataset_type_node = graph.pipeline_graph.dataset_types[self.dataset_type_name]
264
+ data_id = DataCoordinate.from_full_values(dataset_type_node.dimensions, tuple(self.data_coordinate))
265
+ graph._bipartite_xgraph.add_node(
266
+ self.dataset_id,
267
+ data_id=data_id,
268
+ dataset_type_name=self.dataset_type_name,
269
+ pipeline_node=dataset_type_node,
270
+ run=self.run,
271
+ exists=self.exists,
272
+ )
273
+ producer_id: uuid.UUID | None = None
274
+ if self.producer is not None:
275
+ producer_id = address_reader.find(self.producer).key
276
+ graph._bipartite_xgraph.add_edge(producer_id, self.dataset_id)
277
+ for consumer_index in self.consumers:
278
+ consumer_id = address_reader.find(consumer_index).key
279
+ graph._bipartite_xgraph.add_edge(self.dataset_id, consumer_id)
280
+ if producer_id is not None:
281
+ graph._quantum_only_xgraph.add_edge(producer_id, consumer_id)
282
+ graph._datasets_by_type[self.dataset_type_name][data_id] = self.dataset_id
283
+
284
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
285
+ # when we inherit those docstrings in our public classes.
286
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
287
+
288
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
289
+ """See `pydantic.BaseModel.copy`."""
290
+ return super().copy(*args, **kwargs)
291
+
292
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
293
+ """See `pydantic.BaseModel.model_dump`."""
294
+ return super().model_dump(*args, **kwargs)
295
+
296
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
297
+ """See `pydantic.BaseModel.model_dump_json`."""
298
+ return super().model_dump(*args, **kwargs)
299
+
300
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
301
+ """See `pydantic.BaseModel.model_copy`."""
302
+ return super().model_copy(*args, **kwargs)
303
+
304
+ @classmethod
305
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
306
+ """See `pydantic.BaseModel.model_construct`."""
307
+ return super().model_construct(*args, **kwargs)
308
+
309
+ @classmethod
310
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
311
+ """See `pydantic.BaseModel.model_json_schema`."""
312
+ return super().model_json_schema(*args, **kwargs)
313
+
314
+ @classmethod
315
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
316
+ """See `pydantic.BaseModel.model_validate`."""
317
+ return super().model_validate(*args, **kwargs)
318
+
319
+ @classmethod
320
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
321
+ """See `pydantic.BaseModel.model_validate_json`."""
322
+ return super().model_validate_json(*args, **kwargs)
323
+
324
+ @classmethod
325
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
326
+ """See `pydantic.BaseModel.model_validate_strings`."""
327
+ return super().model_validate_strings(*args, **kwargs)
328
+
329
+
330
+ class ProvenanceQuantumModel(pydantic.BaseModel):
331
+ """Data model for the quanta in a provenance quantum graph file."""
332
+
333
+ quantum_id: uuid.UUID
334
+ """Unique identifier for the quantum."""
335
+
336
+ task_label: TaskLabel
337
+ """Name of the type of this dataset.
338
+
339
+ This is always a parent dataset type name, not a component.
340
+
341
+ Note that full dataset type definitions are stored in the pipeline graph.
342
+ """
343
+
344
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
345
+ """The full values (required and implied) of this dataset's data ID."""
346
+
347
+ status: QuantumRunStatus = QuantumRunStatus.METADATA_MISSING
348
+ """Enumerated status for the quantum."""
349
+
350
+ caveats: QuantumSuccessCaveats | None = None
351
+ """Flags indicating caveats on successful quanta."""
352
+
353
+ exception: ExceptionInfo | None = None
354
+ """Information about an exception raised when the quantum was executing."""
355
+
356
+ inputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
357
+ """Internal integer IDs of the datasets predicted to be consumed by this
358
+ quantum, grouped by connection name.
359
+ """
360
+
361
+ outputs: dict[ConnectionName, list[DatasetIndex]] = pydantic.Field(default_factory=dict)
362
+ """Internal integer IDs of the datasets predicted to be produced by this
363
+ quantum, grouped by connection name.
364
+ """
365
+
366
+ resource_usage: QuantumResourceUsage | None = None
367
+ """Resource usage information (timing, memory use) for this quantum."""
368
+
369
+ @property
370
+ def node_id(self) -> uuid.UUID:
371
+ """Alias for the quantum ID."""
372
+ return self.quantum_id
373
+
374
+ @classmethod
375
+ def from_predicted(
376
+ cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
377
+ ) -> ProvenanceQuantumModel:
378
+ """Construct from a predicted quantum model.
379
+
380
+ Parameters
381
+ ----------
382
+ predicted : `PredictedQuantumDatasetsModel`
383
+ Information about the quantum from the predicted graph.
384
+ indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
385
+ Mapping from quantum or dataset UUID to internal integer ID.
386
+
387
+ Returns
388
+ -------
389
+ provenance : `ProvenanceQuantumModel`
390
+ Provenance quantum model.
391
+ """
392
+ inputs = {
393
+ connection_name: [indices[d.dataset_id] for d in predicted_inputs]
394
+ for connection_name, predicted_inputs in predicted.inputs.items()
395
+ }
396
+ outputs = {
397
+ connection_name: [indices[d.dataset_id] for d in predicted_outputs]
398
+ for connection_name, predicted_outputs in predicted.outputs.items()
399
+ }
400
+ return cls(
401
+ quantum_id=predicted.quantum_id,
402
+ task_label=predicted.task_label,
403
+ data_coordinate=predicted.data_coordinate,
404
+ inputs=inputs,
405
+ outputs=outputs,
406
+ )
407
+
408
+ def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
409
+ """Add this quantum and its edges to datasets to a provenance graph.
410
+
411
+ Parameters
412
+ ----------
413
+ graph : `ProvenanceQuantumGraph`
414
+ Graph to update in place.
415
+ address_reader : `AddressReader`
416
+ Reader object that can be used to look up UUIDs from integer
417
+ indexes.
418
+
419
+ Notes
420
+ -----
421
+ This method adds:
422
+
423
+ - a ``bipartite_xgraph`` quantum node with full attributes;
424
+ - a ``quantum_only_xgraph`` quantum node with full attributes;
425
+ - ``bipartite_xgraph`` edges to adjacent datasets (which adds datasets
426
+ nodes with no attributes), while populating those edge attributes;
427
+ - ``quantum_only_xgraph`` edges to any adjacent quantum that has also
428
+ already been loaded.
429
+ """
430
+ task_node = graph.pipeline_graph.tasks[self.task_label]
431
+ data_id = DataCoordinate.from_full_values(task_node.dimensions, tuple(self.data_coordinate))
432
+ graph._bipartite_xgraph.add_node(
433
+ self.quantum_id,
434
+ data_id=data_id,
435
+ task_label=self.task_label,
436
+ pipeline_node=task_node,
437
+ status=self.status,
438
+ caveats=self.caveats,
439
+ exception=self.exception,
440
+ resource_usage=self.resource_usage,
441
+ )
442
+ for connection_name, dataset_indices in self.inputs.items():
443
+ read_edge = task_node.get_input_edge(connection_name)
444
+ for dataset_index in dataset_indices:
445
+ dataset_id = address_reader.find(dataset_index).key
446
+ graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
447
+ graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
448
+ "pipeline_edges", []
449
+ ).append(read_edge)
450
+ for connection_name, dataset_indices in self.outputs.items():
451
+ write_edge = task_node.get_output_edge(connection_name)
452
+ for dataset_index in dataset_indices:
453
+ dataset_id = address_reader.find(dataset_index).key
454
+ graph._bipartite_xgraph.add_edge(
455
+ self.quantum_id,
456
+ dataset_id,
457
+ is_read=False,
458
+ # There can only be one pipeline edge for an output.
459
+ pipeline_edges=[write_edge],
460
+ )
461
+ graph._quanta_by_task_label[self.task_label][data_id] = self.quantum_id
462
+ graph._quantum_only_xgraph.add_node(self.quantum_id, **graph._bipartite_xgraph.nodes[self.quantum_id])
463
+ for dataset_id in graph._bipartite_xgraph.predecessors(self.quantum_id):
464
+ for upstream_quantum_id in graph._bipartite_xgraph.predecessors(dataset_id):
465
+ graph._quantum_only_xgraph.add_edge(upstream_quantum_id, self.quantum_id)
466
+ for dataset_id in graph._bipartite_xgraph.successors(self.quantum_id):
467
+ for downstream_quantum_id in graph._bipartite_xgraph.successors(dataset_id):
468
+ graph._quantum_only_xgraph.add_edge(self.quantum_id, downstream_quantum_id)
469
+
470
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
471
+ # when we inherit those docstrings in our public classes.
472
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
473
+
474
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
475
+ """See `pydantic.BaseModel.copy`."""
476
+ return super().copy(*args, **kwargs)
477
+
478
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
479
+ """See `pydantic.BaseModel.model_dump`."""
480
+ return super().model_dump(*args, **kwargs)
481
+
482
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
483
+ """See `pydantic.BaseModel.model_dump_json`."""
484
+ return super().model_dump(*args, **kwargs)
485
+
486
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
487
+ """See `pydantic.BaseModel.model_copy`."""
488
+ return super().model_copy(*args, **kwargs)
489
+
490
+ @classmethod
491
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
492
+ """See `pydantic.BaseModel.model_construct`."""
493
+ return super().model_construct(*args, **kwargs)
494
+
495
+ @classmethod
496
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
497
+ """See `pydantic.BaseModel.model_json_schema`."""
498
+ return super().model_json_schema(*args, **kwargs)
499
+
500
+ @classmethod
501
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
502
+ """See `pydantic.BaseModel.model_validate`."""
503
+ return super().model_validate(*args, **kwargs)
504
+
505
+ @classmethod
506
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
507
+ """See `pydantic.BaseModel.model_validate_json`."""
508
+ return super().model_validate_json(*args, **kwargs)
509
+
510
+ @classmethod
511
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
512
+ """See `pydantic.BaseModel.model_validate_strings`."""
513
+ return super().model_validate_strings(*args, **kwargs)
514
+
515
+
516
+ class ProvenanceInitQuantumModel(pydantic.BaseModel):
517
+ """Data model for the special "init" quanta in a provenance quantum graph
518
+ file.
519
+ """
520
+
521
+ quantum_id: uuid.UUID
522
+ """Unique identifier for the quantum."""
523
+
524
+ task_label: TaskLabel
525
+ """Name of the type of this dataset.
526
+
527
+ This is always a parent dataset type name, not a component.
528
+
529
+ Note that full dataset type definitions are stored in the pipeline graph.
530
+ """
531
+
532
+ inputs: dict[ConnectionName, DatasetIndex] = pydantic.Field(default_factory=dict)
533
+ """Internal integer IDs of the datasets predicted to be consumed by this
534
+ quantum, grouped by connection name.
535
+ """
536
+
537
+ outputs: dict[ConnectionName, DatasetIndex] = pydantic.Field(default_factory=dict)
538
+ """Internal integer IDs of the datasets predicted to be produced by this
539
+ quantum, grouped by connection name.
540
+ """
541
+
542
+ @classmethod
543
+ def from_predicted(
544
+ cls, predicted: PredictedQuantumDatasetsModel, indices: Mapping[uuid.UUID, int]
545
+ ) -> ProvenanceInitQuantumModel:
546
+ """Construct from a predicted quantum model.
547
+
548
+ Parameters
549
+ ----------
550
+ predicted : `PredictedQuantumDatasetsModel`
551
+ Information about the quantum from the predicted graph.
552
+ indices : `~collections.abc.Mapping [`uuid.UUID`, `int`]
553
+ Mapping from quantum or dataset UUID to internal integer ID.
554
+
555
+ Returns
556
+ -------
557
+ provenance : `ProvenanceInitQuantumModel`
558
+ Provenance init quantum model.
559
+ """
560
+ inputs = {
561
+ connection_name: indices[predicted_inputs[0].dataset_id]
562
+ for connection_name, predicted_inputs in predicted.inputs.items()
563
+ }
564
+ outputs = {
565
+ connection_name: indices[predicted_outputs[0].dataset_id]
566
+ for connection_name, predicted_outputs in predicted.outputs.items()
567
+ }
568
+ return cls(
569
+ quantum_id=predicted.quantum_id,
570
+ task_label=predicted.task_label,
571
+ inputs=inputs,
572
+ outputs=outputs,
573
+ )
574
+
575
+ def _add_to_graph(
576
+ self,
577
+ graph: ProvenanceQuantumGraph,
578
+ address_reader: AddressReader,
579
+ empty_data_id: DataCoordinate,
580
+ ) -> None:
581
+ """Add this quantum and its edges to datasets to a provenance graph.
582
+
583
+ Parameters
584
+ ----------
585
+ graph : `ProvenanceQuantumGraph`
586
+ Graph to update in place.
587
+ address_reader : `AddressReader`
588
+ Reader object that can be used to look up UUIDs from integer
589
+ indexes.
590
+ empty_data_id : `lsst.daf.butler.DataCoordinate`
591
+ The empty data ID for the appropriate dimension universe.
592
+
593
+ Notes
594
+ -----
595
+ This method adds:
596
+
597
+ - a ``bipartite_xgraph`` quantum node with full attributes;
598
+ - ``bipartite_xgraph`` edges to adjacent datasets (which adds datasets
599
+ nodes with no attributes), while populating those edge attributes;
600
+ """
601
+ task_init_node = graph.pipeline_graph.tasks[self.task_label].init
602
+ graph._bipartite_xgraph.add_node(
603
+ self.quantum_id, data_id=empty_data_id, task_label=self.task_label, pipeline_node=task_init_node
604
+ )
605
+ for connection_name, dataset_index in self.inputs.items():
606
+ read_edge = task_init_node.get_input_edge(connection_name)
607
+ dataset_id = address_reader.find(dataset_index).key
608
+ graph._bipartite_xgraph.add_edge(dataset_id, self.quantum_id, is_read=True)
609
+ graph._bipartite_xgraph.edges[dataset_id, self.quantum_id].setdefault(
610
+ "pipeline_edges", []
611
+ ).append(read_edge)
612
+ for connection_name, dataset_index in self.outputs.items():
613
+ write_edge = task_init_node.get_output_edge(connection_name)
614
+ dataset_id = address_reader.find(dataset_index).key
615
+ graph._bipartite_xgraph.add_edge(
616
+ self.quantum_id,
617
+ dataset_id,
618
+ is_read=False,
619
+ # There can only be one pipeline edge for an output.
620
+ pipeline_edges=[write_edge],
621
+ )
622
+ graph._init_quanta[self.task_label] = self.quantum_id
623
+
624
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
625
+ # when we inherit those docstrings in our public classes.
626
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
627
+
628
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
629
+ """See `pydantic.BaseModel.copy`."""
630
+ return super().copy(*args, **kwargs)
631
+
632
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
633
+ """See `pydantic.BaseModel.model_dump`."""
634
+ return super().model_dump(*args, **kwargs)
635
+
636
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
637
+ """See `pydantic.BaseModel.model_dump_json`."""
638
+ return super().model_dump(*args, **kwargs)
639
+
640
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
641
+ """See `pydantic.BaseModel.model_copy`."""
642
+ return super().model_copy(*args, **kwargs)
643
+
644
+ @classmethod
645
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
646
+ """See `pydantic.BaseModel.model_construct`."""
647
+ return super().model_construct(*args, **kwargs)
648
+
649
+ @classmethod
650
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
651
+ """See `pydantic.BaseModel.model_json_schema`."""
652
+ return super().model_json_schema(*args, **kwargs)
653
+
654
+ @classmethod
655
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
656
+ """See `pydantic.BaseModel.model_validate`."""
657
+ return super().model_validate(*args, **kwargs)
658
+
659
+ @classmethod
660
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
661
+ """See `pydantic.BaseModel.model_validate_json`."""
662
+ return super().model_validate_json(*args, **kwargs)
663
+
664
+ @classmethod
665
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
666
+ """See `pydantic.BaseModel.model_validate_strings`."""
667
+ return super().model_validate_strings(*args, **kwargs)
668
+
669
+
670
+ class ProvenanceInitQuantaModel(pydantic.RootModel):
671
+ """Data model for the init quanta in a provenance graph."""
672
+
673
+ root: list[ProvenanceInitQuantumModel] = pydantic.Field(default_factory=list)
674
+ """List of special "init" quanta, one for each task."""
675
+
676
+ def _add_to_graph(self, graph: ProvenanceQuantumGraph, address_reader: AddressReader) -> None:
677
+ """Add this quantum and its edges to datasets to a provenance graph.
678
+
679
+ Parameters
680
+ ----------
681
+ graph : `ProvenanceQuantumGraph`
682
+ Graph to update in place.
683
+ address_reader : `AddressReader`
684
+ Reader object that can be used to look up UUIDs from integer
685
+ indexes.
686
+ """
687
+ empty_data_id = DataCoordinate.make_empty(graph.pipeline_graph.universe)
688
+ for init_quantum in self.root:
689
+ init_quantum._add_to_graph(graph, address_reader, empty_data_id=empty_data_id)
690
+
691
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
692
+ # when we inherit those docstrings in our public classes.
693
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
694
+
695
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
696
+ """See `pydantic.BaseModel.copy`."""
697
+ return super().copy(*args, **kwargs)
698
+
699
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
700
+ """See `pydantic.BaseModel.model_dump`."""
701
+ return super().model_dump(*args, **kwargs)
702
+
703
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
704
+ """See `pydantic.BaseModel.model_dump_json`."""
705
+ return super().model_dump(*args, **kwargs)
706
+
707
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
708
+ """See `pydantic.BaseModel.model_copy`."""
709
+ return super().model_copy(*args, **kwargs)
710
+
711
+ @classmethod
712
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
713
+ """See `pydantic.BaseModel.model_construct`."""
714
+ return super().model_construct(*args, **kwargs)
715
+
716
+ @classmethod
717
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
718
+ """See `pydantic.BaseModel.model_json_schema`."""
719
+ return super().model_json_schema(*args, **kwargs)
720
+
721
+ @classmethod
722
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
723
+ """See `pydantic.BaseModel.model_validate`."""
724
+ return super().model_validate(*args, **kwargs)
725
+
726
+ @classmethod
727
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
728
+ """See `pydantic.BaseModel.model_validate_json`."""
729
+ return super().model_validate_json(*args, **kwargs)
730
+
731
+ @classmethod
732
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
733
+ """See `pydantic.BaseModel.model_validate_strings`."""
734
+ return super().model_validate_strings(*args, **kwargs)
735
+
736
+
737
+ class ProvenanceQuantumGraph(BaseQuantumGraph):
738
+ """A quantum graph that represents processing that has already been
739
+ executed.
740
+
741
+ Parameters
742
+ ----------
743
+ header : `HeaderModel`
744
+ General metadata shared with other quantum graph types.
745
+ pipeline_graph : `.pipeline_graph.PipelineGraph`
746
+ Graph of tasks and dataset types. May contain a superset of the tasks
747
+ and dataset types that actually have quanta and datasets in the quantum
748
+ graph.
749
+
750
+ Notes
751
+ -----
752
+ A provenance quantum graph is generally obtained via the
753
+ `ProvenanceQuantumGraphReader.graph` attribute, which is updated in-place
754
+ as information is read from disk.
755
+ """
756
+
757
+ def __init__(self, header: HeaderModel, pipeline_graph: PipelineGraph) -> None:
758
+ super().__init__(header, pipeline_graph)
759
+ self._init_quanta: dict[TaskLabel, uuid.UUID] = {}
760
+ self._quantum_only_xgraph = networkx.DiGraph()
761
+ self._bipartite_xgraph = networkx.DiGraph()
762
+ self._quanta_by_task_label: dict[str, dict[DataCoordinate, uuid.UUID]] = {
763
+ task_label: {} for task_label in self.pipeline_graph.tasks.keys()
764
+ }
765
+ self._datasets_by_type: dict[str, dict[DataCoordinate, uuid.UUID]] = {
766
+ dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
767
+ }
768
+
769
+ @property
770
+ def init_quanta(self) -> Mapping[TaskLabel, uuid.UUID]:
771
+ """A mapping from task label to the ID of the special init quantum for
772
+ that task.
773
+
774
+ This is populated by the ``init_quanta`` component. Additional
775
+ information about each init quantum can be found by using the ID to
776
+ look up node attributes in the `bipartite_xgraph`, i.e.::
777
+
778
+ info: ProvenanceInitQuantumInfo = qg.bipartite_xgraph.nodes[id]
779
+ """
780
+ return self._init_quanta
781
+
782
+ @property
783
+ def quanta_by_task(self) -> Mapping[TaskLabel, Mapping[DataCoordinate, uuid.UUID]]:
784
+ """A nested mapping of all quanta, keyed first by task name and then by
785
+ data ID.
786
+
787
+ Notes
788
+ -----
789
+ This is populated one quantum at a time as they are read. All tasks in
790
+ the pipeline graph are included, even if none of their quanta were
791
+ loaded (i.e. nested mappings may be empty).
792
+
793
+ The returned object may be an internal dictionary; as the type
794
+ annotation indicates, it should not be modified in place.
795
+ """
796
+ return self._quanta_by_task_label
797
+
798
+ @property
799
+ def datasets_by_type(self) -> Mapping[DatasetTypeName, Mapping[DataCoordinate, uuid.UUID]]:
800
+ """A nested mapping of all datasets, keyed first by dataset type name
801
+ and then by data ID.
802
+
803
+ Notes
804
+ -----
805
+ This is populated one dataset at a time as they are read. All dataset
806
+ types in the pipeline graph are included, even if none of their
807
+ datasets were loaded (i.e. nested mappings may be empty).
808
+
809
+ The returned object may be an internal dictionary; as the type
810
+ annotation indicates, it should not be modified in place.
811
+ """
812
+ return self._datasets_by_type
813
+
814
+ @property
815
+ def quantum_only_xgraph(self) -> networkx.DiGraph:
816
+ """A directed acyclic graph with quanta as nodes (and datasets elided).
817
+
818
+ Notes
819
+ -----
820
+ Node keys are quantum UUIDs, and are populated one quantum at a time as
821
+ they are loaded. Loading quanta (via
822
+ `ProvenanceQuantumGraphReader.read_quanta`) will add the loaded nodes
823
+ with full attributes and add edges to adjacent nodes with no
824
+ attributes. Loading datasets (via
825
+ `ProvenanceQuantumGraphReader.read_datasets`) will also add edges and
826
+ nodes with no attributes.
827
+
828
+ Node attributes are described by the `ProvenanceQuantumInfo` types.
829
+
830
+ This graph does not include special "init" quanta.
831
+
832
+ The returned object is a read-only view of an internal one.
833
+ """
834
+ return self._quantum_only_xgraph.copy(as_view=True)
835
+
836
+ @property
837
+ def bipartite_xgraph(self) -> networkx.DiGraph:
838
+ """A directed acyclic graph with quantum and dataset nodes.
839
+
840
+ Notes
841
+ -----
842
+ Node keys are quantum or dataset UUIDs, and are populated one quantum
843
+ or dataset at a time as they are loaded. Loading quanta (via
844
+ `ProvenanceQuantumGraphReader.read_quanta`) or datasets (via
845
+ `ProvenanceQuantumGraphReader.read_datasets`) will load those nodes
846
+ with full attributes and edges to adjacent nodes with no attributes.
847
+ Loading quanta necessary to populate edge attributes.
848
+
849
+ Node attributes are described by the
850
+ `ProvenanceQuantumInfo`, `ProvenanceInitQuantumInfo`, and
851
+ `ProvenanceDatasetInfo` types.
852
+
853
+ This graph includes init-input and init-output datasets, but it does
854
+ *not* reflect the dependency between each task's special "init" quantum
855
+ and its runtime quanta (as this would require edges between quanta, and
856
+ that would break the "bipartite" property).
857
+
858
+ The returned object is a read-only view of an internal one.
859
+ """
860
+ return self._bipartite_xgraph.copy(as_view=True)
861
+
862
+ def make_quantum_table(self) -> astropy.table.Table:
863
+ """Construct an `astropy.table.Table` with a tabular summary of the
864
+ quanta.
865
+
866
+ Returns
867
+ -------
868
+ table : `astropy.table.Table`
869
+ A table view of the quantum information. This only includes
870
+ counts of status categories and caveats, not any per-data-ID
871
+ detail.
872
+
873
+ Notes
874
+ -----
875
+ Success caveats in the table are represented by their
876
+ `~QuantumSuccessCaveats.concise` form, so when pretty-printing this
877
+ table for users, the `~QuantumSuccessCaveats.legend` should generally
878
+ be printed as well.
879
+ """
880
+ rows = []
881
+ for task_label, quanta_for_task in self.quanta_by_task.items():
882
+ if not self.header.n_task_quanta[task_label]:
883
+ continue
884
+ status_counts = Counter[QuantumRunStatus](
885
+ self._quantum_only_xgraph.nodes[q]["status"] for q in quanta_for_task.values()
886
+ )
887
+ caveat_counts = Counter[QuantumSuccessCaveats | None](
888
+ self._quantum_only_xgraph.nodes[q]["caveats"] for q in quanta_for_task.values()
889
+ )
890
+ caveat_counts.pop(QuantumSuccessCaveats.NO_CAVEATS, None)
891
+ caveat_counts.pop(None, None)
892
+ if len(caveat_counts) > 1:
893
+ caveats = "(multiple)"
894
+ elif len(caveat_counts) == 1:
895
+ ((code, count),) = caveat_counts.items()
896
+ # MyPy can't tell that the pop(None, None) above makes None
897
+ # impossible here.
898
+ caveats = f"{code.concise()}({count})" # type: ignore[union-attr]
899
+ else:
900
+ caveats = ""
901
+ rows.append(
902
+ {
903
+ "Task": task_label,
904
+ "Unknown": status_counts.get(QuantumRunStatus.METADATA_MISSING, 0),
905
+ "Successful": status_counts.get(QuantumRunStatus.SUCCESSFUL, 0),
906
+ "Caveats": caveats,
907
+ "Blocked": status_counts.get(QuantumRunStatus.BLOCKED, 0),
908
+ "Failed": status_counts.get(QuantumRunStatus.FAILED, 0),
909
+ "TOTAL": len(quanta_for_task),
910
+ "EXPECTED": self.header.n_task_quanta[task_label],
911
+ }
912
+ )
913
+ return astropy.table.Table(rows)
914
+
915
+ def make_exception_table(self) -> astropy.table.Table:
916
+ """Construct an `astropy.table.Table` with counts for each exception
917
+ type raised by each task.
918
+
919
+ At present this only includes information from partial-outputs-error
920
+ successes, since exception information for failures is not tracked.
921
+ This may change in the future.
922
+
923
+ Returns
924
+ -------
925
+ table : `astropy.table.Table`
926
+ A table with columns for task label, exception type, and counts.
927
+ """
928
+ rows = []
929
+ for task_label, quanta_for_task in self.quanta_by_task.items():
930
+ counts_by_type = Counter(
931
+ exc_info.type_name
932
+ for q in quanta_for_task.values()
933
+ if (exc_info := self._quantum_only_xgraph.nodes[q]["exception"]) is not None
934
+ )
935
+ for type_name, count in counts_by_type.items():
936
+ rows.append({"Task": task_label, "Exception": type_name, "Count": count})
937
+ return astropy.table.Table(rows)
938
+
939
+ def make_task_resource_usage_table(
940
+ self, task_label: TaskLabel, include_data_ids: bool = False
941
+ ) -> astropy.table.Table:
942
+ """Make a table of resource usage for a single task.
943
+
944
+ Parameters
945
+ ----------
946
+ task_label : `str`
947
+ Label of the task to extract resource usage for.
948
+ include_data_ids : `bool`, optional
949
+ Whether to also include data ID columns.
950
+
951
+ Returns
952
+ -------
953
+ table : `astropy.table.Table`
954
+ A table with columns for quantum ID and all fields in
955
+ `QuantumResourceUsage`.
956
+ """
957
+ quanta_for_task = self.quanta_by_task[task_label]
958
+ dtype_terms: list[tuple[str, np.dtype]] = [("quantum_id", np.dtype((np.void, 16)))]
959
+ if include_data_ids:
960
+ dimensions = self.pipeline_graph.tasks[task_label].dimensions
961
+ for dimension_name in dimensions.data_coordinate_keys:
962
+ dtype = np.dtype(self.pipeline_graph.universe.dimensions[dimension_name].primary_key.pytype)
963
+ dtype_terms.append((dimension_name, dtype))
964
+ fields = QuantumResourceUsage.get_numpy_fields()
965
+ dtype_terms.extend(fields.items())
966
+ row_dtype = np.dtype(dtype_terms)
967
+ rows: list[object] = []
968
+ for data_id, quantum_id in quanta_for_task.items():
969
+ info: ProvenanceQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
970
+ if (resource_usage := info["resource_usage"]) is not None:
971
+ row: tuple[object, ...] = (quantum_id.bytes,)
972
+ if include_data_ids:
973
+ row += data_id.full_values
974
+ row += resource_usage.get_numpy_row()
975
+ rows.append(row)
976
+ array = np.array(rows, dtype=row_dtype)
977
+ return astropy.table.Table(array, units=QuantumResourceUsage.get_units())
978
+
979
+
980
+ @dataclasses.dataclass
981
+ class ProvenanceQuantumGraphReader(BaseQuantumGraphReader):
982
+ """A helper class for reading provenance quantum graphs.
983
+
984
+ Notes
985
+ -----
986
+ The `open` context manager should be used to construct new instances.
987
+ Instances cannot be used after the context manager exits, except to access
988
+ the `graph` attribute`.
989
+
990
+ The various ``read_*`` methods in this class update the `graph` attribute
991
+ in place and return ``self``.
992
+ """
993
+
994
+ graph: ProvenanceQuantumGraph = dataclasses.field(init=False)
995
+ """Loaded provenance graph, populated in place as components are read."""
996
+
997
+ @classmethod
998
+ @contextmanager
999
+ def open(
1000
+ cls,
1001
+ uri: ResourcePathExpression,
1002
+ *,
1003
+ page_size: int | None = None,
1004
+ import_mode: TaskImportMode = TaskImportMode.DO_NOT_IMPORT,
1005
+ ) -> Iterator[ProvenanceQuantumGraphReader]:
1006
+ """Construct a reader from a URI.
1007
+
1008
+ Parameters
1009
+ ----------
1010
+ uri : convertible to `lsst.resources.ResourcePath`
1011
+ URI to open. Should have a ``.qg`` extension.
1012
+ page_size : `int`, optional
1013
+ Approximate number of bytes to read at once from address files and
1014
+ multi-block files. Note that this does not set a page size for
1015
+ *all* reads, but it does affect the smallest, most numerous reads.
1016
+ Can also be set via the ``LSST_QG_PAGE_SIZE`` environment variable.
1017
+ import_mode : `.pipeline_graph.TaskImportMode`, optional
1018
+ How to handle importing the task classes referenced in the pipeline
1019
+ graph.
1020
+
1021
+ Returns
1022
+ -------
1023
+ reader : `contextlib.AbstractContextManager` [ \
1024
+ `ProvenanceQuantumGraphReader` ]
1025
+ A context manager that returns the reader when entered.
1026
+ """
1027
+ with cls._open(
1028
+ uri,
1029
+ graph_type="provenance",
1030
+ address_filename="nodes",
1031
+ page_size=page_size,
1032
+ import_mode=import_mode,
1033
+ n_addresses=4,
1034
+ ) as self:
1035
+ yield self
1036
+
1037
+ def __post_init__(self) -> None:
1038
+ self.graph = ProvenanceQuantumGraph(self.header, self.pipeline_graph)
1039
+
1040
+ def read_init_quanta(self) -> Self:
1041
+ """Read the thin graph, with all edge information and categorization of
1042
+ quanta by task label.
1043
+
1044
+ Returns
1045
+ -------
1046
+ self : `ProvenanceQuantumGraphReader`
1047
+ The reader (to permit method-chaining).
1048
+ """
1049
+ init_quanta = self._read_single_block("init_quanta", ProvenanceInitQuantaModel)
1050
+ for init_quantum in init_quanta.root:
1051
+ self.graph._init_quanta[init_quantum.task_label] = init_quantum.quantum_id
1052
+ init_quanta._add_to_graph(self.graph, self.address_reader)
1053
+ return self
1054
+
1055
+ def read_full_graph(self) -> Self:
1056
+ """Read all bipartite edges and all quantum and dataset node
1057
+ attributes, fully populating the `graph` attribute.
1058
+
1059
+ Returns
1060
+ -------
1061
+ self : `ProvenanceQuantumGraphReader`
1062
+ The reader (to permit method-chaining).
1063
+
1064
+ Notes
1065
+ -----
1066
+ This does not read logs, metadata, or packages ; those must always be
1067
+ fetched explicitly.
1068
+ """
1069
+ self.read_init_quanta()
1070
+ self.read_datasets()
1071
+ self.read_quanta()
1072
+ return self
1073
+
1074
+ def read_datasets(self, datasets: Iterable[uuid.UUID | DatasetIndex] | None = None) -> Self:
1075
+ """Read information about the given datasets.
1076
+
1077
+ Parameters
1078
+ ----------
1079
+ datasets : `~collections.abc.Iterable` [`uuid.UUID` or `int`], optional
1080
+ Iterable of dataset IDs or indices to load. If not provided, all
1081
+ datasets will be loaded. The UUIDs and indices of quanta will be
1082
+ ignored.
1083
+
1084
+ Return
1085
+ -------
1086
+ self : `ProvenanceQuantumGraphReader`
1087
+ The reader (to permit method-chaining).
1088
+ """
1089
+ return self._read_nodes(datasets, DATASET_ADDRESS_INDEX, DATASET_MB_NAME, ProvenanceDatasetModel)
1090
+
1091
+ def read_quanta(self, quanta: Iterable[uuid.UUID | QuantumIndex] | None = None) -> Self:
1092
+ """Read information about the given quanta.
1093
+
1094
+ Parameters
1095
+ ----------
1096
+ quanta : `~collections.abc.Iterable` [`uuid.UUID` or `int`], optional
1097
+ Iterable of quantum IDs or indices to load. If not provided, all
1098
+ quanta will be loaded. The UUIDs and indices of datasets and
1099
+ special init quanta will be ignored.
1100
+
1101
+ Return
1102
+ -------
1103
+ self : `ProvenanceQuantumGraphReader`
1104
+ The reader (to permit method-chaining).
1105
+ """
1106
+ return self._read_nodes(quanta, QUANTUM_ADDRESS_INDEX, QUANTUM_MB_NAME, ProvenanceQuantumModel)
1107
+
1108
+ def _read_nodes(
1109
+ self,
1110
+ nodes: Iterable[uuid.UUID | int] | None,
1111
+ address_index: int,
1112
+ mb_name: str,
1113
+ model_type: type[ProvenanceDatasetModel] | type[ProvenanceQuantumModel],
1114
+ ) -> Self:
1115
+ node: ProvenanceDatasetModel | ProvenanceQuantumModel | None
1116
+ if nodes is None:
1117
+ self.address_reader.read_all()
1118
+ nodes = self.address_reader.rows.keys()
1119
+ for node in MultiblockReader.read_all_models_in_zip(
1120
+ self.zf,
1121
+ mb_name,
1122
+ model_type,
1123
+ self.decompressor,
1124
+ int_size=self.header.int_size,
1125
+ page_size=self.page_size,
1126
+ ):
1127
+ if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(node.node_id, {}):
1128
+ # Use the old node to reduce memory usage (since it might
1129
+ # also have other outstanding reference holders).
1130
+ continue
1131
+ node._add_to_graph(self.graph, self.address_reader)
1132
+ with MultiblockReader.open_in_zip(self.zf, mb_name, int_size=self.header.int_size) as mb_reader:
1133
+ for node_id_or_index in nodes:
1134
+ address_row = self.address_reader.find(node_id_or_index)
1135
+ if "pipeline_node" in self.graph._bipartite_xgraph.nodes.get(address_row.key, {}):
1136
+ # Use the old node to reduce memory usage (since it might
1137
+ # also have other outstanding reference holders).
1138
+ continue
1139
+ node = mb_reader.read_model(
1140
+ address_row.addresses[address_index], model_type, self.decompressor
1141
+ )
1142
+ if node is not None:
1143
+ node._add_to_graph(self.graph, self.address_reader)
1144
+ return self
1145
+
1146
+ def fetch_logs(
1147
+ self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
1148
+ ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords]:
1149
+ """Fetch log datasets.
1150
+
1151
+ Parameters
1152
+ ----------
1153
+ nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
1154
+ UUIDs of the log datasets themselves or of the quanta they
1155
+ correspond to.
1156
+
1157
+ Returns
1158
+ -------
1159
+ logs : `dict` [ `uuid.UUID`, `ButlerLogRecords`]
1160
+ Logs for the given IDs.
1161
+ """
1162
+ from lsst.daf.butler.logging import ButlerLogRecords
1163
+
1164
+ result: dict[uuid.UUID | DatasetIndex | QuantumIndex, ButlerLogRecords] = {}
1165
+ with MultiblockReader.open_in_zip(self.zf, LOG_MB_NAME, int_size=self.header.int_size) as mb_reader:
1166
+ for node_id_or_index in nodes:
1167
+ address_row = self.address_reader.find(node_id_or_index)
1168
+ log = mb_reader.read_model(
1169
+ address_row.addresses[LOG_ADDRESS_INDEX], ButlerLogRecords, self.decompressor
1170
+ )
1171
+ if log is not None:
1172
+ result[node_id_or_index] = log
1173
+ return result
1174
+
1175
+ def fetch_metadata(
1176
+ self, nodes: Iterable[uuid.UUID | DatasetIndex | QuantumIndex]
1177
+ ) -> dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata]:
1178
+ """Fetch metadata datasets.
1179
+
1180
+ Parameters
1181
+ ----------
1182
+ nodes : `~collections.abc.Iterable` [ `uuid.UUID` ]
1183
+ UUIDs of the metadata datasets themselves or of the quanta they
1184
+ correspond to.
1185
+
1186
+ Returns
1187
+ -------
1188
+ metadata : `dict` [ `uuid.UUID`, `TaskMetadata`]
1189
+ Metadata for the given IDs.
1190
+ """
1191
+ from .._task_metadata import TaskMetadata
1192
+
1193
+ result: dict[uuid.UUID | DatasetIndex | QuantumIndex, TaskMetadata] = {}
1194
+ with MultiblockReader.open_in_zip(
1195
+ self.zf, METADATA_MB_NAME, int_size=self.header.int_size
1196
+ ) as mb_reader:
1197
+ for node_id_or_index in nodes:
1198
+ address_row = self.address_reader.find(node_id_or_index)
1199
+ metadata = mb_reader.read_model(
1200
+ address_row.addresses[METADATA_ADDRESS_INDEX], TaskMetadata, self.decompressor
1201
+ )
1202
+ if metadata is not None:
1203
+ result[node_id_or_index] = metadata
1204
+ return result
1205
+
1206
+ def fetch_packages(self) -> Packages:
1207
+ """Fetch package version information."""
1208
+ data = self._read_single_block_raw("packages")
1209
+ return Packages.fromBytes(data, format="json")