lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. lsst/pipe/base/dot_tools.py +14 -152
  2. lsst/pipe/base/exec_fixup_data_id.py +17 -44
  3. lsst/pipe/base/execution_graph_fixup.py +49 -18
  4. lsst/pipe/base/graph/graph.py +28 -9
  5. lsst/pipe/base/graph_walker.py +119 -0
  6. lsst/pipe/base/log_capture.py +5 -2
  7. lsst/pipe/base/mermaid_tools.py +11 -64
  8. lsst/pipe/base/mp_graph_executor.py +298 -236
  9. lsst/pipe/base/quantum_graph/__init__.py +32 -0
  10. lsst/pipe/base/quantum_graph/_common.py +610 -0
  11. lsst/pipe/base/quantum_graph/_multiblock.py +737 -0
  12. lsst/pipe/base/quantum_graph/_predicted.py +1874 -0
  13. lsst/pipe/base/quantum_graph/visualization.py +302 -0
  14. lsst/pipe/base/quantum_graph_builder.py +292 -34
  15. lsst/pipe/base/quantum_graph_executor.py +2 -1
  16. lsst/pipe/base/quantum_provenance_graph.py +16 -7
  17. lsst/pipe/base/separable_pipeline_executor.py +126 -15
  18. lsst/pipe/base/simple_pipeline_executor.py +44 -43
  19. lsst/pipe/base/single_quantum_executor.py +1 -40
  20. lsst/pipe/base/tests/mocks/__init__.py +1 -1
  21. lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
  22. lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
  23. lsst/pipe/base/tests/mocks/_storage_class.py +6 -0
  24. lsst/pipe/base/tests/simpleQGraph.py +11 -5
  25. lsst/pipe/base/version.py +1 -1
  26. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/METADATA +2 -1
  27. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/RECORD +35 -29
  28. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/WHEEL +0 -0
  29. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/entry_points.txt +0 -0
  30. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/COPYRIGHT +0 -0
  31. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/LICENSE +0 -0
  32. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/bsd_license.txt +0 -0
  33. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/gpl-v3.0.txt +0 -0
  34. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/top_level.txt +0 -0
  35. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1874 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = (
31
+ "PredictedDatasetInfo",
32
+ "PredictedDatasetModel",
33
+ "PredictedInitQuantaModel",
34
+ "PredictedQuantumDatasetsModel",
35
+ "PredictedQuantumGraph",
36
+ "PredictedQuantumGraphComponents",
37
+ "PredictedQuantumGraphReader",
38
+ "PredictedQuantumInfo",
39
+ "PredictedThinGraphModel",
40
+ "PredictedThinQuantumModel",
41
+ )
42
+
43
+ import dataclasses
44
+ import itertools
45
+ import logging
46
+ import operator
47
+ import sys
48
+ import uuid
49
+ import warnings
50
+ from collections import defaultdict
51
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
52
+ from contextlib import AbstractContextManager, contextmanager
53
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
54
+
55
+ import networkx
56
+ import networkx.algorithms.bipartite
57
+ import pydantic
58
+ import zstandard
59
+
60
+ from lsst.daf.butler import (
61
+ Config,
62
+ DataCoordinate,
63
+ DataIdValue,
64
+ DatasetRef,
65
+ DatasetType,
66
+ DimensionDataAttacher,
67
+ DimensionDataExtractor,
68
+ DimensionGroup,
69
+ DimensionRecordSetDeserializer,
70
+ LimitedButler,
71
+ Quantum,
72
+ QuantumBackedButler,
73
+ SerializableDimensionData,
74
+ )
75
+ from lsst.daf.butler.datastore.record_data import DatastoreRecordData, SerializedDatastoreRecordData
76
+ from lsst.daf.butler.registry import ConflictingDefinitionError
77
+ from lsst.resources import ResourcePath, ResourcePathExpression
78
+ from lsst.utils.packages import Packages
79
+
80
+ from .. import automatic_connection_constants as acc
81
+ from ..pipeline import TaskDef
82
+ from ..pipeline_graph import (
83
+ PipelineGraph,
84
+ TaskImportMode,
85
+ TaskInitNode,
86
+ TaskNode,
87
+ compare_packages,
88
+ log_config_mismatch,
89
+ )
90
+ from ._common import (
91
+ BaseQuantumGraph,
92
+ BaseQuantumGraphReader,
93
+ BaseQuantumGraphWriter,
94
+ ConnectionName,
95
+ DataCoordinateValues,
96
+ DatasetInfo,
97
+ DatasetTypeName,
98
+ DatastoreName,
99
+ HeaderModel,
100
+ IncompleteQuantumGraphError,
101
+ QuantumIndex,
102
+ QuantumInfo,
103
+ TaskLabel,
104
+ )
105
+ from ._multiblock import DEFAULT_PAGE_SIZE, MultiblockReader, MultiblockWriter
106
+
107
+ if TYPE_CHECKING:
108
+ from ..config import PipelineTaskConfig
109
+ from ..graph import QgraphSummary, QuantumGraph
110
+
111
+ _LOG = logging.getLogger(__name__)
112
+
113
+
114
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
115
+
116
+
117
+ class PredictedThinQuantumModel(pydantic.BaseModel):
118
+ """Data model for a quantum data ID and internal integer ID in a predicted
119
+ quantum graph.
120
+ """
121
+
122
+ quantum_index: QuantumIndex
123
+ """Internal integer ID for this quantum."""
124
+
125
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
126
+ """Full (required and implied) data coordinate values for this quantum."""
127
+
128
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
129
+ # when we inherit those docstrings in our public classes.
130
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
131
+
132
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
133
+ """See `pydantic.BaseModel.copy`."""
134
+ return super().copy(*args, **kwargs)
135
+
136
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
137
+ """See `pydantic.BaseModel.model_dump`."""
138
+ return super().model_dump(*args, **kwargs)
139
+
140
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
141
+ """See `pydantic.BaseModel.model_dump_json`."""
142
+ return super().model_dump(*args, **kwargs)
143
+
144
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
145
+ """See `pydantic.BaseModel.model_copy`."""
146
+ return super().model_copy(*args, **kwargs)
147
+
148
+ @classmethod
149
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
150
+ """See `pydantic.BaseModel.model_construct`."""
151
+ return super().model_construct(*args, **kwargs)
152
+
153
+ @classmethod
154
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
155
+ """See `pydantic.BaseModel.model_json_schema`."""
156
+ return super().model_json_schema(*args, **kwargs)
157
+
158
+
159
+ class PredictedThinGraphModel(pydantic.BaseModel):
160
+ """Data model for the predicted quantum graph component that maps each
161
+ task label to the data IDs and internal integer IDs of its quanta.
162
+ """
163
+
164
+ quanta: dict[TaskLabel, list[PredictedThinQuantumModel]] = pydantic.Field(default_factory=dict)
165
+ """Minimal descriptions of all quanta, grouped by task label."""
166
+
167
+ edges: list[tuple[QuantumIndex, QuantumIndex]] = pydantic.Field(default_factory=list)
168
+ """Pairs of (predecessor, successor) internal integer quantum IDs."""
169
+
170
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
171
+ # when we inherit those docstrings in our public classes.
172
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
173
+
174
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
175
+ """See `pydantic.BaseModel.copy`."""
176
+ return super().copy(*args, **kwargs)
177
+
178
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
179
+ """See `pydantic.BaseModel.model_dump`."""
180
+ return super().model_dump(*args, **kwargs)
181
+
182
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
183
+ """See `pydantic.BaseModel.model_dump_json`."""
184
+ return super().model_dump(*args, **kwargs)
185
+
186
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
187
+ """See `pydantic.BaseModel.model_copy`."""
188
+ return super().model_copy(*args, **kwargs)
189
+
190
+ @classmethod
191
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
192
+ """See `pydantic.BaseModel.model_construct`."""
193
+ return super().model_construct(*args, **kwargs)
194
+
195
+ @classmethod
196
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
197
+ """See `pydantic.BaseModel.model_json_schema`."""
198
+ return super().model_json_schema(*args, **kwargs)
199
+
200
+
201
+ class PredictedDatasetModel(pydantic.BaseModel):
202
+ """Data model for the datasets in a predicted quantum graph file."""
203
+
204
+ dataset_id: uuid.UUID
205
+ """Universally unique ID for the dataset."""
206
+
207
+ dataset_type_name: DatasetTypeName
208
+ """Name of the type of this dataset.
209
+
210
+ This is always a parent dataset type name, not a component.
211
+
212
+ Note that full dataset type definitions are stored in the pipeline graph.
213
+ """
214
+
215
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
216
+ """The full values (required and implied) of this dataset's data ID."""
217
+
218
+ run: str
219
+ """This dataset's RUN collection name."""
220
+
221
+ @classmethod
222
+ def from_dataset_ref(cls, ref: DatasetRef) -> PredictedDatasetModel:
223
+ """Construct from a butler `~lsst.daf.butler.DatasetRef`.
224
+
225
+ Parameters
226
+ ----------
227
+ ref : `lsst.daf.butler.DatasetRef`
228
+ Dataset reference.
229
+
230
+ Returns
231
+ -------
232
+ model : `PredictedDatasetModel`
233
+ Model for the dataset.
234
+ """
235
+ dataset_type_name, _ = DatasetType.splitDatasetTypeName(ref.datasetType.name)
236
+ return cls.model_construct(
237
+ dataset_id=ref.id,
238
+ dataset_type_name=dataset_type_name,
239
+ data_coordinate=list(ref.dataId.full_values),
240
+ run=ref.run,
241
+ )
242
+
243
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
244
+ # when we inherit those docstrings in our public classes.
245
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
246
+
247
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
248
+ """See `pydantic.BaseModel.copy`."""
249
+ return super().copy(*args, **kwargs)
250
+
251
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
252
+ """See `pydantic.BaseModel.model_dump`."""
253
+ return super().model_dump(*args, **kwargs)
254
+
255
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
256
+ """See `pydantic.BaseModel.model_dump_json`."""
257
+ return super().model_dump(*args, **kwargs)
258
+
259
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
260
+ """See `pydantic.BaseModel.model_copy`."""
261
+ return super().model_copy(*args, **kwargs)
262
+
263
+ @classmethod
264
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
265
+ """See `pydantic.BaseModel.model_construct`."""
266
+ return super().model_construct(*args, **kwargs)
267
+
268
+ @classmethod
269
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
270
+ """See `pydantic.BaseModel.model_json_schema`."""
271
+ return super().model_json_schema(*args, **kwargs)
272
+
273
+
274
+ class PredictedQuantumDatasetsModel(pydantic.BaseModel):
275
+ """Data model for a description of a single predicted quantum that includes
276
+ its inputs and outputs.
277
+ """
278
+
279
+ quantum_id: uuid.UUID
280
+ """Universally unique ID for the quantum."""
281
+
282
+ task_label: TaskLabel
283
+ """Label of the task.
284
+
285
+ Note that task label definitions are stored in the pipeline graph.
286
+ """
287
+
288
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
289
+ """The full values (required and implied) of this quantum's data ID."""
290
+
291
+ inputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
292
+ """The input datasets to this quantum, grouped by connection name."""
293
+
294
+ outputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
295
+ """The datasets output by this quantum, grouped by connection name."""
296
+
297
+ datastore_records: dict[DatastoreName, SerializedDatastoreRecordData] = pydantic.Field(
298
+ default_factory=dict
299
+ )
300
+ """Datastore records for inputs to this quantum that are already present in
301
+ the data repository.
302
+ """
303
+
304
+ def iter_dataset_ids(self) -> Iterator[uuid.UUID]:
305
+ """Return an iterator over the UUIDs of all datasets referenced by this
306
+ quantum.
307
+
308
+ Returns
309
+ -------
310
+ iter : `~collections.abc.Iterator` [ `uuid.UUID` ]
311
+ Iterator over dataset IDs.
312
+ """
313
+ for datasets in itertools.chain(self.inputs.values(), self.outputs.values()):
314
+ for dataset in datasets:
315
+ yield dataset.dataset_id
316
+
317
+ def deserialize_datastore_records(self) -> dict[DatastoreName, DatastoreRecordData]:
318
+ """Deserialize the mapping of datastore records."""
319
+ return {
320
+ datastore_name: DatastoreRecordData.from_simple(serialized_records)
321
+ for datastore_name, serialized_records in self.datastore_records.items()
322
+ }
323
+
324
+ @classmethod
325
+ def from_execution_quantum(
326
+ cls, task_node: TaskNode, quantum: Quantum, quantum_id: uuid.UUID
327
+ ) -> PredictedQuantumDatasetsModel:
328
+ """Construct from an `lsst.daf.butler.Quantum` instance.
329
+
330
+ Parameters
331
+ ----------
332
+ task_node : `.pipeline_graph.TaskNode`
333
+ Task node from the pipeline graph.
334
+ quantum : `lsst.daf.butler.quantum`
335
+ Quantum object.
336
+ quantum_id : `uuid.UUID`
337
+ ID for this quantum.
338
+
339
+ Returns
340
+ -------
341
+ model : `PredictedFullQuantumModel`
342
+ Model for this quantum.
343
+ """
344
+ result: PredictedQuantumDatasetsModel = cls.model_construct(
345
+ quantum_id=quantum_id,
346
+ task_label=task_node.label,
347
+ data_coordinate=list(cast(DataCoordinate, quantum.dataId).full_values),
348
+ )
349
+ for read_edge in task_node.iter_all_inputs():
350
+ refs = sorted(quantum.inputs[read_edge.dataset_type_name], key=lambda ref: ref.dataId)
351
+ result.inputs[read_edge.connection_name] = [
352
+ PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
353
+ ]
354
+ for write_edge in task_node.iter_all_outputs():
355
+ refs = sorted(quantum.outputs[write_edge.dataset_type_name], key=lambda ref: ref.dataId)
356
+ result.outputs[write_edge.connection_name] = [
357
+ PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
358
+ ]
359
+ result.datastore_records = {
360
+ store_name: records.to_simple() for store_name, records in quantum.datastore_records.items()
361
+ }
362
+ return result
363
+
364
+ @classmethod
365
+ def from_old_quantum_graph_init(
366
+ cls, task_init_node: TaskInitNode, old_quantum_graph: QuantumGraph
367
+ ) -> PredictedQuantumDatasetsModel:
368
+ """Construct from the init-input and init-output dataset types of a
369
+ task in an old `QuantumGraph` instance.
370
+
371
+ Parameters
372
+ ----------
373
+ task_init_node : `.pipeline_graph.TaskNode`
374
+ Task init node from the pipeline graph.
375
+ old_quantum_graph : `QuantumGraph`
376
+ Quantum graph.
377
+
378
+ Returns
379
+ -------
380
+ model : `PredictedFullQuantumModel`
381
+ Model for this "init" quantum.
382
+ """
383
+ task_def = old_quantum_graph.findTaskDefByLabel(task_init_node.label)
384
+ assert task_def is not None
385
+ init_input_refs = {
386
+ ref.datasetType.name: ref for ref in (old_quantum_graph.initInputRefs(task_def) or [])
387
+ }
388
+ init_output_refs = {
389
+ ref.datasetType.name: ref for ref in (old_quantum_graph.initOutputRefs(task_def) or [])
390
+ }
391
+ init_input_ids = {ref.id for ref in init_input_refs.values()}
392
+ result: PredictedQuantumDatasetsModel = cls.model_construct(
393
+ quantum_id=uuid.uuid4(), task_label=task_init_node.label
394
+ )
395
+ for read_edge in task_init_node.iter_all_inputs():
396
+ ref = init_input_refs[read_edge.dataset_type_name]
397
+ result.inputs[read_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
398
+ for write_edge in task_init_node.iter_all_outputs():
399
+ ref = init_output_refs[write_edge.dataset_type_name]
400
+ result.outputs[write_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
401
+ datastore_records: dict[str, DatastoreRecordData] = {}
402
+ for quantum in old_quantum_graph.get_task_quanta(task_init_node.label).values():
403
+ for store_name, records in quantum.datastore_records.items():
404
+ subset = records.subset(init_input_ids)
405
+ if subset is not None:
406
+ datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
407
+ break # All quanta have same init-inputs, so we only need one.
408
+ result.datastore_records = {
409
+ store_name: records.to_simple() for store_name, records in datastore_records.items()
410
+ }
411
+ return result
412
+
413
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
414
+ # when we inherit those docstrings in our public classes.
415
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
416
+
417
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
418
+ """See `pydantic.BaseModel.copy`."""
419
+ return super().copy(*args, **kwargs)
420
+
421
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
422
+ """See `pydantic.BaseModel.model_dump`."""
423
+ return super().model_dump(*args, **kwargs)
424
+
425
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
426
+ """See `pydantic.BaseModel.model_dump_json`."""
427
+ return super().model_dump(*args, **kwargs)
428
+
429
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
430
+ """See `pydantic.BaseModel.model_copy`."""
431
+ return super().model_copy(*args, **kwargs)
432
+
433
+ @classmethod
434
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
435
+ """See `pydantic.BaseModel.model_construct`."""
436
+ return super().model_construct(*args, **kwargs)
437
+
438
+ @classmethod
439
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
440
+ """See `pydantic.BaseModel.model_json_schema`."""
441
+ return super().model_json_schema(*args, **kwargs)
442
+
443
+
444
+ class PredictedInitQuantaModel(pydantic.RootModel):
445
+ """Data model for the init-inputs and init-outputs of a predicted quantum
446
+ graph.
447
+ """
448
+
449
+ root: list[PredictedQuantumDatasetsModel] = pydantic.Field(default_factory=list)
450
+ """List of special "init" quanta: one for each task, and another for global
451
+ init-outputs.
452
+ """
453
+
454
+ def update_from_old_quantum_graph(self, old_quantum_graph: QuantumGraph) -> None:
455
+ """Update this model in-place by extracting from an old `QuantumGraph`
456
+ instance.
457
+
458
+ Parameters
459
+ ----------
460
+ old_quantum_graph : `QuantumGraph`
461
+ Quantum graph.
462
+ """
463
+ global_init_quantum = PredictedQuantumDatasetsModel.model_construct(
464
+ quantum_id=uuid.uuid4(), task_label=""
465
+ )
466
+ for ref in old_quantum_graph.globalInitOutputRefs():
467
+ global_init_quantum.outputs[ref.datasetType.name] = [PredictedDatasetModel.from_dataset_ref(ref)]
468
+ self.root.append(global_init_quantum)
469
+ for task_node in old_quantum_graph.pipeline_graph.tasks.values():
470
+ self.root.append(
471
+ PredictedQuantumDatasetsModel.from_old_quantum_graph_init(task_node.init, old_quantum_graph)
472
+ )
473
+
474
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
475
+ # when we inherit those docstrings in our public classes.
476
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
477
+
478
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
479
+ """See `pydantic.BaseModel.copy`."""
480
+ return super().copy(*args, **kwargs)
481
+
482
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
483
+ """See `pydantic.BaseModel.model_dump`."""
484
+ return super().model_dump(*args, **kwargs)
485
+
486
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
487
+ """See `pydantic.BaseModel.model_dump_json`."""
488
+ return super().model_dump(*args, **kwargs)
489
+
490
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
491
+ """See `pydantic.BaseModel.model_copy`."""
492
+ return super().model_copy(*args, **kwargs)
493
+
494
+ @classmethod
495
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
496
+ """See `pydantic.BaseModel.model_construct`."""
497
+ return super().model_construct(*args, **kwargs)
498
+
499
+ @classmethod
500
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
501
+ """See `pydantic.BaseModel.model_json_schema`."""
502
+ return super().model_json_schema(*args, **kwargs)
503
+
504
+
505
+ class PredictedQuantumInfo(QuantumInfo):
506
+ """A typed dictionary that annotates the attributes of the NetworkX graph
507
+ node data for a predicted quantum.
508
+
509
+ Since NetworkX types are not generic over their node mapping type, this has
510
+ to be used explicitly, e.g.::
511
+
512
+ node_data: PredictedQuantumInfo = xgraph.nodes[quantum_id]
513
+
514
+ where ``xgraph`` can be either `PredictedQuantumGraph.quantum_only_xgraph`
515
+ or `PredictedQuantumGraph.bipartite_xgraph`.
516
+ """
517
+
518
+ quantum: Quantum
519
+ """Quantum object that can be passed directly to an executor.
520
+
521
+ This attribute is only present if
522
+ `PredictedQuantumGraph.build_execution_quanta` has been run on this node's
523
+ quantum ID already.
524
+ """
525
+
526
+
527
+ class PredictedDatasetInfo(DatasetInfo):
528
+ """A typed dictionary that annotates the attributes of the NetworkX graph
529
+ node data for a dataset.
530
+
531
+ Since NetworkX types are not generic over their node mapping type, this has
532
+ to be used explicitly, e.g.::
533
+
534
+ node_data: PredictedDatasetInfo = xgraph.nodes[dataset_ids]
535
+
536
+ where ``xgraph`` is from the `PredictedQuantumGraph.bipartite_xgraph`
537
+ property.
538
+ """
539
+
540
+
541
+ class PredictedQuantumGraph(BaseQuantumGraph):
542
+ """A directed acyclic graph that predicts a processing run and supports it
543
+ during execution.
544
+
545
+ Parameters
546
+ ----------
547
+ components : `PredictedQuantumGraphComponents`
548
+ A struct of components used to construct the graph.
549
+
550
+ Notes
551
+ -----
552
+ Iteration over a `PredictedQuantumGraph` yields loaded quantum IDs in
553
+ deterministic topological order (but the tiebreaker is unspecified). The
554
+ `len` of a `PredictedQuantumGraph` is the number of loaded non-init quanta,
555
+ i.e. the same as the number of quanta iterated over.
556
+ """
557
+
558
+ def __init__(self, components: PredictedQuantumGraphComponents):
559
+ if not components.header.graph_type == "predicted":
560
+ raise TypeError(f"Header is for a {components.header.graph_type!r} graph, not 'predicted'.")
561
+ super().__init__(components.header, components.pipeline_graph)
562
+ self._quantum_only_xgraph = networkx.DiGraph()
563
+ self._bipartite_xgraph = networkx.DiGraph()
564
+ self._quanta_by_task_label: dict[str, dict[DataCoordinate, uuid.UUID]] = {
565
+ task_label: {} for task_label in self.pipeline_graph.tasks.keys()
566
+ }
567
+ self._datasets_by_type: dict[str, dict[DataCoordinate, uuid.UUID]] = {
568
+ dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
569
+ }
570
+ self._datasets_by_type[self.pipeline_graph.packages_dataset_type.name] = {}
571
+ self._dimension_data = components.dimension_data
572
+ self._add_init_quanta(components.init_quanta)
573
+ self._quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = {}
574
+ self._expanded_data_ids: dict[DataCoordinate, DataCoordinate] = {}
575
+ self._add_thin_graph(components.thin_graph, components.quantum_indices)
576
+ for quantum_datasets in components.quantum_datasets.values():
577
+ self._add_quantum_datasets(quantum_datasets)
578
+ if not components.thin_graph.edges:
579
+ # If we loaded the thin_graph, we've already populated this graph.
580
+ self._quantum_only_xgraph.update(
581
+ networkx.algorithms.bipartite.projected_graph(
582
+ networkx.DiGraph(self._bipartite_xgraph),
583
+ self._quantum_only_xgraph.nodes.keys(),
584
+ )
585
+ )
586
+ if _LOG.isEnabledFor(logging.DEBUG):
587
+ for quantum_id in self:
588
+ _LOG.debug(
589
+ "%s: %s @ %s",
590
+ quantum_id,
591
+ self._quantum_only_xgraph.nodes[quantum_id]["task_label"],
592
+ self._quantum_only_xgraph.nodes[quantum_id]["data_id"].required,
593
+ )
594
+
595
+ def _add_init_quanta(self, component: PredictedInitQuantaModel) -> None:
596
+ self._init_quanta = {q.task_label: q for q in component.root}
597
+ empty_data_id = DataCoordinate.make_empty(self.pipeline_graph.universe)
598
+ for quantum_datasets in self._init_quanta.values():
599
+ for init_datasets in itertools.chain(
600
+ quantum_datasets.inputs.values(), quantum_datasets.outputs.values()
601
+ ):
602
+ for init_dataset in init_datasets:
603
+ self._datasets_by_type[init_dataset.dataset_type_name][empty_data_id] = (
604
+ init_dataset.dataset_id
605
+ )
606
+ _LOG.debug(
607
+ "%s: %s @ init",
608
+ quantum_datasets.quantum_id,
609
+ quantum_datasets.task_label,
610
+ )
611
+
612
+ def _add_thin_graph(
613
+ self, component: PredictedThinGraphModel, indices: Mapping[uuid.UUID, QuantumIndex]
614
+ ) -> None:
615
+ uuid_by_index = {v: k for k, v in indices.items()}
616
+ for index1, index2 in component.edges:
617
+ self._quantum_only_xgraph.add_edge(uuid_by_index[index1], uuid_by_index[index2])
618
+ for task_label, thin_quanta_for_task in component.quanta.items():
619
+ for thin_quantum in thin_quanta_for_task:
620
+ self._add_quantum(
621
+ uuid_by_index[thin_quantum.quantum_index],
622
+ task_label,
623
+ thin_quantum.data_coordinate,
624
+ )
625
+
626
+ def _add_quantum_datasets(self, quantum_datasets: PredictedQuantumDatasetsModel) -> None:
627
+ self._quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
628
+ self._add_quantum(
629
+ quantum_datasets.quantum_id, quantum_datasets.task_label, quantum_datasets.data_coordinate
630
+ )
631
+ task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
632
+ for connection_name, input_datasets in quantum_datasets.inputs.items():
633
+ pipeline_edge = task_node.get_input_edge(connection_name)
634
+ for input_dataset in input_datasets:
635
+ self._add_dataset(input_dataset)
636
+ self._bipartite_xgraph.add_edge(
637
+ input_dataset.dataset_id,
638
+ quantum_datasets.quantum_id,
639
+ key=connection_name,
640
+ is_read=True,
641
+ )
642
+ # There might be multiple input connections for the same
643
+ # dataset type.
644
+ self._bipartite_xgraph.edges[
645
+ input_dataset.dataset_id, quantum_datasets.quantum_id
646
+ ].setdefault("pipeline_edges", []).append(pipeline_edge)
647
+ for connection_name, output_datasets in quantum_datasets.outputs.items():
648
+ pipeline_edges = [task_node.get_output_edge(connection_name)]
649
+ for output_dataset in output_datasets:
650
+ self._add_dataset(output_dataset)
651
+ self._bipartite_xgraph.add_edge(
652
+ quantum_datasets.quantum_id,
653
+ output_dataset.dataset_id,
654
+ key=connection_name,
655
+ is_read=False,
656
+ pipeline_edges=pipeline_edges,
657
+ )
658
+
659
+ def _add_quantum(
660
+ self, quantum_id: uuid.UUID, task_label: str, data_coordinate_values: Sequence[DataIdValue]
661
+ ) -> None:
662
+ task_node = self.pipeline_graph.tasks[task_label]
663
+ self._quantum_only_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
664
+ self._bipartite_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
665
+ data_coordinate_values = tuple(data_coordinate_values)
666
+ dimensions = self.pipeline_graph.tasks[task_label].dimensions
667
+ data_id = DataCoordinate.from_full_values(dimensions, tuple(data_coordinate_values))
668
+ self._quantum_only_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
669
+ self._bipartite_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
670
+ self._quanta_by_task_label[task_label][data_id] = quantum_id
671
+
672
+ def _add_dataset(self, model: PredictedDatasetModel) -> None:
673
+ dataset_type_node = self.pipeline_graph.dataset_types[model.dataset_type_name]
674
+ data_id = DataCoordinate.from_full_values(dataset_type_node.dimensions, tuple(model.data_coordinate))
675
+ self._bipartite_xgraph.add_node(
676
+ model.dataset_id,
677
+ dataset_type_name=dataset_type_node.name,
678
+ pipeline_node=dataset_type_node,
679
+ run=model.run,
680
+ )
681
+ self._bipartite_xgraph.nodes[model.dataset_id].setdefault("data_id", data_id)
682
+ self._datasets_by_type[model.dataset_type_name][data_id] = model.dataset_id
683
+
684
+ @classmethod
685
+ def open(
686
+ cls,
687
+ uri: ResourcePathExpression,
688
+ page_size: int = DEFAULT_PAGE_SIZE,
689
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
690
+ ) -> AbstractContextManager[PredictedQuantumGraphReader]:
691
+ """Open a quantum graph and return a reader to load from it.
692
+
693
+ Parameters
694
+ ----------
695
+ uri : convertible to `lsst.resources.ResourcePath`
696
+ URI to open. Should have a ``.qg`` extension.
697
+ page_size : `int`, optional
698
+ Approximate number of bytes to read at once from address files.
699
+ Note that this does not set a page size for *all* reads, but it
700
+ does affect the smallest, most numerous reads.
701
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
702
+ How to handle importing the task classes referenced in the pipeline
703
+ graph.
704
+
705
+ Returns
706
+ -------
707
+ reader : `contextlib.AbstractContextManager` [ \
708
+ `PredictedQuantumGraphReader` ]
709
+ A context manager that returns the reader when entered.
710
+ """
711
+ return PredictedQuantumGraphReader.open(uri, page_size=page_size, import_mode=import_mode)
712
+
713
+ @classmethod
714
+ def read_execution_quanta(
715
+ cls,
716
+ uri: ResourcePathExpression,
717
+ quantum_ids: Iterable[uuid.UUID] | None = None,
718
+ page_size: int = DEFAULT_PAGE_SIZE,
719
+ ) -> PredictedQuantumGraph:
720
+ """Read one or more executable quanta from a quantum graph file.
721
+
722
+ Parameters
723
+ ----------
724
+ uri : convertible to `lsst.resources.ResourcePath`
725
+ URI to open. Should have a ``.qg`` extension for new quantum graph
726
+ files, or ``.qgraph`` for the old format.
727
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
728
+ Iterable of quantum IDs to load. If not provided, all quanta will
729
+ be loaded. The UUIDs of special init quanta will be ignored.
730
+ page_size : `int`, optional
731
+ Approximate number of bytes to read at once from address files.
732
+ Note that this does not set a page size for *all* reads, but it
733
+ does affect the smallest, most numerous reads.
734
+
735
+ Returns
736
+ -------
737
+ quantum_graph : `PredictedQuantumGraph` ]
738
+ A quantum graph that can build execution quanta for all of the
739
+ given IDs.
740
+ """
741
+ return PredictedQuantumGraphComponents.read_execution_quanta(
742
+ uri,
743
+ quantum_ids,
744
+ page_size=page_size,
745
+ ).assemble()
746
+
747
+ @property
748
+ def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
749
+ """A nested mapping of all quanta, keyed first by task name and then by
750
+ data ID.
751
+
752
+ Notes
753
+ -----
754
+ This is populated by the ``thin_graph`` component (all quanta are
755
+ added) and the `quantum_datasets`` component (only loaded quanta are
756
+ added). All tasks in the pipeline graph are included, even if none of
757
+ their quanta were loaded (i.e. nested mappings may be empty).
758
+
759
+ The returned object may be an internal dictionary; as the type
760
+ annotation indicates, it should not be modified in place.
761
+ """
762
+ return self._quanta_by_task_label
763
+
764
+ @property
765
+ def datasets_by_type(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
766
+ """A nested mapping of all datasets, keyed first by dataset type name
767
+ and then by data ID.
768
+
769
+ Notes
770
+ -----
771
+ This is populated only by the ``quantum_datasets`` and ``init_quanta``
772
+ components, and only datasets referenced by loaded quanta are present.
773
+ All dataset types in the pipeline graph are included, even if none of
774
+ their datasets were loaded (i.e. nested mappings may be empty).
775
+
776
+ The returned object may be an internal dictionary; as the type
777
+ annotation indicates, it should not be modified in place.
778
+ """
779
+ return self._datasets_by_type
780
+
781
+ @property
782
+ def quantum_only_xgraph(self) -> networkx.DiGraph:
783
+ """A directed acyclic graph with quanta as nodes and datasets elided.
784
+
785
+ Notes
786
+ -----
787
+ Node keys are quantum UUIDs, and are populated by the ``thin_graph``
788
+ component (all nodes and edges) and ``quantum_datasets`` component
789
+ (only those that were loaded).
790
+
791
+ Node state dictionaries are described by the
792
+ `PredictedQuantumInfo` type.
793
+
794
+ The returned object is a read-only view of an internal one.
795
+ """
796
+ return self._quantum_only_xgraph.copy(as_view=True)
797
+
798
+ @property
799
+ def bipartite_xgraph(self) -> networkx.MultiDiGraph:
800
+ """A directed acyclic graph with quantum and dataset nodes.
801
+
802
+ This graph never includes init-input and init-output datasets.
803
+
804
+ Notes
805
+ -----
806
+ Node keys are quantum or dataset UUIDs. Nodes for quanta are present
807
+ if the ``thin_graph`` component is loaded (all nodes) or if the
808
+ ``quantum_datasets`` component is loaded (just loaded quanta). Edges
809
+ and dataset nodes are only present for quanta whose
810
+ ``quantum_datasets`` were loaded.
811
+
812
+ Node state dictionaries are described by the
813
+ `PredictedQuantumInfo` and `PredictedDatasetInfo` types.
814
+
815
+ The returned object is a read-only view of an internal one.
816
+ """
817
+ return self._bipartite_xgraph.copy(as_view=True)
818
+
819
+ @property
820
+ def dimension_data(self) -> DimensionDataAttacher | None:
821
+ """All dimension records needed to expand the data IDS in the graph.
822
+
823
+ This may be `None` if the dimension data was not loaded. If all
824
+ execution quanta have been built, all records are guaranteed to have
825
+ been deserialized and the ``records`` attribute is complete. In other
826
+ cases some records may still only be present in the ``deserializers``
827
+ attribute.
828
+ """
829
+ return self._dimension_data
830
+
831
+ def __iter__(self) -> Iterator[uuid.UUID]:
832
+ for quanta_for_task in self.quanta_by_task.values():
833
+ for data_id in sorted(quanta_for_task.keys()):
834
+ yield quanta_for_task[data_id]
835
+
836
+ def __len__(self) -> int:
837
+ return len(self._quantum_only_xgraph)
838
+
839
+ def get_init_inputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
840
+ """Return the init-input datasets for the given task.
841
+
842
+ Parameters
843
+ ----------
844
+ task_label : `str`
845
+ Label of the task.
846
+
847
+ Returns
848
+ -------
849
+ init_inputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
850
+ Dataset references for init-input datasets, keyed by connection
851
+ name. Dataset types storage classes match the task connection
852
+ declarations, not necessarily the data repository, and may be
853
+ components.
854
+ """
855
+ if self._init_quanta is None:
856
+ raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
857
+ task_init_node = self.pipeline_graph.tasks[task_label].init
858
+ return {
859
+ connection_name: task_init_node.inputs[connection_name].adapt_dataset_ref(
860
+ self._make_init_ref(datasets[0])
861
+ )
862
+ for connection_name, datasets in self._init_quanta[task_label].inputs.items()
863
+ }
864
+
865
+ def get_init_outputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
866
+ """Return the init-output datasets for the given task.
867
+
868
+ Parameters
869
+ ----------
870
+ task_label : `str`
871
+ Label of the task. ``""`` may be used to get global init-outputs.
872
+
873
+ Returns
874
+ -------
875
+ init_outputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
876
+ Dataset references for init-outputs datasets, keyed by connection
877
+ name. Dataset types storage classes match the task connection
878
+ declarations, not necessarily the data repository.
879
+ """
880
+ if self._init_quanta is None:
881
+ raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
882
+ if not task_label:
883
+ (datasets,) = self._init_quanta[""].outputs.values()
884
+ return {
885
+ acc.PACKAGES_INIT_OUTPUT_NAME: DatasetRef(
886
+ self.pipeline_graph.packages_dataset_type,
887
+ DataCoordinate.make_empty(self.pipeline_graph.universe),
888
+ run=datasets[0].run,
889
+ id=datasets[0].dataset_id,
890
+ conform=False,
891
+ )
892
+ }
893
+ task_init_node = self.pipeline_graph.tasks[task_label].init
894
+ result: dict[ConnectionName, DatasetRef] = {}
895
+ for connection_name, datasets in self._init_quanta[task_label].outputs.items():
896
+ if connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
897
+ edge = task_init_node.config_output
898
+ else:
899
+ edge = task_init_node.outputs[connection_name]
900
+ result[connection_name] = edge.adapt_dataset_ref(self._make_init_ref(datasets[0]))
901
+ return result
902
+
903
+ def _make_init_ref(self, dataset: PredictedDatasetModel) -> DatasetRef:
904
+ dataset_type = self.pipeline_graph.dataset_types[dataset.dataset_type_name].dataset_type
905
+ return DatasetRef(
906
+ dataset_type,
907
+ DataCoordinate.make_empty(self.pipeline_graph.universe),
908
+ run=dataset.run,
909
+ id=dataset.dataset_id,
910
+ conform=False,
911
+ )
912
+
913
+ def build_execution_quanta(
914
+ self,
915
+ quantum_ids: Iterable[uuid.UUID] | None = None,
916
+ task_label: str | None = None,
917
+ ) -> dict[uuid.UUID, Quantum]:
918
+ """Build `lsst.daf.butler.Quantum` objects suitable for executing
919
+ tasks.
920
+
921
+ In addition to returning the quantum objects directly, this also causes
922
+ the `quantum_only_xgraph` and `bipartite_xgraph` graphs to include a
923
+ ``quantum`` attribute for the affected quanta.
924
+
925
+ Parameters
926
+ ----------
927
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
928
+ IDs of all quanta to return. If not provided, all quanta for the
929
+ given task label (if given) or graph are returned.
930
+ task_label : `str`, optional
931
+ Task label whose quanta should be generated. Ignored if
932
+ ``quantum_ids`` is not `None`.
933
+
934
+ Returns
935
+ -------
936
+ quanta : `dict` [ `uuid.UUID`, `lsst.daf.butler.Quantum` ]
937
+ Mapping of quanta, keyed by UUID. All dataset types are adapted to
938
+ the task's storage class declarations and inputs may be components.
939
+ All data IDs have dimension records attached.
940
+ """
941
+ if not self._init_quanta:
942
+ raise IncompleteQuantumGraphError(
943
+ "Cannot build execution quanta without loading the ``init_quanta`` component."
944
+ )
945
+ if quantum_ids is None:
946
+ if task_label is not None:
947
+ quantum_ids = self._quanta_by_task_label[task_label].values()
948
+ else:
949
+ quantum_ids = self._quantum_only_xgraph.nodes.keys()
950
+ else:
951
+ # Guard against single-pass iterators.
952
+ quantum_ids = list(quantum_ids)
953
+ del task_label # make sure we don't accidentally use this.
954
+ result: dict[uuid.UUID, Quantum] = {}
955
+ self._expand_execution_quantum_data_ids(quantum_ids)
956
+ task_init_datastore_records: dict[TaskLabel, dict[DatastoreName, DatastoreRecordData]] = {}
957
+ for quantum_id in quantum_ids:
958
+ quantum_node_dict: PredictedQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
959
+ if "quantum" in quantum_node_dict:
960
+ result[quantum_id] = quantum_node_dict["quantum"]
961
+ continue
962
+ # We've declare the info dict keys to all be required because that
963
+ # saves a lot of casting, but the reality is that they can either
964
+ # be fully populated or totally unpopulated. But that makes mypy
965
+ # think the check above always succeeds.
966
+ try: # type:ignore [unreachable]
967
+ quantum_datasets = self._quantum_datasets[quantum_id]
968
+ except KeyError:
969
+ raise IncompleteQuantumGraphError(
970
+ f"Full quantum information for {quantum_id} was not loaded."
971
+ ) from None
972
+ task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
973
+ quantum_data_id = self._expanded_data_ids[self._bipartite_xgraph.nodes[quantum_id]["data_id"]]
974
+ inputs = self._build_execution_quantum_refs(task_node, quantum_datasets.inputs)
975
+ outputs = self._build_execution_quantum_refs(task_node, quantum_datasets.outputs)
976
+ if task_node.label not in task_init_datastore_records:
977
+ task_init_datastore_records[task_node.label] = self._init_quanta[
978
+ task_node.label
979
+ ].deserialize_datastore_records()
980
+ quantum = Quantum(
981
+ taskName=task_node.task_class_name,
982
+ taskClass=task_node.task_class,
983
+ dataId=quantum_data_id,
984
+ initInputs={
985
+ ref.datasetType: ref for ref in self.get_init_inputs(quantum_datasets.task_label).values()
986
+ },
987
+ inputs=inputs,
988
+ outputs=outputs,
989
+ datastore_records=DatastoreRecordData.merge_mappings(
990
+ quantum_datasets.deserialize_datastore_records(),
991
+ task_init_datastore_records[task_node.label],
992
+ ),
993
+ )
994
+ self._quantum_only_xgraph.nodes[quantum_id]["quantum"] = quantum
995
+ self._bipartite_xgraph.nodes[quantum_id]["quantum"] = quantum
996
+ result[quantum_id] = quantum
997
+ return result
998
+
999
+ def _expand_execution_quantum_data_ids(self, quantum_ids: Iterable[uuid.UUID]) -> None:
1000
+ if self._dimension_data is None:
1001
+ raise IncompleteQuantumGraphError(
1002
+ "Cannot build execution quanta without loading the ``dimension_data`` component."
1003
+ )
1004
+ data_ids_to_expand: dict[DimensionGroup, set[DataCoordinate]] = defaultdict(set)
1005
+ for quantum_id in quantum_ids:
1006
+ data_id: DataCoordinate = self._bipartite_xgraph.nodes[quantum_id]["data_id"]
1007
+ if data_id.hasRecords():
1008
+ self._expanded_data_ids[data_id] = data_id
1009
+ else:
1010
+ data_ids_to_expand[data_id.dimensions].add(data_id)
1011
+ for dataset_id in itertools.chain(
1012
+ self._bipartite_xgraph.predecessors(quantum_id),
1013
+ self._bipartite_xgraph.successors(quantum_id),
1014
+ ):
1015
+ data_id = self._bipartite_xgraph.nodes[dataset_id]["data_id"]
1016
+ if data_id.hasRecords():
1017
+ self._expanded_data_ids[data_id] = data_id
1018
+ else:
1019
+ data_ids_to_expand[data_id.dimensions].add(data_id)
1020
+ for dimensions, data_ids_for_dimensions in data_ids_to_expand.items():
1021
+ self._expanded_data_ids.update(
1022
+ (d, d) for d in self._dimension_data.attach(dimensions, data_ids_for_dimensions)
1023
+ )
1024
+
1025
+ def _build_execution_quantum_refs(
1026
+ self, task_node: TaskNode, model_mapping: dict[ConnectionName, list[PredictedDatasetModel]]
1027
+ ) -> dict[DatasetType, list[DatasetRef]]:
1028
+ results: dict[DatasetType, list[DatasetRef]] = {}
1029
+ for connection_name, datasets in model_mapping.items():
1030
+ edge = task_node.get_edge(connection_name)
1031
+ dataset_type = edge.adapt_dataset_type(
1032
+ self.pipeline_graph.dataset_types[edge.parent_dataset_type_name].dataset_type
1033
+ )
1034
+ results[dataset_type] = [self._make_general_ref(dataset_type, d.dataset_id) for d in datasets]
1035
+ return results
1036
+
1037
+ def _make_general_ref(self, dataset_type: DatasetType, dataset_id: uuid.UUID) -> DatasetRef:
1038
+ node_state = self._bipartite_xgraph.nodes[dataset_id]
1039
+ data_id = self._expanded_data_ids[node_state["data_id"]]
1040
+ return DatasetRef(dataset_type, data_id, run=node_state["run"], id=dataset_id)
1041
+
1042
+ def make_init_qbb(
1043
+ self,
1044
+ butler_config: Config | ResourcePathExpression,
1045
+ *,
1046
+ config_search_paths: Iterable[str] | None = None,
1047
+ ) -> QuantumBackedButler:
1048
+ """Construct an quantum-backed butler suitable for reading and writing
1049
+ init input and init output datasets, respectively.
1050
+
1051
+ This only requires the ``init_quanta`` component to have been loaded.
1052
+
1053
+ Parameters
1054
+ ----------
1055
+ butler_config : `~lsst.daf.butler.Config` or \
1056
+ `~lsst.resources.ResourcePathExpression`
1057
+ A butler repository root, configuration filename, or configuration
1058
+ instance.
1059
+ config_search_paths : `~collections.abc.Iterable` [ `str` ], optional
1060
+ Additional search paths for butler configuration.
1061
+
1062
+ Returns
1063
+ -------
1064
+ qbb : `~lsst.daf.butler.QuantumBackedButler`
1065
+ A limited butler that can ``get`` init-input datasets and ``put``
1066
+ init-output datasets.
1067
+ """
1068
+ # Collect all init input/output dataset IDs.
1069
+ predicted_inputs: set[uuid.UUID] = set()
1070
+ predicted_outputs: set[uuid.UUID] = set()
1071
+ datastore_record_maps: list[dict[DatastoreName, DatastoreRecordData]] = []
1072
+ for init_quantum_datasets in self._init_quanta.values():
1073
+ predicted_inputs.update(
1074
+ d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.inputs.values())
1075
+ )
1076
+ predicted_outputs.update(
1077
+ d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.outputs.values())
1078
+ )
1079
+ datastore_record_maps.append(
1080
+ {
1081
+ datastore_name: DatastoreRecordData.from_simple(serialized_records)
1082
+ for datastore_name, serialized_records in init_quantum_datasets.datastore_records.items()
1083
+ }
1084
+ )
1085
+ # Remove intermediates from inputs.
1086
+ predicted_inputs -= predicted_outputs
1087
+ dataset_types = {d.name: d.dataset_type for d in self.pipeline_graph.dataset_types.values()}
1088
+ # Make butler from everything.
1089
+ return QuantumBackedButler.from_predicted(
1090
+ config=butler_config,
1091
+ predicted_inputs=predicted_inputs,
1092
+ predicted_outputs=predicted_outputs,
1093
+ dimensions=self.pipeline_graph.universe,
1094
+ datastore_records=DatastoreRecordData.merge_mappings(*datastore_record_maps),
1095
+ search_paths=list(config_search_paths) if config_search_paths is not None else None,
1096
+ dataset_types=dataset_types,
1097
+ )
1098
+
1099
+ def write_init_outputs(self, butler: LimitedButler, skip_existing: bool = True) -> None:
1100
+ """Write the init-output datasets for all tasks in the quantum graph.
1101
+
1102
+ This only requires the ``init_quanta`` component to have been loaded.
1103
+
1104
+ Parameters
1105
+ ----------
1106
+ butler : `lsst.daf.butler.LimitedButler`
1107
+ A limited butler data repository client.
1108
+ skip_existing : `bool`, optional
1109
+ If `True` (default) ignore init-outputs that already exist. If
1110
+ `False`, raise.
1111
+
1112
+ Raises
1113
+ ------
1114
+ lsst.daf.butler.registry.ConflictingDefinitionError
1115
+ Raised if an init-output dataset already exists and
1116
+ ``skip_existing=False``.
1117
+ """
1118
+ # Extract init-input and init-output refs from the QG.
1119
+ input_refs: dict[str, DatasetRef] = {}
1120
+ output_refs: dict[str, DatasetRef] = {}
1121
+ for task_node in self.pipeline_graph.tasks.values():
1122
+ if task_node.label not in self._init_quanta:
1123
+ continue
1124
+ input_refs.update(
1125
+ {ref.datasetType.name: ref for ref in self.get_init_inputs(task_node.label).values()}
1126
+ )
1127
+ output_refs.update(
1128
+ {
1129
+ ref.datasetType.name: ref
1130
+ for ref in self.get_init_outputs(task_node.label).values()
1131
+ if ref.datasetType.name != task_node.init.config_output.dataset_type_name
1132
+ }
1133
+ )
1134
+ for ref, is_stored in butler.stored_many(output_refs.values()).items():
1135
+ if is_stored:
1136
+ if not skip_existing:
1137
+ raise ConflictingDefinitionError(f"Init-output dataset {ref} already exists.")
1138
+ # We'll `put` whatever's left in output_refs at the end.
1139
+ del output_refs[ref.datasetType.name]
1140
+ # Instantiate tasks, reading overall init-inputs and gathering
1141
+ # init-output in-memory objects.
1142
+ init_outputs: list[tuple[Any, DatasetType]] = []
1143
+ self.pipeline_graph.instantiate_tasks(
1144
+ get_init_input=lambda dataset_type: butler.get(
1145
+ input_refs[dataset_type.name].overrideStorageClass(dataset_type.storageClass)
1146
+ ),
1147
+ init_outputs=init_outputs,
1148
+ # A task can be in the pipeline graph without having an init
1149
+ # quantum if it doesn't have any regular quanta either (e.g. they
1150
+ # were all skipped), and the _init_quanta has a "" entry for global
1151
+ # init-outputs that we don't want to pass here.
1152
+ labels=self.pipeline_graph.tasks.keys() & self._init_quanta.keys(),
1153
+ )
1154
+ # Write init-outputs that weren't already present.
1155
+ for obj, dataset_type in init_outputs:
1156
+ if new_ref := output_refs.get(dataset_type.name):
1157
+ assert new_ref.datasetType.storageClass_name == dataset_type.storageClass_name, (
1158
+ "QG init refs should use task connection storage classes."
1159
+ )
1160
+ butler.put(obj, new_ref)
1161
+
1162
+ def write_configs(self, butler: LimitedButler, compare_existing: bool = True) -> None:
1163
+ """Write the config datasets for all tasks in the quantum graph.
1164
+
1165
+ Parameters
1166
+ ----------
1167
+ butler : `lsst.daf.butler.LimitedButler`
1168
+ A limited butler data repository client.
1169
+ compare_existing : `bool`, optional
1170
+ If `True` check configs that already exist for consistency. If
1171
+ `False`, always raise if configs already exist.
1172
+
1173
+ Raises
1174
+ ------
1175
+ lsst.daf.butler.registry.ConflictingDefinitionError
1176
+ Raised if an config dataset already exists and
1177
+ ``compare_existing=False``, or if the existing config is not
1178
+ consistent with the config in the quantum graph.
1179
+ """
1180
+ to_put: list[tuple[PipelineTaskConfig, DatasetRef]] = []
1181
+ for task_node in self.pipeline_graph.tasks.values():
1182
+ if task_node.label not in self._init_quanta:
1183
+ continue
1184
+ dataset_type_name = task_node.init.config_output.dataset_type_name
1185
+ ref = self.get_init_outputs(task_node.label)[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME]
1186
+ try:
1187
+ old_config = butler.get(ref)
1188
+ except (LookupError, FileNotFoundError):
1189
+ old_config = None
1190
+ if old_config is not None:
1191
+ if not compare_existing:
1192
+ raise ConflictingDefinitionError(f"Config dataset {ref} already exists.")
1193
+ if not task_node.config.compare(old_config, shortcut=False, output=log_config_mismatch):
1194
+ raise ConflictingDefinitionError(
1195
+ f"Config does not match existing task config {dataset_type_name!r} in "
1196
+ "butler; tasks configurations must be consistent within the same run collection."
1197
+ )
1198
+ else:
1199
+ to_put.append((task_node.config, ref))
1200
+ # We do writes at the end to minimize the mess we leave behind when we
1201
+ # raise an exception.
1202
+ for config, ref in to_put:
1203
+ butler.put(config, ref)
1204
+
1205
+ def write_packages(self, butler: LimitedButler, compare_existing: bool = True) -> None:
1206
+ """Write the 'packages' dataset for the currently-active software
1207
+ versions.
1208
+
1209
+ Parameters
1210
+ ----------
1211
+ butler : `lsst.daf.butler.LimitedButler`
1212
+ A limited butler data repository client.
1213
+ compare_existing : `bool`, optional
1214
+ If `True` check packages that already exist for consistency. If
1215
+ `False`, always raise if the packages dataset already exists.
1216
+
1217
+ Raises
1218
+ ------
1219
+ lsst.daf.butler.registry.ConflictingDefinitionError
1220
+ Raised if the packages dataset already exists and is not consistent
1221
+ with the current packages.
1222
+ """
1223
+ new_packages = Packages.fromSystem()
1224
+ (ref,) = self.get_init_outputs("").values()
1225
+ try:
1226
+ packages = butler.get(ref)
1227
+ except (LookupError, FileNotFoundError):
1228
+ packages = None
1229
+ if packages is not None:
1230
+ if not compare_existing:
1231
+ raise ConflictingDefinitionError(f"Packages dataset {ref} already exists.")
1232
+ if compare_packages(packages, new_packages):
1233
+ # have to remove existing dataset first; butler has no
1234
+ # replace option.
1235
+ butler.pruneDatasets([ref], unstore=True, purge=True)
1236
+ butler.put(packages, ref)
1237
+ else:
1238
+ butler.put(new_packages, ref)
1239
+
1240
+ def init_output_run(self, butler: LimitedButler, existing: bool = True) -> None:
1241
+ """Initialize a new output RUN collection by writing init-output
1242
+ datasets (including configs and packages).
1243
+
1244
+ Parameters
1245
+ ----------
1246
+ butler : `lsst.daf.butler.LimitedButler`
1247
+ A limited butler data repository client.
1248
+ existing : `bool`, optional
1249
+ If `True` check or ignore outputs that already exist. If
1250
+ `False`, always raise if an output dataset already exists.
1251
+
1252
+ Raises
1253
+ ------
1254
+ lsst.daf.butler.registry.ConflictingDefinitionError
1255
+ Raised if there are existing init output datasets, and either
1256
+ ``existing=False`` or their contents are not compatible with this
1257
+ graph.
1258
+ """
1259
+ self.write_configs(butler, compare_existing=existing)
1260
+ self.write_packages(butler, compare_existing=existing)
1261
+ self.write_init_outputs(butler, skip_existing=existing)
1262
+
1263
+ @classmethod
1264
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraph:
1265
+ """Construct from an old `QuantumGraph` instance.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ old_quantum_graph : `QuantumGraph`
1270
+ Quantum graph to transform.
1271
+
1272
+ Returns
1273
+ -------
1274
+ predicted_quantum_graph : `PredictedQuantumGraph`
1275
+ A new predicted quantum graph.
1276
+ """
1277
+ return PredictedQuantumGraphComponents.from_old_quantum_graph(old_quantum_graph).assemble()
1278
+
1279
+ def to_old_quantum_graph(self) -> QuantumGraph:
1280
+ """Transform into an old `QuantumGraph` instance.
1281
+
1282
+ Returns
1283
+ -------
1284
+ old_quantum_graph : `QuantumGraph`
1285
+ Old quantum graph.
1286
+
1287
+ Notes
1288
+ -----
1289
+ This can only be called on graphs that have loaded all quantum
1290
+ datasets, init datasets, and dimension records.
1291
+ """
1292
+ from ..graph import QuantumGraph
1293
+
1294
+ quanta: dict[TaskDef, set[Quantum]] = {}
1295
+ quantum_to_quantum_id: dict[Quantum, uuid.UUID] = {}
1296
+ init_inputs: dict[TaskDef, list[DatasetRef]] = {}
1297
+ init_outputs: dict[TaskDef, list[DatasetRef]] = {}
1298
+ for task_def in self.pipeline_graph._iter_task_defs():
1299
+ if not self._quanta_by_task_label.get(task_def.label):
1300
+ continue
1301
+ quanta_for_task: set[Quantum] = set()
1302
+ for quantum_id, quantum in self.build_execution_quanta(task_label=task_def.label).items():
1303
+ quanta_for_task.add(quantum)
1304
+ quantum_to_quantum_id[quantum] = quantum_id
1305
+ quanta[task_def] = quanta_for_task
1306
+ init_inputs[task_def] = list(self.get_init_inputs(task_def.label).values())
1307
+ init_outputs[task_def] = list(self.get_init_outputs(task_def.label).values())
1308
+ global_init_outputs = list(self.get_init_outputs("").values())
1309
+ registry_dataset_types = [d.dataset_type for d in self.pipeline_graph.dataset_types.values()]
1310
+ result = object.__new__(QuantumGraph)
1311
+ result._buildGraphs(
1312
+ quanta,
1313
+ _quantumToNodeId=quantum_to_quantum_id,
1314
+ metadata=self.header.to_old_metadata(),
1315
+ universe=self.pipeline_graph.universe,
1316
+ initInputs=init_inputs,
1317
+ initOutputs=init_outputs,
1318
+ globalInitOutputs=global_init_outputs,
1319
+ registryDatasetTypes=registry_dataset_types,
1320
+ )
1321
+ return result
1322
+
1323
+ def _make_summary(self) -> QgraphSummary:
1324
+ from ..graph import QgraphSummary, QgraphTaskSummary
1325
+
1326
+ summary = QgraphSummary(
1327
+ cmdLine=self.header.command or None,
1328
+ creationUTC=str(self.header.timestamp) if self.header.timestamp is not None else None,
1329
+ inputCollection=self.header.inputs or None,
1330
+ outputCollection=self.header.output,
1331
+ outputRun=self.header.output_run,
1332
+ )
1333
+ for task_label, quanta_for_task in self.quanta_by_task.items():
1334
+ task_summary = QgraphTaskSummary(taskLabel=task_label, numQuanta=len(quanta_for_task))
1335
+ task_node = self.pipeline_graph.tasks[task_label]
1336
+ for quantum_id in quanta_for_task.values():
1337
+ quantum_datasets = self._quantum_datasets[quantum_id]
1338
+ for connection_name, input_datasets in quantum_datasets.inputs.items():
1339
+ task_summary.numInputs[
1340
+ task_node.get_input_edge(connection_name).parent_dataset_type_name
1341
+ ] += len(input_datasets)
1342
+ for connection_name, output_datasets in quantum_datasets.outputs.items():
1343
+ task_summary.numOutputs[
1344
+ task_node.get_output_edge(connection_name).parent_dataset_type_name
1345
+ ] += len(output_datasets)
1346
+ summary.qgraphTaskSummaries[task_label] = task_summary
1347
+ return summary
1348
+
1349
+
1350
+ @dataclasses.dataclass(kw_only=True)
1351
+ class PredictedQuantumGraphComponents:
1352
+ """A helper class for building and writing predicted quantum graphs.
1353
+
1354
+ Notes
1355
+ -----
1356
+ This class is a simple struct of model classes to allow different tools
1357
+ that build predicted quantum graphs to assemble them in whatever order they
1358
+ prefer. It does not enforce any internal invariants (e.g. the quantum and
1359
+ dataset counts in the header, different representations of quanta, internal
1360
+ ID sorting, etc.), but it does provide methods that can satisfy them.
1361
+ """
1362
+
1363
+ def __post_init__(self) -> None:
1364
+ self.header.graph_type = "predicted"
1365
+
1366
+ header: HeaderModel = dataclasses.field(default_factory=HeaderModel)
1367
+ """Basic metadata about the graph."""
1368
+
1369
+ pipeline_graph: PipelineGraph
1370
+ """Description of the pipeline this graph runs, including all task label
1371
+ and dataset type definitions.
1372
+
1373
+ This may include tasks that do not have any quanta (e.g. due to skipping
1374
+ already-executed tasks).
1375
+
1376
+ This also includes the dimension universe used to construct the graph.
1377
+ """
1378
+
1379
+ dimension_data: DimensionDataAttacher | None = None
1380
+ """Object that can attach dimension records to data IDs.
1381
+ """
1382
+
1383
+ init_quanta: PredictedInitQuantaModel = dataclasses.field(default_factory=PredictedInitQuantaModel)
1384
+ """A list of special quanta that describe the init-inputs and init-outputs
1385
+ of the graph.
1386
+
1387
+ Tasks that are included in the pipeline graph but do not have any quanta
1388
+ may or may not have an init quantum, but tasks that do have regular quanta
1389
+ always have an init quantum as well.
1390
+
1391
+ When used to construct a `PredictedQuantumGraph`, this must have either
1392
+ zero entries or all tasks in the pipeline.
1393
+ """
1394
+
1395
+ thin_graph: PredictedThinGraphModel = dataclasses.field(default_factory=PredictedThinGraphModel)
1396
+ """A lightweight quantum-quantum DAG with task labels and data IDs only.
1397
+
1398
+ This uses internal integer IDs ("indexes") for node IDs.
1399
+
1400
+ This does not include the special "init" quanta.
1401
+ """
1402
+
1403
+ quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = dataclasses.field(default_factory=dict)
1404
+ """The full descriptions of all quanta, including input and output
1405
+ dataset, keyed by UUID.
1406
+
1407
+ When used to construct a `PredictedQuantumGraph`, this need not have all
1408
+ entries.
1409
+
1410
+ This does not include special "init" quanta.
1411
+ """
1412
+
1413
+ quantum_indices: dict[uuid.UUID, QuantumIndex] = dataclasses.field(default_factory=dict)
1414
+ """A mapping from external universal quantum ID to internal integer ID.
1415
+
1416
+ While this `dict` does not need to be sorted, the internal integer IDs do
1417
+ need to correspond exactly to ``enumerate(sorted(uuids))``.
1418
+
1419
+ When used to construct a `PredictedQuantumGraph`, this must be fully
1420
+ populated if `thin_graph` is. It can be empty otherwise.
1421
+
1422
+ This does include special "init" quanta.
1423
+ """
1424
+
1425
+ def set_quantum_indices(self) -> None:
1426
+ """Populate the `quantum_indices` component by sorting the UUIDs in the
1427
+ `init_quanta` and `quantum_datasets` components (which must both be
1428
+ complete).
1429
+ """
1430
+ all_quantum_ids = [q.quantum_id for q in self.init_quanta.root]
1431
+ all_quantum_ids.extend(self.quantum_datasets.keys())
1432
+ all_quantum_ids.sort(key=operator.attrgetter("int"))
1433
+ self.quantum_indices = {quantum_id: index for index, quantum_id in enumerate(all_quantum_ids)}
1434
+
1435
+ def set_thin_graph(self) -> None:
1436
+ """Populate the `thin_graph` component from the `pipeline_graph`,
1437
+ `quantum_datasets` and `quantum_indices` components (which must all be
1438
+ complete).
1439
+ """
1440
+ bipartite_xgraph = networkx.DiGraph()
1441
+ self.thin_graph.quanta = {task_label: [] for task_label in self.pipeline_graph.tasks}
1442
+ graph_quantum_indices = []
1443
+ for quantum_datasets in self.quantum_datasets.values():
1444
+ quantum_index = self.quantum_indices[quantum_datasets.quantum_id]
1445
+ self.thin_graph.quanta[quantum_datasets.task_label].append(
1446
+ PredictedThinQuantumModel.model_construct(
1447
+ quantum_index=quantum_index,
1448
+ data_coordinate=quantum_datasets.data_coordinate,
1449
+ )
1450
+ )
1451
+ for dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
1452
+ bipartite_xgraph.add_edge(dataset.dataset_id, quantum_index)
1453
+ for dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
1454
+ bipartite_xgraph.add_edge(quantum_index, dataset.dataset_id)
1455
+ graph_quantum_indices.append(quantum_index)
1456
+ quantum_only_xgraph: networkx.DiGraph = networkx.bipartite.projected_graph(
1457
+ bipartite_xgraph, graph_quantum_indices
1458
+ )
1459
+ self.thin_graph.edges = list(quantum_only_xgraph.edges)
1460
+
1461
+ def set_header_counts(self) -> None:
1462
+ """Populate the quantum and dataset counts in the header from the
1463
+ `quantum_indices`, `thin_graph`, `init_quanta`, and `quantum_datasets`
1464
+ components.
1465
+ """
1466
+ self.header.n_quanta = len(self.quantum_indices) - len(self.init_quanta.root)
1467
+ self.header.n_task_quanta = {
1468
+ task_label: len(thin_quanta) for task_label, thin_quanta in self.thin_graph.quanta.items()
1469
+ }
1470
+ all_dataset_ids: set[uuid.UUID] = set()
1471
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1472
+ all_dataset_ids.update(quantum_datasets.iter_dataset_ids())
1473
+ self.header.n_datasets = len(all_dataset_ids)
1474
+
1475
+ def update_output_run(self, output_run: str) -> None:
1476
+ """Update the output `~lsst.daf.butler.CollectionType.RUN` collection
1477
+ name in all datasets and regenerate all output dataset and quantum
1478
+ UUIDs.
1479
+
1480
+ Parameters
1481
+ ----------
1482
+ output_run : `str`
1483
+ New output `~lsst.daf.butler.CollectionType.RUN` collection name.
1484
+ """
1485
+ uuid_map: dict[uuid.UUID, uuid.UUID] = {}
1486
+ # Do all outputs and then all inputs in separate passes so we don't
1487
+ # need to rely on topological ordering of anything.
1488
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1489
+ new_quantum_id = uuid.uuid4()
1490
+ uuid_map[quantum_datasets.quantum_id] = new_quantum_id
1491
+ quantum_datasets.quantum_id = uuid.uuid4()
1492
+ for output_dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
1493
+ assert output_dataset.run == self.header.output_run, (
1494
+ f"Incorrect run {output_dataset.run} for output dataset {output_dataset.dataset_id}."
1495
+ )
1496
+ new_dataset_id = uuid.uuid4()
1497
+ uuid_map[output_dataset.dataset_id] = new_dataset_id
1498
+ output_dataset.dataset_id = new_dataset_id
1499
+ output_dataset.run = output_run
1500
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1501
+ for input_dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
1502
+ if input_dataset.run == self.header.output_run:
1503
+ input_dataset.run = output_run
1504
+ input_dataset.dataset_id = uuid_map.get(
1505
+ input_dataset.dataset_id,
1506
+ # This dataset isn't necessary an output of the graph
1507
+ # just because it's in the output run; the graph could
1508
+ # have been built with extend_run=True.
1509
+ input_dataset.dataset_id,
1510
+ )
1511
+ # Update the keys of the quantum_datasets dict.
1512
+ self.quantum_datasets = {qd.quantum_id: qd for qd in self.quantum_datasets.values()}
1513
+ # Since the UUIDs have changed, the indices need to change, too.
1514
+ self.set_quantum_indices()
1515
+ self.set_thin_graph()
1516
+ # Update the header last, since we use it above to get the old run.
1517
+ self.header.output_run = output_run
1518
+
1519
+ def assemble(self) -> PredictedQuantumGraph:
1520
+ """Construct a `PredictedQuantumGraph` from these components."""
1521
+ return PredictedQuantumGraph(self)
1522
+
1523
+ @classmethod
1524
+ def read_execution_quanta(
1525
+ cls,
1526
+ uri: ResourcePathExpression,
1527
+ quantum_ids: Iterable[uuid.UUID] | None = None,
1528
+ page_size: int = DEFAULT_PAGE_SIZE,
1529
+ ) -> PredictedQuantumGraphComponents:
1530
+ """Read one or more executable quanta from a quantum graph file.
1531
+
1532
+ Parameters
1533
+ ----------
1534
+ uri : convertible to `lsst.resources.ResourcePath`
1535
+ URI to open. Should have a ``.qg`` extension for new quantum graph
1536
+ files, or ``.qgraph`` for the old format.
1537
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1538
+ Iterable of quantum IDs to load. If not provided, all quanta will
1539
+ be loaded. The UUIDs of special init quanta will be ignored.
1540
+ page_size : `int`, optional
1541
+ Approximate number of bytes to read at once from address files.
1542
+ Note that this does not set a page size for *all* reads, but it
1543
+ does affect the smallest, most numerous reads.
1544
+
1545
+ Returns
1546
+ -------
1547
+ components : `PredictedQuantumGraphComponents` ]
1548
+ Components for quantum graph that can build execution quanta for
1549
+ all of the given IDs.
1550
+ """
1551
+ uri = ResourcePath(uri)
1552
+ if uri.getExtension() == ".qgraph":
1553
+ _LOG.warning(
1554
+ f"Reading and converting old quantum graph {uri}. "
1555
+ "Use the '.qg' extension to write in the new format."
1556
+ )
1557
+ from ..graph import QuantumGraph
1558
+
1559
+ old_qg = QuantumGraph.loadUri(uri, nodes=quantum_ids)
1560
+ return PredictedQuantumGraphComponents.from_old_quantum_graph(old_qg)
1561
+
1562
+ with PredictedQuantumGraph.open(uri, page_size=page_size) as reader:
1563
+ reader.read_execution_quanta(quantum_ids)
1564
+ return reader.components
1565
+
1566
+ @classmethod
1567
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraphComponents:
1568
+ """Construct from an old `QuantumGraph` instance.
1569
+
1570
+ Parameters
1571
+ ----------
1572
+ old_quantum_graph : `QuantumGraph`
1573
+ Quantum graph to transform.
1574
+
1575
+ Returns
1576
+ -------
1577
+ components : `PredictedQuantumGraphComponents`
1578
+ Components for a new predicted quantum graph.
1579
+ """
1580
+ header = HeaderModel.from_old_quantum_graph(old_quantum_graph)
1581
+ result = cls(header=header, pipeline_graph=old_quantum_graph.pipeline_graph)
1582
+ result.init_quanta.update_from_old_quantum_graph(old_quantum_graph)
1583
+ dimension_data_extractor = DimensionDataExtractor.from_dimension_group(
1584
+ old_quantum_graph.pipeline_graph.get_all_dimensions()
1585
+ )
1586
+ for task_node in old_quantum_graph.pipeline_graph.tasks.values():
1587
+ task_quanta = old_quantum_graph.get_task_quanta(task_node.label)
1588
+ for quantum_id, quantum in task_quanta.items():
1589
+ result.quantum_datasets[quantum_id] = PredictedQuantumDatasetsModel.from_execution_quantum(
1590
+ task_node, quantum, quantum_id
1591
+ )
1592
+ dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
1593
+ for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
1594
+ dimension_data_extractor.update(ref.dataId for ref in refs)
1595
+ result.dimension_data = DimensionDataAttacher(
1596
+ records=dimension_data_extractor.records.values(),
1597
+ dimensions=result.pipeline_graph.get_all_dimensions(),
1598
+ )
1599
+ result.set_quantum_indices()
1600
+ result.set_thin_graph()
1601
+ result.set_header_counts()
1602
+ return result
1603
+
1604
+ def write(
1605
+ self,
1606
+ uri: ResourcePathExpression,
1607
+ *,
1608
+ zstd_level: int = 10,
1609
+ zstd_dict_size: int = 32768,
1610
+ zstd_dict_n_inputs: int = 512,
1611
+ ) -> None:
1612
+ """Write the graph to a file.
1613
+
1614
+ Parameters
1615
+ ----------
1616
+ uri : convertible to `lsst.resources.ResourcePath`
1617
+ Path to write to. Should have a ``.qg`` extension, or ``.qgraph``
1618
+ to force writing the old format.
1619
+ zstd_level : `int`, optional
1620
+ ZStandard compression level to use on JSON blocks.
1621
+ zstd_dict_size : `int`, optional
1622
+ Size of a ZStandard dictionary that shares compression information
1623
+ across components. Set to zero to disable the dictionary.
1624
+ Dictionary compression is automatically disabled if the number of
1625
+ quanta is smaller than ``zstd_dict_n_inputs``.
1626
+ zstd_dict_n_inputs : `int`, optional
1627
+ Maximum number of `PredictedQuantumDatasetsModel` JSON
1628
+ representations to feed the ZStandard dictionary training routine.
1629
+
1630
+ Notes
1631
+ -----
1632
+ Only a complete predicted quantum graph with all components fully
1633
+ populated should be written.
1634
+ """
1635
+ if self.header.n_quanta + len(self.init_quanta.root) != len(self.quantum_indices):
1636
+ raise RuntimeError(
1637
+ f"Cannot save graph after partial read of quanta: expected {self.header.n_quanta}, "
1638
+ f"got {len(self.quantum_indices)}."
1639
+ )
1640
+ uri = ResourcePath(uri)
1641
+ match uri.getExtension():
1642
+ case ".qg":
1643
+ pass
1644
+ case ".qgraph":
1645
+ _LOG.warning(
1646
+ "Converting to an old-format quantum graph.. "
1647
+ "Use '.qg' instead of '.qgraph' to save in the new format."
1648
+ )
1649
+ old_qg = self.assemble().to_old_quantum_graph()
1650
+ old_qg.saveUri(uri)
1651
+ return
1652
+ case ext:
1653
+ raise ValueError(
1654
+ f"Unsupported extension {ext!r} for quantum graph; "
1655
+ "expected '.qg' (or '.qgraph' to force the old format)."
1656
+ )
1657
+ cdict: zstandard.ZstdCompressionDict | None = None
1658
+ cdict_data: bytes | None = None
1659
+ quantum_datasets_json: dict[uuid.UUID, bytes] = {}
1660
+ if len(self.quantum_datasets) < zstd_dict_n_inputs:
1661
+ # ZStandard will fail if we ask to use a compression dict without
1662
+ # giving it enough data, and it only helps if we have a lot of
1663
+ # quanta.
1664
+ zstd_dict_size = 0
1665
+ if zstd_dict_size:
1666
+ quantum_datasets_json = {
1667
+ quantum_model.quantum_id: quantum_model.model_dump_json().encode()
1668
+ for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
1669
+ }
1670
+ try:
1671
+ cdict = zstandard.train_dictionary(
1672
+ zstd_dict_size,
1673
+ list(quantum_datasets_json.values()),
1674
+ level=zstd_level,
1675
+ )
1676
+ except zstandard.ZstdError as err:
1677
+ warnings.warn(f"Not using a compression dictionary: {err}.")
1678
+ cdict = None
1679
+ else:
1680
+ cdict_data = cdict.as_bytes()
1681
+ compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
1682
+ with BaseQuantumGraphWriter.open(
1683
+ uri,
1684
+ header=self.header,
1685
+ pipeline_graph=self.pipeline_graph,
1686
+ indices=self.quantum_indices,
1687
+ address_filename="quanta",
1688
+ compressor=compressor,
1689
+ cdict_data=cdict_data,
1690
+ ) as writer:
1691
+ writer.write_single_model("thin_graph", self.thin_graph)
1692
+ if self.dimension_data is None:
1693
+ raise IncompleteQuantumGraphError(
1694
+ "Cannot save predicted quantum graph with no dimension data."
1695
+ )
1696
+ serialized_dimension_data = self.dimension_data.serialized()
1697
+ writer.write_single_model("dimension_data", serialized_dimension_data)
1698
+ del serialized_dimension_data
1699
+ writer.write_single_model("init_quanta", self.init_quanta)
1700
+ with MultiblockWriter.open_in_zip(
1701
+ writer.zf, "quantum_datasets", writer.int_size
1702
+ ) as quantum_datasets_mb:
1703
+ for quantum_model in self.quantum_datasets.values():
1704
+ if json_data := quantum_datasets_json.get(quantum_model.quantum_id):
1705
+ quantum_datasets_mb.write_bytes(
1706
+ quantum_model.quantum_id, writer.compressor.compress(json_data)
1707
+ )
1708
+ else:
1709
+ quantum_datasets_mb.write_model(
1710
+ quantum_model.quantum_id, quantum_model, writer.compressor
1711
+ )
1712
+ writer.address_writer.addresses.append(quantum_datasets_mb.addresses)
1713
+
1714
+
1715
+ @dataclasses.dataclass
1716
+ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1717
+ """A helper class for reading predicted quantum graphs."""
1718
+
1719
+ components: PredictedQuantumGraphComponents = dataclasses.field(init=False)
1720
+ """Quantum graph components populated by this reader's methods."""
1721
+
1722
+ @classmethod
1723
+ @contextmanager
1724
+ def open(
1725
+ cls,
1726
+ uri: ResourcePathExpression,
1727
+ *,
1728
+ page_size: int = DEFAULT_PAGE_SIZE,
1729
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
1730
+ ) -> Iterator[PredictedQuantumGraphReader]:
1731
+ """Construct a reader from a URI.
1732
+
1733
+ Parameters
1734
+ ----------
1735
+ uri : convertible to `lsst.resources.ResourcePath`
1736
+ URI to open. Should have a ``.qg`` extension.
1737
+ page_size : `int`, optional
1738
+ Approximate number of bytes to read at once from address files.
1739
+ Note that this does not set a page size for *all* reads, but it
1740
+ does affect the smallest, most numerous reads.
1741
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
1742
+ How to handle importing the task classes referenced in the pipeline
1743
+ graph.
1744
+
1745
+ Returns
1746
+ -------
1747
+ reader : `contextlib.AbstractContextManager` [ \
1748
+ `PredictedQuantumGraphReader` ]
1749
+ A context manager that returns the reader when entered.
1750
+ """
1751
+ with cls._open(
1752
+ uri,
1753
+ graph_type="predicted",
1754
+ address_filename="quanta",
1755
+ page_size=page_size,
1756
+ import_mode=import_mode,
1757
+ ) as self:
1758
+ yield self
1759
+
1760
+ def __post_init__(self) -> None:
1761
+ self.components = PredictedQuantumGraphComponents(
1762
+ header=self.header, pipeline_graph=self.pipeline_graph
1763
+ )
1764
+
1765
+ def finish(self) -> PredictedQuantumGraph:
1766
+ """Construct a `PredictedQuantumGraph` instance from this reader."""
1767
+ return self.components.assemble()
1768
+
1769
+ def read_all(self) -> PredictedQuantumGraphReader:
1770
+ """Read all components in full."""
1771
+ return self.read_thin_graph().read_execution_quanta()
1772
+
1773
+ def read_thin_graph(self) -> PredictedQuantumGraphReader:
1774
+ """Read the thin graph.
1775
+
1776
+ The thin graph is a quantum-quantum DAG with internal integer IDs for
1777
+ nodes and just task labels and data IDs as node attributes. It always
1778
+ includes all regular quanta, and does not include init-input or
1779
+ init-output information.
1780
+ """
1781
+ if not self.components.thin_graph.quanta:
1782
+ self.components.thin_graph = self._read_single_block("thin_graph", PredictedThinGraphModel)
1783
+ if len(self.components.quantum_indices) != self.components.header.n_quanta:
1784
+ self.address_reader.read_all()
1785
+ self.components.quantum_indices.update(
1786
+ {row.key: row.index for row in self.address_reader.rows.values()}
1787
+ )
1788
+ return self
1789
+
1790
+ def read_init_quanta(self) -> PredictedQuantumGraphReader:
1791
+ """Read the list of special quanta that represent init-inputs and
1792
+ init-outputs.
1793
+ """
1794
+ if not self.components.init_quanta.root:
1795
+ self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
1796
+ return self
1797
+
1798
+ def read_dimension_data(self) -> PredictedQuantumGraphReader:
1799
+ """Read all dimension records.
1800
+
1801
+ Record data IDs will be immediately deserialized, while other fields
1802
+ will be left in serialized form until they are needed.
1803
+ """
1804
+ if self.components.dimension_data is None:
1805
+ serializable_dimension_data = self._read_single_block("dimension_data", SerializableDimensionData)
1806
+ self.components.dimension_data = DimensionDataAttacher(
1807
+ deserializers=[
1808
+ DimensionRecordSetDeserializer.from_raw(
1809
+ self.components.pipeline_graph.universe[element], serialized_records
1810
+ )
1811
+ for element, serialized_records in serializable_dimension_data.root.items()
1812
+ ],
1813
+ dimensions=DimensionGroup.union(
1814
+ *self.components.pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
1815
+ universe=self.components.pipeline_graph.universe,
1816
+ ),
1817
+ )
1818
+ return self
1819
+
1820
+ def read_quantum_datasets(
1821
+ self, quantum_ids: Iterable[uuid.UUID] | None = None
1822
+ ) -> PredictedQuantumGraphReader:
1823
+ """Read information about all datasets produced and consumed by the
1824
+ given quantum IDs.
1825
+
1826
+ Parameters
1827
+ ----------
1828
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1829
+ Iterable of quantum IDs to load. If not provided, all quanta will
1830
+ be loaded. The UUIDs of special init quanta will be ignored.
1831
+ """
1832
+ quantum_datasets: PredictedQuantumDatasetsModel | None
1833
+ if quantum_ids is None:
1834
+ if len(self.components.quantum_datasets) != self.header.n_quanta:
1835
+ for quantum_datasets in MultiblockReader.read_all_models_in_zip(
1836
+ self.zf,
1837
+ "quantum_datasets",
1838
+ PredictedQuantumDatasetsModel,
1839
+ self.decompressor,
1840
+ int_size=self.components.header.int_size,
1841
+ page_size=self.page_size,
1842
+ ):
1843
+ self.components.quantum_datasets.setdefault(quantum_datasets.quantum_id, quantum_datasets)
1844
+ self.address_reader.read_all()
1845
+ for address_row in self.address_reader.rows.values():
1846
+ self.components.quantum_indices[address_row.key] = address_row.index
1847
+ return self
1848
+ with MultiblockReader.open_in_zip(
1849
+ self.zf, "quantum_datasets", int_size=self.components.header.int_size
1850
+ ) as mb_reader:
1851
+ for quantum_id in quantum_ids:
1852
+ if quantum_id in self.components.quantum_datasets:
1853
+ continue
1854
+ address_row = self.address_reader.find(quantum_id)
1855
+ self.components.quantum_indices[address_row.key] = address_row.index
1856
+ quantum_datasets = mb_reader.read_model(
1857
+ address_row.addresses[0], PredictedQuantumDatasetsModel, self.decompressor
1858
+ )
1859
+ if quantum_datasets is not None:
1860
+ self.components.quantum_datasets[address_row.key] = quantum_datasets
1861
+ return self
1862
+
1863
+ def read_execution_quanta(
1864
+ self, quantum_ids: Iterable[uuid.UUID] | None = None
1865
+ ) -> PredictedQuantumGraphReader:
1866
+ """Read all information needed to execute the given quanta.
1867
+
1868
+ Parameters
1869
+ ----------
1870
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1871
+ Iterable of quantum IDs to load. If not provided, all quanta will
1872
+ be loaded. The UUIDs of special init quanta will be ignored.
1873
+ """
1874
+ return self.read_init_quanta().read_dimension_data().read_quantum_datasets(quantum_ids)