lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lsst/pipe/base/_task_metadata.py +15 -0
  2. lsst/pipe/base/dot_tools.py +14 -152
  3. lsst/pipe/base/exec_fixup_data_id.py +17 -44
  4. lsst/pipe/base/execution_graph_fixup.py +49 -18
  5. lsst/pipe/base/graph/_versionDeserializers.py +6 -5
  6. lsst/pipe/base/graph/graph.py +30 -10
  7. lsst/pipe/base/graph/graphSummary.py +30 -0
  8. lsst/pipe/base/graph_walker.py +119 -0
  9. lsst/pipe/base/log_capture.py +5 -2
  10. lsst/pipe/base/mermaid_tools.py +11 -64
  11. lsst/pipe/base/mp_graph_executor.py +298 -236
  12. lsst/pipe/base/pipeline_graph/io.py +1 -1
  13. lsst/pipe/base/quantum_graph/__init__.py +32 -0
  14. lsst/pipe/base/quantum_graph/_common.py +632 -0
  15. lsst/pipe/base/quantum_graph/_multiblock.py +808 -0
  16. lsst/pipe/base/quantum_graph/_predicted.py +1950 -0
  17. lsst/pipe/base/quantum_graph/visualization.py +302 -0
  18. lsst/pipe/base/quantum_graph_builder.py +292 -34
  19. lsst/pipe/base/quantum_graph_executor.py +2 -1
  20. lsst/pipe/base/quantum_provenance_graph.py +16 -7
  21. lsst/pipe/base/quantum_reports.py +45 -0
  22. lsst/pipe/base/separable_pipeline_executor.py +126 -15
  23. lsst/pipe/base/simple_pipeline_executor.py +44 -43
  24. lsst/pipe/base/single_quantum_executor.py +1 -40
  25. lsst/pipe/base/tests/mocks/__init__.py +1 -1
  26. lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
  27. lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
  28. lsst/pipe/base/tests/mocks/_storage_class.py +51 -0
  29. lsst/pipe/base/tests/simpleQGraph.py +11 -5
  30. lsst/pipe/base/version.py +1 -1
  31. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/METADATA +2 -1
  32. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/RECORD +40 -34
  33. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/WHEEL +0 -0
  34. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/entry_points.txt +0 -0
  35. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/COPYRIGHT +0 -0
  36. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/LICENSE +0 -0
  37. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/bsd_license.txt +0 -0
  38. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  39. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/top_level.txt +0 -0
  40. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1950 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = (
31
+ "PredictedDatasetInfo",
32
+ "PredictedDatasetModel",
33
+ "PredictedInitQuantaModel",
34
+ "PredictedQuantumDatasetsModel",
35
+ "PredictedQuantumGraph",
36
+ "PredictedQuantumGraphComponents",
37
+ "PredictedQuantumGraphReader",
38
+ "PredictedQuantumInfo",
39
+ "PredictedThinGraphModel",
40
+ "PredictedThinQuantumModel",
41
+ )
42
+
43
+ import dataclasses
44
+ import itertools
45
+ import logging
46
+ import operator
47
+ import sys
48
+ import uuid
49
+ import warnings
50
+ from collections import defaultdict
51
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
52
+ from contextlib import AbstractContextManager, contextmanager
53
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
54
+
55
+ import networkx
56
+ import networkx.algorithms.bipartite
57
+ import pydantic
58
+ import zstandard
59
+
60
+ from lsst.daf.butler import (
61
+ Config,
62
+ DataCoordinate,
63
+ DataIdValue,
64
+ DatasetRef,
65
+ DatasetType,
66
+ DimensionDataAttacher,
67
+ DimensionDataExtractor,
68
+ DimensionGroup,
69
+ DimensionRecordSetDeserializer,
70
+ LimitedButler,
71
+ Quantum,
72
+ QuantumBackedButler,
73
+ SerializableDimensionData,
74
+ )
75
+ from lsst.daf.butler._rubin import generate_uuidv7
76
+ from lsst.daf.butler.datastore.record_data import DatastoreRecordData, SerializedDatastoreRecordData
77
+ from lsst.daf.butler.registry import ConflictingDefinitionError
78
+ from lsst.resources import ResourcePath, ResourcePathExpression
79
+ from lsst.utils.packages import Packages
80
+
81
+ from .. import automatic_connection_constants as acc
82
+ from ..pipeline import TaskDef
83
+ from ..pipeline_graph import (
84
+ PipelineGraph,
85
+ TaskImportMode,
86
+ TaskInitNode,
87
+ TaskNode,
88
+ compare_packages,
89
+ log_config_mismatch,
90
+ )
91
+ from ._common import (
92
+ BaseQuantumGraph,
93
+ BaseQuantumGraphReader,
94
+ BaseQuantumGraphWriter,
95
+ ConnectionName,
96
+ DataCoordinateValues,
97
+ DatasetInfo,
98
+ DatasetTypeName,
99
+ DatastoreName,
100
+ HeaderModel,
101
+ IncompleteQuantumGraphError,
102
+ QuantumIndex,
103
+ QuantumInfo,
104
+ TaskLabel,
105
+ )
106
+ from ._multiblock import DEFAULT_PAGE_SIZE, MultiblockReader, MultiblockWriter
107
+
108
+ if TYPE_CHECKING:
109
+ from ..config import PipelineTaskConfig
110
+ from ..graph import QgraphSummary, QuantumGraph
111
+
112
+ _LOG = logging.getLogger(__name__)
113
+
114
+
115
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
116
+
117
+
118
+ class PredictedThinQuantumModel(pydantic.BaseModel):
119
+ """Data model for a quantum data ID and internal integer ID in a predicted
120
+ quantum graph.
121
+ """
122
+
123
+ quantum_index: QuantumIndex
124
+ """Internal integer ID for this quantum."""
125
+
126
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
127
+ """Full (required and implied) data coordinate values for this quantum."""
128
+
129
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
130
+ # when we inherit those docstrings in our public classes.
131
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
132
+
133
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
134
+ """See `pydantic.BaseModel.copy`."""
135
+ return super().copy(*args, **kwargs)
136
+
137
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
138
+ """See `pydantic.BaseModel.model_dump`."""
139
+ return super().model_dump(*args, **kwargs)
140
+
141
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
142
+ """See `pydantic.BaseModel.model_dump_json`."""
143
+ return super().model_dump(*args, **kwargs)
144
+
145
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
146
+ """See `pydantic.BaseModel.model_copy`."""
147
+ return super().model_copy(*args, **kwargs)
148
+
149
+ @classmethod
150
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
151
+ """See `pydantic.BaseModel.model_construct`."""
152
+ return super().model_construct(*args, **kwargs)
153
+
154
+ @classmethod
155
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
156
+ """See `pydantic.BaseModel.model_json_schema`."""
157
+ return super().model_json_schema(*args, **kwargs)
158
+
159
+ @classmethod
160
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
161
+ """See `pydantic.BaseModel.model_validate`."""
162
+ return super().model_validate(*args, **kwargs)
163
+
164
+ @classmethod
165
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
166
+ """See `pydantic.BaseModel.model_validate_json`."""
167
+ return super().model_validate_json(*args, **kwargs)
168
+
169
+ @classmethod
170
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
171
+ """See `pydantic.BaseModel.model_validate_strings`."""
172
+ return super().model_validate_strings(*args, **kwargs)
173
+
174
+
175
+ class PredictedThinGraphModel(pydantic.BaseModel):
176
+ """Data model for the predicted quantum graph component that maps each
177
+ task label to the data IDs and internal integer IDs of its quanta.
178
+ """
179
+
180
+ quanta: dict[TaskLabel, list[PredictedThinQuantumModel]] = pydantic.Field(default_factory=dict)
181
+ """Minimal descriptions of all quanta, grouped by task label."""
182
+
183
+ edges: list[tuple[QuantumIndex, QuantumIndex]] = pydantic.Field(default_factory=list)
184
+ """Pairs of (predecessor, successor) internal integer quantum IDs."""
185
+
186
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
187
+ # when we inherit those docstrings in our public classes.
188
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
189
+
190
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
191
+ """See `pydantic.BaseModel.copy`."""
192
+ return super().copy(*args, **kwargs)
193
+
194
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
195
+ """See `pydantic.BaseModel.model_dump`."""
196
+ return super().model_dump(*args, **kwargs)
197
+
198
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
199
+ """See `pydantic.BaseModel.model_dump_json`."""
200
+ return super().model_dump(*args, **kwargs)
201
+
202
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
203
+ """See `pydantic.BaseModel.model_copy`."""
204
+ return super().model_copy(*args, **kwargs)
205
+
206
+ @classmethod
207
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
208
+ """See `pydantic.BaseModel.model_construct`."""
209
+ return super().model_construct(*args, **kwargs)
210
+
211
+ @classmethod
212
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
213
+ """See `pydantic.BaseModel.model_json_schema`."""
214
+ return super().model_json_schema(*args, **kwargs)
215
+
216
+ @classmethod
217
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
218
+ """See `pydantic.BaseModel.model_validate`."""
219
+ return super().model_validate(*args, **kwargs)
220
+
221
+ @classmethod
222
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
223
+ """See `pydantic.BaseModel.model_validate_json`."""
224
+ return super().model_validate_json(*args, **kwargs)
225
+
226
+ @classmethod
227
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
228
+ """See `pydantic.BaseModel.model_validate_strings`."""
229
+ return super().model_validate_strings(*args, **kwargs)
230
+
231
+
232
+ class PredictedDatasetModel(pydantic.BaseModel):
233
+ """Data model for the datasets in a predicted quantum graph file."""
234
+
235
+ dataset_id: uuid.UUID
236
+ """Universally unique ID for the dataset."""
237
+
238
+ dataset_type_name: DatasetTypeName
239
+ """Name of the type of this dataset.
240
+
241
+ This is always a parent dataset type name, not a component.
242
+
243
+ Note that full dataset type definitions are stored in the pipeline graph.
244
+ """
245
+
246
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
247
+ """The full values (required and implied) of this dataset's data ID."""
248
+
249
+ run: str
250
+ """This dataset's RUN collection name."""
251
+
252
+ @classmethod
253
+ def from_dataset_ref(cls, ref: DatasetRef) -> PredictedDatasetModel:
254
+ """Construct from a butler `~lsst.daf.butler.DatasetRef`.
255
+
256
+ Parameters
257
+ ----------
258
+ ref : `lsst.daf.butler.DatasetRef`
259
+ Dataset reference.
260
+
261
+ Returns
262
+ -------
263
+ model : `PredictedDatasetModel`
264
+ Model for the dataset.
265
+ """
266
+ dataset_type_name, _ = DatasetType.splitDatasetTypeName(ref.datasetType.name)
267
+ return cls.model_construct(
268
+ dataset_id=ref.id,
269
+ dataset_type_name=dataset_type_name,
270
+ data_coordinate=list(ref.dataId.full_values),
271
+ run=ref.run,
272
+ )
273
+
274
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
275
+ # when we inherit those docstrings in our public classes.
276
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
277
+
278
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
279
+ """See `pydantic.BaseModel.copy`."""
280
+ return super().copy(*args, **kwargs)
281
+
282
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
283
+ """See `pydantic.BaseModel.model_dump`."""
284
+ return super().model_dump(*args, **kwargs)
285
+
286
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
287
+ """See `pydantic.BaseModel.model_dump_json`."""
288
+ return super().model_dump(*args, **kwargs)
289
+
290
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
291
+ """See `pydantic.BaseModel.model_copy`."""
292
+ return super().model_copy(*args, **kwargs)
293
+
294
+ @classmethod
295
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
296
+ """See `pydantic.BaseModel.model_construct`."""
297
+ return super().model_construct(*args, **kwargs)
298
+
299
+ @classmethod
300
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
301
+ """See `pydantic.BaseModel.model_json_schema`."""
302
+ return super().model_json_schema(*args, **kwargs)
303
+
304
+ @classmethod
305
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
306
+ """See `pydantic.BaseModel.model_validate`."""
307
+ return super().model_validate(*args, **kwargs)
308
+
309
+ @classmethod
310
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
311
+ """See `pydantic.BaseModel.model_validate_json`."""
312
+ return super().model_validate_json(*args, **kwargs)
313
+
314
+ @classmethod
315
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
316
+ """See `pydantic.BaseModel.model_validate_strings`."""
317
+ return super().model_validate_strings(*args, **kwargs)
318
+
319
+
320
+ class PredictedQuantumDatasetsModel(pydantic.BaseModel):
321
+ """Data model for a description of a single predicted quantum that includes
322
+ its inputs and outputs.
323
+ """
324
+
325
+ quantum_id: uuid.UUID
326
+ """Universally unique ID for the quantum."""
327
+
328
+ task_label: TaskLabel
329
+ """Label of the task.
330
+
331
+ Note that task label definitions are stored in the pipeline graph.
332
+ """
333
+
334
+ data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
335
+ """The full values (required and implied) of this quantum's data ID."""
336
+
337
+ inputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
338
+ """The input datasets to this quantum, grouped by connection name."""
339
+
340
+ outputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
341
+ """The datasets output by this quantum, grouped by connection name."""
342
+
343
+ datastore_records: dict[DatastoreName, SerializedDatastoreRecordData] = pydantic.Field(
344
+ default_factory=dict
345
+ )
346
+ """Datastore records for inputs to this quantum that are already present in
347
+ the data repository.
348
+ """
349
+
350
+ def iter_dataset_ids(self) -> Iterator[uuid.UUID]:
351
+ """Return an iterator over the UUIDs of all datasets referenced by this
352
+ quantum.
353
+
354
+ Returns
355
+ -------
356
+ iter : `~collections.abc.Iterator` [ `uuid.UUID` ]
357
+ Iterator over dataset IDs.
358
+ """
359
+ for datasets in itertools.chain(self.inputs.values(), self.outputs.values()):
360
+ for dataset in datasets:
361
+ yield dataset.dataset_id
362
+
363
+ def deserialize_datastore_records(self) -> dict[DatastoreName, DatastoreRecordData]:
364
+ """Deserialize the mapping of datastore records."""
365
+ return {
366
+ datastore_name: DatastoreRecordData.from_simple(serialized_records)
367
+ for datastore_name, serialized_records in self.datastore_records.items()
368
+ }
369
+
370
+ @classmethod
371
+ def from_execution_quantum(
372
+ cls, task_node: TaskNode, quantum: Quantum, quantum_id: uuid.UUID
373
+ ) -> PredictedQuantumDatasetsModel:
374
+ """Construct from an `lsst.daf.butler.Quantum` instance.
375
+
376
+ Parameters
377
+ ----------
378
+ task_node : `.pipeline_graph.TaskNode`
379
+ Task node from the pipeline graph.
380
+ quantum : `lsst.daf.butler.quantum`
381
+ Quantum object.
382
+ quantum_id : `uuid.UUID`
383
+ ID for this quantum.
384
+
385
+ Returns
386
+ -------
387
+ model : `PredictedFullQuantumModel`
388
+ Model for this quantum.
389
+ """
390
+ result: PredictedQuantumDatasetsModel = cls.model_construct(
391
+ quantum_id=quantum_id,
392
+ task_label=task_node.label,
393
+ data_coordinate=list(cast(DataCoordinate, quantum.dataId).full_values),
394
+ )
395
+ for read_edge in task_node.iter_all_inputs():
396
+ refs = sorted(quantum.inputs[read_edge.dataset_type_name], key=lambda ref: ref.dataId)
397
+ result.inputs[read_edge.connection_name] = [
398
+ PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
399
+ ]
400
+ for write_edge in task_node.iter_all_outputs():
401
+ refs = sorted(quantum.outputs[write_edge.dataset_type_name], key=lambda ref: ref.dataId)
402
+ result.outputs[write_edge.connection_name] = [
403
+ PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
404
+ ]
405
+ result.datastore_records = {
406
+ store_name: records.to_simple() for store_name, records in quantum.datastore_records.items()
407
+ }
408
+ return result
409
+
410
+ @classmethod
411
+ def from_old_quantum_graph_init(
412
+ cls, task_init_node: TaskInitNode, old_quantum_graph: QuantumGraph
413
+ ) -> PredictedQuantumDatasetsModel:
414
+ """Construct from the init-input and init-output dataset types of a
415
+ task in an old `QuantumGraph` instance.
416
+
417
+ Parameters
418
+ ----------
419
+ task_init_node : `.pipeline_graph.TaskNode`
420
+ Task init node from the pipeline graph.
421
+ old_quantum_graph : `QuantumGraph`
422
+ Quantum graph.
423
+
424
+ Returns
425
+ -------
426
+ model : `PredictedFullQuantumModel`
427
+ Model for this "init" quantum.
428
+ """
429
+ task_def = old_quantum_graph.findTaskDefByLabel(task_init_node.label)
430
+ assert task_def is not None
431
+ init_input_refs = {
432
+ ref.datasetType.name: ref for ref in (old_quantum_graph.initInputRefs(task_def) or [])
433
+ }
434
+ init_output_refs = {
435
+ ref.datasetType.name: ref for ref in (old_quantum_graph.initOutputRefs(task_def) or [])
436
+ }
437
+ init_input_ids = {ref.id for ref in init_input_refs.values()}
438
+ result: PredictedQuantumDatasetsModel = cls.model_construct(
439
+ quantum_id=generate_uuidv7(), task_label=task_init_node.label
440
+ )
441
+ for read_edge in task_init_node.iter_all_inputs():
442
+ ref = init_input_refs[read_edge.dataset_type_name]
443
+ result.inputs[read_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
444
+ for write_edge in task_init_node.iter_all_outputs():
445
+ ref = init_output_refs[write_edge.dataset_type_name]
446
+ result.outputs[write_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
447
+ datastore_records: dict[str, DatastoreRecordData] = {}
448
+ for quantum in old_quantum_graph.get_task_quanta(task_init_node.label).values():
449
+ for store_name, records in quantum.datastore_records.items():
450
+ subset = records.subset(init_input_ids)
451
+ if subset is not None:
452
+ datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
453
+ break # All quanta have same init-inputs, so we only need one.
454
+ result.datastore_records = {
455
+ store_name: records.to_simple() for store_name, records in datastore_records.items()
456
+ }
457
+ return result
458
+
459
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
460
+ # when we inherit those docstrings in our public classes.
461
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
462
+
463
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
464
+ """See `pydantic.BaseModel.copy`."""
465
+ return super().copy(*args, **kwargs)
466
+
467
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
468
+ """See `pydantic.BaseModel.model_dump`."""
469
+ return super().model_dump(*args, **kwargs)
470
+
471
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
472
+ """See `pydantic.BaseModel.model_dump_json`."""
473
+ return super().model_dump(*args, **kwargs)
474
+
475
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
476
+ """See `pydantic.BaseModel.model_copy`."""
477
+ return super().model_copy(*args, **kwargs)
478
+
479
+ @classmethod
480
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
481
+ """See `pydantic.BaseModel.model_construct`."""
482
+ return super().model_construct(*args, **kwargs)
483
+
484
+ @classmethod
485
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
486
+ """See `pydantic.BaseModel.model_json_schema`."""
487
+ return super().model_json_schema(*args, **kwargs)
488
+
489
+ @classmethod
490
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
491
+ """See `pydantic.BaseModel.model_validate`."""
492
+ return super().model_validate(*args, **kwargs)
493
+
494
+ @classmethod
495
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
496
+ """See `pydantic.BaseModel.model_validate_json`."""
497
+ return super().model_validate_json(*args, **kwargs)
498
+
499
+ @classmethod
500
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
501
+ """See `pydantic.BaseModel.model_validate_strings`."""
502
+ return super().model_validate_strings(*args, **kwargs)
503
+
504
+
505
+ class PredictedInitQuantaModel(pydantic.RootModel):
506
+ """Data model for the init-inputs and init-outputs of a predicted quantum
507
+ graph.
508
+ """
509
+
510
+ root: list[PredictedQuantumDatasetsModel] = pydantic.Field(default_factory=list)
511
+ """List of special "init" quanta: one for each task, and another for global
512
+ init-outputs.
513
+ """
514
+
515
+ def update_from_old_quantum_graph(self, old_quantum_graph: QuantumGraph) -> None:
516
+ """Update this model in-place by extracting from an old `QuantumGraph`
517
+ instance.
518
+
519
+ Parameters
520
+ ----------
521
+ old_quantum_graph : `QuantumGraph`
522
+ Quantum graph.
523
+ """
524
+ global_init_quantum = PredictedQuantumDatasetsModel.model_construct(
525
+ quantum_id=generate_uuidv7(), task_label=""
526
+ )
527
+ for ref in old_quantum_graph.globalInitOutputRefs():
528
+ global_init_quantum.outputs[ref.datasetType.name] = [PredictedDatasetModel.from_dataset_ref(ref)]
529
+ self.root.append(global_init_quantum)
530
+ for task_node in old_quantum_graph.pipeline_graph.tasks.values():
531
+ self.root.append(
532
+ PredictedQuantumDatasetsModel.from_old_quantum_graph_init(task_node.init, old_quantum_graph)
533
+ )
534
+
535
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
536
+ # when we inherit those docstrings in our public classes.
537
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
538
+
539
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
540
+ """See `pydantic.BaseModel.copy`."""
541
+ return super().copy(*args, **kwargs)
542
+
543
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
544
+ """See `pydantic.BaseModel.model_dump`."""
545
+ return super().model_dump(*args, **kwargs)
546
+
547
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
548
+ """See `pydantic.BaseModel.model_dump_json`."""
549
+ return super().model_dump(*args, **kwargs)
550
+
551
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
552
+ """See `pydantic.BaseModel.model_copy`."""
553
+ return super().model_copy(*args, **kwargs)
554
+
555
+ @classmethod
556
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
557
+ """See `pydantic.BaseModel.model_construct`."""
558
+ return super().model_construct(*args, **kwargs)
559
+
560
+ @classmethod
561
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
562
+ """See `pydantic.BaseModel.model_json_schema`."""
563
+ return super().model_json_schema(*args, **kwargs)
564
+
565
+ @classmethod
566
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
567
+ """See `pydantic.BaseModel.model_validate`."""
568
+ return super().model_validate(*args, **kwargs)
569
+
570
+ @classmethod
571
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
572
+ """See `pydantic.BaseModel.model_validate_json`."""
573
+ return super().model_validate_json(*args, **kwargs)
574
+
575
+ @classmethod
576
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
577
+ """See `pydantic.BaseModel.model_validate_strings`."""
578
+ return super().model_validate_strings(*args, **kwargs)
579
+
580
+
581
+ class PredictedQuantumInfo(QuantumInfo):
582
+ """A typed dictionary that annotates the attributes of the NetworkX graph
583
+ node data for a predicted quantum.
584
+
585
+ Since NetworkX types are not generic over their node mapping type, this has
586
+ to be used explicitly, e.g.::
587
+
588
+ node_data: PredictedQuantumInfo = xgraph.nodes[quantum_id]
589
+
590
+ where ``xgraph`` can be either `PredictedQuantumGraph.quantum_only_xgraph`
591
+ or `PredictedQuantumGraph.bipartite_xgraph`.
592
+ """
593
+
594
+ quantum: Quantum
595
+ """Quantum object that can be passed directly to an executor.
596
+
597
+ This attribute is only present if
598
+ `PredictedQuantumGraph.build_execution_quanta` has been run on this node's
599
+ quantum ID already.
600
+ """
601
+
602
+
603
+ class PredictedDatasetInfo(DatasetInfo):
604
+ """A typed dictionary that annotates the attributes of the NetworkX graph
605
+ node data for a dataset.
606
+
607
+ Since NetworkX types are not generic over their node mapping type, this has
608
+ to be used explicitly, e.g.::
609
+
610
+ node_data: PredictedDatasetInfo = xgraph.nodes[dataset_ids]
611
+
612
+ where ``xgraph`` is from the `PredictedQuantumGraph.bipartite_xgraph`
613
+ property.
614
+ """
615
+
616
+
617
+ class PredictedQuantumGraph(BaseQuantumGraph):
618
+ """A directed acyclic graph that predicts a processing run and supports it
619
+ during execution.
620
+
621
+ Parameters
622
+ ----------
623
+ components : `PredictedQuantumGraphComponents`
624
+ A struct of components used to construct the graph.
625
+
626
+ Notes
627
+ -----
628
+ Iteration over a `PredictedQuantumGraph` yields loaded quantum IDs in
629
+ deterministic topological order (but the tiebreaker is unspecified). The
630
+ `len` of a `PredictedQuantumGraph` is the number of loaded non-init quanta,
631
+ i.e. the same as the number of quanta iterated over.
632
+ """
633
+
634
+ def __init__(self, components: PredictedQuantumGraphComponents):
635
+ if not components.header.graph_type == "predicted":
636
+ raise TypeError(f"Header is for a {components.header.graph_type!r} graph, not 'predicted'.")
637
+ super().__init__(components.header, components.pipeline_graph)
638
+ self._quantum_only_xgraph = networkx.DiGraph()
639
+ self._bipartite_xgraph = networkx.DiGraph()
640
+ self._quanta_by_task_label: dict[str, dict[DataCoordinate, uuid.UUID]] = {
641
+ task_label: {} for task_label in self.pipeline_graph.tasks.keys()
642
+ }
643
+ self._datasets_by_type: dict[str, dict[DataCoordinate, uuid.UUID]] = {
644
+ dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
645
+ }
646
+ self._datasets_by_type[self.pipeline_graph.packages_dataset_type.name] = {}
647
+ self._dimension_data = components.dimension_data
648
+ self._add_init_quanta(components.init_quanta)
649
+ self._quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = {}
650
+ self._expanded_data_ids: dict[DataCoordinate, DataCoordinate] = {}
651
+ self._add_thin_graph(components.thin_graph, components.quantum_indices)
652
+ for quantum_datasets in components.quantum_datasets.values():
653
+ self._add_quantum_datasets(quantum_datasets)
654
+ if not components.thin_graph.edges:
655
+ # If we loaded the thin_graph, we've already populated this graph.
656
+ self._quantum_only_xgraph.update(
657
+ networkx.algorithms.bipartite.projected_graph(
658
+ networkx.DiGraph(self._bipartite_xgraph),
659
+ self._quantum_only_xgraph.nodes.keys(),
660
+ )
661
+ )
662
+ if _LOG.isEnabledFor(logging.DEBUG):
663
+ for quantum_id in self:
664
+ _LOG.debug(
665
+ "%s: %s @ %s",
666
+ quantum_id,
667
+ self._quantum_only_xgraph.nodes[quantum_id]["task_label"],
668
+ self._quantum_only_xgraph.nodes[quantum_id]["data_id"].required,
669
+ )
670
+
671
+ def _add_init_quanta(self, component: PredictedInitQuantaModel) -> None:
672
+ self._init_quanta = {q.task_label: q for q in component.root}
673
+ empty_data_id = DataCoordinate.make_empty(self.pipeline_graph.universe)
674
+ for quantum_datasets in self._init_quanta.values():
675
+ for init_datasets in itertools.chain(
676
+ quantum_datasets.inputs.values(), quantum_datasets.outputs.values()
677
+ ):
678
+ for init_dataset in init_datasets:
679
+ self._datasets_by_type[init_dataset.dataset_type_name][empty_data_id] = (
680
+ init_dataset.dataset_id
681
+ )
682
+ _LOG.debug(
683
+ "%s: %s @ init",
684
+ quantum_datasets.quantum_id,
685
+ quantum_datasets.task_label,
686
+ )
687
+
688
+ def _add_thin_graph(
689
+ self, component: PredictedThinGraphModel, indices: Mapping[uuid.UUID, QuantumIndex]
690
+ ) -> None:
691
+ uuid_by_index = {v: k for k, v in indices.items()}
692
+ for index1, index2 in component.edges:
693
+ self._quantum_only_xgraph.add_edge(uuid_by_index[index1], uuid_by_index[index2])
694
+ for task_label, thin_quanta_for_task in component.quanta.items():
695
+ for thin_quantum in thin_quanta_for_task:
696
+ self._add_quantum(
697
+ uuid_by_index[thin_quantum.quantum_index],
698
+ task_label,
699
+ thin_quantum.data_coordinate,
700
+ )
701
+
702
+ def _add_quantum_datasets(self, quantum_datasets: PredictedQuantumDatasetsModel) -> None:
703
+ self._quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
704
+ self._add_quantum(
705
+ quantum_datasets.quantum_id, quantum_datasets.task_label, quantum_datasets.data_coordinate
706
+ )
707
+ task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
708
+ for connection_name, input_datasets in quantum_datasets.inputs.items():
709
+ pipeline_edge = task_node.get_input_edge(connection_name)
710
+ for input_dataset in input_datasets:
711
+ self._add_dataset(input_dataset)
712
+ self._bipartite_xgraph.add_edge(
713
+ input_dataset.dataset_id,
714
+ quantum_datasets.quantum_id,
715
+ key=connection_name,
716
+ is_read=True,
717
+ )
718
+ # There might be multiple input connections for the same
719
+ # dataset type.
720
+ self._bipartite_xgraph.edges[
721
+ input_dataset.dataset_id, quantum_datasets.quantum_id
722
+ ].setdefault("pipeline_edges", []).append(pipeline_edge)
723
+ for connection_name, output_datasets in quantum_datasets.outputs.items():
724
+ pipeline_edges = [task_node.get_output_edge(connection_name)]
725
+ for output_dataset in output_datasets:
726
+ self._add_dataset(output_dataset)
727
+ self._bipartite_xgraph.add_edge(
728
+ quantum_datasets.quantum_id,
729
+ output_dataset.dataset_id,
730
+ key=connection_name,
731
+ is_read=False,
732
+ pipeline_edges=pipeline_edges,
733
+ )
734
+
735
+ def _add_quantum(
736
+ self, quantum_id: uuid.UUID, task_label: str, data_coordinate_values: Sequence[DataIdValue]
737
+ ) -> None:
738
+ task_node = self.pipeline_graph.tasks[task_label]
739
+ self._quantum_only_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
740
+ self._bipartite_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
741
+ data_coordinate_values = tuple(data_coordinate_values)
742
+ dimensions = self.pipeline_graph.tasks[task_label].dimensions
743
+ data_id = DataCoordinate.from_full_values(dimensions, tuple(data_coordinate_values))
744
+ self._quantum_only_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
745
+ self._bipartite_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
746
+ self._quanta_by_task_label[task_label][data_id] = quantum_id
747
+
748
+ def _add_dataset(self, model: PredictedDatasetModel) -> None:
749
+ dataset_type_node = self.pipeline_graph.dataset_types[model.dataset_type_name]
750
+ data_id = DataCoordinate.from_full_values(dataset_type_node.dimensions, tuple(model.data_coordinate))
751
+ self._bipartite_xgraph.add_node(
752
+ model.dataset_id,
753
+ dataset_type_name=dataset_type_node.name,
754
+ pipeline_node=dataset_type_node,
755
+ run=model.run,
756
+ )
757
+ self._bipartite_xgraph.nodes[model.dataset_id].setdefault("data_id", data_id)
758
+ self._datasets_by_type[model.dataset_type_name][data_id] = model.dataset_id
759
+
760
+ @classmethod
761
+ def open(
762
+ cls,
763
+ uri: ResourcePathExpression,
764
+ page_size: int = DEFAULT_PAGE_SIZE,
765
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
766
+ ) -> AbstractContextManager[PredictedQuantumGraphReader]:
767
+ """Open a quantum graph and return a reader to load from it.
768
+
769
+ Parameters
770
+ ----------
771
+ uri : convertible to `lsst.resources.ResourcePath`
772
+ URI to open. Should have a ``.qg`` extension.
773
+ page_size : `int`, optional
774
+ Approximate number of bytes to read at once from address files.
775
+ Note that this does not set a page size for *all* reads, but it
776
+ does affect the smallest, most numerous reads.
777
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
778
+ How to handle importing the task classes referenced in the pipeline
779
+ graph.
780
+
781
+ Returns
782
+ -------
783
+ reader : `contextlib.AbstractContextManager` [ \
784
+ `PredictedQuantumGraphReader` ]
785
+ A context manager that returns the reader when entered.
786
+ """
787
+ return PredictedQuantumGraphReader.open(uri, page_size=page_size, import_mode=import_mode)
788
+
789
+ @classmethod
790
+ def read_execution_quanta(
791
+ cls,
792
+ uri: ResourcePathExpression,
793
+ quantum_ids: Iterable[uuid.UUID] | None = None,
794
+ page_size: int = DEFAULT_PAGE_SIZE,
795
+ ) -> PredictedQuantumGraph:
796
+ """Read one or more executable quanta from a quantum graph file.
797
+
798
+ Parameters
799
+ ----------
800
+ uri : convertible to `lsst.resources.ResourcePath`
801
+ URI to open. Should have a ``.qg`` extension for new quantum graph
802
+ files, or ``.qgraph`` for the old format.
803
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
804
+ Iterable of quantum IDs to load. If not provided, all quanta will
805
+ be loaded. The UUIDs of special init quanta will be ignored.
806
+ page_size : `int`, optional
807
+ Approximate number of bytes to read at once from address files.
808
+ Note that this does not set a page size for *all* reads, but it
809
+ does affect the smallest, most numerous reads.
810
+
811
+ Returns
812
+ -------
813
+ quantum_graph : `PredictedQuantumGraph` ]
814
+ A quantum graph that can build execution quanta for all of the
815
+ given IDs.
816
+ """
817
+ return PredictedQuantumGraphComponents.read_execution_quanta(
818
+ uri,
819
+ quantum_ids,
820
+ page_size=page_size,
821
+ ).assemble()
822
+
823
+ @property
824
+ def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
825
+ """A nested mapping of all quanta, keyed first by task name and then by
826
+ data ID.
827
+
828
+ Notes
829
+ -----
830
+ This is populated by the ``thin_graph`` component (all quanta are
831
+ added) and the `quantum_datasets`` component (only loaded quanta are
832
+ added). All tasks in the pipeline graph are included, even if none of
833
+ their quanta were loaded (i.e. nested mappings may be empty).
834
+
835
+ The returned object may be an internal dictionary; as the type
836
+ annotation indicates, it should not be modified in place.
837
+ """
838
+ return self._quanta_by_task_label
839
+
840
+ @property
841
+ def datasets_by_type(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
842
+ """A nested mapping of all datasets, keyed first by dataset type name
843
+ and then by data ID.
844
+
845
+ Notes
846
+ -----
847
+ This is populated only by the ``quantum_datasets`` and ``init_quanta``
848
+ components, and only datasets referenced by loaded quanta are present.
849
+ All dataset types in the pipeline graph are included, even if none of
850
+ their datasets were loaded (i.e. nested mappings may be empty).
851
+
852
+ The returned object may be an internal dictionary; as the type
853
+ annotation indicates, it should not be modified in place.
854
+ """
855
+ return self._datasets_by_type
856
+
857
+ @property
858
+ def quantum_only_xgraph(self) -> networkx.DiGraph:
859
+ """A directed acyclic graph with quanta as nodes and datasets elided.
860
+
861
+ Notes
862
+ -----
863
+ Node keys are quantum UUIDs, and are populated by the ``thin_graph``
864
+ component (all nodes and edges) and ``quantum_datasets`` component
865
+ (only those that were loaded).
866
+
867
+ Node state dictionaries are described by the
868
+ `PredictedQuantumInfo` type.
869
+
870
+ The returned object is a read-only view of an internal one.
871
+ """
872
+ return self._quantum_only_xgraph.copy(as_view=True)
873
+
874
+ @property
875
+ def bipartite_xgraph(self) -> networkx.MultiDiGraph:
876
+ """A directed acyclic graph with quantum and dataset nodes.
877
+
878
+ This graph never includes init-input and init-output datasets.
879
+
880
+ Notes
881
+ -----
882
+ Node keys are quantum or dataset UUIDs. Nodes for quanta are present
883
+ if the ``thin_graph`` component is loaded (all nodes) or if the
884
+ ``quantum_datasets`` component is loaded (just loaded quanta). Edges
885
+ and dataset nodes are only present for quanta whose
886
+ ``quantum_datasets`` were loaded.
887
+
888
+ Node state dictionaries are described by the
889
+ `PredictedQuantumInfo` and `PredictedDatasetInfo` types.
890
+
891
+ The returned object is a read-only view of an internal one.
892
+ """
893
+ return self._bipartite_xgraph.copy(as_view=True)
894
+
895
+ @property
896
+ def dimension_data(self) -> DimensionDataAttacher | None:
897
+ """All dimension records needed to expand the data IDS in the graph.
898
+
899
+ This may be `None` if the dimension data was not loaded. If all
900
+ execution quanta have been built, all records are guaranteed to have
901
+ been deserialized and the ``records`` attribute is complete. In other
902
+ cases some records may still only be present in the ``deserializers``
903
+ attribute.
904
+ """
905
+ return self._dimension_data
906
+
907
+ def __iter__(self) -> Iterator[uuid.UUID]:
908
+ for quanta_for_task in self.quanta_by_task.values():
909
+ for data_id in sorted(quanta_for_task.keys()):
910
+ yield quanta_for_task[data_id]
911
+
912
+ def __len__(self) -> int:
913
+ return len(self._quantum_only_xgraph)
914
+
915
+ def get_init_inputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
916
+ """Return the init-input datasets for the given task.
917
+
918
+ Parameters
919
+ ----------
920
+ task_label : `str`
921
+ Label of the task.
922
+
923
+ Returns
924
+ -------
925
+ init_inputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
926
+ Dataset references for init-input datasets, keyed by connection
927
+ name. Dataset types storage classes match the task connection
928
+ declarations, not necessarily the data repository, and may be
929
+ components.
930
+ """
931
+ if self._init_quanta is None:
932
+ raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
933
+ task_init_node = self.pipeline_graph.tasks[task_label].init
934
+ return {
935
+ connection_name: task_init_node.inputs[connection_name].adapt_dataset_ref(
936
+ self._make_init_ref(datasets[0])
937
+ )
938
+ for connection_name, datasets in self._init_quanta[task_label].inputs.items()
939
+ }
940
+
941
+ def get_init_outputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
942
+ """Return the init-output datasets for the given task.
943
+
944
+ Parameters
945
+ ----------
946
+ task_label : `str`
947
+ Label of the task. ``""`` may be used to get global init-outputs.
948
+
949
+ Returns
950
+ -------
951
+ init_outputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
952
+ Dataset references for init-outputs datasets, keyed by connection
953
+ name. Dataset types storage classes match the task connection
954
+ declarations, not necessarily the data repository.
955
+ """
956
+ if self._init_quanta is None:
957
+ raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
958
+ if not task_label:
959
+ (datasets,) = self._init_quanta[""].outputs.values()
960
+ return {
961
+ acc.PACKAGES_INIT_OUTPUT_NAME: DatasetRef(
962
+ self.pipeline_graph.packages_dataset_type,
963
+ DataCoordinate.make_empty(self.pipeline_graph.universe),
964
+ run=datasets[0].run,
965
+ id=datasets[0].dataset_id,
966
+ conform=False,
967
+ )
968
+ }
969
+ task_init_node = self.pipeline_graph.tasks[task_label].init
970
+ result: dict[ConnectionName, DatasetRef] = {}
971
+ for connection_name, datasets in self._init_quanta[task_label].outputs.items():
972
+ if connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
973
+ edge = task_init_node.config_output
974
+ else:
975
+ edge = task_init_node.outputs[connection_name]
976
+ result[connection_name] = edge.adapt_dataset_ref(self._make_init_ref(datasets[0]))
977
+ return result
978
+
979
+ def _make_init_ref(self, dataset: PredictedDatasetModel) -> DatasetRef:
980
+ dataset_type = self.pipeline_graph.dataset_types[dataset.dataset_type_name].dataset_type
981
+ return DatasetRef(
982
+ dataset_type,
983
+ DataCoordinate.make_empty(self.pipeline_graph.universe),
984
+ run=dataset.run,
985
+ id=dataset.dataset_id,
986
+ conform=False,
987
+ )
988
+
989
+ def build_execution_quanta(
990
+ self,
991
+ quantum_ids: Iterable[uuid.UUID] | None = None,
992
+ task_label: str | None = None,
993
+ ) -> dict[uuid.UUID, Quantum]:
994
+ """Build `lsst.daf.butler.Quantum` objects suitable for executing
995
+ tasks.
996
+
997
+ In addition to returning the quantum objects directly, this also causes
998
+ the `quantum_only_xgraph` and `bipartite_xgraph` graphs to include a
999
+ ``quantum`` attribute for the affected quanta.
1000
+
1001
+ Parameters
1002
+ ----------
1003
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1004
+ IDs of all quanta to return. If not provided, all quanta for the
1005
+ given task label (if given) or graph are returned.
1006
+ task_label : `str`, optional
1007
+ Task label whose quanta should be generated. Ignored if
1008
+ ``quantum_ids`` is not `None`.
1009
+
1010
+ Returns
1011
+ -------
1012
+ quanta : `dict` [ `uuid.UUID`, `lsst.daf.butler.Quantum` ]
1013
+ Mapping of quanta, keyed by UUID. All dataset types are adapted to
1014
+ the task's storage class declarations and inputs may be components.
1015
+ All data IDs have dimension records attached.
1016
+ """
1017
+ if not self._init_quanta:
1018
+ raise IncompleteQuantumGraphError(
1019
+ "Cannot build execution quanta without loading the ``init_quanta`` component."
1020
+ )
1021
+ if quantum_ids is None:
1022
+ if task_label is not None:
1023
+ quantum_ids = self._quanta_by_task_label[task_label].values()
1024
+ else:
1025
+ quantum_ids = self._quantum_only_xgraph.nodes.keys()
1026
+ else:
1027
+ # Guard against single-pass iterators.
1028
+ quantum_ids = list(quantum_ids)
1029
+ del task_label # make sure we don't accidentally use this.
1030
+ result: dict[uuid.UUID, Quantum] = {}
1031
+ self._expand_execution_quantum_data_ids(quantum_ids)
1032
+ task_init_datastore_records: dict[TaskLabel, dict[DatastoreName, DatastoreRecordData]] = {}
1033
+ for quantum_id in quantum_ids:
1034
+ quantum_node_dict: PredictedQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
1035
+ if "quantum" in quantum_node_dict:
1036
+ result[quantum_id] = quantum_node_dict["quantum"]
1037
+ continue
1038
+ # We've declare the info dict keys to all be required because that
1039
+ # saves a lot of casting, but the reality is that they can either
1040
+ # be fully populated or totally unpopulated. But that makes mypy
1041
+ # think the check above always succeeds.
1042
+ try: # type:ignore [unreachable]
1043
+ quantum_datasets = self._quantum_datasets[quantum_id]
1044
+ except KeyError:
1045
+ raise IncompleteQuantumGraphError(
1046
+ f"Full quantum information for {quantum_id} was not loaded."
1047
+ ) from None
1048
+ task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
1049
+ quantum_data_id = self._expanded_data_ids[self._bipartite_xgraph.nodes[quantum_id]["data_id"]]
1050
+ inputs = self._build_execution_quantum_refs(task_node, quantum_datasets.inputs)
1051
+ outputs = self._build_execution_quantum_refs(task_node, quantum_datasets.outputs)
1052
+ if task_node.label not in task_init_datastore_records:
1053
+ task_init_datastore_records[task_node.label] = self._init_quanta[
1054
+ task_node.label
1055
+ ].deserialize_datastore_records()
1056
+ quantum = Quantum(
1057
+ taskName=task_node.task_class_name,
1058
+ taskClass=task_node.task_class,
1059
+ dataId=quantum_data_id,
1060
+ initInputs={
1061
+ ref.datasetType: ref for ref in self.get_init_inputs(quantum_datasets.task_label).values()
1062
+ },
1063
+ inputs=inputs,
1064
+ outputs=outputs,
1065
+ datastore_records=DatastoreRecordData.merge_mappings(
1066
+ quantum_datasets.deserialize_datastore_records(),
1067
+ task_init_datastore_records[task_node.label],
1068
+ ),
1069
+ )
1070
+ self._quantum_only_xgraph.nodes[quantum_id]["quantum"] = quantum
1071
+ self._bipartite_xgraph.nodes[quantum_id]["quantum"] = quantum
1072
+ result[quantum_id] = quantum
1073
+ return result
1074
+
1075
+ def _expand_execution_quantum_data_ids(self, quantum_ids: Iterable[uuid.UUID]) -> None:
1076
+ if self._dimension_data is None:
1077
+ raise IncompleteQuantumGraphError(
1078
+ "Cannot build execution quanta without loading the ``dimension_data`` component."
1079
+ )
1080
+ data_ids_to_expand: dict[DimensionGroup, set[DataCoordinate]] = defaultdict(set)
1081
+ for quantum_id in quantum_ids:
1082
+ data_id: DataCoordinate = self._bipartite_xgraph.nodes[quantum_id]["data_id"]
1083
+ if data_id.hasRecords():
1084
+ self._expanded_data_ids[data_id] = data_id
1085
+ else:
1086
+ data_ids_to_expand[data_id.dimensions].add(data_id)
1087
+ for dataset_id in itertools.chain(
1088
+ self._bipartite_xgraph.predecessors(quantum_id),
1089
+ self._bipartite_xgraph.successors(quantum_id),
1090
+ ):
1091
+ data_id = self._bipartite_xgraph.nodes[dataset_id]["data_id"]
1092
+ if data_id.hasRecords():
1093
+ self._expanded_data_ids[data_id] = data_id
1094
+ else:
1095
+ data_ids_to_expand[data_id.dimensions].add(data_id)
1096
+ for dimensions, data_ids_for_dimensions in data_ids_to_expand.items():
1097
+ self._expanded_data_ids.update(
1098
+ (d, d) for d in self._dimension_data.attach(dimensions, data_ids_for_dimensions)
1099
+ )
1100
+
1101
+ def _build_execution_quantum_refs(
1102
+ self, task_node: TaskNode, model_mapping: dict[ConnectionName, list[PredictedDatasetModel]]
1103
+ ) -> dict[DatasetType, list[DatasetRef]]:
1104
+ results: dict[DatasetType, list[DatasetRef]] = {}
1105
+ for connection_name, datasets in model_mapping.items():
1106
+ edge = task_node.get_edge(connection_name)
1107
+ dataset_type = edge.adapt_dataset_type(
1108
+ self.pipeline_graph.dataset_types[edge.parent_dataset_type_name].dataset_type
1109
+ )
1110
+ results[dataset_type] = [self._make_general_ref(dataset_type, d.dataset_id) for d in datasets]
1111
+ return results
1112
+
1113
+ def _make_general_ref(self, dataset_type: DatasetType, dataset_id: uuid.UUID) -> DatasetRef:
1114
+ node_state = self._bipartite_xgraph.nodes[dataset_id]
1115
+ data_id = self._expanded_data_ids[node_state["data_id"]]
1116
+ return DatasetRef(dataset_type, data_id, run=node_state["run"], id=dataset_id)
1117
+
1118
+ def make_init_qbb(
1119
+ self,
1120
+ butler_config: Config | ResourcePathExpression,
1121
+ *,
1122
+ config_search_paths: Iterable[str] | None = None,
1123
+ ) -> QuantumBackedButler:
1124
+ """Construct an quantum-backed butler suitable for reading and writing
1125
+ init input and init output datasets, respectively.
1126
+
1127
+ This only requires the ``init_quanta`` component to have been loaded.
1128
+
1129
+ Parameters
1130
+ ----------
1131
+ butler_config : `~lsst.daf.butler.Config` or \
1132
+ `~lsst.resources.ResourcePathExpression`
1133
+ A butler repository root, configuration filename, or configuration
1134
+ instance.
1135
+ config_search_paths : `~collections.abc.Iterable` [ `str` ], optional
1136
+ Additional search paths for butler configuration.
1137
+
1138
+ Returns
1139
+ -------
1140
+ qbb : `~lsst.daf.butler.QuantumBackedButler`
1141
+ A limited butler that can ``get`` init-input datasets and ``put``
1142
+ init-output datasets.
1143
+ """
1144
+ # Collect all init input/output dataset IDs.
1145
+ predicted_inputs: set[uuid.UUID] = set()
1146
+ predicted_outputs: set[uuid.UUID] = set()
1147
+ datastore_record_maps: list[dict[DatastoreName, DatastoreRecordData]] = []
1148
+ for init_quantum_datasets in self._init_quanta.values():
1149
+ predicted_inputs.update(
1150
+ d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.inputs.values())
1151
+ )
1152
+ predicted_outputs.update(
1153
+ d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.outputs.values())
1154
+ )
1155
+ datastore_record_maps.append(
1156
+ {
1157
+ datastore_name: DatastoreRecordData.from_simple(serialized_records)
1158
+ for datastore_name, serialized_records in init_quantum_datasets.datastore_records.items()
1159
+ }
1160
+ )
1161
+ # Remove intermediates from inputs.
1162
+ predicted_inputs -= predicted_outputs
1163
+ dataset_types = {d.name: d.dataset_type for d in self.pipeline_graph.dataset_types.values()}
1164
+ # Make butler from everything.
1165
+ return QuantumBackedButler.from_predicted(
1166
+ config=butler_config,
1167
+ predicted_inputs=predicted_inputs,
1168
+ predicted_outputs=predicted_outputs,
1169
+ dimensions=self.pipeline_graph.universe,
1170
+ datastore_records=DatastoreRecordData.merge_mappings(*datastore_record_maps),
1171
+ search_paths=list(config_search_paths) if config_search_paths is not None else None,
1172
+ dataset_types=dataset_types,
1173
+ )
1174
+
1175
+ def write_init_outputs(self, butler: LimitedButler, skip_existing: bool = True) -> None:
1176
+ """Write the init-output datasets for all tasks in the quantum graph.
1177
+
1178
+ This only requires the ``init_quanta`` component to have been loaded.
1179
+
1180
+ Parameters
1181
+ ----------
1182
+ butler : `lsst.daf.butler.LimitedButler`
1183
+ A limited butler data repository client.
1184
+ skip_existing : `bool`, optional
1185
+ If `True` (default) ignore init-outputs that already exist. If
1186
+ `False`, raise.
1187
+
1188
+ Raises
1189
+ ------
1190
+ lsst.daf.butler.registry.ConflictingDefinitionError
1191
+ Raised if an init-output dataset already exists and
1192
+ ``skip_existing=False``.
1193
+ """
1194
+ # Extract init-input and init-output refs from the QG.
1195
+ input_refs: dict[str, DatasetRef] = {}
1196
+ output_refs: dict[str, DatasetRef] = {}
1197
+ for task_node in self.pipeline_graph.tasks.values():
1198
+ if task_node.label not in self._init_quanta:
1199
+ continue
1200
+ input_refs.update(
1201
+ {ref.datasetType.name: ref for ref in self.get_init_inputs(task_node.label).values()}
1202
+ )
1203
+ output_refs.update(
1204
+ {
1205
+ ref.datasetType.name: ref
1206
+ for ref in self.get_init_outputs(task_node.label).values()
1207
+ if ref.datasetType.name != task_node.init.config_output.dataset_type_name
1208
+ }
1209
+ )
1210
+ for ref, is_stored in butler.stored_many(output_refs.values()).items():
1211
+ if is_stored:
1212
+ if not skip_existing:
1213
+ raise ConflictingDefinitionError(f"Init-output dataset {ref} already exists.")
1214
+ # We'll `put` whatever's left in output_refs at the end.
1215
+ del output_refs[ref.datasetType.name]
1216
+ # Instantiate tasks, reading overall init-inputs and gathering
1217
+ # init-output in-memory objects.
1218
+ init_outputs: list[tuple[Any, DatasetType]] = []
1219
+ self.pipeline_graph.instantiate_tasks(
1220
+ get_init_input=lambda dataset_type: butler.get(
1221
+ input_refs[dataset_type.name].overrideStorageClass(dataset_type.storageClass)
1222
+ ),
1223
+ init_outputs=init_outputs,
1224
+ # A task can be in the pipeline graph without having an init
1225
+ # quantum if it doesn't have any regular quanta either (e.g. they
1226
+ # were all skipped), and the _init_quanta has a "" entry for global
1227
+ # init-outputs that we don't want to pass here.
1228
+ labels=self.pipeline_graph.tasks.keys() & self._init_quanta.keys(),
1229
+ )
1230
+ # Write init-outputs that weren't already present.
1231
+ for obj, dataset_type in init_outputs:
1232
+ if new_ref := output_refs.get(dataset_type.name):
1233
+ assert new_ref.datasetType.storageClass_name == dataset_type.storageClass_name, (
1234
+ "QG init refs should use task connection storage classes."
1235
+ )
1236
+ butler.put(obj, new_ref)
1237
+
1238
+ def write_configs(self, butler: LimitedButler, compare_existing: bool = True) -> None:
1239
+ """Write the config datasets for all tasks in the quantum graph.
1240
+
1241
+ Parameters
1242
+ ----------
1243
+ butler : `lsst.daf.butler.LimitedButler`
1244
+ A limited butler data repository client.
1245
+ compare_existing : `bool`, optional
1246
+ If `True` check configs that already exist for consistency. If
1247
+ `False`, always raise if configs already exist.
1248
+
1249
+ Raises
1250
+ ------
1251
+ lsst.daf.butler.registry.ConflictingDefinitionError
1252
+ Raised if an config dataset already exists and
1253
+ ``compare_existing=False``, or if the existing config is not
1254
+ consistent with the config in the quantum graph.
1255
+ """
1256
+ to_put: list[tuple[PipelineTaskConfig, DatasetRef]] = []
1257
+ for task_node in self.pipeline_graph.tasks.values():
1258
+ if task_node.label not in self._init_quanta:
1259
+ continue
1260
+ dataset_type_name = task_node.init.config_output.dataset_type_name
1261
+ ref = self.get_init_outputs(task_node.label)[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME]
1262
+ try:
1263
+ old_config = butler.get(ref)
1264
+ except (LookupError, FileNotFoundError):
1265
+ old_config = None
1266
+ if old_config is not None:
1267
+ if not compare_existing:
1268
+ raise ConflictingDefinitionError(f"Config dataset {ref} already exists.")
1269
+ if not task_node.config.compare(old_config, shortcut=False, output=log_config_mismatch):
1270
+ raise ConflictingDefinitionError(
1271
+ f"Config does not match existing task config {dataset_type_name!r} in "
1272
+ "butler; tasks configurations must be consistent within the same run collection."
1273
+ )
1274
+ else:
1275
+ to_put.append((task_node.config, ref))
1276
+ # We do writes at the end to minimize the mess we leave behind when we
1277
+ # raise an exception.
1278
+ for config, ref in to_put:
1279
+ butler.put(config, ref)
1280
+
1281
+ def write_packages(self, butler: LimitedButler, compare_existing: bool = True) -> None:
1282
+ """Write the 'packages' dataset for the currently-active software
1283
+ versions.
1284
+
1285
+ Parameters
1286
+ ----------
1287
+ butler : `lsst.daf.butler.LimitedButler`
1288
+ A limited butler data repository client.
1289
+ compare_existing : `bool`, optional
1290
+ If `True` check packages that already exist for consistency. If
1291
+ `False`, always raise if the packages dataset already exists.
1292
+
1293
+ Raises
1294
+ ------
1295
+ lsst.daf.butler.registry.ConflictingDefinitionError
1296
+ Raised if the packages dataset already exists and is not consistent
1297
+ with the current packages.
1298
+ """
1299
+ new_packages = Packages.fromSystem()
1300
+ (ref,) = self.get_init_outputs("").values()
1301
+ try:
1302
+ packages = butler.get(ref)
1303
+ except (LookupError, FileNotFoundError):
1304
+ packages = None
1305
+ if packages is not None:
1306
+ if not compare_existing:
1307
+ raise ConflictingDefinitionError(f"Packages dataset {ref} already exists.")
1308
+ if compare_packages(packages, new_packages):
1309
+ # have to remove existing dataset first; butler has no
1310
+ # replace option.
1311
+ butler.pruneDatasets([ref], unstore=True, purge=True)
1312
+ butler.put(packages, ref)
1313
+ else:
1314
+ butler.put(new_packages, ref)
1315
+
1316
+ def init_output_run(self, butler: LimitedButler, existing: bool = True) -> None:
1317
+ """Initialize a new output RUN collection by writing init-output
1318
+ datasets (including configs and packages).
1319
+
1320
+ Parameters
1321
+ ----------
1322
+ butler : `lsst.daf.butler.LimitedButler`
1323
+ A limited butler data repository client.
1324
+ existing : `bool`, optional
1325
+ If `True` check or ignore outputs that already exist. If
1326
+ `False`, always raise if an output dataset already exists.
1327
+
1328
+ Raises
1329
+ ------
1330
+ lsst.daf.butler.registry.ConflictingDefinitionError
1331
+ Raised if there are existing init output datasets, and either
1332
+ ``existing=False`` or their contents are not compatible with this
1333
+ graph.
1334
+ """
1335
+ self.write_configs(butler, compare_existing=existing)
1336
+ self.write_packages(butler, compare_existing=existing)
1337
+ self.write_init_outputs(butler, skip_existing=existing)
1338
+
1339
+ @classmethod
1340
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraph:
1341
+ """Construct from an old `QuantumGraph` instance.
1342
+
1343
+ Parameters
1344
+ ----------
1345
+ old_quantum_graph : `QuantumGraph`
1346
+ Quantum graph to transform.
1347
+
1348
+ Returns
1349
+ -------
1350
+ predicted_quantum_graph : `PredictedQuantumGraph`
1351
+ A new predicted quantum graph.
1352
+ """
1353
+ return PredictedQuantumGraphComponents.from_old_quantum_graph(old_quantum_graph).assemble()
1354
+
1355
+ def to_old_quantum_graph(self) -> QuantumGraph:
1356
+ """Transform into an old `QuantumGraph` instance.
1357
+
1358
+ Returns
1359
+ -------
1360
+ old_quantum_graph : `QuantumGraph`
1361
+ Old quantum graph.
1362
+
1363
+ Notes
1364
+ -----
1365
+ This can only be called on graphs that have loaded all quantum
1366
+ datasets, init datasets, and dimension records.
1367
+ """
1368
+ from ..graph import QuantumGraph
1369
+
1370
+ quanta: dict[TaskDef, set[Quantum]] = {}
1371
+ quantum_to_quantum_id: dict[Quantum, uuid.UUID] = {}
1372
+ init_inputs: dict[TaskDef, list[DatasetRef]] = {}
1373
+ init_outputs: dict[TaskDef, list[DatasetRef]] = {}
1374
+ for task_def in self.pipeline_graph._iter_task_defs():
1375
+ if not self._quanta_by_task_label.get(task_def.label):
1376
+ continue
1377
+ quanta_for_task: set[Quantum] = set()
1378
+ for quantum_id, quantum in self.build_execution_quanta(task_label=task_def.label).items():
1379
+ quanta_for_task.add(quantum)
1380
+ quantum_to_quantum_id[quantum] = quantum_id
1381
+ quanta[task_def] = quanta_for_task
1382
+ init_inputs[task_def] = list(self.get_init_inputs(task_def.label).values())
1383
+ init_outputs[task_def] = list(self.get_init_outputs(task_def.label).values())
1384
+ global_init_outputs = list(self.get_init_outputs("").values())
1385
+ registry_dataset_types = [d.dataset_type for d in self.pipeline_graph.dataset_types.values()]
1386
+ result = object.__new__(QuantumGraph)
1387
+ result._buildGraphs(
1388
+ quanta,
1389
+ _quantumToNodeId=quantum_to_quantum_id,
1390
+ metadata=self.header.to_old_metadata(),
1391
+ universe=self.pipeline_graph.universe,
1392
+ initInputs=init_inputs,
1393
+ initOutputs=init_outputs,
1394
+ globalInitOutputs=global_init_outputs,
1395
+ registryDatasetTypes=registry_dataset_types,
1396
+ )
1397
+ return result
1398
+
1399
+ def _make_summary(self) -> QgraphSummary:
1400
+ from ..graph import QgraphSummary, QgraphTaskSummary
1401
+
1402
+ summary = QgraphSummary(
1403
+ cmdLine=self.header.command or None,
1404
+ creationUTC=str(self.header.timestamp) if self.header.timestamp is not None else None,
1405
+ inputCollection=self.header.inputs or None,
1406
+ outputCollection=self.header.output,
1407
+ outputRun=self.header.output_run,
1408
+ )
1409
+ for task_label, quanta_for_task in self.quanta_by_task.items():
1410
+ task_summary = QgraphTaskSummary(taskLabel=task_label, numQuanta=len(quanta_for_task))
1411
+ task_node = self.pipeline_graph.tasks[task_label]
1412
+ for quantum_id in quanta_for_task.values():
1413
+ quantum_datasets = self._quantum_datasets[quantum_id]
1414
+ for connection_name, input_datasets in quantum_datasets.inputs.items():
1415
+ task_summary.numInputs[
1416
+ task_node.get_input_edge(connection_name).parent_dataset_type_name
1417
+ ] += len(input_datasets)
1418
+ for connection_name, output_datasets in quantum_datasets.outputs.items():
1419
+ task_summary.numOutputs[
1420
+ task_node.get_output_edge(connection_name).parent_dataset_type_name
1421
+ ] += len(output_datasets)
1422
+ summary.qgraphTaskSummaries[task_label] = task_summary
1423
+ return summary
1424
+
1425
+
1426
+ @dataclasses.dataclass(kw_only=True)
1427
+ class PredictedQuantumGraphComponents:
1428
+ """A helper class for building and writing predicted quantum graphs.
1429
+
1430
+ Notes
1431
+ -----
1432
+ This class is a simple struct of model classes to allow different tools
1433
+ that build predicted quantum graphs to assemble them in whatever order they
1434
+ prefer. It does not enforce any internal invariants (e.g. the quantum and
1435
+ dataset counts in the header, different representations of quanta, internal
1436
+ ID sorting, etc.), but it does provide methods that can satisfy them.
1437
+ """
1438
+
1439
+ def __post_init__(self) -> None:
1440
+ self.header.graph_type = "predicted"
1441
+
1442
+ header: HeaderModel = dataclasses.field(default_factory=HeaderModel)
1443
+ """Basic metadata about the graph."""
1444
+
1445
+ pipeline_graph: PipelineGraph
1446
+ """Description of the pipeline this graph runs, including all task label
1447
+ and dataset type definitions.
1448
+
1449
+ This may include tasks that do not have any quanta (e.g. due to skipping
1450
+ already-executed tasks).
1451
+
1452
+ This also includes the dimension universe used to construct the graph.
1453
+ """
1454
+
1455
+ dimension_data: DimensionDataAttacher | None = None
1456
+ """Object that can attach dimension records to data IDs.
1457
+ """
1458
+
1459
+ init_quanta: PredictedInitQuantaModel = dataclasses.field(default_factory=PredictedInitQuantaModel)
1460
+ """A list of special quanta that describe the init-inputs and init-outputs
1461
+ of the graph.
1462
+
1463
+ Tasks that are included in the pipeline graph but do not have any quanta
1464
+ may or may not have an init quantum, but tasks that do have regular quanta
1465
+ always have an init quantum as well.
1466
+
1467
+ When used to construct a `PredictedQuantumGraph`, this must have either
1468
+ zero entries or all tasks in the pipeline.
1469
+ """
1470
+
1471
+ thin_graph: PredictedThinGraphModel = dataclasses.field(default_factory=PredictedThinGraphModel)
1472
+ """A lightweight quantum-quantum DAG with task labels and data IDs only.
1473
+
1474
+ This uses internal integer IDs ("indexes") for node IDs.
1475
+
1476
+ This does not include the special "init" quanta.
1477
+ """
1478
+
1479
+ quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = dataclasses.field(default_factory=dict)
1480
+ """The full descriptions of all quanta, including input and output
1481
+ dataset, keyed by UUID.
1482
+
1483
+ When used to construct a `PredictedQuantumGraph`, this need not have all
1484
+ entries.
1485
+
1486
+ This does not include special "init" quanta.
1487
+ """
1488
+
1489
+ quantum_indices: dict[uuid.UUID, QuantumIndex] = dataclasses.field(default_factory=dict)
1490
+ """A mapping from external universal quantum ID to internal integer ID.
1491
+
1492
+ While this `dict` does not need to be sorted, the internal integer IDs do
1493
+ need to correspond exactly to ``enumerate(sorted(uuids))``.
1494
+
1495
+ When used to construct a `PredictedQuantumGraph`, this must be fully
1496
+ populated if `thin_graph` is. It can be empty otherwise.
1497
+
1498
+ This does include special "init" quanta.
1499
+ """
1500
+
1501
+ def set_quantum_indices(self) -> None:
1502
+ """Populate the `quantum_indices` component by sorting the UUIDs in the
1503
+ `init_quanta` and `quantum_datasets` components (which must both be
1504
+ complete).
1505
+ """
1506
+ all_quantum_ids = [q.quantum_id for q in self.init_quanta.root]
1507
+ all_quantum_ids.extend(self.quantum_datasets.keys())
1508
+ all_quantum_ids.sort(key=operator.attrgetter("int"))
1509
+ self.quantum_indices = {quantum_id: index for index, quantum_id in enumerate(all_quantum_ids)}
1510
+
1511
+ def set_thin_graph(self) -> None:
1512
+ """Populate the `thin_graph` component from the `pipeline_graph`,
1513
+ `quantum_datasets` and `quantum_indices` components (which must all be
1514
+ complete).
1515
+ """
1516
+ bipartite_xgraph = networkx.DiGraph()
1517
+ self.thin_graph.quanta = {task_label: [] for task_label in self.pipeline_graph.tasks}
1518
+ graph_quantum_indices = []
1519
+ for quantum_datasets in self.quantum_datasets.values():
1520
+ quantum_index = self.quantum_indices[quantum_datasets.quantum_id]
1521
+ self.thin_graph.quanta[quantum_datasets.task_label].append(
1522
+ PredictedThinQuantumModel.model_construct(
1523
+ quantum_index=quantum_index,
1524
+ data_coordinate=quantum_datasets.data_coordinate,
1525
+ )
1526
+ )
1527
+ for dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
1528
+ bipartite_xgraph.add_edge(dataset.dataset_id, quantum_index)
1529
+ for dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
1530
+ bipartite_xgraph.add_edge(quantum_index, dataset.dataset_id)
1531
+ graph_quantum_indices.append(quantum_index)
1532
+ quantum_only_xgraph: networkx.DiGraph = networkx.bipartite.projected_graph(
1533
+ bipartite_xgraph, graph_quantum_indices
1534
+ )
1535
+ self.thin_graph.edges = list(quantum_only_xgraph.edges)
1536
+
1537
+ def set_header_counts(self) -> None:
1538
+ """Populate the quantum and dataset counts in the header from the
1539
+ `quantum_indices`, `thin_graph`, `init_quanta`, and `quantum_datasets`
1540
+ components.
1541
+ """
1542
+ self.header.n_quanta = len(self.quantum_indices) - len(self.init_quanta.root)
1543
+ self.header.n_task_quanta = {
1544
+ task_label: len(thin_quanta) for task_label, thin_quanta in self.thin_graph.quanta.items()
1545
+ }
1546
+ all_dataset_ids: set[uuid.UUID] = set()
1547
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1548
+ all_dataset_ids.update(quantum_datasets.iter_dataset_ids())
1549
+ self.header.n_datasets = len(all_dataset_ids)
1550
+
1551
+ def update_output_run(self, output_run: str) -> None:
1552
+ """Update the output `~lsst.daf.butler.CollectionType.RUN` collection
1553
+ name in all datasets and regenerate all output dataset and quantum
1554
+ UUIDs.
1555
+
1556
+ Parameters
1557
+ ----------
1558
+ output_run : `str`
1559
+ New output `~lsst.daf.butler.CollectionType.RUN` collection name.
1560
+ """
1561
+ uuid_map: dict[uuid.UUID, uuid.UUID] = {}
1562
+ # Do all outputs and then all inputs in separate passes so we don't
1563
+ # need to rely on topological ordering of anything.
1564
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1565
+ new_quantum_id = generate_uuidv7()
1566
+ quantum_datasets.quantum_id = new_quantum_id
1567
+ for output_dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
1568
+ assert output_dataset.run == self.header.output_run, (
1569
+ f"Incorrect run {output_dataset.run} for output dataset {output_dataset.dataset_id}."
1570
+ )
1571
+ new_dataset_id = generate_uuidv7()
1572
+ uuid_map[output_dataset.dataset_id] = new_dataset_id
1573
+ output_dataset.dataset_id = new_dataset_id
1574
+ output_dataset.run = output_run
1575
+ for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
1576
+ for input_dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
1577
+ if input_dataset.run == self.header.output_run:
1578
+ input_dataset.run = output_run
1579
+ input_dataset.dataset_id = uuid_map.get(
1580
+ input_dataset.dataset_id,
1581
+ # This dataset isn't necessary an output of the graph
1582
+ # just because it's in the output run; the graph could
1583
+ # have been built with extend_run=True.
1584
+ input_dataset.dataset_id,
1585
+ )
1586
+ # Update the keys of the quantum_datasets dict.
1587
+ self.quantum_datasets = {qd.quantum_id: qd for qd in self.quantum_datasets.values()}
1588
+ # Since the UUIDs have changed, the indices need to change, too.
1589
+ self.set_quantum_indices()
1590
+ self.set_thin_graph()
1591
+ # Update the header last, since we use it above to get the old run.
1592
+ self.header.output_run = output_run
1593
+
1594
+ def assemble(self) -> PredictedQuantumGraph:
1595
+ """Construct a `PredictedQuantumGraph` from these components."""
1596
+ return PredictedQuantumGraph(self)
1597
+
1598
+ @classmethod
1599
+ def read_execution_quanta(
1600
+ cls,
1601
+ uri: ResourcePathExpression,
1602
+ quantum_ids: Iterable[uuid.UUID] | None = None,
1603
+ page_size: int = DEFAULT_PAGE_SIZE,
1604
+ ) -> PredictedQuantumGraphComponents:
1605
+ """Read one or more executable quanta from a quantum graph file.
1606
+
1607
+ Parameters
1608
+ ----------
1609
+ uri : convertible to `lsst.resources.ResourcePath`
1610
+ URI to open. Should have a ``.qg`` extension for new quantum graph
1611
+ files, or ``.qgraph`` for the old format.
1612
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1613
+ Iterable of quantum IDs to load. If not provided, all quanta will
1614
+ be loaded. The UUIDs of special init quanta will be ignored.
1615
+ page_size : `int`, optional
1616
+ Approximate number of bytes to read at once from address files.
1617
+ Note that this does not set a page size for *all* reads, but it
1618
+ does affect the smallest, most numerous reads.
1619
+
1620
+ Returns
1621
+ -------
1622
+ components : `PredictedQuantumGraphComponents` ]
1623
+ Components for quantum graph that can build execution quanta for
1624
+ all of the given IDs.
1625
+ """
1626
+ uri = ResourcePath(uri)
1627
+ if uri.getExtension() == ".qgraph":
1628
+ _LOG.warning(
1629
+ f"Reading and converting old quantum graph {uri}. "
1630
+ "Use the '.qg' extension to write in the new format."
1631
+ )
1632
+ from ..graph import QuantumGraph
1633
+
1634
+ old_qg = QuantumGraph.loadUri(uri, nodes=quantum_ids)
1635
+ return PredictedQuantumGraphComponents.from_old_quantum_graph(old_qg)
1636
+
1637
+ with PredictedQuantumGraph.open(uri, page_size=page_size) as reader:
1638
+ reader.read_execution_quanta(quantum_ids)
1639
+ return reader.components
1640
+
1641
+ @classmethod
1642
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraphComponents:
1643
+ """Construct from an old `QuantumGraph` instance.
1644
+
1645
+ Parameters
1646
+ ----------
1647
+ old_quantum_graph : `QuantumGraph`
1648
+ Quantum graph to transform.
1649
+
1650
+ Returns
1651
+ -------
1652
+ components : `PredictedQuantumGraphComponents`
1653
+ Components for a new predicted quantum graph.
1654
+ """
1655
+ header = HeaderModel.from_old_quantum_graph(old_quantum_graph)
1656
+ result = cls(header=header, pipeline_graph=old_quantum_graph.pipeline_graph)
1657
+ result.init_quanta.update_from_old_quantum_graph(old_quantum_graph)
1658
+ dimension_data_extractor = DimensionDataExtractor.from_dimension_group(
1659
+ old_quantum_graph.pipeline_graph.get_all_dimensions()
1660
+ )
1661
+ for task_node in old_quantum_graph.pipeline_graph.tasks.values():
1662
+ task_quanta = old_quantum_graph.get_task_quanta(task_node.label)
1663
+ for quantum_id, quantum in task_quanta.items():
1664
+ result.quantum_datasets[quantum_id] = PredictedQuantumDatasetsModel.from_execution_quantum(
1665
+ task_node, quantum, quantum_id
1666
+ )
1667
+ dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
1668
+ for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
1669
+ dimension_data_extractor.update(ref.dataId for ref in refs)
1670
+ result.dimension_data = DimensionDataAttacher(
1671
+ records=dimension_data_extractor.records.values(),
1672
+ dimensions=result.pipeline_graph.get_all_dimensions(),
1673
+ )
1674
+ result.set_quantum_indices()
1675
+ result.set_thin_graph()
1676
+ result.set_header_counts()
1677
+ return result
1678
+
1679
+ def write(
1680
+ self,
1681
+ uri: ResourcePathExpression,
1682
+ *,
1683
+ zstd_level: int = 10,
1684
+ zstd_dict_size: int = 32768,
1685
+ zstd_dict_n_inputs: int = 512,
1686
+ ) -> None:
1687
+ """Write the graph to a file.
1688
+
1689
+ Parameters
1690
+ ----------
1691
+ uri : convertible to `lsst.resources.ResourcePath`
1692
+ Path to write to. Should have a ``.qg`` extension, or ``.qgraph``
1693
+ to force writing the old format.
1694
+ zstd_level : `int`, optional
1695
+ ZStandard compression level to use on JSON blocks.
1696
+ zstd_dict_size : `int`, optional
1697
+ Size of a ZStandard dictionary that shares compression information
1698
+ across components. Set to zero to disable the dictionary.
1699
+ Dictionary compression is automatically disabled if the number of
1700
+ quanta is smaller than ``zstd_dict_n_inputs``.
1701
+ zstd_dict_n_inputs : `int`, optional
1702
+ Maximum number of `PredictedQuantumDatasetsModel` JSON
1703
+ representations to feed the ZStandard dictionary training routine.
1704
+
1705
+ Notes
1706
+ -----
1707
+ Only a complete predicted quantum graph with all components fully
1708
+ populated should be written.
1709
+ """
1710
+ if self.header.n_quanta + len(self.init_quanta.root) != len(self.quantum_indices):
1711
+ raise RuntimeError(
1712
+ f"Cannot save graph after partial read of quanta: expected {self.header.n_quanta}, "
1713
+ f"got {len(self.quantum_indices)}."
1714
+ )
1715
+ uri = ResourcePath(uri)
1716
+ match uri.getExtension():
1717
+ case ".qg":
1718
+ pass
1719
+ case ".qgraph":
1720
+ _LOG.warning(
1721
+ "Converting to an old-format quantum graph.. "
1722
+ "Use '.qg' instead of '.qgraph' to save in the new format."
1723
+ )
1724
+ old_qg = self.assemble().to_old_quantum_graph()
1725
+ old_qg.saveUri(uri)
1726
+ return
1727
+ case ext:
1728
+ raise ValueError(
1729
+ f"Unsupported extension {ext!r} for quantum graph; "
1730
+ "expected '.qg' (or '.qgraph' to force the old format)."
1731
+ )
1732
+ cdict: zstandard.ZstdCompressionDict | None = None
1733
+ cdict_data: bytes | None = None
1734
+ quantum_datasets_json: dict[uuid.UUID, bytes] = {}
1735
+ if len(self.quantum_datasets) < zstd_dict_n_inputs:
1736
+ # ZStandard will fail if we ask to use a compression dict without
1737
+ # giving it enough data, and it only helps if we have a lot of
1738
+ # quanta.
1739
+ zstd_dict_size = 0
1740
+ if zstd_dict_size:
1741
+ quantum_datasets_json = {
1742
+ quantum_model.quantum_id: quantum_model.model_dump_json().encode()
1743
+ for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
1744
+ }
1745
+ try:
1746
+ cdict = zstandard.train_dictionary(
1747
+ zstd_dict_size,
1748
+ list(quantum_datasets_json.values()),
1749
+ level=zstd_level,
1750
+ )
1751
+ except zstandard.ZstdError as err:
1752
+ warnings.warn(f"Not using a compression dictionary: {err}.")
1753
+ cdict = None
1754
+ else:
1755
+ cdict_data = cdict.as_bytes()
1756
+ compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
1757
+ with BaseQuantumGraphWriter.open(
1758
+ uri,
1759
+ header=self.header,
1760
+ pipeline_graph=self.pipeline_graph,
1761
+ indices=self.quantum_indices,
1762
+ address_filename="quanta",
1763
+ compressor=compressor,
1764
+ cdict_data=cdict_data,
1765
+ ) as writer:
1766
+ writer.write_single_model("thin_graph", self.thin_graph)
1767
+ if self.dimension_data is None:
1768
+ raise IncompleteQuantumGraphError(
1769
+ "Cannot save predicted quantum graph with no dimension data."
1770
+ )
1771
+ serialized_dimension_data = self.dimension_data.serialized()
1772
+ writer.write_single_model("dimension_data", serialized_dimension_data)
1773
+ del serialized_dimension_data
1774
+ writer.write_single_model("init_quanta", self.init_quanta)
1775
+ with MultiblockWriter.open_in_zip(
1776
+ writer.zf, "quantum_datasets", writer.int_size
1777
+ ) as quantum_datasets_mb:
1778
+ for quantum_model in self.quantum_datasets.values():
1779
+ if json_data := quantum_datasets_json.get(quantum_model.quantum_id):
1780
+ quantum_datasets_mb.write_bytes(
1781
+ quantum_model.quantum_id, writer.compressor.compress(json_data)
1782
+ )
1783
+ else:
1784
+ quantum_datasets_mb.write_model(
1785
+ quantum_model.quantum_id, quantum_model, writer.compressor
1786
+ )
1787
+ writer.address_writer.addresses.append(quantum_datasets_mb.addresses)
1788
+
1789
+
1790
+ @dataclasses.dataclass
1791
+ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
1792
+ """A helper class for reading predicted quantum graphs."""
1793
+
1794
+ components: PredictedQuantumGraphComponents = dataclasses.field(init=False)
1795
+ """Quantum graph components populated by this reader's methods."""
1796
+
1797
+ @classmethod
1798
+ @contextmanager
1799
+ def open(
1800
+ cls,
1801
+ uri: ResourcePathExpression,
1802
+ *,
1803
+ page_size: int = DEFAULT_PAGE_SIZE,
1804
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
1805
+ ) -> Iterator[PredictedQuantumGraphReader]:
1806
+ """Construct a reader from a URI.
1807
+
1808
+ Parameters
1809
+ ----------
1810
+ uri : convertible to `lsst.resources.ResourcePath`
1811
+ URI to open. Should have a ``.qg`` extension.
1812
+ page_size : `int`, optional
1813
+ Approximate number of bytes to read at once from address files.
1814
+ Note that this does not set a page size for *all* reads, but it
1815
+ does affect the smallest, most numerous reads.
1816
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
1817
+ How to handle importing the task classes referenced in the pipeline
1818
+ graph.
1819
+
1820
+ Returns
1821
+ -------
1822
+ reader : `contextlib.AbstractContextManager` [ \
1823
+ `PredictedQuantumGraphReader` ]
1824
+ A context manager that returns the reader when entered.
1825
+ """
1826
+ with cls._open(
1827
+ uri,
1828
+ graph_type="predicted",
1829
+ address_filename="quanta",
1830
+ page_size=page_size,
1831
+ import_mode=import_mode,
1832
+ n_addresses=1,
1833
+ ) as self:
1834
+ yield self
1835
+
1836
+ def __post_init__(self) -> None:
1837
+ self.components = PredictedQuantumGraphComponents(
1838
+ header=self.header, pipeline_graph=self.pipeline_graph
1839
+ )
1840
+
1841
+ def finish(self) -> PredictedQuantumGraph:
1842
+ """Construct a `PredictedQuantumGraph` instance from this reader."""
1843
+ return self.components.assemble()
1844
+
1845
+ def read_all(self) -> PredictedQuantumGraphReader:
1846
+ """Read all components in full."""
1847
+ return self.read_thin_graph().read_execution_quanta()
1848
+
1849
+ def read_thin_graph(self) -> PredictedQuantumGraphReader:
1850
+ """Read the thin graph.
1851
+
1852
+ The thin graph is a quantum-quantum DAG with internal integer IDs for
1853
+ nodes and just task labels and data IDs as node attributes. It always
1854
+ includes all regular quanta, and does not include init-input or
1855
+ init-output information.
1856
+ """
1857
+ if not self.components.thin_graph.quanta:
1858
+ self.components.thin_graph = self._read_single_block("thin_graph", PredictedThinGraphModel)
1859
+ if len(self.components.quantum_indices) != self.components.header.n_quanta:
1860
+ self.address_reader.read_all()
1861
+ self.components.quantum_indices.update(
1862
+ {row.key: row.index for row in self.address_reader.rows.values()}
1863
+ )
1864
+ return self
1865
+
1866
+ def read_init_quanta(self) -> PredictedQuantumGraphReader:
1867
+ """Read the list of special quanta that represent init-inputs and
1868
+ init-outputs.
1869
+ """
1870
+ if not self.components.init_quanta.root:
1871
+ self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
1872
+ return self
1873
+
1874
+ def read_dimension_data(self) -> PredictedQuantumGraphReader:
1875
+ """Read all dimension records.
1876
+
1877
+ Record data IDs will be immediately deserialized, while other fields
1878
+ will be left in serialized form until they are needed.
1879
+ """
1880
+ if self.components.dimension_data is None:
1881
+ serializable_dimension_data = self._read_single_block("dimension_data", SerializableDimensionData)
1882
+ self.components.dimension_data = DimensionDataAttacher(
1883
+ deserializers=[
1884
+ DimensionRecordSetDeserializer.from_raw(
1885
+ self.components.pipeline_graph.universe[element], serialized_records
1886
+ )
1887
+ for element, serialized_records in serializable_dimension_data.root.items()
1888
+ ],
1889
+ dimensions=DimensionGroup.union(
1890
+ *self.components.pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
1891
+ universe=self.components.pipeline_graph.universe,
1892
+ ),
1893
+ )
1894
+ return self
1895
+
1896
+ def read_quantum_datasets(
1897
+ self, quantum_ids: Iterable[uuid.UUID] | None = None
1898
+ ) -> PredictedQuantumGraphReader:
1899
+ """Read information about all datasets produced and consumed by the
1900
+ given quantum IDs.
1901
+
1902
+ Parameters
1903
+ ----------
1904
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1905
+ Iterable of quantum IDs to load. If not provided, all quanta will
1906
+ be loaded. The UUIDs of special init quanta will be ignored.
1907
+ """
1908
+ quantum_datasets: PredictedQuantumDatasetsModel | None
1909
+ if quantum_ids is None:
1910
+ if len(self.components.quantum_datasets) != self.header.n_quanta:
1911
+ for quantum_datasets in MultiblockReader.read_all_models_in_zip(
1912
+ self.zf,
1913
+ "quantum_datasets",
1914
+ PredictedQuantumDatasetsModel,
1915
+ self.decompressor,
1916
+ int_size=self.components.header.int_size,
1917
+ page_size=self.page_size,
1918
+ ):
1919
+ self.components.quantum_datasets.setdefault(quantum_datasets.quantum_id, quantum_datasets)
1920
+ self.address_reader.read_all()
1921
+ for address_row in self.address_reader.rows.values():
1922
+ self.components.quantum_indices[address_row.key] = address_row.index
1923
+ return self
1924
+ with MultiblockReader.open_in_zip(
1925
+ self.zf, "quantum_datasets", int_size=self.components.header.int_size
1926
+ ) as mb_reader:
1927
+ for quantum_id in quantum_ids:
1928
+ if quantum_id in self.components.quantum_datasets:
1929
+ continue
1930
+ address_row = self.address_reader.find(quantum_id)
1931
+ self.components.quantum_indices[address_row.key] = address_row.index
1932
+ quantum_datasets = mb_reader.read_model(
1933
+ address_row.addresses[0], PredictedQuantumDatasetsModel, self.decompressor
1934
+ )
1935
+ if quantum_datasets is not None:
1936
+ self.components.quantum_datasets[address_row.key] = quantum_datasets
1937
+ return self
1938
+
1939
+ def read_execution_quanta(
1940
+ self, quantum_ids: Iterable[uuid.UUID] | None = None
1941
+ ) -> PredictedQuantumGraphReader:
1942
+ """Read all information needed to execute the given quanta.
1943
+
1944
+ Parameters
1945
+ ----------
1946
+ quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
1947
+ Iterable of quantum IDs to load. If not provided, all quanta will
1948
+ be loaded. The UUIDs of special init quanta will be ignored.
1949
+ """
1950
+ return self.read_init_quanta().read_dimension_data().read_quantum_datasets(quantum_ids)