lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_task_metadata.py +15 -0
- lsst/pipe/base/dot_tools.py +14 -152
- lsst/pipe/base/exec_fixup_data_id.py +17 -44
- lsst/pipe/base/execution_graph_fixup.py +49 -18
- lsst/pipe/base/graph/_versionDeserializers.py +6 -5
- lsst/pipe/base/graph/graph.py +30 -10
- lsst/pipe/base/graph/graphSummary.py +30 -0
- lsst/pipe/base/graph_walker.py +119 -0
- lsst/pipe/base/log_capture.py +5 -2
- lsst/pipe/base/mermaid_tools.py +11 -64
- lsst/pipe/base/mp_graph_executor.py +298 -236
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph/__init__.py +32 -0
- lsst/pipe/base/quantum_graph/_common.py +632 -0
- lsst/pipe/base/quantum_graph/_multiblock.py +808 -0
- lsst/pipe/base/quantum_graph/_predicted.py +1950 -0
- lsst/pipe/base/quantum_graph/visualization.py +302 -0
- lsst/pipe/base/quantum_graph_builder.py +292 -34
- lsst/pipe/base/quantum_graph_executor.py +2 -1
- lsst/pipe/base/quantum_provenance_graph.py +16 -7
- lsst/pipe/base/quantum_reports.py +45 -0
- lsst/pipe/base/separable_pipeline_executor.py +126 -15
- lsst/pipe/base/simple_pipeline_executor.py +44 -43
- lsst/pipe/base/single_quantum_executor.py +1 -40
- lsst/pipe/base/tests/mocks/__init__.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
- lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
- lsst/pipe/base/tests/mocks/_storage_class.py +51 -0
- lsst/pipe/base/tests/simpleQGraph.py +11 -5
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/METADATA +2 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/RECORD +40 -34
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,1950 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = (
|
|
31
|
+
"PredictedDatasetInfo",
|
|
32
|
+
"PredictedDatasetModel",
|
|
33
|
+
"PredictedInitQuantaModel",
|
|
34
|
+
"PredictedQuantumDatasetsModel",
|
|
35
|
+
"PredictedQuantumGraph",
|
|
36
|
+
"PredictedQuantumGraphComponents",
|
|
37
|
+
"PredictedQuantumGraphReader",
|
|
38
|
+
"PredictedQuantumInfo",
|
|
39
|
+
"PredictedThinGraphModel",
|
|
40
|
+
"PredictedThinQuantumModel",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
import dataclasses
|
|
44
|
+
import itertools
|
|
45
|
+
import logging
|
|
46
|
+
import operator
|
|
47
|
+
import sys
|
|
48
|
+
import uuid
|
|
49
|
+
import warnings
|
|
50
|
+
from collections import defaultdict
|
|
51
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
52
|
+
from contextlib import AbstractContextManager, contextmanager
|
|
53
|
+
from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
54
|
+
|
|
55
|
+
import networkx
|
|
56
|
+
import networkx.algorithms.bipartite
|
|
57
|
+
import pydantic
|
|
58
|
+
import zstandard
|
|
59
|
+
|
|
60
|
+
from lsst.daf.butler import (
|
|
61
|
+
Config,
|
|
62
|
+
DataCoordinate,
|
|
63
|
+
DataIdValue,
|
|
64
|
+
DatasetRef,
|
|
65
|
+
DatasetType,
|
|
66
|
+
DimensionDataAttacher,
|
|
67
|
+
DimensionDataExtractor,
|
|
68
|
+
DimensionGroup,
|
|
69
|
+
DimensionRecordSetDeserializer,
|
|
70
|
+
LimitedButler,
|
|
71
|
+
Quantum,
|
|
72
|
+
QuantumBackedButler,
|
|
73
|
+
SerializableDimensionData,
|
|
74
|
+
)
|
|
75
|
+
from lsst.daf.butler._rubin import generate_uuidv7
|
|
76
|
+
from lsst.daf.butler.datastore.record_data import DatastoreRecordData, SerializedDatastoreRecordData
|
|
77
|
+
from lsst.daf.butler.registry import ConflictingDefinitionError
|
|
78
|
+
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
79
|
+
from lsst.utils.packages import Packages
|
|
80
|
+
|
|
81
|
+
from .. import automatic_connection_constants as acc
|
|
82
|
+
from ..pipeline import TaskDef
|
|
83
|
+
from ..pipeline_graph import (
|
|
84
|
+
PipelineGraph,
|
|
85
|
+
TaskImportMode,
|
|
86
|
+
TaskInitNode,
|
|
87
|
+
TaskNode,
|
|
88
|
+
compare_packages,
|
|
89
|
+
log_config_mismatch,
|
|
90
|
+
)
|
|
91
|
+
from ._common import (
|
|
92
|
+
BaseQuantumGraph,
|
|
93
|
+
BaseQuantumGraphReader,
|
|
94
|
+
BaseQuantumGraphWriter,
|
|
95
|
+
ConnectionName,
|
|
96
|
+
DataCoordinateValues,
|
|
97
|
+
DatasetInfo,
|
|
98
|
+
DatasetTypeName,
|
|
99
|
+
DatastoreName,
|
|
100
|
+
HeaderModel,
|
|
101
|
+
IncompleteQuantumGraphError,
|
|
102
|
+
QuantumIndex,
|
|
103
|
+
QuantumInfo,
|
|
104
|
+
TaskLabel,
|
|
105
|
+
)
|
|
106
|
+
from ._multiblock import DEFAULT_PAGE_SIZE, MultiblockReader, MultiblockWriter
|
|
107
|
+
|
|
108
|
+
if TYPE_CHECKING:
|
|
109
|
+
from ..config import PipelineTaskConfig
|
|
110
|
+
from ..graph import QgraphSummary, QuantumGraph
|
|
111
|
+
|
|
112
|
+
_LOG = logging.getLogger(__name__)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
_T = TypeVar("_T", bound=pydantic.BaseModel)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class PredictedThinQuantumModel(pydantic.BaseModel):
|
|
119
|
+
"""Data model for a quantum data ID and internal integer ID in a predicted
|
|
120
|
+
quantum graph.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
quantum_index: QuantumIndex
|
|
124
|
+
"""Internal integer ID for this quantum."""
|
|
125
|
+
|
|
126
|
+
data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
|
|
127
|
+
"""Full (required and implied) data coordinate values for this quantum."""
|
|
128
|
+
|
|
129
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
130
|
+
# when we inherit those docstrings in our public classes.
|
|
131
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
132
|
+
|
|
133
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
134
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
135
|
+
return super().copy(*args, **kwargs)
|
|
136
|
+
|
|
137
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
138
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
139
|
+
return super().model_dump(*args, **kwargs)
|
|
140
|
+
|
|
141
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
142
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
143
|
+
return super().model_dump(*args, **kwargs)
|
|
144
|
+
|
|
145
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
146
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
147
|
+
return super().model_copy(*args, **kwargs)
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
151
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
152
|
+
return super().model_construct(*args, **kwargs)
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
156
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
157
|
+
return super().model_json_schema(*args, **kwargs)
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
161
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
162
|
+
return super().model_validate(*args, **kwargs)
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
166
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
167
|
+
return super().model_validate_json(*args, **kwargs)
|
|
168
|
+
|
|
169
|
+
@classmethod
|
|
170
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
171
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
172
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class PredictedThinGraphModel(pydantic.BaseModel):
|
|
176
|
+
"""Data model for the predicted quantum graph component that maps each
|
|
177
|
+
task label to the data IDs and internal integer IDs of its quanta.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
quanta: dict[TaskLabel, list[PredictedThinQuantumModel]] = pydantic.Field(default_factory=dict)
|
|
181
|
+
"""Minimal descriptions of all quanta, grouped by task label."""
|
|
182
|
+
|
|
183
|
+
edges: list[tuple[QuantumIndex, QuantumIndex]] = pydantic.Field(default_factory=list)
|
|
184
|
+
"""Pairs of (predecessor, successor) internal integer quantum IDs."""
|
|
185
|
+
|
|
186
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
187
|
+
# when we inherit those docstrings in our public classes.
|
|
188
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
189
|
+
|
|
190
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
191
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
192
|
+
return super().copy(*args, **kwargs)
|
|
193
|
+
|
|
194
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
195
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
196
|
+
return super().model_dump(*args, **kwargs)
|
|
197
|
+
|
|
198
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
199
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
200
|
+
return super().model_dump(*args, **kwargs)
|
|
201
|
+
|
|
202
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
203
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
204
|
+
return super().model_copy(*args, **kwargs)
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
208
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
209
|
+
return super().model_construct(*args, **kwargs)
|
|
210
|
+
|
|
211
|
+
@classmethod
|
|
212
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
213
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
214
|
+
return super().model_json_schema(*args, **kwargs)
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
218
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
219
|
+
return super().model_validate(*args, **kwargs)
|
|
220
|
+
|
|
221
|
+
@classmethod
|
|
222
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
223
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
224
|
+
return super().model_validate_json(*args, **kwargs)
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
228
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
229
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class PredictedDatasetModel(pydantic.BaseModel):
|
|
233
|
+
"""Data model for the datasets in a predicted quantum graph file."""
|
|
234
|
+
|
|
235
|
+
dataset_id: uuid.UUID
|
|
236
|
+
"""Universally unique ID for the dataset."""
|
|
237
|
+
|
|
238
|
+
dataset_type_name: DatasetTypeName
|
|
239
|
+
"""Name of the type of this dataset.
|
|
240
|
+
|
|
241
|
+
This is always a parent dataset type name, not a component.
|
|
242
|
+
|
|
243
|
+
Note that full dataset type definitions are stored in the pipeline graph.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
|
|
247
|
+
"""The full values (required and implied) of this dataset's data ID."""
|
|
248
|
+
|
|
249
|
+
run: str
|
|
250
|
+
"""This dataset's RUN collection name."""
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def from_dataset_ref(cls, ref: DatasetRef) -> PredictedDatasetModel:
|
|
254
|
+
"""Construct from a butler `~lsst.daf.butler.DatasetRef`.
|
|
255
|
+
|
|
256
|
+
Parameters
|
|
257
|
+
----------
|
|
258
|
+
ref : `lsst.daf.butler.DatasetRef`
|
|
259
|
+
Dataset reference.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
model : `PredictedDatasetModel`
|
|
264
|
+
Model for the dataset.
|
|
265
|
+
"""
|
|
266
|
+
dataset_type_name, _ = DatasetType.splitDatasetTypeName(ref.datasetType.name)
|
|
267
|
+
return cls.model_construct(
|
|
268
|
+
dataset_id=ref.id,
|
|
269
|
+
dataset_type_name=dataset_type_name,
|
|
270
|
+
data_coordinate=list(ref.dataId.full_values),
|
|
271
|
+
run=ref.run,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
275
|
+
# when we inherit those docstrings in our public classes.
|
|
276
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
277
|
+
|
|
278
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
279
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
280
|
+
return super().copy(*args, **kwargs)
|
|
281
|
+
|
|
282
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
283
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
284
|
+
return super().model_dump(*args, **kwargs)
|
|
285
|
+
|
|
286
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
287
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
288
|
+
return super().model_dump(*args, **kwargs)
|
|
289
|
+
|
|
290
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
291
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
292
|
+
return super().model_copy(*args, **kwargs)
|
|
293
|
+
|
|
294
|
+
@classmethod
|
|
295
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
296
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
297
|
+
return super().model_construct(*args, **kwargs)
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
301
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
302
|
+
return super().model_json_schema(*args, **kwargs)
|
|
303
|
+
|
|
304
|
+
@classmethod
|
|
305
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
306
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
307
|
+
return super().model_validate(*args, **kwargs)
|
|
308
|
+
|
|
309
|
+
@classmethod
|
|
310
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
311
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
312
|
+
return super().model_validate_json(*args, **kwargs)
|
|
313
|
+
|
|
314
|
+
@classmethod
|
|
315
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
316
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
317
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class PredictedQuantumDatasetsModel(pydantic.BaseModel):
|
|
321
|
+
"""Data model for a description of a single predicted quantum that includes
|
|
322
|
+
its inputs and outputs.
|
|
323
|
+
"""
|
|
324
|
+
|
|
325
|
+
quantum_id: uuid.UUID
|
|
326
|
+
"""Universally unique ID for the quantum."""
|
|
327
|
+
|
|
328
|
+
task_label: TaskLabel
|
|
329
|
+
"""Label of the task.
|
|
330
|
+
|
|
331
|
+
Note that task label definitions are stored in the pipeline graph.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
data_coordinate: DataCoordinateValues = pydantic.Field(default_factory=list)
|
|
335
|
+
"""The full values (required and implied) of this quantum's data ID."""
|
|
336
|
+
|
|
337
|
+
inputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
|
|
338
|
+
"""The input datasets to this quantum, grouped by connection name."""
|
|
339
|
+
|
|
340
|
+
outputs: dict[ConnectionName, list[PredictedDatasetModel]] = pydantic.Field(default_factory=dict)
|
|
341
|
+
"""The datasets output by this quantum, grouped by connection name."""
|
|
342
|
+
|
|
343
|
+
datastore_records: dict[DatastoreName, SerializedDatastoreRecordData] = pydantic.Field(
|
|
344
|
+
default_factory=dict
|
|
345
|
+
)
|
|
346
|
+
"""Datastore records for inputs to this quantum that are already present in
|
|
347
|
+
the data repository.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
def iter_dataset_ids(self) -> Iterator[uuid.UUID]:
|
|
351
|
+
"""Return an iterator over the UUIDs of all datasets referenced by this
|
|
352
|
+
quantum.
|
|
353
|
+
|
|
354
|
+
Returns
|
|
355
|
+
-------
|
|
356
|
+
iter : `~collections.abc.Iterator` [ `uuid.UUID` ]
|
|
357
|
+
Iterator over dataset IDs.
|
|
358
|
+
"""
|
|
359
|
+
for datasets in itertools.chain(self.inputs.values(), self.outputs.values()):
|
|
360
|
+
for dataset in datasets:
|
|
361
|
+
yield dataset.dataset_id
|
|
362
|
+
|
|
363
|
+
def deserialize_datastore_records(self) -> dict[DatastoreName, DatastoreRecordData]:
|
|
364
|
+
"""Deserialize the mapping of datastore records."""
|
|
365
|
+
return {
|
|
366
|
+
datastore_name: DatastoreRecordData.from_simple(serialized_records)
|
|
367
|
+
for datastore_name, serialized_records in self.datastore_records.items()
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
@classmethod
|
|
371
|
+
def from_execution_quantum(
|
|
372
|
+
cls, task_node: TaskNode, quantum: Quantum, quantum_id: uuid.UUID
|
|
373
|
+
) -> PredictedQuantumDatasetsModel:
|
|
374
|
+
"""Construct from an `lsst.daf.butler.Quantum` instance.
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
task_node : `.pipeline_graph.TaskNode`
|
|
379
|
+
Task node from the pipeline graph.
|
|
380
|
+
quantum : `lsst.daf.butler.quantum`
|
|
381
|
+
Quantum object.
|
|
382
|
+
quantum_id : `uuid.UUID`
|
|
383
|
+
ID for this quantum.
|
|
384
|
+
|
|
385
|
+
Returns
|
|
386
|
+
-------
|
|
387
|
+
model : `PredictedFullQuantumModel`
|
|
388
|
+
Model for this quantum.
|
|
389
|
+
"""
|
|
390
|
+
result: PredictedQuantumDatasetsModel = cls.model_construct(
|
|
391
|
+
quantum_id=quantum_id,
|
|
392
|
+
task_label=task_node.label,
|
|
393
|
+
data_coordinate=list(cast(DataCoordinate, quantum.dataId).full_values),
|
|
394
|
+
)
|
|
395
|
+
for read_edge in task_node.iter_all_inputs():
|
|
396
|
+
refs = sorted(quantum.inputs[read_edge.dataset_type_name], key=lambda ref: ref.dataId)
|
|
397
|
+
result.inputs[read_edge.connection_name] = [
|
|
398
|
+
PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
|
|
399
|
+
]
|
|
400
|
+
for write_edge in task_node.iter_all_outputs():
|
|
401
|
+
refs = sorted(quantum.outputs[write_edge.dataset_type_name], key=lambda ref: ref.dataId)
|
|
402
|
+
result.outputs[write_edge.connection_name] = [
|
|
403
|
+
PredictedDatasetModel.from_dataset_ref(ref) for ref in refs
|
|
404
|
+
]
|
|
405
|
+
result.datastore_records = {
|
|
406
|
+
store_name: records.to_simple() for store_name, records in quantum.datastore_records.items()
|
|
407
|
+
}
|
|
408
|
+
return result
|
|
409
|
+
|
|
410
|
+
@classmethod
|
|
411
|
+
def from_old_quantum_graph_init(
|
|
412
|
+
cls, task_init_node: TaskInitNode, old_quantum_graph: QuantumGraph
|
|
413
|
+
) -> PredictedQuantumDatasetsModel:
|
|
414
|
+
"""Construct from the init-input and init-output dataset types of a
|
|
415
|
+
task in an old `QuantumGraph` instance.
|
|
416
|
+
|
|
417
|
+
Parameters
|
|
418
|
+
----------
|
|
419
|
+
task_init_node : `.pipeline_graph.TaskNode`
|
|
420
|
+
Task init node from the pipeline graph.
|
|
421
|
+
old_quantum_graph : `QuantumGraph`
|
|
422
|
+
Quantum graph.
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
model : `PredictedFullQuantumModel`
|
|
427
|
+
Model for this "init" quantum.
|
|
428
|
+
"""
|
|
429
|
+
task_def = old_quantum_graph.findTaskDefByLabel(task_init_node.label)
|
|
430
|
+
assert task_def is not None
|
|
431
|
+
init_input_refs = {
|
|
432
|
+
ref.datasetType.name: ref for ref in (old_quantum_graph.initInputRefs(task_def) or [])
|
|
433
|
+
}
|
|
434
|
+
init_output_refs = {
|
|
435
|
+
ref.datasetType.name: ref for ref in (old_quantum_graph.initOutputRefs(task_def) or [])
|
|
436
|
+
}
|
|
437
|
+
init_input_ids = {ref.id for ref in init_input_refs.values()}
|
|
438
|
+
result: PredictedQuantumDatasetsModel = cls.model_construct(
|
|
439
|
+
quantum_id=generate_uuidv7(), task_label=task_init_node.label
|
|
440
|
+
)
|
|
441
|
+
for read_edge in task_init_node.iter_all_inputs():
|
|
442
|
+
ref = init_input_refs[read_edge.dataset_type_name]
|
|
443
|
+
result.inputs[read_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
|
|
444
|
+
for write_edge in task_init_node.iter_all_outputs():
|
|
445
|
+
ref = init_output_refs[write_edge.dataset_type_name]
|
|
446
|
+
result.outputs[write_edge.connection_name] = [PredictedDatasetModel.from_dataset_ref(ref)]
|
|
447
|
+
datastore_records: dict[str, DatastoreRecordData] = {}
|
|
448
|
+
for quantum in old_quantum_graph.get_task_quanta(task_init_node.label).values():
|
|
449
|
+
for store_name, records in quantum.datastore_records.items():
|
|
450
|
+
subset = records.subset(init_input_ids)
|
|
451
|
+
if subset is not None:
|
|
452
|
+
datastore_records.setdefault(store_name, DatastoreRecordData()).update(subset)
|
|
453
|
+
break # All quanta have same init-inputs, so we only need one.
|
|
454
|
+
result.datastore_records = {
|
|
455
|
+
store_name: records.to_simple() for store_name, records in datastore_records.items()
|
|
456
|
+
}
|
|
457
|
+
return result
|
|
458
|
+
|
|
459
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
460
|
+
# when we inherit those docstrings in our public classes.
|
|
461
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
462
|
+
|
|
463
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
464
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
465
|
+
return super().copy(*args, **kwargs)
|
|
466
|
+
|
|
467
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
468
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
469
|
+
return super().model_dump(*args, **kwargs)
|
|
470
|
+
|
|
471
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
472
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
473
|
+
return super().model_dump(*args, **kwargs)
|
|
474
|
+
|
|
475
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
476
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
477
|
+
return super().model_copy(*args, **kwargs)
|
|
478
|
+
|
|
479
|
+
@classmethod
|
|
480
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
481
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
482
|
+
return super().model_construct(*args, **kwargs)
|
|
483
|
+
|
|
484
|
+
@classmethod
|
|
485
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
486
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
487
|
+
return super().model_json_schema(*args, **kwargs)
|
|
488
|
+
|
|
489
|
+
@classmethod
|
|
490
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
491
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
492
|
+
return super().model_validate(*args, **kwargs)
|
|
493
|
+
|
|
494
|
+
@classmethod
|
|
495
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
496
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
497
|
+
return super().model_validate_json(*args, **kwargs)
|
|
498
|
+
|
|
499
|
+
@classmethod
|
|
500
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
501
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
502
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
class PredictedInitQuantaModel(pydantic.RootModel):
|
|
506
|
+
"""Data model for the init-inputs and init-outputs of a predicted quantum
|
|
507
|
+
graph.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
root: list[PredictedQuantumDatasetsModel] = pydantic.Field(default_factory=list)
|
|
511
|
+
"""List of special "init" quanta: one for each task, and another for global
|
|
512
|
+
init-outputs.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
def update_from_old_quantum_graph(self, old_quantum_graph: QuantumGraph) -> None:
|
|
516
|
+
"""Update this model in-place by extracting from an old `QuantumGraph`
|
|
517
|
+
instance.
|
|
518
|
+
|
|
519
|
+
Parameters
|
|
520
|
+
----------
|
|
521
|
+
old_quantum_graph : `QuantumGraph`
|
|
522
|
+
Quantum graph.
|
|
523
|
+
"""
|
|
524
|
+
global_init_quantum = PredictedQuantumDatasetsModel.model_construct(
|
|
525
|
+
quantum_id=generate_uuidv7(), task_label=""
|
|
526
|
+
)
|
|
527
|
+
for ref in old_quantum_graph.globalInitOutputRefs():
|
|
528
|
+
global_init_quantum.outputs[ref.datasetType.name] = [PredictedDatasetModel.from_dataset_ref(ref)]
|
|
529
|
+
self.root.append(global_init_quantum)
|
|
530
|
+
for task_node in old_quantum_graph.pipeline_graph.tasks.values():
|
|
531
|
+
self.root.append(
|
|
532
|
+
PredictedQuantumDatasetsModel.from_old_quantum_graph_init(task_node.init, old_quantum_graph)
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
536
|
+
# when we inherit those docstrings in our public classes.
|
|
537
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
538
|
+
|
|
539
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
540
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
541
|
+
return super().copy(*args, **kwargs)
|
|
542
|
+
|
|
543
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
544
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
545
|
+
return super().model_dump(*args, **kwargs)
|
|
546
|
+
|
|
547
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
548
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
549
|
+
return super().model_dump(*args, **kwargs)
|
|
550
|
+
|
|
551
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
552
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
553
|
+
return super().model_copy(*args, **kwargs)
|
|
554
|
+
|
|
555
|
+
@classmethod
|
|
556
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
557
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
558
|
+
return super().model_construct(*args, **kwargs)
|
|
559
|
+
|
|
560
|
+
@classmethod
|
|
561
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
562
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
563
|
+
return super().model_json_schema(*args, **kwargs)
|
|
564
|
+
|
|
565
|
+
@classmethod
|
|
566
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
567
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
568
|
+
return super().model_validate(*args, **kwargs)
|
|
569
|
+
|
|
570
|
+
@classmethod
|
|
571
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
572
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
573
|
+
return super().model_validate_json(*args, **kwargs)
|
|
574
|
+
|
|
575
|
+
@classmethod
|
|
576
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
577
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
578
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class PredictedQuantumInfo(QuantumInfo):
|
|
582
|
+
"""A typed dictionary that annotates the attributes of the NetworkX graph
|
|
583
|
+
node data for a predicted quantum.
|
|
584
|
+
|
|
585
|
+
Since NetworkX types are not generic over their node mapping type, this has
|
|
586
|
+
to be used explicitly, e.g.::
|
|
587
|
+
|
|
588
|
+
node_data: PredictedQuantumInfo = xgraph.nodes[quantum_id]
|
|
589
|
+
|
|
590
|
+
where ``xgraph`` can be either `PredictedQuantumGraph.quantum_only_xgraph`
|
|
591
|
+
or `PredictedQuantumGraph.bipartite_xgraph`.
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
quantum: Quantum
|
|
595
|
+
"""Quantum object that can be passed directly to an executor.
|
|
596
|
+
|
|
597
|
+
This attribute is only present if
|
|
598
|
+
`PredictedQuantumGraph.build_execution_quanta` has been run on this node's
|
|
599
|
+
quantum ID already.
|
|
600
|
+
"""
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
class PredictedDatasetInfo(DatasetInfo):
|
|
604
|
+
"""A typed dictionary that annotates the attributes of the NetworkX graph
|
|
605
|
+
node data for a dataset.
|
|
606
|
+
|
|
607
|
+
Since NetworkX types are not generic over their node mapping type, this has
|
|
608
|
+
to be used explicitly, e.g.::
|
|
609
|
+
|
|
610
|
+
node_data: PredictedDatasetInfo = xgraph.nodes[dataset_ids]
|
|
611
|
+
|
|
612
|
+
where ``xgraph`` is from the `PredictedQuantumGraph.bipartite_xgraph`
|
|
613
|
+
property.
|
|
614
|
+
"""
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
class PredictedQuantumGraph(BaseQuantumGraph):
|
|
618
|
+
"""A directed acyclic graph that predicts a processing run and supports it
|
|
619
|
+
during execution.
|
|
620
|
+
|
|
621
|
+
Parameters
|
|
622
|
+
----------
|
|
623
|
+
components : `PredictedQuantumGraphComponents`
|
|
624
|
+
A struct of components used to construct the graph.
|
|
625
|
+
|
|
626
|
+
Notes
|
|
627
|
+
-----
|
|
628
|
+
Iteration over a `PredictedQuantumGraph` yields loaded quantum IDs in
|
|
629
|
+
deterministic topological order (but the tiebreaker is unspecified). The
|
|
630
|
+
`len` of a `PredictedQuantumGraph` is the number of loaded non-init quanta,
|
|
631
|
+
i.e. the same as the number of quanta iterated over.
|
|
632
|
+
"""
|
|
633
|
+
|
|
634
|
+
def __init__(self, components: PredictedQuantumGraphComponents):
|
|
635
|
+
if not components.header.graph_type == "predicted":
|
|
636
|
+
raise TypeError(f"Header is for a {components.header.graph_type!r} graph, not 'predicted'.")
|
|
637
|
+
super().__init__(components.header, components.pipeline_graph)
|
|
638
|
+
self._quantum_only_xgraph = networkx.DiGraph()
|
|
639
|
+
self._bipartite_xgraph = networkx.DiGraph()
|
|
640
|
+
self._quanta_by_task_label: dict[str, dict[DataCoordinate, uuid.UUID]] = {
|
|
641
|
+
task_label: {} for task_label in self.pipeline_graph.tasks.keys()
|
|
642
|
+
}
|
|
643
|
+
self._datasets_by_type: dict[str, dict[DataCoordinate, uuid.UUID]] = {
|
|
644
|
+
dataset_type_name: {} for dataset_type_name in self.pipeline_graph.dataset_types.keys()
|
|
645
|
+
}
|
|
646
|
+
self._datasets_by_type[self.pipeline_graph.packages_dataset_type.name] = {}
|
|
647
|
+
self._dimension_data = components.dimension_data
|
|
648
|
+
self._add_init_quanta(components.init_quanta)
|
|
649
|
+
self._quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = {}
|
|
650
|
+
self._expanded_data_ids: dict[DataCoordinate, DataCoordinate] = {}
|
|
651
|
+
self._add_thin_graph(components.thin_graph, components.quantum_indices)
|
|
652
|
+
for quantum_datasets in components.quantum_datasets.values():
|
|
653
|
+
self._add_quantum_datasets(quantum_datasets)
|
|
654
|
+
if not components.thin_graph.edges:
|
|
655
|
+
# If we loaded the thin_graph, we've already populated this graph.
|
|
656
|
+
self._quantum_only_xgraph.update(
|
|
657
|
+
networkx.algorithms.bipartite.projected_graph(
|
|
658
|
+
networkx.DiGraph(self._bipartite_xgraph),
|
|
659
|
+
self._quantum_only_xgraph.nodes.keys(),
|
|
660
|
+
)
|
|
661
|
+
)
|
|
662
|
+
if _LOG.isEnabledFor(logging.DEBUG):
|
|
663
|
+
for quantum_id in self:
|
|
664
|
+
_LOG.debug(
|
|
665
|
+
"%s: %s @ %s",
|
|
666
|
+
quantum_id,
|
|
667
|
+
self._quantum_only_xgraph.nodes[quantum_id]["task_label"],
|
|
668
|
+
self._quantum_only_xgraph.nodes[quantum_id]["data_id"].required,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
def _add_init_quanta(self, component: PredictedInitQuantaModel) -> None:
|
|
672
|
+
self._init_quanta = {q.task_label: q for q in component.root}
|
|
673
|
+
empty_data_id = DataCoordinate.make_empty(self.pipeline_graph.universe)
|
|
674
|
+
for quantum_datasets in self._init_quanta.values():
|
|
675
|
+
for init_datasets in itertools.chain(
|
|
676
|
+
quantum_datasets.inputs.values(), quantum_datasets.outputs.values()
|
|
677
|
+
):
|
|
678
|
+
for init_dataset in init_datasets:
|
|
679
|
+
self._datasets_by_type[init_dataset.dataset_type_name][empty_data_id] = (
|
|
680
|
+
init_dataset.dataset_id
|
|
681
|
+
)
|
|
682
|
+
_LOG.debug(
|
|
683
|
+
"%s: %s @ init",
|
|
684
|
+
quantum_datasets.quantum_id,
|
|
685
|
+
quantum_datasets.task_label,
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
def _add_thin_graph(
|
|
689
|
+
self, component: PredictedThinGraphModel, indices: Mapping[uuid.UUID, QuantumIndex]
|
|
690
|
+
) -> None:
|
|
691
|
+
uuid_by_index = {v: k for k, v in indices.items()}
|
|
692
|
+
for index1, index2 in component.edges:
|
|
693
|
+
self._quantum_only_xgraph.add_edge(uuid_by_index[index1], uuid_by_index[index2])
|
|
694
|
+
for task_label, thin_quanta_for_task in component.quanta.items():
|
|
695
|
+
for thin_quantum in thin_quanta_for_task:
|
|
696
|
+
self._add_quantum(
|
|
697
|
+
uuid_by_index[thin_quantum.quantum_index],
|
|
698
|
+
task_label,
|
|
699
|
+
thin_quantum.data_coordinate,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
def _add_quantum_datasets(self, quantum_datasets: PredictedQuantumDatasetsModel) -> None:
|
|
703
|
+
self._quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
|
|
704
|
+
self._add_quantum(
|
|
705
|
+
quantum_datasets.quantum_id, quantum_datasets.task_label, quantum_datasets.data_coordinate
|
|
706
|
+
)
|
|
707
|
+
task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
|
|
708
|
+
for connection_name, input_datasets in quantum_datasets.inputs.items():
|
|
709
|
+
pipeline_edge = task_node.get_input_edge(connection_name)
|
|
710
|
+
for input_dataset in input_datasets:
|
|
711
|
+
self._add_dataset(input_dataset)
|
|
712
|
+
self._bipartite_xgraph.add_edge(
|
|
713
|
+
input_dataset.dataset_id,
|
|
714
|
+
quantum_datasets.quantum_id,
|
|
715
|
+
key=connection_name,
|
|
716
|
+
is_read=True,
|
|
717
|
+
)
|
|
718
|
+
# There might be multiple input connections for the same
|
|
719
|
+
# dataset type.
|
|
720
|
+
self._bipartite_xgraph.edges[
|
|
721
|
+
input_dataset.dataset_id, quantum_datasets.quantum_id
|
|
722
|
+
].setdefault("pipeline_edges", []).append(pipeline_edge)
|
|
723
|
+
for connection_name, output_datasets in quantum_datasets.outputs.items():
|
|
724
|
+
pipeline_edges = [task_node.get_output_edge(connection_name)]
|
|
725
|
+
for output_dataset in output_datasets:
|
|
726
|
+
self._add_dataset(output_dataset)
|
|
727
|
+
self._bipartite_xgraph.add_edge(
|
|
728
|
+
quantum_datasets.quantum_id,
|
|
729
|
+
output_dataset.dataset_id,
|
|
730
|
+
key=connection_name,
|
|
731
|
+
is_read=False,
|
|
732
|
+
pipeline_edges=pipeline_edges,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
def _add_quantum(
|
|
736
|
+
self, quantum_id: uuid.UUID, task_label: str, data_coordinate_values: Sequence[DataIdValue]
|
|
737
|
+
) -> None:
|
|
738
|
+
task_node = self.pipeline_graph.tasks[task_label]
|
|
739
|
+
self._quantum_only_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
|
|
740
|
+
self._bipartite_xgraph.add_node(quantum_id, task_label=task_label, pipeline_node=task_node)
|
|
741
|
+
data_coordinate_values = tuple(data_coordinate_values)
|
|
742
|
+
dimensions = self.pipeline_graph.tasks[task_label].dimensions
|
|
743
|
+
data_id = DataCoordinate.from_full_values(dimensions, tuple(data_coordinate_values))
|
|
744
|
+
self._quantum_only_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
|
|
745
|
+
self._bipartite_xgraph.nodes[quantum_id].setdefault("data_id", data_id)
|
|
746
|
+
self._quanta_by_task_label[task_label][data_id] = quantum_id
|
|
747
|
+
|
|
748
|
+
def _add_dataset(self, model: PredictedDatasetModel) -> None:
|
|
749
|
+
dataset_type_node = self.pipeline_graph.dataset_types[model.dataset_type_name]
|
|
750
|
+
data_id = DataCoordinate.from_full_values(dataset_type_node.dimensions, tuple(model.data_coordinate))
|
|
751
|
+
self._bipartite_xgraph.add_node(
|
|
752
|
+
model.dataset_id,
|
|
753
|
+
dataset_type_name=dataset_type_node.name,
|
|
754
|
+
pipeline_node=dataset_type_node,
|
|
755
|
+
run=model.run,
|
|
756
|
+
)
|
|
757
|
+
self._bipartite_xgraph.nodes[model.dataset_id].setdefault("data_id", data_id)
|
|
758
|
+
self._datasets_by_type[model.dataset_type_name][data_id] = model.dataset_id
|
|
759
|
+
|
|
760
|
+
@classmethod
|
|
761
|
+
def open(
|
|
762
|
+
cls,
|
|
763
|
+
uri: ResourcePathExpression,
|
|
764
|
+
page_size: int = DEFAULT_PAGE_SIZE,
|
|
765
|
+
import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
|
|
766
|
+
) -> AbstractContextManager[PredictedQuantumGraphReader]:
|
|
767
|
+
"""Open a quantum graph and return a reader to load from it.
|
|
768
|
+
|
|
769
|
+
Parameters
|
|
770
|
+
----------
|
|
771
|
+
uri : convertible to `lsst.resources.ResourcePath`
|
|
772
|
+
URI to open. Should have a ``.qg`` extension.
|
|
773
|
+
page_size : `int`, optional
|
|
774
|
+
Approximate number of bytes to read at once from address files.
|
|
775
|
+
Note that this does not set a page size for *all* reads, but it
|
|
776
|
+
does affect the smallest, most numerous reads.
|
|
777
|
+
import_mode : `..pipeline_graph.TaskImportMode`, optional
|
|
778
|
+
How to handle importing the task classes referenced in the pipeline
|
|
779
|
+
graph.
|
|
780
|
+
|
|
781
|
+
Returns
|
|
782
|
+
-------
|
|
783
|
+
reader : `contextlib.AbstractContextManager` [ \
|
|
784
|
+
`PredictedQuantumGraphReader` ]
|
|
785
|
+
A context manager that returns the reader when entered.
|
|
786
|
+
"""
|
|
787
|
+
return PredictedQuantumGraphReader.open(uri, page_size=page_size, import_mode=import_mode)
|
|
788
|
+
|
|
789
|
+
@classmethod
|
|
790
|
+
def read_execution_quanta(
|
|
791
|
+
cls,
|
|
792
|
+
uri: ResourcePathExpression,
|
|
793
|
+
quantum_ids: Iterable[uuid.UUID] | None = None,
|
|
794
|
+
page_size: int = DEFAULT_PAGE_SIZE,
|
|
795
|
+
) -> PredictedQuantumGraph:
|
|
796
|
+
"""Read one or more executable quanta from a quantum graph file.
|
|
797
|
+
|
|
798
|
+
Parameters
|
|
799
|
+
----------
|
|
800
|
+
uri : convertible to `lsst.resources.ResourcePath`
|
|
801
|
+
URI to open. Should have a ``.qg`` extension for new quantum graph
|
|
802
|
+
files, or ``.qgraph`` for the old format.
|
|
803
|
+
quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
|
|
804
|
+
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
805
|
+
be loaded. The UUIDs of special init quanta will be ignored.
|
|
806
|
+
page_size : `int`, optional
|
|
807
|
+
Approximate number of bytes to read at once from address files.
|
|
808
|
+
Note that this does not set a page size for *all* reads, but it
|
|
809
|
+
does affect the smallest, most numerous reads.
|
|
810
|
+
|
|
811
|
+
Returns
|
|
812
|
+
-------
|
|
813
|
+
quantum_graph : `PredictedQuantumGraph` ]
|
|
814
|
+
A quantum graph that can build execution quanta for all of the
|
|
815
|
+
given IDs.
|
|
816
|
+
"""
|
|
817
|
+
return PredictedQuantumGraphComponents.read_execution_quanta(
|
|
818
|
+
uri,
|
|
819
|
+
quantum_ids,
|
|
820
|
+
page_size=page_size,
|
|
821
|
+
).assemble()
|
|
822
|
+
|
|
823
|
+
@property
|
|
824
|
+
def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
|
|
825
|
+
"""A nested mapping of all quanta, keyed first by task name and then by
|
|
826
|
+
data ID.
|
|
827
|
+
|
|
828
|
+
Notes
|
|
829
|
+
-----
|
|
830
|
+
This is populated by the ``thin_graph`` component (all quanta are
|
|
831
|
+
added) and the `quantum_datasets`` component (only loaded quanta are
|
|
832
|
+
added). All tasks in the pipeline graph are included, even if none of
|
|
833
|
+
their quanta were loaded (i.e. nested mappings may be empty).
|
|
834
|
+
|
|
835
|
+
The returned object may be an internal dictionary; as the type
|
|
836
|
+
annotation indicates, it should not be modified in place.
|
|
837
|
+
"""
|
|
838
|
+
return self._quanta_by_task_label
|
|
839
|
+
|
|
840
|
+
@property
|
|
841
|
+
def datasets_by_type(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
|
|
842
|
+
"""A nested mapping of all datasets, keyed first by dataset type name
|
|
843
|
+
and then by data ID.
|
|
844
|
+
|
|
845
|
+
Notes
|
|
846
|
+
-----
|
|
847
|
+
This is populated only by the ``quantum_datasets`` and ``init_quanta``
|
|
848
|
+
components, and only datasets referenced by loaded quanta are present.
|
|
849
|
+
All dataset types in the pipeline graph are included, even if none of
|
|
850
|
+
their datasets were loaded (i.e. nested mappings may be empty).
|
|
851
|
+
|
|
852
|
+
The returned object may be an internal dictionary; as the type
|
|
853
|
+
annotation indicates, it should not be modified in place.
|
|
854
|
+
"""
|
|
855
|
+
return self._datasets_by_type
|
|
856
|
+
|
|
857
|
+
@property
|
|
858
|
+
def quantum_only_xgraph(self) -> networkx.DiGraph:
|
|
859
|
+
"""A directed acyclic graph with quanta as nodes and datasets elided.
|
|
860
|
+
|
|
861
|
+
Notes
|
|
862
|
+
-----
|
|
863
|
+
Node keys are quantum UUIDs, and are populated by the ``thin_graph``
|
|
864
|
+
component (all nodes and edges) and ``quantum_datasets`` component
|
|
865
|
+
(only those that were loaded).
|
|
866
|
+
|
|
867
|
+
Node state dictionaries are described by the
|
|
868
|
+
`PredictedQuantumInfo` type.
|
|
869
|
+
|
|
870
|
+
The returned object is a read-only view of an internal one.
|
|
871
|
+
"""
|
|
872
|
+
return self._quantum_only_xgraph.copy(as_view=True)
|
|
873
|
+
|
|
874
|
+
@property
|
|
875
|
+
def bipartite_xgraph(self) -> networkx.MultiDiGraph:
|
|
876
|
+
"""A directed acyclic graph with quantum and dataset nodes.
|
|
877
|
+
|
|
878
|
+
This graph never includes init-input and init-output datasets.
|
|
879
|
+
|
|
880
|
+
Notes
|
|
881
|
+
-----
|
|
882
|
+
Node keys are quantum or dataset UUIDs. Nodes for quanta are present
|
|
883
|
+
if the ``thin_graph`` component is loaded (all nodes) or if the
|
|
884
|
+
``quantum_datasets`` component is loaded (just loaded quanta). Edges
|
|
885
|
+
and dataset nodes are only present for quanta whose
|
|
886
|
+
``quantum_datasets`` were loaded.
|
|
887
|
+
|
|
888
|
+
Node state dictionaries are described by the
|
|
889
|
+
`PredictedQuantumInfo` and `PredictedDatasetInfo` types.
|
|
890
|
+
|
|
891
|
+
The returned object is a read-only view of an internal one.
|
|
892
|
+
"""
|
|
893
|
+
return self._bipartite_xgraph.copy(as_view=True)
|
|
894
|
+
|
|
895
|
+
@property
|
|
896
|
+
def dimension_data(self) -> DimensionDataAttacher | None:
|
|
897
|
+
"""All dimension records needed to expand the data IDS in the graph.
|
|
898
|
+
|
|
899
|
+
This may be `None` if the dimension data was not loaded. If all
|
|
900
|
+
execution quanta have been built, all records are guaranteed to have
|
|
901
|
+
been deserialized and the ``records`` attribute is complete. In other
|
|
902
|
+
cases some records may still only be present in the ``deserializers``
|
|
903
|
+
attribute.
|
|
904
|
+
"""
|
|
905
|
+
return self._dimension_data
|
|
906
|
+
|
|
907
|
+
def __iter__(self) -> Iterator[uuid.UUID]:
|
|
908
|
+
for quanta_for_task in self.quanta_by_task.values():
|
|
909
|
+
for data_id in sorted(quanta_for_task.keys()):
|
|
910
|
+
yield quanta_for_task[data_id]
|
|
911
|
+
|
|
912
|
+
def __len__(self) -> int:
|
|
913
|
+
return len(self._quantum_only_xgraph)
|
|
914
|
+
|
|
915
|
+
def get_init_inputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
|
|
916
|
+
"""Return the init-input datasets for the given task.
|
|
917
|
+
|
|
918
|
+
Parameters
|
|
919
|
+
----------
|
|
920
|
+
task_label : `str`
|
|
921
|
+
Label of the task.
|
|
922
|
+
|
|
923
|
+
Returns
|
|
924
|
+
-------
|
|
925
|
+
init_inputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
|
|
926
|
+
Dataset references for init-input datasets, keyed by connection
|
|
927
|
+
name. Dataset types storage classes match the task connection
|
|
928
|
+
declarations, not necessarily the data repository, and may be
|
|
929
|
+
components.
|
|
930
|
+
"""
|
|
931
|
+
if self._init_quanta is None:
|
|
932
|
+
raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
|
|
933
|
+
task_init_node = self.pipeline_graph.tasks[task_label].init
|
|
934
|
+
return {
|
|
935
|
+
connection_name: task_init_node.inputs[connection_name].adapt_dataset_ref(
|
|
936
|
+
self._make_init_ref(datasets[0])
|
|
937
|
+
)
|
|
938
|
+
for connection_name, datasets in self._init_quanta[task_label].inputs.items()
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
def get_init_outputs(self, task_label: str) -> dict[ConnectionName, DatasetRef]:
|
|
942
|
+
"""Return the init-output datasets for the given task.
|
|
943
|
+
|
|
944
|
+
Parameters
|
|
945
|
+
----------
|
|
946
|
+
task_label : `str`
|
|
947
|
+
Label of the task. ``""`` may be used to get global init-outputs.
|
|
948
|
+
|
|
949
|
+
Returns
|
|
950
|
+
-------
|
|
951
|
+
init_outputs : `dict` [ `str`, `lsst.daf.butler.DatasetRef` ]
|
|
952
|
+
Dataset references for init-outputs datasets, keyed by connection
|
|
953
|
+
name. Dataset types storage classes match the task connection
|
|
954
|
+
declarations, not necessarily the data repository.
|
|
955
|
+
"""
|
|
956
|
+
if self._init_quanta is None:
|
|
957
|
+
raise IncompleteQuantumGraphError("The init_quanta component was not loaded.")
|
|
958
|
+
if not task_label:
|
|
959
|
+
(datasets,) = self._init_quanta[""].outputs.values()
|
|
960
|
+
return {
|
|
961
|
+
acc.PACKAGES_INIT_OUTPUT_NAME: DatasetRef(
|
|
962
|
+
self.pipeline_graph.packages_dataset_type,
|
|
963
|
+
DataCoordinate.make_empty(self.pipeline_graph.universe),
|
|
964
|
+
run=datasets[0].run,
|
|
965
|
+
id=datasets[0].dataset_id,
|
|
966
|
+
conform=False,
|
|
967
|
+
)
|
|
968
|
+
}
|
|
969
|
+
task_init_node = self.pipeline_graph.tasks[task_label].init
|
|
970
|
+
result: dict[ConnectionName, DatasetRef] = {}
|
|
971
|
+
for connection_name, datasets in self._init_quanta[task_label].outputs.items():
|
|
972
|
+
if connection_name == acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME:
|
|
973
|
+
edge = task_init_node.config_output
|
|
974
|
+
else:
|
|
975
|
+
edge = task_init_node.outputs[connection_name]
|
|
976
|
+
result[connection_name] = edge.adapt_dataset_ref(self._make_init_ref(datasets[0]))
|
|
977
|
+
return result
|
|
978
|
+
|
|
979
|
+
def _make_init_ref(self, dataset: PredictedDatasetModel) -> DatasetRef:
|
|
980
|
+
dataset_type = self.pipeline_graph.dataset_types[dataset.dataset_type_name].dataset_type
|
|
981
|
+
return DatasetRef(
|
|
982
|
+
dataset_type,
|
|
983
|
+
DataCoordinate.make_empty(self.pipeline_graph.universe),
|
|
984
|
+
run=dataset.run,
|
|
985
|
+
id=dataset.dataset_id,
|
|
986
|
+
conform=False,
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
def build_execution_quanta(
|
|
990
|
+
self,
|
|
991
|
+
quantum_ids: Iterable[uuid.UUID] | None = None,
|
|
992
|
+
task_label: str | None = None,
|
|
993
|
+
) -> dict[uuid.UUID, Quantum]:
|
|
994
|
+
"""Build `lsst.daf.butler.Quantum` objects suitable for executing
|
|
995
|
+
tasks.
|
|
996
|
+
|
|
997
|
+
In addition to returning the quantum objects directly, this also causes
|
|
998
|
+
the `quantum_only_xgraph` and `bipartite_xgraph` graphs to include a
|
|
999
|
+
``quantum`` attribute for the affected quanta.
|
|
1000
|
+
|
|
1001
|
+
Parameters
|
|
1002
|
+
----------
|
|
1003
|
+
quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
|
|
1004
|
+
IDs of all quanta to return. If not provided, all quanta for the
|
|
1005
|
+
given task label (if given) or graph are returned.
|
|
1006
|
+
task_label : `str`, optional
|
|
1007
|
+
Task label whose quanta should be generated. Ignored if
|
|
1008
|
+
``quantum_ids`` is not `None`.
|
|
1009
|
+
|
|
1010
|
+
Returns
|
|
1011
|
+
-------
|
|
1012
|
+
quanta : `dict` [ `uuid.UUID`, `lsst.daf.butler.Quantum` ]
|
|
1013
|
+
Mapping of quanta, keyed by UUID. All dataset types are adapted to
|
|
1014
|
+
the task's storage class declarations and inputs may be components.
|
|
1015
|
+
All data IDs have dimension records attached.
|
|
1016
|
+
"""
|
|
1017
|
+
if not self._init_quanta:
|
|
1018
|
+
raise IncompleteQuantumGraphError(
|
|
1019
|
+
"Cannot build execution quanta without loading the ``init_quanta`` component."
|
|
1020
|
+
)
|
|
1021
|
+
if quantum_ids is None:
|
|
1022
|
+
if task_label is not None:
|
|
1023
|
+
quantum_ids = self._quanta_by_task_label[task_label].values()
|
|
1024
|
+
else:
|
|
1025
|
+
quantum_ids = self._quantum_only_xgraph.nodes.keys()
|
|
1026
|
+
else:
|
|
1027
|
+
# Guard against single-pass iterators.
|
|
1028
|
+
quantum_ids = list(quantum_ids)
|
|
1029
|
+
del task_label # make sure we don't accidentally use this.
|
|
1030
|
+
result: dict[uuid.UUID, Quantum] = {}
|
|
1031
|
+
self._expand_execution_quantum_data_ids(quantum_ids)
|
|
1032
|
+
task_init_datastore_records: dict[TaskLabel, dict[DatastoreName, DatastoreRecordData]] = {}
|
|
1033
|
+
for quantum_id in quantum_ids:
|
|
1034
|
+
quantum_node_dict: PredictedQuantumInfo = self._quantum_only_xgraph.nodes[quantum_id]
|
|
1035
|
+
if "quantum" in quantum_node_dict:
|
|
1036
|
+
result[quantum_id] = quantum_node_dict["quantum"]
|
|
1037
|
+
continue
|
|
1038
|
+
# We've declare the info dict keys to all be required because that
|
|
1039
|
+
# saves a lot of casting, but the reality is that they can either
|
|
1040
|
+
# be fully populated or totally unpopulated. But that makes mypy
|
|
1041
|
+
# think the check above always succeeds.
|
|
1042
|
+
try: # type:ignore [unreachable]
|
|
1043
|
+
quantum_datasets = self._quantum_datasets[quantum_id]
|
|
1044
|
+
except KeyError:
|
|
1045
|
+
raise IncompleteQuantumGraphError(
|
|
1046
|
+
f"Full quantum information for {quantum_id} was not loaded."
|
|
1047
|
+
) from None
|
|
1048
|
+
task_node = self.pipeline_graph.tasks[quantum_datasets.task_label]
|
|
1049
|
+
quantum_data_id = self._expanded_data_ids[self._bipartite_xgraph.nodes[quantum_id]["data_id"]]
|
|
1050
|
+
inputs = self._build_execution_quantum_refs(task_node, quantum_datasets.inputs)
|
|
1051
|
+
outputs = self._build_execution_quantum_refs(task_node, quantum_datasets.outputs)
|
|
1052
|
+
if task_node.label not in task_init_datastore_records:
|
|
1053
|
+
task_init_datastore_records[task_node.label] = self._init_quanta[
|
|
1054
|
+
task_node.label
|
|
1055
|
+
].deserialize_datastore_records()
|
|
1056
|
+
quantum = Quantum(
|
|
1057
|
+
taskName=task_node.task_class_name,
|
|
1058
|
+
taskClass=task_node.task_class,
|
|
1059
|
+
dataId=quantum_data_id,
|
|
1060
|
+
initInputs={
|
|
1061
|
+
ref.datasetType: ref for ref in self.get_init_inputs(quantum_datasets.task_label).values()
|
|
1062
|
+
},
|
|
1063
|
+
inputs=inputs,
|
|
1064
|
+
outputs=outputs,
|
|
1065
|
+
datastore_records=DatastoreRecordData.merge_mappings(
|
|
1066
|
+
quantum_datasets.deserialize_datastore_records(),
|
|
1067
|
+
task_init_datastore_records[task_node.label],
|
|
1068
|
+
),
|
|
1069
|
+
)
|
|
1070
|
+
self._quantum_only_xgraph.nodes[quantum_id]["quantum"] = quantum
|
|
1071
|
+
self._bipartite_xgraph.nodes[quantum_id]["quantum"] = quantum
|
|
1072
|
+
result[quantum_id] = quantum
|
|
1073
|
+
return result
|
|
1074
|
+
|
|
1075
|
+
def _expand_execution_quantum_data_ids(self, quantum_ids: Iterable[uuid.UUID]) -> None:
|
|
1076
|
+
if self._dimension_data is None:
|
|
1077
|
+
raise IncompleteQuantumGraphError(
|
|
1078
|
+
"Cannot build execution quanta without loading the ``dimension_data`` component."
|
|
1079
|
+
)
|
|
1080
|
+
data_ids_to_expand: dict[DimensionGroup, set[DataCoordinate]] = defaultdict(set)
|
|
1081
|
+
for quantum_id in quantum_ids:
|
|
1082
|
+
data_id: DataCoordinate = self._bipartite_xgraph.nodes[quantum_id]["data_id"]
|
|
1083
|
+
if data_id.hasRecords():
|
|
1084
|
+
self._expanded_data_ids[data_id] = data_id
|
|
1085
|
+
else:
|
|
1086
|
+
data_ids_to_expand[data_id.dimensions].add(data_id)
|
|
1087
|
+
for dataset_id in itertools.chain(
|
|
1088
|
+
self._bipartite_xgraph.predecessors(quantum_id),
|
|
1089
|
+
self._bipartite_xgraph.successors(quantum_id),
|
|
1090
|
+
):
|
|
1091
|
+
data_id = self._bipartite_xgraph.nodes[dataset_id]["data_id"]
|
|
1092
|
+
if data_id.hasRecords():
|
|
1093
|
+
self._expanded_data_ids[data_id] = data_id
|
|
1094
|
+
else:
|
|
1095
|
+
data_ids_to_expand[data_id.dimensions].add(data_id)
|
|
1096
|
+
for dimensions, data_ids_for_dimensions in data_ids_to_expand.items():
|
|
1097
|
+
self._expanded_data_ids.update(
|
|
1098
|
+
(d, d) for d in self._dimension_data.attach(dimensions, data_ids_for_dimensions)
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
def _build_execution_quantum_refs(
|
|
1102
|
+
self, task_node: TaskNode, model_mapping: dict[ConnectionName, list[PredictedDatasetModel]]
|
|
1103
|
+
) -> dict[DatasetType, list[DatasetRef]]:
|
|
1104
|
+
results: dict[DatasetType, list[DatasetRef]] = {}
|
|
1105
|
+
for connection_name, datasets in model_mapping.items():
|
|
1106
|
+
edge = task_node.get_edge(connection_name)
|
|
1107
|
+
dataset_type = edge.adapt_dataset_type(
|
|
1108
|
+
self.pipeline_graph.dataset_types[edge.parent_dataset_type_name].dataset_type
|
|
1109
|
+
)
|
|
1110
|
+
results[dataset_type] = [self._make_general_ref(dataset_type, d.dataset_id) for d in datasets]
|
|
1111
|
+
return results
|
|
1112
|
+
|
|
1113
|
+
def _make_general_ref(self, dataset_type: DatasetType, dataset_id: uuid.UUID) -> DatasetRef:
|
|
1114
|
+
node_state = self._bipartite_xgraph.nodes[dataset_id]
|
|
1115
|
+
data_id = self._expanded_data_ids[node_state["data_id"]]
|
|
1116
|
+
return DatasetRef(dataset_type, data_id, run=node_state["run"], id=dataset_id)
|
|
1117
|
+
|
|
1118
|
+
def make_init_qbb(
|
|
1119
|
+
self,
|
|
1120
|
+
butler_config: Config | ResourcePathExpression,
|
|
1121
|
+
*,
|
|
1122
|
+
config_search_paths: Iterable[str] | None = None,
|
|
1123
|
+
) -> QuantumBackedButler:
|
|
1124
|
+
"""Construct an quantum-backed butler suitable for reading and writing
|
|
1125
|
+
init input and init output datasets, respectively.
|
|
1126
|
+
|
|
1127
|
+
This only requires the ``init_quanta`` component to have been loaded.
|
|
1128
|
+
|
|
1129
|
+
Parameters
|
|
1130
|
+
----------
|
|
1131
|
+
butler_config : `~lsst.daf.butler.Config` or \
|
|
1132
|
+
`~lsst.resources.ResourcePathExpression`
|
|
1133
|
+
A butler repository root, configuration filename, or configuration
|
|
1134
|
+
instance.
|
|
1135
|
+
config_search_paths : `~collections.abc.Iterable` [ `str` ], optional
|
|
1136
|
+
Additional search paths for butler configuration.
|
|
1137
|
+
|
|
1138
|
+
Returns
|
|
1139
|
+
-------
|
|
1140
|
+
qbb : `~lsst.daf.butler.QuantumBackedButler`
|
|
1141
|
+
A limited butler that can ``get`` init-input datasets and ``put``
|
|
1142
|
+
init-output datasets.
|
|
1143
|
+
"""
|
|
1144
|
+
# Collect all init input/output dataset IDs.
|
|
1145
|
+
predicted_inputs: set[uuid.UUID] = set()
|
|
1146
|
+
predicted_outputs: set[uuid.UUID] = set()
|
|
1147
|
+
datastore_record_maps: list[dict[DatastoreName, DatastoreRecordData]] = []
|
|
1148
|
+
for init_quantum_datasets in self._init_quanta.values():
|
|
1149
|
+
predicted_inputs.update(
|
|
1150
|
+
d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.inputs.values())
|
|
1151
|
+
)
|
|
1152
|
+
predicted_outputs.update(
|
|
1153
|
+
d.dataset_id for d in itertools.chain.from_iterable(init_quantum_datasets.outputs.values())
|
|
1154
|
+
)
|
|
1155
|
+
datastore_record_maps.append(
|
|
1156
|
+
{
|
|
1157
|
+
datastore_name: DatastoreRecordData.from_simple(serialized_records)
|
|
1158
|
+
for datastore_name, serialized_records in init_quantum_datasets.datastore_records.items()
|
|
1159
|
+
}
|
|
1160
|
+
)
|
|
1161
|
+
# Remove intermediates from inputs.
|
|
1162
|
+
predicted_inputs -= predicted_outputs
|
|
1163
|
+
dataset_types = {d.name: d.dataset_type for d in self.pipeline_graph.dataset_types.values()}
|
|
1164
|
+
# Make butler from everything.
|
|
1165
|
+
return QuantumBackedButler.from_predicted(
|
|
1166
|
+
config=butler_config,
|
|
1167
|
+
predicted_inputs=predicted_inputs,
|
|
1168
|
+
predicted_outputs=predicted_outputs,
|
|
1169
|
+
dimensions=self.pipeline_graph.universe,
|
|
1170
|
+
datastore_records=DatastoreRecordData.merge_mappings(*datastore_record_maps),
|
|
1171
|
+
search_paths=list(config_search_paths) if config_search_paths is not None else None,
|
|
1172
|
+
dataset_types=dataset_types,
|
|
1173
|
+
)
|
|
1174
|
+
|
|
1175
|
+
def write_init_outputs(self, butler: LimitedButler, skip_existing: bool = True) -> None:
|
|
1176
|
+
"""Write the init-output datasets for all tasks in the quantum graph.
|
|
1177
|
+
|
|
1178
|
+
This only requires the ``init_quanta`` component to have been loaded.
|
|
1179
|
+
|
|
1180
|
+
Parameters
|
|
1181
|
+
----------
|
|
1182
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
1183
|
+
A limited butler data repository client.
|
|
1184
|
+
skip_existing : `bool`, optional
|
|
1185
|
+
If `True` (default) ignore init-outputs that already exist. If
|
|
1186
|
+
`False`, raise.
|
|
1187
|
+
|
|
1188
|
+
Raises
|
|
1189
|
+
------
|
|
1190
|
+
lsst.daf.butler.registry.ConflictingDefinitionError
|
|
1191
|
+
Raised if an init-output dataset already exists and
|
|
1192
|
+
``skip_existing=False``.
|
|
1193
|
+
"""
|
|
1194
|
+
# Extract init-input and init-output refs from the QG.
|
|
1195
|
+
input_refs: dict[str, DatasetRef] = {}
|
|
1196
|
+
output_refs: dict[str, DatasetRef] = {}
|
|
1197
|
+
for task_node in self.pipeline_graph.tasks.values():
|
|
1198
|
+
if task_node.label not in self._init_quanta:
|
|
1199
|
+
continue
|
|
1200
|
+
input_refs.update(
|
|
1201
|
+
{ref.datasetType.name: ref for ref in self.get_init_inputs(task_node.label).values()}
|
|
1202
|
+
)
|
|
1203
|
+
output_refs.update(
|
|
1204
|
+
{
|
|
1205
|
+
ref.datasetType.name: ref
|
|
1206
|
+
for ref in self.get_init_outputs(task_node.label).values()
|
|
1207
|
+
if ref.datasetType.name != task_node.init.config_output.dataset_type_name
|
|
1208
|
+
}
|
|
1209
|
+
)
|
|
1210
|
+
for ref, is_stored in butler.stored_many(output_refs.values()).items():
|
|
1211
|
+
if is_stored:
|
|
1212
|
+
if not skip_existing:
|
|
1213
|
+
raise ConflictingDefinitionError(f"Init-output dataset {ref} already exists.")
|
|
1214
|
+
# We'll `put` whatever's left in output_refs at the end.
|
|
1215
|
+
del output_refs[ref.datasetType.name]
|
|
1216
|
+
# Instantiate tasks, reading overall init-inputs and gathering
|
|
1217
|
+
# init-output in-memory objects.
|
|
1218
|
+
init_outputs: list[tuple[Any, DatasetType]] = []
|
|
1219
|
+
self.pipeline_graph.instantiate_tasks(
|
|
1220
|
+
get_init_input=lambda dataset_type: butler.get(
|
|
1221
|
+
input_refs[dataset_type.name].overrideStorageClass(dataset_type.storageClass)
|
|
1222
|
+
),
|
|
1223
|
+
init_outputs=init_outputs,
|
|
1224
|
+
# A task can be in the pipeline graph without having an init
|
|
1225
|
+
# quantum if it doesn't have any regular quanta either (e.g. they
|
|
1226
|
+
# were all skipped), and the _init_quanta has a "" entry for global
|
|
1227
|
+
# init-outputs that we don't want to pass here.
|
|
1228
|
+
labels=self.pipeline_graph.tasks.keys() & self._init_quanta.keys(),
|
|
1229
|
+
)
|
|
1230
|
+
# Write init-outputs that weren't already present.
|
|
1231
|
+
for obj, dataset_type in init_outputs:
|
|
1232
|
+
if new_ref := output_refs.get(dataset_type.name):
|
|
1233
|
+
assert new_ref.datasetType.storageClass_name == dataset_type.storageClass_name, (
|
|
1234
|
+
"QG init refs should use task connection storage classes."
|
|
1235
|
+
)
|
|
1236
|
+
butler.put(obj, new_ref)
|
|
1237
|
+
|
|
1238
|
+
def write_configs(self, butler: LimitedButler, compare_existing: bool = True) -> None:
|
|
1239
|
+
"""Write the config datasets for all tasks in the quantum graph.
|
|
1240
|
+
|
|
1241
|
+
Parameters
|
|
1242
|
+
----------
|
|
1243
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
1244
|
+
A limited butler data repository client.
|
|
1245
|
+
compare_existing : `bool`, optional
|
|
1246
|
+
If `True` check configs that already exist for consistency. If
|
|
1247
|
+
`False`, always raise if configs already exist.
|
|
1248
|
+
|
|
1249
|
+
Raises
|
|
1250
|
+
------
|
|
1251
|
+
lsst.daf.butler.registry.ConflictingDefinitionError
|
|
1252
|
+
Raised if an config dataset already exists and
|
|
1253
|
+
``compare_existing=False``, or if the existing config is not
|
|
1254
|
+
consistent with the config in the quantum graph.
|
|
1255
|
+
"""
|
|
1256
|
+
to_put: list[tuple[PipelineTaskConfig, DatasetRef]] = []
|
|
1257
|
+
for task_node in self.pipeline_graph.tasks.values():
|
|
1258
|
+
if task_node.label not in self._init_quanta:
|
|
1259
|
+
continue
|
|
1260
|
+
dataset_type_name = task_node.init.config_output.dataset_type_name
|
|
1261
|
+
ref = self.get_init_outputs(task_node.label)[acc.CONFIG_INIT_OUTPUT_CONNECTION_NAME]
|
|
1262
|
+
try:
|
|
1263
|
+
old_config = butler.get(ref)
|
|
1264
|
+
except (LookupError, FileNotFoundError):
|
|
1265
|
+
old_config = None
|
|
1266
|
+
if old_config is not None:
|
|
1267
|
+
if not compare_existing:
|
|
1268
|
+
raise ConflictingDefinitionError(f"Config dataset {ref} already exists.")
|
|
1269
|
+
if not task_node.config.compare(old_config, shortcut=False, output=log_config_mismatch):
|
|
1270
|
+
raise ConflictingDefinitionError(
|
|
1271
|
+
f"Config does not match existing task config {dataset_type_name!r} in "
|
|
1272
|
+
"butler; tasks configurations must be consistent within the same run collection."
|
|
1273
|
+
)
|
|
1274
|
+
else:
|
|
1275
|
+
to_put.append((task_node.config, ref))
|
|
1276
|
+
# We do writes at the end to minimize the mess we leave behind when we
|
|
1277
|
+
# raise an exception.
|
|
1278
|
+
for config, ref in to_put:
|
|
1279
|
+
butler.put(config, ref)
|
|
1280
|
+
|
|
1281
|
+
def write_packages(self, butler: LimitedButler, compare_existing: bool = True) -> None:
|
|
1282
|
+
"""Write the 'packages' dataset for the currently-active software
|
|
1283
|
+
versions.
|
|
1284
|
+
|
|
1285
|
+
Parameters
|
|
1286
|
+
----------
|
|
1287
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
1288
|
+
A limited butler data repository client.
|
|
1289
|
+
compare_existing : `bool`, optional
|
|
1290
|
+
If `True` check packages that already exist for consistency. If
|
|
1291
|
+
`False`, always raise if the packages dataset already exists.
|
|
1292
|
+
|
|
1293
|
+
Raises
|
|
1294
|
+
------
|
|
1295
|
+
lsst.daf.butler.registry.ConflictingDefinitionError
|
|
1296
|
+
Raised if the packages dataset already exists and is not consistent
|
|
1297
|
+
with the current packages.
|
|
1298
|
+
"""
|
|
1299
|
+
new_packages = Packages.fromSystem()
|
|
1300
|
+
(ref,) = self.get_init_outputs("").values()
|
|
1301
|
+
try:
|
|
1302
|
+
packages = butler.get(ref)
|
|
1303
|
+
except (LookupError, FileNotFoundError):
|
|
1304
|
+
packages = None
|
|
1305
|
+
if packages is not None:
|
|
1306
|
+
if not compare_existing:
|
|
1307
|
+
raise ConflictingDefinitionError(f"Packages dataset {ref} already exists.")
|
|
1308
|
+
if compare_packages(packages, new_packages):
|
|
1309
|
+
# have to remove existing dataset first; butler has no
|
|
1310
|
+
# replace option.
|
|
1311
|
+
butler.pruneDatasets([ref], unstore=True, purge=True)
|
|
1312
|
+
butler.put(packages, ref)
|
|
1313
|
+
else:
|
|
1314
|
+
butler.put(new_packages, ref)
|
|
1315
|
+
|
|
1316
|
+
def init_output_run(self, butler: LimitedButler, existing: bool = True) -> None:
|
|
1317
|
+
"""Initialize a new output RUN collection by writing init-output
|
|
1318
|
+
datasets (including configs and packages).
|
|
1319
|
+
|
|
1320
|
+
Parameters
|
|
1321
|
+
----------
|
|
1322
|
+
butler : `lsst.daf.butler.LimitedButler`
|
|
1323
|
+
A limited butler data repository client.
|
|
1324
|
+
existing : `bool`, optional
|
|
1325
|
+
If `True` check or ignore outputs that already exist. If
|
|
1326
|
+
`False`, always raise if an output dataset already exists.
|
|
1327
|
+
|
|
1328
|
+
Raises
|
|
1329
|
+
------
|
|
1330
|
+
lsst.daf.butler.registry.ConflictingDefinitionError
|
|
1331
|
+
Raised if there are existing init output datasets, and either
|
|
1332
|
+
``existing=False`` or their contents are not compatible with this
|
|
1333
|
+
graph.
|
|
1334
|
+
"""
|
|
1335
|
+
self.write_configs(butler, compare_existing=existing)
|
|
1336
|
+
self.write_packages(butler, compare_existing=existing)
|
|
1337
|
+
self.write_init_outputs(butler, skip_existing=existing)
|
|
1338
|
+
|
|
1339
|
+
@classmethod
|
|
1340
|
+
def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraph:
|
|
1341
|
+
"""Construct from an old `QuantumGraph` instance.
|
|
1342
|
+
|
|
1343
|
+
Parameters
|
|
1344
|
+
----------
|
|
1345
|
+
old_quantum_graph : `QuantumGraph`
|
|
1346
|
+
Quantum graph to transform.
|
|
1347
|
+
|
|
1348
|
+
Returns
|
|
1349
|
+
-------
|
|
1350
|
+
predicted_quantum_graph : `PredictedQuantumGraph`
|
|
1351
|
+
A new predicted quantum graph.
|
|
1352
|
+
"""
|
|
1353
|
+
return PredictedQuantumGraphComponents.from_old_quantum_graph(old_quantum_graph).assemble()
|
|
1354
|
+
|
|
1355
|
+
def to_old_quantum_graph(self) -> QuantumGraph:
|
|
1356
|
+
"""Transform into an old `QuantumGraph` instance.
|
|
1357
|
+
|
|
1358
|
+
Returns
|
|
1359
|
+
-------
|
|
1360
|
+
old_quantum_graph : `QuantumGraph`
|
|
1361
|
+
Old quantum graph.
|
|
1362
|
+
|
|
1363
|
+
Notes
|
|
1364
|
+
-----
|
|
1365
|
+
This can only be called on graphs that have loaded all quantum
|
|
1366
|
+
datasets, init datasets, and dimension records.
|
|
1367
|
+
"""
|
|
1368
|
+
from ..graph import QuantumGraph
|
|
1369
|
+
|
|
1370
|
+
quanta: dict[TaskDef, set[Quantum]] = {}
|
|
1371
|
+
quantum_to_quantum_id: dict[Quantum, uuid.UUID] = {}
|
|
1372
|
+
init_inputs: dict[TaskDef, list[DatasetRef]] = {}
|
|
1373
|
+
init_outputs: dict[TaskDef, list[DatasetRef]] = {}
|
|
1374
|
+
for task_def in self.pipeline_graph._iter_task_defs():
|
|
1375
|
+
if not self._quanta_by_task_label.get(task_def.label):
|
|
1376
|
+
continue
|
|
1377
|
+
quanta_for_task: set[Quantum] = set()
|
|
1378
|
+
for quantum_id, quantum in self.build_execution_quanta(task_label=task_def.label).items():
|
|
1379
|
+
quanta_for_task.add(quantum)
|
|
1380
|
+
quantum_to_quantum_id[quantum] = quantum_id
|
|
1381
|
+
quanta[task_def] = quanta_for_task
|
|
1382
|
+
init_inputs[task_def] = list(self.get_init_inputs(task_def.label).values())
|
|
1383
|
+
init_outputs[task_def] = list(self.get_init_outputs(task_def.label).values())
|
|
1384
|
+
global_init_outputs = list(self.get_init_outputs("").values())
|
|
1385
|
+
registry_dataset_types = [d.dataset_type for d in self.pipeline_graph.dataset_types.values()]
|
|
1386
|
+
result = object.__new__(QuantumGraph)
|
|
1387
|
+
result._buildGraphs(
|
|
1388
|
+
quanta,
|
|
1389
|
+
_quantumToNodeId=quantum_to_quantum_id,
|
|
1390
|
+
metadata=self.header.to_old_metadata(),
|
|
1391
|
+
universe=self.pipeline_graph.universe,
|
|
1392
|
+
initInputs=init_inputs,
|
|
1393
|
+
initOutputs=init_outputs,
|
|
1394
|
+
globalInitOutputs=global_init_outputs,
|
|
1395
|
+
registryDatasetTypes=registry_dataset_types,
|
|
1396
|
+
)
|
|
1397
|
+
return result
|
|
1398
|
+
|
|
1399
|
+
def _make_summary(self) -> QgraphSummary:
|
|
1400
|
+
from ..graph import QgraphSummary, QgraphTaskSummary
|
|
1401
|
+
|
|
1402
|
+
summary = QgraphSummary(
|
|
1403
|
+
cmdLine=self.header.command or None,
|
|
1404
|
+
creationUTC=str(self.header.timestamp) if self.header.timestamp is not None else None,
|
|
1405
|
+
inputCollection=self.header.inputs or None,
|
|
1406
|
+
outputCollection=self.header.output,
|
|
1407
|
+
outputRun=self.header.output_run,
|
|
1408
|
+
)
|
|
1409
|
+
for task_label, quanta_for_task in self.quanta_by_task.items():
|
|
1410
|
+
task_summary = QgraphTaskSummary(taskLabel=task_label, numQuanta=len(quanta_for_task))
|
|
1411
|
+
task_node = self.pipeline_graph.tasks[task_label]
|
|
1412
|
+
for quantum_id in quanta_for_task.values():
|
|
1413
|
+
quantum_datasets = self._quantum_datasets[quantum_id]
|
|
1414
|
+
for connection_name, input_datasets in quantum_datasets.inputs.items():
|
|
1415
|
+
task_summary.numInputs[
|
|
1416
|
+
task_node.get_input_edge(connection_name).parent_dataset_type_name
|
|
1417
|
+
] += len(input_datasets)
|
|
1418
|
+
for connection_name, output_datasets in quantum_datasets.outputs.items():
|
|
1419
|
+
task_summary.numOutputs[
|
|
1420
|
+
task_node.get_output_edge(connection_name).parent_dataset_type_name
|
|
1421
|
+
] += len(output_datasets)
|
|
1422
|
+
summary.qgraphTaskSummaries[task_label] = task_summary
|
|
1423
|
+
return summary
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
@dataclasses.dataclass(kw_only=True)
|
|
1427
|
+
class PredictedQuantumGraphComponents:
|
|
1428
|
+
"""A helper class for building and writing predicted quantum graphs.
|
|
1429
|
+
|
|
1430
|
+
Notes
|
|
1431
|
+
-----
|
|
1432
|
+
This class is a simple struct of model classes to allow different tools
|
|
1433
|
+
that build predicted quantum graphs to assemble them in whatever order they
|
|
1434
|
+
prefer. It does not enforce any internal invariants (e.g. the quantum and
|
|
1435
|
+
dataset counts in the header, different representations of quanta, internal
|
|
1436
|
+
ID sorting, etc.), but it does provide methods that can satisfy them.
|
|
1437
|
+
"""
|
|
1438
|
+
|
|
1439
|
+
def __post_init__(self) -> None:
|
|
1440
|
+
self.header.graph_type = "predicted"
|
|
1441
|
+
|
|
1442
|
+
header: HeaderModel = dataclasses.field(default_factory=HeaderModel)
|
|
1443
|
+
"""Basic metadata about the graph."""
|
|
1444
|
+
|
|
1445
|
+
pipeline_graph: PipelineGraph
|
|
1446
|
+
"""Description of the pipeline this graph runs, including all task label
|
|
1447
|
+
and dataset type definitions.
|
|
1448
|
+
|
|
1449
|
+
This may include tasks that do not have any quanta (e.g. due to skipping
|
|
1450
|
+
already-executed tasks).
|
|
1451
|
+
|
|
1452
|
+
This also includes the dimension universe used to construct the graph.
|
|
1453
|
+
"""
|
|
1454
|
+
|
|
1455
|
+
dimension_data: DimensionDataAttacher | None = None
|
|
1456
|
+
"""Object that can attach dimension records to data IDs.
|
|
1457
|
+
"""
|
|
1458
|
+
|
|
1459
|
+
init_quanta: PredictedInitQuantaModel = dataclasses.field(default_factory=PredictedInitQuantaModel)
|
|
1460
|
+
"""A list of special quanta that describe the init-inputs and init-outputs
|
|
1461
|
+
of the graph.
|
|
1462
|
+
|
|
1463
|
+
Tasks that are included in the pipeline graph but do not have any quanta
|
|
1464
|
+
may or may not have an init quantum, but tasks that do have regular quanta
|
|
1465
|
+
always have an init quantum as well.
|
|
1466
|
+
|
|
1467
|
+
When used to construct a `PredictedQuantumGraph`, this must have either
|
|
1468
|
+
zero entries or all tasks in the pipeline.
|
|
1469
|
+
"""
|
|
1470
|
+
|
|
1471
|
+
thin_graph: PredictedThinGraphModel = dataclasses.field(default_factory=PredictedThinGraphModel)
|
|
1472
|
+
"""A lightweight quantum-quantum DAG with task labels and data IDs only.
|
|
1473
|
+
|
|
1474
|
+
This uses internal integer IDs ("indexes") for node IDs.
|
|
1475
|
+
|
|
1476
|
+
This does not include the special "init" quanta.
|
|
1477
|
+
"""
|
|
1478
|
+
|
|
1479
|
+
quantum_datasets: dict[uuid.UUID, PredictedQuantumDatasetsModel] = dataclasses.field(default_factory=dict)
|
|
1480
|
+
"""The full descriptions of all quanta, including input and output
|
|
1481
|
+
dataset, keyed by UUID.
|
|
1482
|
+
|
|
1483
|
+
When used to construct a `PredictedQuantumGraph`, this need not have all
|
|
1484
|
+
entries.
|
|
1485
|
+
|
|
1486
|
+
This does not include special "init" quanta.
|
|
1487
|
+
"""
|
|
1488
|
+
|
|
1489
|
+
quantum_indices: dict[uuid.UUID, QuantumIndex] = dataclasses.field(default_factory=dict)
|
|
1490
|
+
"""A mapping from external universal quantum ID to internal integer ID.
|
|
1491
|
+
|
|
1492
|
+
While this `dict` does not need to be sorted, the internal integer IDs do
|
|
1493
|
+
need to correspond exactly to ``enumerate(sorted(uuids))``.
|
|
1494
|
+
|
|
1495
|
+
When used to construct a `PredictedQuantumGraph`, this must be fully
|
|
1496
|
+
populated if `thin_graph` is. It can be empty otherwise.
|
|
1497
|
+
|
|
1498
|
+
This does include special "init" quanta.
|
|
1499
|
+
"""
|
|
1500
|
+
|
|
1501
|
+
def set_quantum_indices(self) -> None:
|
|
1502
|
+
"""Populate the `quantum_indices` component by sorting the UUIDs in the
|
|
1503
|
+
`init_quanta` and `quantum_datasets` components (which must both be
|
|
1504
|
+
complete).
|
|
1505
|
+
"""
|
|
1506
|
+
all_quantum_ids = [q.quantum_id for q in self.init_quanta.root]
|
|
1507
|
+
all_quantum_ids.extend(self.quantum_datasets.keys())
|
|
1508
|
+
all_quantum_ids.sort(key=operator.attrgetter("int"))
|
|
1509
|
+
self.quantum_indices = {quantum_id: index for index, quantum_id in enumerate(all_quantum_ids)}
|
|
1510
|
+
|
|
1511
|
+
def set_thin_graph(self) -> None:
|
|
1512
|
+
"""Populate the `thin_graph` component from the `pipeline_graph`,
|
|
1513
|
+
`quantum_datasets` and `quantum_indices` components (which must all be
|
|
1514
|
+
complete).
|
|
1515
|
+
"""
|
|
1516
|
+
bipartite_xgraph = networkx.DiGraph()
|
|
1517
|
+
self.thin_graph.quanta = {task_label: [] for task_label in self.pipeline_graph.tasks}
|
|
1518
|
+
graph_quantum_indices = []
|
|
1519
|
+
for quantum_datasets in self.quantum_datasets.values():
|
|
1520
|
+
quantum_index = self.quantum_indices[quantum_datasets.quantum_id]
|
|
1521
|
+
self.thin_graph.quanta[quantum_datasets.task_label].append(
|
|
1522
|
+
PredictedThinQuantumModel.model_construct(
|
|
1523
|
+
quantum_index=quantum_index,
|
|
1524
|
+
data_coordinate=quantum_datasets.data_coordinate,
|
|
1525
|
+
)
|
|
1526
|
+
)
|
|
1527
|
+
for dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
|
|
1528
|
+
bipartite_xgraph.add_edge(dataset.dataset_id, quantum_index)
|
|
1529
|
+
for dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
|
|
1530
|
+
bipartite_xgraph.add_edge(quantum_index, dataset.dataset_id)
|
|
1531
|
+
graph_quantum_indices.append(quantum_index)
|
|
1532
|
+
quantum_only_xgraph: networkx.DiGraph = networkx.bipartite.projected_graph(
|
|
1533
|
+
bipartite_xgraph, graph_quantum_indices
|
|
1534
|
+
)
|
|
1535
|
+
self.thin_graph.edges = list(quantum_only_xgraph.edges)
|
|
1536
|
+
|
|
1537
|
+
def set_header_counts(self) -> None:
|
|
1538
|
+
"""Populate the quantum and dataset counts in the header from the
|
|
1539
|
+
`quantum_indices`, `thin_graph`, `init_quanta`, and `quantum_datasets`
|
|
1540
|
+
components.
|
|
1541
|
+
"""
|
|
1542
|
+
self.header.n_quanta = len(self.quantum_indices) - len(self.init_quanta.root)
|
|
1543
|
+
self.header.n_task_quanta = {
|
|
1544
|
+
task_label: len(thin_quanta) for task_label, thin_quanta in self.thin_graph.quanta.items()
|
|
1545
|
+
}
|
|
1546
|
+
all_dataset_ids: set[uuid.UUID] = set()
|
|
1547
|
+
for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
|
|
1548
|
+
all_dataset_ids.update(quantum_datasets.iter_dataset_ids())
|
|
1549
|
+
self.header.n_datasets = len(all_dataset_ids)
|
|
1550
|
+
|
|
1551
|
+
def update_output_run(self, output_run: str) -> None:
|
|
1552
|
+
"""Update the output `~lsst.daf.butler.CollectionType.RUN` collection
|
|
1553
|
+
name in all datasets and regenerate all output dataset and quantum
|
|
1554
|
+
UUIDs.
|
|
1555
|
+
|
|
1556
|
+
Parameters
|
|
1557
|
+
----------
|
|
1558
|
+
output_run : `str`
|
|
1559
|
+
New output `~lsst.daf.butler.CollectionType.RUN` collection name.
|
|
1560
|
+
"""
|
|
1561
|
+
uuid_map: dict[uuid.UUID, uuid.UUID] = {}
|
|
1562
|
+
# Do all outputs and then all inputs in separate passes so we don't
|
|
1563
|
+
# need to rely on topological ordering of anything.
|
|
1564
|
+
for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
|
|
1565
|
+
new_quantum_id = generate_uuidv7()
|
|
1566
|
+
quantum_datasets.quantum_id = new_quantum_id
|
|
1567
|
+
for output_dataset in itertools.chain.from_iterable(quantum_datasets.outputs.values()):
|
|
1568
|
+
assert output_dataset.run == self.header.output_run, (
|
|
1569
|
+
f"Incorrect run {output_dataset.run} for output dataset {output_dataset.dataset_id}."
|
|
1570
|
+
)
|
|
1571
|
+
new_dataset_id = generate_uuidv7()
|
|
1572
|
+
uuid_map[output_dataset.dataset_id] = new_dataset_id
|
|
1573
|
+
output_dataset.dataset_id = new_dataset_id
|
|
1574
|
+
output_dataset.run = output_run
|
|
1575
|
+
for quantum_datasets in itertools.chain(self.init_quanta.root, self.quantum_datasets.values()):
|
|
1576
|
+
for input_dataset in itertools.chain.from_iterable(quantum_datasets.inputs.values()):
|
|
1577
|
+
if input_dataset.run == self.header.output_run:
|
|
1578
|
+
input_dataset.run = output_run
|
|
1579
|
+
input_dataset.dataset_id = uuid_map.get(
|
|
1580
|
+
input_dataset.dataset_id,
|
|
1581
|
+
# This dataset isn't necessary an output of the graph
|
|
1582
|
+
# just because it's in the output run; the graph could
|
|
1583
|
+
# have been built with extend_run=True.
|
|
1584
|
+
input_dataset.dataset_id,
|
|
1585
|
+
)
|
|
1586
|
+
# Update the keys of the quantum_datasets dict.
|
|
1587
|
+
self.quantum_datasets = {qd.quantum_id: qd for qd in self.quantum_datasets.values()}
|
|
1588
|
+
# Since the UUIDs have changed, the indices need to change, too.
|
|
1589
|
+
self.set_quantum_indices()
|
|
1590
|
+
self.set_thin_graph()
|
|
1591
|
+
# Update the header last, since we use it above to get the old run.
|
|
1592
|
+
self.header.output_run = output_run
|
|
1593
|
+
|
|
1594
|
+
def assemble(self) -> PredictedQuantumGraph:
|
|
1595
|
+
"""Construct a `PredictedQuantumGraph` from these components."""
|
|
1596
|
+
return PredictedQuantumGraph(self)
|
|
1597
|
+
|
|
1598
|
+
@classmethod
|
|
1599
|
+
def read_execution_quanta(
|
|
1600
|
+
cls,
|
|
1601
|
+
uri: ResourcePathExpression,
|
|
1602
|
+
quantum_ids: Iterable[uuid.UUID] | None = None,
|
|
1603
|
+
page_size: int = DEFAULT_PAGE_SIZE,
|
|
1604
|
+
) -> PredictedQuantumGraphComponents:
|
|
1605
|
+
"""Read one or more executable quanta from a quantum graph file.
|
|
1606
|
+
|
|
1607
|
+
Parameters
|
|
1608
|
+
----------
|
|
1609
|
+
uri : convertible to `lsst.resources.ResourcePath`
|
|
1610
|
+
URI to open. Should have a ``.qg`` extension for new quantum graph
|
|
1611
|
+
files, or ``.qgraph`` for the old format.
|
|
1612
|
+
quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
|
|
1613
|
+
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
1614
|
+
be loaded. The UUIDs of special init quanta will be ignored.
|
|
1615
|
+
page_size : `int`, optional
|
|
1616
|
+
Approximate number of bytes to read at once from address files.
|
|
1617
|
+
Note that this does not set a page size for *all* reads, but it
|
|
1618
|
+
does affect the smallest, most numerous reads.
|
|
1619
|
+
|
|
1620
|
+
Returns
|
|
1621
|
+
-------
|
|
1622
|
+
components : `PredictedQuantumGraphComponents` ]
|
|
1623
|
+
Components for quantum graph that can build execution quanta for
|
|
1624
|
+
all of the given IDs.
|
|
1625
|
+
"""
|
|
1626
|
+
uri = ResourcePath(uri)
|
|
1627
|
+
if uri.getExtension() == ".qgraph":
|
|
1628
|
+
_LOG.warning(
|
|
1629
|
+
f"Reading and converting old quantum graph {uri}. "
|
|
1630
|
+
"Use the '.qg' extension to write in the new format."
|
|
1631
|
+
)
|
|
1632
|
+
from ..graph import QuantumGraph
|
|
1633
|
+
|
|
1634
|
+
old_qg = QuantumGraph.loadUri(uri, nodes=quantum_ids)
|
|
1635
|
+
return PredictedQuantumGraphComponents.from_old_quantum_graph(old_qg)
|
|
1636
|
+
|
|
1637
|
+
with PredictedQuantumGraph.open(uri, page_size=page_size) as reader:
|
|
1638
|
+
reader.read_execution_quanta(quantum_ids)
|
|
1639
|
+
return reader.components
|
|
1640
|
+
|
|
1641
|
+
@classmethod
|
|
1642
|
+
def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> PredictedQuantumGraphComponents:
|
|
1643
|
+
"""Construct from an old `QuantumGraph` instance.
|
|
1644
|
+
|
|
1645
|
+
Parameters
|
|
1646
|
+
----------
|
|
1647
|
+
old_quantum_graph : `QuantumGraph`
|
|
1648
|
+
Quantum graph to transform.
|
|
1649
|
+
|
|
1650
|
+
Returns
|
|
1651
|
+
-------
|
|
1652
|
+
components : `PredictedQuantumGraphComponents`
|
|
1653
|
+
Components for a new predicted quantum graph.
|
|
1654
|
+
"""
|
|
1655
|
+
header = HeaderModel.from_old_quantum_graph(old_quantum_graph)
|
|
1656
|
+
result = cls(header=header, pipeline_graph=old_quantum_graph.pipeline_graph)
|
|
1657
|
+
result.init_quanta.update_from_old_quantum_graph(old_quantum_graph)
|
|
1658
|
+
dimension_data_extractor = DimensionDataExtractor.from_dimension_group(
|
|
1659
|
+
old_quantum_graph.pipeline_graph.get_all_dimensions()
|
|
1660
|
+
)
|
|
1661
|
+
for task_node in old_quantum_graph.pipeline_graph.tasks.values():
|
|
1662
|
+
task_quanta = old_quantum_graph.get_task_quanta(task_node.label)
|
|
1663
|
+
for quantum_id, quantum in task_quanta.items():
|
|
1664
|
+
result.quantum_datasets[quantum_id] = PredictedQuantumDatasetsModel.from_execution_quantum(
|
|
1665
|
+
task_node, quantum, quantum_id
|
|
1666
|
+
)
|
|
1667
|
+
dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
|
|
1668
|
+
for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
|
|
1669
|
+
dimension_data_extractor.update(ref.dataId for ref in refs)
|
|
1670
|
+
result.dimension_data = DimensionDataAttacher(
|
|
1671
|
+
records=dimension_data_extractor.records.values(),
|
|
1672
|
+
dimensions=result.pipeline_graph.get_all_dimensions(),
|
|
1673
|
+
)
|
|
1674
|
+
result.set_quantum_indices()
|
|
1675
|
+
result.set_thin_graph()
|
|
1676
|
+
result.set_header_counts()
|
|
1677
|
+
return result
|
|
1678
|
+
|
|
1679
|
+
def write(
|
|
1680
|
+
self,
|
|
1681
|
+
uri: ResourcePathExpression,
|
|
1682
|
+
*,
|
|
1683
|
+
zstd_level: int = 10,
|
|
1684
|
+
zstd_dict_size: int = 32768,
|
|
1685
|
+
zstd_dict_n_inputs: int = 512,
|
|
1686
|
+
) -> None:
|
|
1687
|
+
"""Write the graph to a file.
|
|
1688
|
+
|
|
1689
|
+
Parameters
|
|
1690
|
+
----------
|
|
1691
|
+
uri : convertible to `lsst.resources.ResourcePath`
|
|
1692
|
+
Path to write to. Should have a ``.qg`` extension, or ``.qgraph``
|
|
1693
|
+
to force writing the old format.
|
|
1694
|
+
zstd_level : `int`, optional
|
|
1695
|
+
ZStandard compression level to use on JSON blocks.
|
|
1696
|
+
zstd_dict_size : `int`, optional
|
|
1697
|
+
Size of a ZStandard dictionary that shares compression information
|
|
1698
|
+
across components. Set to zero to disable the dictionary.
|
|
1699
|
+
Dictionary compression is automatically disabled if the number of
|
|
1700
|
+
quanta is smaller than ``zstd_dict_n_inputs``.
|
|
1701
|
+
zstd_dict_n_inputs : `int`, optional
|
|
1702
|
+
Maximum number of `PredictedQuantumDatasetsModel` JSON
|
|
1703
|
+
representations to feed the ZStandard dictionary training routine.
|
|
1704
|
+
|
|
1705
|
+
Notes
|
|
1706
|
+
-----
|
|
1707
|
+
Only a complete predicted quantum graph with all components fully
|
|
1708
|
+
populated should be written.
|
|
1709
|
+
"""
|
|
1710
|
+
if self.header.n_quanta + len(self.init_quanta.root) != len(self.quantum_indices):
|
|
1711
|
+
raise RuntimeError(
|
|
1712
|
+
f"Cannot save graph after partial read of quanta: expected {self.header.n_quanta}, "
|
|
1713
|
+
f"got {len(self.quantum_indices)}."
|
|
1714
|
+
)
|
|
1715
|
+
uri = ResourcePath(uri)
|
|
1716
|
+
match uri.getExtension():
|
|
1717
|
+
case ".qg":
|
|
1718
|
+
pass
|
|
1719
|
+
case ".qgraph":
|
|
1720
|
+
_LOG.warning(
|
|
1721
|
+
"Converting to an old-format quantum graph.. "
|
|
1722
|
+
"Use '.qg' instead of '.qgraph' to save in the new format."
|
|
1723
|
+
)
|
|
1724
|
+
old_qg = self.assemble().to_old_quantum_graph()
|
|
1725
|
+
old_qg.saveUri(uri)
|
|
1726
|
+
return
|
|
1727
|
+
case ext:
|
|
1728
|
+
raise ValueError(
|
|
1729
|
+
f"Unsupported extension {ext!r} for quantum graph; "
|
|
1730
|
+
"expected '.qg' (or '.qgraph' to force the old format)."
|
|
1731
|
+
)
|
|
1732
|
+
cdict: zstandard.ZstdCompressionDict | None = None
|
|
1733
|
+
cdict_data: bytes | None = None
|
|
1734
|
+
quantum_datasets_json: dict[uuid.UUID, bytes] = {}
|
|
1735
|
+
if len(self.quantum_datasets) < zstd_dict_n_inputs:
|
|
1736
|
+
# ZStandard will fail if we ask to use a compression dict without
|
|
1737
|
+
# giving it enough data, and it only helps if we have a lot of
|
|
1738
|
+
# quanta.
|
|
1739
|
+
zstd_dict_size = 0
|
|
1740
|
+
if zstd_dict_size:
|
|
1741
|
+
quantum_datasets_json = {
|
|
1742
|
+
quantum_model.quantum_id: quantum_model.model_dump_json().encode()
|
|
1743
|
+
for quantum_model in itertools.islice(self.quantum_datasets.values(), zstd_dict_n_inputs)
|
|
1744
|
+
}
|
|
1745
|
+
try:
|
|
1746
|
+
cdict = zstandard.train_dictionary(
|
|
1747
|
+
zstd_dict_size,
|
|
1748
|
+
list(quantum_datasets_json.values()),
|
|
1749
|
+
level=zstd_level,
|
|
1750
|
+
)
|
|
1751
|
+
except zstandard.ZstdError as err:
|
|
1752
|
+
warnings.warn(f"Not using a compression dictionary: {err}.")
|
|
1753
|
+
cdict = None
|
|
1754
|
+
else:
|
|
1755
|
+
cdict_data = cdict.as_bytes()
|
|
1756
|
+
compressor = zstandard.ZstdCompressor(level=zstd_level, dict_data=cdict)
|
|
1757
|
+
with BaseQuantumGraphWriter.open(
|
|
1758
|
+
uri,
|
|
1759
|
+
header=self.header,
|
|
1760
|
+
pipeline_graph=self.pipeline_graph,
|
|
1761
|
+
indices=self.quantum_indices,
|
|
1762
|
+
address_filename="quanta",
|
|
1763
|
+
compressor=compressor,
|
|
1764
|
+
cdict_data=cdict_data,
|
|
1765
|
+
) as writer:
|
|
1766
|
+
writer.write_single_model("thin_graph", self.thin_graph)
|
|
1767
|
+
if self.dimension_data is None:
|
|
1768
|
+
raise IncompleteQuantumGraphError(
|
|
1769
|
+
"Cannot save predicted quantum graph with no dimension data."
|
|
1770
|
+
)
|
|
1771
|
+
serialized_dimension_data = self.dimension_data.serialized()
|
|
1772
|
+
writer.write_single_model("dimension_data", serialized_dimension_data)
|
|
1773
|
+
del serialized_dimension_data
|
|
1774
|
+
writer.write_single_model("init_quanta", self.init_quanta)
|
|
1775
|
+
with MultiblockWriter.open_in_zip(
|
|
1776
|
+
writer.zf, "quantum_datasets", writer.int_size
|
|
1777
|
+
) as quantum_datasets_mb:
|
|
1778
|
+
for quantum_model in self.quantum_datasets.values():
|
|
1779
|
+
if json_data := quantum_datasets_json.get(quantum_model.quantum_id):
|
|
1780
|
+
quantum_datasets_mb.write_bytes(
|
|
1781
|
+
quantum_model.quantum_id, writer.compressor.compress(json_data)
|
|
1782
|
+
)
|
|
1783
|
+
else:
|
|
1784
|
+
quantum_datasets_mb.write_model(
|
|
1785
|
+
quantum_model.quantum_id, quantum_model, writer.compressor
|
|
1786
|
+
)
|
|
1787
|
+
writer.address_writer.addresses.append(quantum_datasets_mb.addresses)
|
|
1788
|
+
|
|
1789
|
+
|
|
1790
|
+
@dataclasses.dataclass
|
|
1791
|
+
class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
1792
|
+
"""A helper class for reading predicted quantum graphs."""
|
|
1793
|
+
|
|
1794
|
+
components: PredictedQuantumGraphComponents = dataclasses.field(init=False)
|
|
1795
|
+
"""Quantum graph components populated by this reader's methods."""
|
|
1796
|
+
|
|
1797
|
+
@classmethod
|
|
1798
|
+
@contextmanager
|
|
1799
|
+
def open(
|
|
1800
|
+
cls,
|
|
1801
|
+
uri: ResourcePathExpression,
|
|
1802
|
+
*,
|
|
1803
|
+
page_size: int = DEFAULT_PAGE_SIZE,
|
|
1804
|
+
import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
|
|
1805
|
+
) -> Iterator[PredictedQuantumGraphReader]:
|
|
1806
|
+
"""Construct a reader from a URI.
|
|
1807
|
+
|
|
1808
|
+
Parameters
|
|
1809
|
+
----------
|
|
1810
|
+
uri : convertible to `lsst.resources.ResourcePath`
|
|
1811
|
+
URI to open. Should have a ``.qg`` extension.
|
|
1812
|
+
page_size : `int`, optional
|
|
1813
|
+
Approximate number of bytes to read at once from address files.
|
|
1814
|
+
Note that this does not set a page size for *all* reads, but it
|
|
1815
|
+
does affect the smallest, most numerous reads.
|
|
1816
|
+
import_mode : `..pipeline_graph.TaskImportMode`, optional
|
|
1817
|
+
How to handle importing the task classes referenced in the pipeline
|
|
1818
|
+
graph.
|
|
1819
|
+
|
|
1820
|
+
Returns
|
|
1821
|
+
-------
|
|
1822
|
+
reader : `contextlib.AbstractContextManager` [ \
|
|
1823
|
+
`PredictedQuantumGraphReader` ]
|
|
1824
|
+
A context manager that returns the reader when entered.
|
|
1825
|
+
"""
|
|
1826
|
+
with cls._open(
|
|
1827
|
+
uri,
|
|
1828
|
+
graph_type="predicted",
|
|
1829
|
+
address_filename="quanta",
|
|
1830
|
+
page_size=page_size,
|
|
1831
|
+
import_mode=import_mode,
|
|
1832
|
+
n_addresses=1,
|
|
1833
|
+
) as self:
|
|
1834
|
+
yield self
|
|
1835
|
+
|
|
1836
|
+
def __post_init__(self) -> None:
|
|
1837
|
+
self.components = PredictedQuantumGraphComponents(
|
|
1838
|
+
header=self.header, pipeline_graph=self.pipeline_graph
|
|
1839
|
+
)
|
|
1840
|
+
|
|
1841
|
+
def finish(self) -> PredictedQuantumGraph:
|
|
1842
|
+
"""Construct a `PredictedQuantumGraph` instance from this reader."""
|
|
1843
|
+
return self.components.assemble()
|
|
1844
|
+
|
|
1845
|
+
def read_all(self) -> PredictedQuantumGraphReader:
|
|
1846
|
+
"""Read all components in full."""
|
|
1847
|
+
return self.read_thin_graph().read_execution_quanta()
|
|
1848
|
+
|
|
1849
|
+
def read_thin_graph(self) -> PredictedQuantumGraphReader:
|
|
1850
|
+
"""Read the thin graph.
|
|
1851
|
+
|
|
1852
|
+
The thin graph is a quantum-quantum DAG with internal integer IDs for
|
|
1853
|
+
nodes and just task labels and data IDs as node attributes. It always
|
|
1854
|
+
includes all regular quanta, and does not include init-input or
|
|
1855
|
+
init-output information.
|
|
1856
|
+
"""
|
|
1857
|
+
if not self.components.thin_graph.quanta:
|
|
1858
|
+
self.components.thin_graph = self._read_single_block("thin_graph", PredictedThinGraphModel)
|
|
1859
|
+
if len(self.components.quantum_indices) != self.components.header.n_quanta:
|
|
1860
|
+
self.address_reader.read_all()
|
|
1861
|
+
self.components.quantum_indices.update(
|
|
1862
|
+
{row.key: row.index for row in self.address_reader.rows.values()}
|
|
1863
|
+
)
|
|
1864
|
+
return self
|
|
1865
|
+
|
|
1866
|
+
def read_init_quanta(self) -> PredictedQuantumGraphReader:
|
|
1867
|
+
"""Read the list of special quanta that represent init-inputs and
|
|
1868
|
+
init-outputs.
|
|
1869
|
+
"""
|
|
1870
|
+
if not self.components.init_quanta.root:
|
|
1871
|
+
self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
|
|
1872
|
+
return self
|
|
1873
|
+
|
|
1874
|
+
def read_dimension_data(self) -> PredictedQuantumGraphReader:
|
|
1875
|
+
"""Read all dimension records.
|
|
1876
|
+
|
|
1877
|
+
Record data IDs will be immediately deserialized, while other fields
|
|
1878
|
+
will be left in serialized form until they are needed.
|
|
1879
|
+
"""
|
|
1880
|
+
if self.components.dimension_data is None:
|
|
1881
|
+
serializable_dimension_data = self._read_single_block("dimension_data", SerializableDimensionData)
|
|
1882
|
+
self.components.dimension_data = DimensionDataAttacher(
|
|
1883
|
+
deserializers=[
|
|
1884
|
+
DimensionRecordSetDeserializer.from_raw(
|
|
1885
|
+
self.components.pipeline_graph.universe[element], serialized_records
|
|
1886
|
+
)
|
|
1887
|
+
for element, serialized_records in serializable_dimension_data.root.items()
|
|
1888
|
+
],
|
|
1889
|
+
dimensions=DimensionGroup.union(
|
|
1890
|
+
*self.components.pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
|
|
1891
|
+
universe=self.components.pipeline_graph.universe,
|
|
1892
|
+
),
|
|
1893
|
+
)
|
|
1894
|
+
return self
|
|
1895
|
+
|
|
1896
|
+
def read_quantum_datasets(
|
|
1897
|
+
self, quantum_ids: Iterable[uuid.UUID] | None = None
|
|
1898
|
+
) -> PredictedQuantumGraphReader:
|
|
1899
|
+
"""Read information about all datasets produced and consumed by the
|
|
1900
|
+
given quantum IDs.
|
|
1901
|
+
|
|
1902
|
+
Parameters
|
|
1903
|
+
----------
|
|
1904
|
+
quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
|
|
1905
|
+
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
1906
|
+
be loaded. The UUIDs of special init quanta will be ignored.
|
|
1907
|
+
"""
|
|
1908
|
+
quantum_datasets: PredictedQuantumDatasetsModel | None
|
|
1909
|
+
if quantum_ids is None:
|
|
1910
|
+
if len(self.components.quantum_datasets) != self.header.n_quanta:
|
|
1911
|
+
for quantum_datasets in MultiblockReader.read_all_models_in_zip(
|
|
1912
|
+
self.zf,
|
|
1913
|
+
"quantum_datasets",
|
|
1914
|
+
PredictedQuantumDatasetsModel,
|
|
1915
|
+
self.decompressor,
|
|
1916
|
+
int_size=self.components.header.int_size,
|
|
1917
|
+
page_size=self.page_size,
|
|
1918
|
+
):
|
|
1919
|
+
self.components.quantum_datasets.setdefault(quantum_datasets.quantum_id, quantum_datasets)
|
|
1920
|
+
self.address_reader.read_all()
|
|
1921
|
+
for address_row in self.address_reader.rows.values():
|
|
1922
|
+
self.components.quantum_indices[address_row.key] = address_row.index
|
|
1923
|
+
return self
|
|
1924
|
+
with MultiblockReader.open_in_zip(
|
|
1925
|
+
self.zf, "quantum_datasets", int_size=self.components.header.int_size
|
|
1926
|
+
) as mb_reader:
|
|
1927
|
+
for quantum_id in quantum_ids:
|
|
1928
|
+
if quantum_id in self.components.quantum_datasets:
|
|
1929
|
+
continue
|
|
1930
|
+
address_row = self.address_reader.find(quantum_id)
|
|
1931
|
+
self.components.quantum_indices[address_row.key] = address_row.index
|
|
1932
|
+
quantum_datasets = mb_reader.read_model(
|
|
1933
|
+
address_row.addresses[0], PredictedQuantumDatasetsModel, self.decompressor
|
|
1934
|
+
)
|
|
1935
|
+
if quantum_datasets is not None:
|
|
1936
|
+
self.components.quantum_datasets[address_row.key] = quantum_datasets
|
|
1937
|
+
return self
|
|
1938
|
+
|
|
1939
|
+
def read_execution_quanta(
|
|
1940
|
+
self, quantum_ids: Iterable[uuid.UUID] | None = None
|
|
1941
|
+
) -> PredictedQuantumGraphReader:
|
|
1942
|
+
"""Read all information needed to execute the given quanta.
|
|
1943
|
+
|
|
1944
|
+
Parameters
|
|
1945
|
+
----------
|
|
1946
|
+
quantum_ids : `~collections.abc.Iterable` [ `uuid.UUID` ], optional
|
|
1947
|
+
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
1948
|
+
be loaded. The UUIDs of special init quanta will be ignored.
|
|
1949
|
+
"""
|
|
1950
|
+
return self.read_init_quanta().read_dimension_data().read_quantum_datasets(quantum_ids)
|