lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lsst/pipe/base/_task_metadata.py +15 -0
  2. lsst/pipe/base/dot_tools.py +14 -152
  3. lsst/pipe/base/exec_fixup_data_id.py +17 -44
  4. lsst/pipe/base/execution_graph_fixup.py +49 -18
  5. lsst/pipe/base/graph/_versionDeserializers.py +6 -5
  6. lsst/pipe/base/graph/graph.py +30 -10
  7. lsst/pipe/base/graph/graphSummary.py +30 -0
  8. lsst/pipe/base/graph_walker.py +119 -0
  9. lsst/pipe/base/log_capture.py +5 -2
  10. lsst/pipe/base/mermaid_tools.py +11 -64
  11. lsst/pipe/base/mp_graph_executor.py +298 -236
  12. lsst/pipe/base/pipeline_graph/io.py +1 -1
  13. lsst/pipe/base/quantum_graph/__init__.py +32 -0
  14. lsst/pipe/base/quantum_graph/_common.py +632 -0
  15. lsst/pipe/base/quantum_graph/_multiblock.py +808 -0
  16. lsst/pipe/base/quantum_graph/_predicted.py +1950 -0
  17. lsst/pipe/base/quantum_graph/visualization.py +302 -0
  18. lsst/pipe/base/quantum_graph_builder.py +292 -34
  19. lsst/pipe/base/quantum_graph_executor.py +2 -1
  20. lsst/pipe/base/quantum_provenance_graph.py +16 -7
  21. lsst/pipe/base/quantum_reports.py +45 -0
  22. lsst/pipe/base/separable_pipeline_executor.py +126 -15
  23. lsst/pipe/base/simple_pipeline_executor.py +44 -43
  24. lsst/pipe/base/single_quantum_executor.py +1 -40
  25. lsst/pipe/base/tests/mocks/__init__.py +1 -1
  26. lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
  27. lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
  28. lsst/pipe/base/tests/mocks/_storage_class.py +51 -0
  29. lsst/pipe/base/tests/simpleQGraph.py +11 -5
  30. lsst/pipe/base/version.py +1 -1
  31. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/METADATA +2 -1
  32. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/RECORD +40 -34
  33. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/WHEEL +0 -0
  34. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/entry_points.txt +0 -0
  35. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/COPYRIGHT +0 -0
  36. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/LICENSE +0 -0
  37. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/bsd_license.txt +0 -0
  38. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  39. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/top_level.txt +0 -0
  40. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4100.dist-info}/zip-safe +0 -0
@@ -650,7 +650,7 @@ class SerializedTaskSubset(pydantic.BaseModel):
650
650
  """
651
651
  members = set(self.tasks)
652
652
  if label in steps:
653
- steps.set_dimensions(label, self.dimensions)
653
+ steps._dimensions_by_label[label] = frozenset(self.dimensions)
654
654
  return TaskSubset(xgraph, label, members, self.description, steps)
655
655
 
656
656
 
@@ -0,0 +1,32 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ from ._common import *
31
+ from ._multiblock import *
32
+ from ._predicted import *
@@ -0,0 +1,632 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = (
31
+ "BaseQuantumGraph",
32
+ "BaseQuantumGraphReader",
33
+ "BipartiteEdgeInfo",
34
+ "DatasetInfo",
35
+ "HeaderModel",
36
+ "QuantumInfo",
37
+ )
38
+ import dataclasses
39
+ import datetime
40
+ import getpass
41
+ import os
42
+ import sys
43
+ import uuid
44
+ import zipfile
45
+ from abc import ABC, abstractmethod
46
+ from collections.abc import Iterator, Mapping
47
+ from contextlib import contextmanager
48
+ from typing import (
49
+ TYPE_CHECKING,
50
+ Any,
51
+ Self,
52
+ TypeAlias,
53
+ TypedDict,
54
+ TypeVar,
55
+ )
56
+
57
+ import networkx
58
+ import networkx.algorithms.bipartite
59
+ import pydantic
60
+ import zstandard
61
+
62
+ from lsst.daf.butler import DataCoordinate, DataIdValue
63
+ from lsst.resources import ResourcePath, ResourcePathExpression
64
+
65
+ from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
66
+ from ..pipeline_graph.io import SerializedPipelineGraph
67
+ from ._multiblock import (
68
+ DEFAULT_PAGE_SIZE,
69
+ AddressReader,
70
+ AddressWriter,
71
+ Compressor,
72
+ Decompressor,
73
+ )
74
+
75
+ if TYPE_CHECKING:
76
+ from ..graph import QuantumGraph
77
+
78
+
79
+ # These aliases make it a lot easier how the various pydantic models are
80
+ # structured, but they're too verbose to be worth exporting to code outside the
81
+ # quantum_graph subpackage.
82
+ TaskLabel: TypeAlias = str
83
+ DatasetTypeName: TypeAlias = str
84
+ ConnectionName: TypeAlias = str
85
+ QuantumIndex: TypeAlias = int
86
+ DatastoreName: TypeAlias = str
87
+ DimensionElementName: TypeAlias = str
88
+ DataCoordinateValues: TypeAlias = list[DataIdValue]
89
+
90
+
91
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
92
+
93
+
94
+ class IncompleteQuantumGraphError(RuntimeError):
95
+ pass
96
+
97
+
98
+ class HeaderModel(pydantic.BaseModel):
99
+ """Data model for the header of a quantum graph file."""
100
+
101
+ version: int = 0
102
+ """File format / data model version number."""
103
+
104
+ graph_type: str = ""
105
+ """Type of quantum graph stored in this file."""
106
+
107
+ inputs: list[str] = pydantic.Field(default_factory=list)
108
+ """List of input collections used to build the quantum graph."""
109
+
110
+ output: str | None = ""
111
+ """Output CHAINED collection provided when building the quantum graph."""
112
+
113
+ output_run: str = ""
114
+ """Output RUN collection for all output datasets in this graph."""
115
+
116
+ user: str = pydantic.Field(default_factory=getpass.getuser)
117
+ """Username of the process that built this quantum graph."""
118
+
119
+ timestamp: datetime.datetime = pydantic.Field(default_factory=datetime.datetime.now)
120
+ """Timestamp for when this quantum graph was built.
121
+
122
+ It is unspecified exactly which point during quantum-graph generation this
123
+ timestamp is recorded.
124
+ """
125
+
126
+ command: str = pydantic.Field(default_factory=lambda: " ".join(sys.argv))
127
+ """Command-line invocation that created this graph."""
128
+
129
+ metadata: dict[str, Any] = pydantic.Field(default_factory=dict)
130
+ """Free-form metadata associated with this quantum graph at build time."""
131
+
132
+ int_size: int = 8
133
+ """Number of bytes in the integers used in this file's multi-block and
134
+ address files.
135
+ """
136
+
137
+ n_quanta: int = 0
138
+ """Total number of quanta in this graph.
139
+
140
+ This does not include special "init" quanta, but it does include quanta
141
+ that were not loaded in a partial read (except when reading from an old
142
+ quantum graph file).
143
+ """
144
+
145
+ n_datasets: int = 0
146
+ """Total number of distinct datasets in the full graph. This includes
147
+ datasets whose related quanta were not loaded in a partial read (except
148
+ when reading from an old quantum graph file).
149
+ """
150
+
151
+ n_task_quanta: dict[TaskLabel, int] = pydantic.Field(default_factory=dict)
152
+ """Number of quanta for each task label.
153
+
154
+ This does not include special "init" quanta, but it does include quanta
155
+ that were not loaded in a partial read (except when reading from an old
156
+ quantum graph file).
157
+ """
158
+
159
+ @classmethod
160
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
161
+ """Extract a header from an old `QuantumGraph` instance.
162
+
163
+ Parameters
164
+ ----------
165
+ old_quantum_graph : `QuantumGraph`
166
+ Quantum graph to extract a header from.
167
+
168
+ Returns
169
+ -------
170
+ header : `PredictedHeaderModel`
171
+ Header for a new predicted quantum graph.
172
+ """
173
+ metadata = dict(old_quantum_graph.metadata)
174
+ metadata.pop("packages", None)
175
+ if (time_str := metadata.pop("time", None)) is not None:
176
+ timestamp = datetime.datetime.fromisoformat(time_str)
177
+ else:
178
+ timestamp = datetime.datetime.now()
179
+ return cls(
180
+ inputs=list(metadata.pop("input", []) or []), # Guard against explicit None and missing key.
181
+ output=metadata.pop("output", None),
182
+ output_run=metadata.pop("output_run", ""),
183
+ user=metadata.pop("user", ""),
184
+ command=metadata.pop("full_command", ""),
185
+ timestamp=timestamp,
186
+ metadata=metadata,
187
+ )
188
+
189
+ def to_old_metadata(self) -> dict[str, Any]:
190
+ """Return a dictionary using the key conventions used in old quantum
191
+ graph files.
192
+ """
193
+ result = self.metadata.copy()
194
+ result["input"] = self.inputs
195
+ result["output"] = self.output
196
+ result["output_run"] = self.output_run
197
+ result["full_command"] = self.command
198
+ result["user"] = self.user
199
+ result["time"] = str(self.timestamp)
200
+ return result
201
+
202
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
203
+ # when we inherit those docstrings in our public classes.
204
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
205
+
206
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
207
+ """See `pydantic.BaseModel.copy`."""
208
+ return super().copy(*args, **kwargs)
209
+
210
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
211
+ """See `pydantic.BaseModel.model_dump`."""
212
+ return super().model_dump(*args, **kwargs)
213
+
214
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
215
+ """See `pydantic.BaseModel.model_dump_json`."""
216
+ return super().model_dump(*args, **kwargs)
217
+
218
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
219
+ """See `pydantic.BaseModel.model_copy`."""
220
+ return super().model_copy(*args, **kwargs)
221
+
222
+ @classmethod
223
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
224
+ """See `pydantic.BaseModel.model_construct`."""
225
+ return super().model_construct(*args, **kwargs)
226
+
227
+ @classmethod
228
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
229
+ """See `pydantic.BaseModel.model_json_schema`."""
230
+ return super().model_json_schema(*args, **kwargs)
231
+
232
+ @classmethod
233
+ def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
234
+ """See `pydantic.BaseModel.model_validate`."""
235
+ return super().model_validate(*args, **kwargs)
236
+
237
+ @classmethod
238
+ def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
239
+ """See `pydantic.BaseModel.model_validate_json`."""
240
+ return super().model_validate_json(*args, **kwargs)
241
+
242
+ @classmethod
243
+ def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
244
+ """See `pydantic.BaseModel.model_validate_strings`."""
245
+ return super().model_validate_strings(*args, **kwargs)
246
+
247
+
248
+ class QuantumInfo(TypedDict):
249
+ """A typed dictionary that annotates the attributes of the NetworkX graph
250
+ node data for a quantum.
251
+
252
+ Since NetworkX types are not generic over their node mapping type, this has
253
+ to be used explicitly, e.g.::
254
+
255
+ node_data: QuantumInfo = xgraph.nodes[quantum_id]
256
+
257
+ where ``xgraph`` can be either `BaseQuantumGraph.quantum_only_xgraph`
258
+ or `BaseQuantumGraph.bipartite_xgraph`.
259
+ """
260
+
261
+ data_id: DataCoordinate
262
+ """Data ID of the quantum."""
263
+
264
+ task_label: str
265
+ """Label of the task for this quantum."""
266
+
267
+ pipeline_node: TaskNode
268
+ """Node in the pipeline graph for this quantum's task."""
269
+
270
+
271
+ class DatasetInfo(TypedDict):
272
+ """A typed dictionary that annotates the attributes of the NetworkX graph
273
+ node data for a dataset.
274
+
275
+ Since NetworkX types are not generic over their node mapping type, this has
276
+ to be used explicitly, e.g.::
277
+
278
+ node_data: DatasetInfo = xgraph.nodes[dataset_id]
279
+
280
+ where ``xgraph`` is from the `BaseQuantumGraph.bipartite_xgraph` property.
281
+ """
282
+
283
+ data_id: DataCoordinate
284
+ """Data ID of the dataset."""
285
+
286
+ dataset_type_name: DatasetTypeName
287
+ """Name of the type of this dataset.
288
+
289
+ This is always the general dataset type that matches the data repository
290
+ storage class, which may differ from any particular task-adapted dataset
291
+ type whose storage class has been overridden to match the task connections.
292
+ This means is it never a component.
293
+ """
294
+
295
+ run: str
296
+ """Name of the `~lsst.daf.butler.CollectionType.RUN` collection that holds
297
+ or will hold this dataset.
298
+ """
299
+
300
+ pipeline_node: DatasetTypeNode
301
+ """Node in the pipeline graph for this dataset's type."""
302
+
303
+
304
+ class BipartiteEdgeInfo(TypedDict):
305
+ """A typed dictionary that annotates the attributes of the NetworkX graph
306
+ edge data in a bipartite graph.
307
+ """
308
+
309
+ is_read: bool
310
+ """`True` if this is a dataset -> quantum edge; `False` if it is a
311
+ quantum -> dataset edge.
312
+ """
313
+
314
+ pipeline_edges: list[Edge]
315
+ """Corresponding edges in the pipeline graph.
316
+
317
+ Note that there may be more than one pipeline edge since a quantum can
318
+ consume a particular dataset via multiple connections.
319
+ """
320
+
321
+
322
+ class BaseQuantumGraph(ABC):
323
+ """An abstract base for quantum graphs.
324
+
325
+ Parameters
326
+ ----------
327
+ header : `HeaderModel`
328
+ Structured metadata for the graph.
329
+ pipeline_graph : `..pipeline_graph.PipelineGraph`
330
+ Graph of tasks and dataset types. May contain a superset of the tasks
331
+ and dataset types that actually have quanta and datasets in the quantum
332
+ graph.
333
+ """
334
+
335
+ def __init__(self, header: HeaderModel, pipeline_graph: PipelineGraph):
336
+ self.header = header
337
+ self.pipeline_graph = pipeline_graph
338
+
339
+ @property
340
+ @abstractmethod
341
+ def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
342
+ """A nested mapping of all quanta, keyed first by task name and then by
343
+ data ID.
344
+
345
+ Notes
346
+ -----
347
+ Partial loads may not fully populate this mapping, but it can always
348
+ be accessed.
349
+
350
+ The returned object may be an internal dictionary; as the type
351
+ annotation indicates, it should not be modified in place.
352
+ """
353
+ raise NotImplementedError()
354
+
355
+ @property
356
+ @abstractmethod
357
+ def datasets_by_type(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
358
+ """A nested mapping of all datasets, keyed first by dataset type name
359
+ and then by data ID.
360
+
361
+ Notes
362
+ -----
363
+ Partial loads may not fully populate this mapping, but it can always
364
+ be accessed.
365
+
366
+ The returned object may be an internal dictionary; as the type
367
+ annotation indicates, it should not be modified in place.
368
+ """
369
+ raise NotImplementedError()
370
+
371
+ @property
372
+ @abstractmethod
373
+ def quantum_only_xgraph(self) -> networkx.DiGraph:
374
+ """A directed acyclic graph with quanta as nodes and datasets elided.
375
+
376
+ Notes
377
+ -----
378
+ Partial loads may not fully populate this graph, but it can always be
379
+ accessed.
380
+
381
+ Node state dictionaries are described by the `QuantumInfo` type
382
+ (or a subtype thereof).
383
+
384
+ The returned object is a read-only view of an internal one.
385
+ """
386
+ raise NotImplementedError()
387
+
388
+ @property
389
+ @abstractmethod
390
+ def bipartite_xgraph(self) -> networkx.DiGraph:
391
+ """A directed acyclic graph with quantum and dataset nodes.
392
+
393
+ Notes
394
+ -----
395
+ Partial loads may not fully populate this graph, but it can always be
396
+ accessed.
397
+
398
+ Node state dictionaries are described by the `QuantumInfo` and
399
+ `DatasetInfo` types (or a subtypes thereof). Edges have state
400
+ dictionaries described by `BipartiteEdgeInfo`.
401
+
402
+ The returned object is a read-only view of an internal one.
403
+ """
404
+ raise NotImplementedError()
405
+
406
+
407
+ @dataclasses.dataclass
408
+ class BaseQuantumGraphWriter:
409
+ """A helper class for writing quantum graphs."""
410
+
411
+ zf: zipfile.ZipFile
412
+ """The zip archive that represents the quantum graph on disk."""
413
+
414
+ compressor: Compressor
415
+ """A compressor for all compressed JSON blocks."""
416
+
417
+ address_writer: AddressWriter
418
+ """A helper object for reading addresses into the multi-block files."""
419
+
420
+ int_size: int
421
+ """Size (in bytes) used to write integers to binary files."""
422
+
423
+ @classmethod
424
+ @contextmanager
425
+ def open(
426
+ cls,
427
+ uri: ResourcePathExpression,
428
+ header: HeaderModel,
429
+ pipeline_graph: PipelineGraph,
430
+ indices: dict[uuid.UUID, int],
431
+ *,
432
+ address_filename: str,
433
+ compressor: Compressor,
434
+ cdict_data: bytes | None = None,
435
+ ) -> Iterator[Self]:
436
+ uri = ResourcePath(uri)
437
+ address_writer = AddressWriter(indices)
438
+ with uri.open(mode="wb") as stream:
439
+ with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
440
+ self = cls(zf, compressor, address_writer, header.int_size)
441
+ self.write_single_model("header", header)
442
+ if cdict_data is not None:
443
+ zf.writestr("compression_dict", cdict_data)
444
+ self.write_single_model("pipeline_graph", SerializedPipelineGraph.serialize(pipeline_graph))
445
+ yield self
446
+ address_writer.write_to_zip(zf, address_filename, int_size=self.int_size)
447
+
448
+ def write_single_model(self, name: str, model: pydantic.BaseModel) -> None:
449
+ """Write a single compressed JSON block as a 'file' in a zip archive.
450
+
451
+ Parameters
452
+ ----------
453
+ name : `str`
454
+ Base name of the file. An extension will be added.
455
+ model : `pydantic.BaseModel`
456
+ Pydantic model to convert to JSON.
457
+ """
458
+ json_data = model.model_dump_json().encode()
459
+ self.write_single_block(name, json_data)
460
+
461
+ def write_single_block(self, name: str, json_data: bytes) -> None:
462
+ """Write a single compressed JSON block as a 'file' in a zip archive.
463
+
464
+ Parameters
465
+ ----------
466
+ name : `str`
467
+ Base name of the file. An extension will be added.
468
+ json_data : `bytes`
469
+ Raw JSON to compress and write.
470
+ """
471
+ json_data = self.compressor.compress(json_data)
472
+ self.zf.writestr(f"{name}.json.zst", json_data)
473
+
474
+
475
+ @dataclasses.dataclass
476
+ class BaseQuantumGraphReader:
477
+ """A helper class for reading quantum graphs."""
478
+
479
+ header: HeaderModel
480
+ """Header metadata for the quantum graph."""
481
+
482
+ pipeline_graph: PipelineGraph
483
+ """Graph of tasks and dataset type names that appear in the quantum
484
+ graph.
485
+ """
486
+
487
+ zf: zipfile.ZipFile
488
+ """The zip archive that represents the quantum graph on disk."""
489
+
490
+ decompressor: Decompressor
491
+ """A decompressor for all compressed JSON blocks."""
492
+
493
+ address_reader: AddressReader
494
+ """A helper object for reading addresses into the multi-block files."""
495
+
496
+ page_size: int
497
+ """Approximate number of bytes to read at a time.
498
+
499
+ Note that this does not set a page size for *all* reads, but it
500
+ does affect the smallest, most numerous reads.
501
+ """
502
+
503
+ @classmethod
504
+ @contextmanager
505
+ def _open(
506
+ cls,
507
+ uri: ResourcePathExpression,
508
+ *,
509
+ address_filename: str,
510
+ graph_type: str,
511
+ n_addresses: int,
512
+ page_size: int | None = None,
513
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
514
+ ) -> Iterator[Self]:
515
+ """Construct a reader from a URI.
516
+
517
+ Parameters
518
+ ----------
519
+ uri : convertible to `lsst.resources.ResourcePath`
520
+ URI to open. Should have a ``.qg`` extension.
521
+ address_filename : `str`
522
+ Base filename for the address file.
523
+ graph_type : `str`
524
+ Value to expect for `HeaderModel.graph_type`.
525
+ n_addresses : `int`
526
+ Number of addresses to expect per row in the address file.
527
+ page_size : `int`, optional
528
+ Approximate number of bytes to read at once from address files.
529
+ Note that this does not set a page size for *all* reads, but it
530
+ does affect the smallest, most numerous reads. When `None`, the
531
+ ``LSST_QG_PAGE_SIZE`` environment variable is checked before
532
+ falling back to a default of 5MB.
533
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
534
+ How to handle importing the task classes referenced in the pipeline
535
+ graph.
536
+
537
+ Returns
538
+ -------
539
+ reader : `contextlib.AbstractContextManager` [ \
540
+ `PredictedQuantumGraphReader` ]
541
+ A context manager that returns the reader when entered.
542
+ """
543
+ if page_size is None:
544
+ page_size = int(os.environ.get("LSST_QG_PAGE_SIZE", DEFAULT_PAGE_SIZE))
545
+ uri = ResourcePath(uri)
546
+ cdict: zstandard.ZstdCompressionDict | None = None
547
+ with uri.open(mode="rb") as zf_stream:
548
+ with zipfile.ZipFile(zf_stream, "r") as zf:
549
+ if (cdict_path := zipfile.Path(zf, "compression_dict")).exists():
550
+ cdict = zstandard.ZstdCompressionDict(cdict_path.read_bytes())
551
+ decompressor = zstandard.ZstdDecompressor(cdict)
552
+ header = cls._read_single_block_static("header", HeaderModel, zf, decompressor)
553
+ if not header.graph_type == graph_type:
554
+ raise TypeError(f"Header is for a {header.graph_type!r} graph, not {graph_type!r} graph.")
555
+ serialized_pipeline_graph = cls._read_single_block_static(
556
+ "pipeline_graph", SerializedPipelineGraph, zf, decompressor
557
+ )
558
+ pipeline_graph = serialized_pipeline_graph.deserialize(import_mode)
559
+ with AddressReader.open_in_zip(
560
+ zf,
561
+ address_filename,
562
+ page_size=page_size,
563
+ int_size=header.int_size,
564
+ n_addresses=n_addresses,
565
+ ) as address_reader:
566
+ yield cls(
567
+ header=header,
568
+ pipeline_graph=pipeline_graph,
569
+ zf=zf,
570
+ decompressor=decompressor,
571
+ address_reader=address_reader,
572
+ page_size=page_size,
573
+ )
574
+
575
+ @staticmethod
576
+ def _read_single_block_static(
577
+ name: str, model_type: type[_T], zf: zipfile.ZipFile, decompressor: Decompressor
578
+ ) -> _T:
579
+ """Read a single compressed JSON block from a 'file' in a zip archive.
580
+
581
+ Parameters
582
+ ----------
583
+ zf : `zipfile.ZipFile`
584
+ Zip archive to read the file from.
585
+ name : `str`
586
+ Base name of the file. An extension will be added.
587
+ model_type : `type` [ `pydantic.BaseModel` ]
588
+ Pydantic model to validate JSON with.
589
+ decompressor : `Decompressor`
590
+ Object with a `decompress` method that takes and returns `bytes`.
591
+
592
+ Returns
593
+ -------
594
+ model : `pydantic.BaseModel`
595
+ Validated model.
596
+ """
597
+ compressed_data = zf.read(f"{name}.json.zst")
598
+ json_data = decompressor.decompress(compressed_data)
599
+ return model_type.model_validate_json(json_data)
600
+
601
+ def _read_single_block(self, name: str, model_type: type[_T]) -> _T:
602
+ """Read a single compressed JSON block from a 'file' in a zip archive.
603
+
604
+ Parameters
605
+ ----------
606
+ name : `str`
607
+ Base name of the file. An extension will be added.
608
+ model_type : `type` [ `pydantic.BaseModel` ]
609
+ Pydantic model to validate JSON with.
610
+
611
+ Returns
612
+ -------
613
+ model : `pydantic.BaseModel`
614
+ Validated model.
615
+ """
616
+ return self._read_single_block_static(name, model_type, self.zf, self.decompressor)
617
+
618
+ def _read_single_block_raw(self, name: str) -> bytes:
619
+ """Read a single compressed block from a 'file' in a zip archive.
620
+
621
+ Parameters
622
+ ----------
623
+ name : `str`
624
+ Base name of the file. An extension will be added.
625
+
626
+ Returns
627
+ -------
628
+ data : `bytes`
629
+ Decompressed bytes.
630
+ """
631
+ compressed_data = self.zf.read(f"{name}.json.zst")
632
+ return self.decompressor.decompress(compressed_data)