lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4000__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. lsst/pipe/base/dot_tools.py +14 -152
  2. lsst/pipe/base/exec_fixup_data_id.py +17 -44
  3. lsst/pipe/base/execution_graph_fixup.py +49 -18
  4. lsst/pipe/base/graph/graph.py +28 -9
  5. lsst/pipe/base/graph_walker.py +119 -0
  6. lsst/pipe/base/log_capture.py +5 -2
  7. lsst/pipe/base/mermaid_tools.py +11 -64
  8. lsst/pipe/base/mp_graph_executor.py +298 -236
  9. lsst/pipe/base/quantum_graph/__init__.py +32 -0
  10. lsst/pipe/base/quantum_graph/_common.py +610 -0
  11. lsst/pipe/base/quantum_graph/_multiblock.py +737 -0
  12. lsst/pipe/base/quantum_graph/_predicted.py +1874 -0
  13. lsst/pipe/base/quantum_graph/visualization.py +302 -0
  14. lsst/pipe/base/quantum_graph_builder.py +292 -34
  15. lsst/pipe/base/quantum_graph_executor.py +2 -1
  16. lsst/pipe/base/quantum_provenance_graph.py +16 -7
  17. lsst/pipe/base/separable_pipeline_executor.py +126 -15
  18. lsst/pipe/base/simple_pipeline_executor.py +44 -43
  19. lsst/pipe/base/single_quantum_executor.py +1 -40
  20. lsst/pipe/base/tests/mocks/__init__.py +1 -1
  21. lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
  22. lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
  23. lsst/pipe/base/tests/mocks/_storage_class.py +6 -0
  24. lsst/pipe/base/tests/simpleQGraph.py +11 -5
  25. lsst/pipe/base/version.py +1 -1
  26. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/METADATA +2 -1
  27. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/RECORD +35 -29
  28. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/WHEEL +0 -0
  29. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/entry_points.txt +0 -0
  30. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/COPYRIGHT +0 -0
  31. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/LICENSE +0 -0
  32. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/bsd_license.txt +0 -0
  33. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/gpl-v3.0.txt +0 -0
  34. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/top_level.txt +0 -0
  35. {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/zip-safe +0 -0
@@ -0,0 +1,32 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ from ._common import *
31
+ from ._multiblock import *
32
+ from ._predicted import *
@@ -0,0 +1,610 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = (
31
+ "BaseQuantumGraph",
32
+ "BaseQuantumGraphReader",
33
+ "BipartiteEdgeInfo",
34
+ "DatasetInfo",
35
+ "HeaderModel",
36
+ "QuantumInfo",
37
+ )
38
+ import dataclasses
39
+ import datetime
40
+ import getpass
41
+ import os
42
+ import sys
43
+ import uuid
44
+ import zipfile
45
+ from abc import ABC, abstractmethod
46
+ from collections.abc import Iterator, Mapping
47
+ from contextlib import contextmanager
48
+ from typing import (
49
+ TYPE_CHECKING,
50
+ Any,
51
+ Self,
52
+ TypeAlias,
53
+ TypedDict,
54
+ TypeVar,
55
+ )
56
+
57
+ import networkx
58
+ import networkx.algorithms.bipartite
59
+ import pydantic
60
+ import zstandard
61
+
62
+ from lsst.daf.butler import DataCoordinate, DataIdValue
63
+ from lsst.resources import ResourcePath, ResourcePathExpression
64
+
65
+ from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
66
+ from ..pipeline_graph.io import SerializedPipelineGraph
67
+ from ._multiblock import (
68
+ DEFAULT_PAGE_SIZE,
69
+ AddressReader,
70
+ AddressWriter,
71
+ Compressor,
72
+ Decompressor,
73
+ )
74
+
75
+ if TYPE_CHECKING:
76
+ from ..graph import QuantumGraph
77
+
78
+
79
+ # These aliases make it a lot easier how the various pydantic models are
80
+ # structured, but they're too verbose to be worth exporting to code outside the
81
+ # quantum_graph subpackage.
82
+ TaskLabel: TypeAlias = str
83
+ DatasetTypeName: TypeAlias = str
84
+ ConnectionName: TypeAlias = str
85
+ QuantumIndex: TypeAlias = int
86
+ DatastoreName: TypeAlias = str
87
+ DimensionElementName: TypeAlias = str
88
+ DataCoordinateValues: TypeAlias = list[DataIdValue]
89
+
90
+
91
+ _T = TypeVar("_T", bound=pydantic.BaseModel)
92
+
93
+
94
+ class IncompleteQuantumGraphError(RuntimeError):
95
+ pass
96
+
97
+
98
+ class HeaderModel(pydantic.BaseModel):
99
+ """Data model for the header of a quantum graph file."""
100
+
101
+ version: int = 0
102
+ """File format / data model version number."""
103
+
104
+ graph_type: str = ""
105
+ """Type of quantum graph stored in this file."""
106
+
107
+ inputs: list[str] = pydantic.Field(default_factory=list)
108
+ """List of input collections used to build the quantum graph."""
109
+
110
+ output: str | None = ""
111
+ """Output CHAINED collection provided when building the quantum graph."""
112
+
113
+ output_run: str = ""
114
+ """Output RUN collection for all output datasets in this graph."""
115
+
116
+ user: str = pydantic.Field(default_factory=getpass.getuser)
117
+ """Username of the process that built this quantum graph."""
118
+
119
+ timestamp: datetime.datetime = pydantic.Field(default_factory=datetime.datetime.now)
120
+ """Timestamp for when this quantum graph was built.
121
+
122
+ It is unspecified exactly which point during quantum-graph generation this
123
+ timestamp is recorded.
124
+ """
125
+
126
+ command: str = pydantic.Field(default_factory=lambda: " ".join(sys.argv))
127
+ """Command-line invocation that created this graph."""
128
+
129
+ metadata: dict[str, Any] = pydantic.Field(default_factory=dict)
130
+ """Free-form metadata associated with this quantum graph at build time."""
131
+
132
+ int_size: int = 8
133
+ """Number of bytes in the integers used in this file's multi-block and
134
+ address files.
135
+ """
136
+
137
+ n_quanta: int = 0
138
+ """Total number of quanta in this graph.
139
+
140
+ This does not include special "init" quanta, but it does include quanta
141
+ that were not loaded in a partial read (except when reading from an old
142
+ quantum graph file).
143
+ """
144
+
145
+ n_datasets: int = 0
146
+ """Total number of distinct datasets in the full graph. This includes
147
+ datasets whose related quanta were not loaded in a partial read (except
148
+ when reading from an old quantum graph file).
149
+ """
150
+
151
+ n_task_quanta: dict[TaskLabel, int] = pydantic.Field(default_factory=dict)
152
+ """Number of quanta for each task label.
153
+
154
+ This does not include special "init" quanta, but it does include quanta
155
+ that were not loaded in a partial read (except when reading from an old
156
+ quantum graph file).
157
+ """
158
+
159
+ @classmethod
160
+ def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
161
+ """Extract a header from an old `QuantumGraph` instance.
162
+
163
+ Parameters
164
+ ----------
165
+ old_quantum_graph : `QuantumGraph`
166
+ Quantum graph to extract a header from.
167
+
168
+ Returns
169
+ -------
170
+ header : `PredictedHeaderModel`
171
+ Header for a new predicted quantum graph.
172
+ """
173
+ metadata = dict(old_quantum_graph.metadata)
174
+ metadata.pop("packages", None)
175
+ if (time_str := metadata.pop("time", None)) is not None:
176
+ timestamp = datetime.datetime.fromisoformat(time_str)
177
+ else:
178
+ timestamp = datetime.datetime.now()
179
+ return cls(
180
+ inputs=list(metadata.pop("input", []) or []), # Guard against explicit None and missing key.
181
+ output=metadata.pop("output", None),
182
+ output_run=metadata.pop("output_run", ""),
183
+ user=metadata.pop("user", ""),
184
+ command=metadata.pop("full_command", ""),
185
+ timestamp=timestamp,
186
+ metadata=metadata,
187
+ )
188
+
189
+ def to_old_metadata(self) -> dict[str, Any]:
190
+ """Return a dictionary using the key conventions used in old quantum
191
+ graph files.
192
+ """
193
+ result = self.metadata.copy()
194
+ result["input"] = self.inputs
195
+ result["output"] = self.output
196
+ result["output_run"] = self.output_run
197
+ result["full_command"] = self.command
198
+ result["user"] = self.user
199
+ result["time"] = str(self.timestamp)
200
+ return result
201
+
202
+ # Work around the fact that Sphinx chokes on Pydantic docstring formatting,
203
+ # when we inherit those docstrings in our public classes.
204
+ if "sphinx" in sys.modules and not TYPE_CHECKING:
205
+
206
+ def copy(self, *args: Any, **kwargs: Any) -> Any:
207
+ """See `pydantic.BaseModel.copy`."""
208
+ return super().copy(*args, **kwargs)
209
+
210
+ def model_dump(self, *args: Any, **kwargs: Any) -> Any:
211
+ """See `pydantic.BaseModel.model_dump`."""
212
+ return super().model_dump(*args, **kwargs)
213
+
214
+ def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
215
+ """See `pydantic.BaseModel.model_dump_json`."""
216
+ return super().model_dump(*args, **kwargs)
217
+
218
+ def model_copy(self, *args: Any, **kwargs: Any) -> Any:
219
+ """See `pydantic.BaseModel.model_copy`."""
220
+ return super().model_copy(*args, **kwargs)
221
+
222
+ @classmethod
223
+ def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
224
+ """See `pydantic.BaseModel.model_construct`."""
225
+ return super().model_construct(*args, **kwargs)
226
+
227
+ @classmethod
228
+ def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
229
+ """See `pydantic.BaseModel.model_json_schema`."""
230
+ return super().model_json_schema(*args, **kwargs)
231
+
232
+
233
+ class QuantumInfo(TypedDict):
234
+ """A typed dictionary that annotates the attributes of the NetworkX graph
235
+ node data for a quantum.
236
+
237
+ Since NetworkX types are not generic over their node mapping type, this has
238
+ to be used explicitly, e.g.::
239
+
240
+ node_data: QuantumInfo = xgraph.nodes[quantum_id]
241
+
242
+ where ``xgraph`` can be either `BaseQuantumGraph.quantum_only_xgraph`
243
+ or `BaseQuantumGraph.bipartite_xgraph`.
244
+ """
245
+
246
+ data_id: DataCoordinate
247
+ """Data ID of the quantum."""
248
+
249
+ task_label: str
250
+ """Label of the task for this quantum."""
251
+
252
+ pipeline_node: TaskNode
253
+ """Node in the pipeline graph for this quantum's task."""
254
+
255
+
256
+ class DatasetInfo(TypedDict):
257
+ """A typed dictionary that annotates the attributes of the NetworkX graph
258
+ node data for a dataset.
259
+
260
+ Since NetworkX types are not generic over their node mapping type, this has
261
+ to be used explicitly, e.g.::
262
+
263
+ node_data: DatasetInfo = xgraph.nodes[dataset_id]
264
+
265
+ where ``xgraph`` is from the `BaseQuantumGraph.bipartite_xgraph` property.
266
+ """
267
+
268
+ data_id: DataCoordinate
269
+ """Data ID of the dataset."""
270
+
271
+ dataset_type_name: DatasetTypeName
272
+ """Name of the type of this dataset.
273
+
274
+ This is always the general dataset type that matches the data repository
275
+ storage class, which may differ from any particular task-adapted dataset
276
+ type whose storage class has been overridden to match the task connections.
277
+ This means is it never a component.
278
+ """
279
+
280
+ run: str
281
+ """Name of the `~lsst.daf.butler.CollectionType.RUN` collection that holds
282
+ or will hold this dataset.
283
+ """
284
+
285
+ pipeline_node: DatasetTypeNode
286
+ """Node in the pipeline graph for this dataset's type."""
287
+
288
+
289
+ class BipartiteEdgeInfo(TypedDict):
290
+ """A typed dictionary that annotates the attributes of the NetworkX graph
291
+ edge data in a bipartite graph.
292
+ """
293
+
294
+ is_read: bool
295
+ """`True` if this is a dataset -> quantum edge; `False` if it is a
296
+ quantum -> dataset edge.
297
+ """
298
+
299
+ pipeline_edges: list[Edge]
300
+ """Corresponding edges in the pipeline graph.
301
+
302
+ Note that there may be more than one pipeline edge since a quantum can
303
+ consume a particular dataset via multiple connections.
304
+ """
305
+
306
+
307
+ class BaseQuantumGraph(ABC):
308
+ """An abstract base for quantum graphs.
309
+
310
+ Parameters
311
+ ----------
312
+ header : `HeaderModel`
313
+ Structured metadata for the graph.
314
+ pipeline_graph : `..pipeline_graph.PipelineGraph`
315
+ Graph of tasks and dataset types. May contain a superset of the tasks
316
+ and dataset types that actually have quanta and datasets in the quantum
317
+ graph.
318
+ """
319
+
320
+ def __init__(self, header: HeaderModel, pipeline_graph: PipelineGraph):
321
+ self.header = header
322
+ self.pipeline_graph = pipeline_graph
323
+
324
+ @property
325
+ @abstractmethod
326
+ def quanta_by_task(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
327
+ """A nested mapping of all quanta, keyed first by task name and then by
328
+ data ID.
329
+
330
+ Notes
331
+ -----
332
+ Partial loads may not fully populate this mapping, but it can always
333
+ be accessed.
334
+
335
+ The returned object may be an internal dictionary; as the type
336
+ annotation indicates, it should not be modified in place.
337
+ """
338
+ raise NotImplementedError()
339
+
340
+ @property
341
+ @abstractmethod
342
+ def datasets_by_type(self) -> Mapping[str, Mapping[DataCoordinate, uuid.UUID]]:
343
+ """A nested mapping of all datasets, keyed first by dataset type name
344
+ and then by data ID.
345
+
346
+ Notes
347
+ -----
348
+ Partial loads may not fully populate this mapping, but it can always
349
+ be accessed.
350
+
351
+ The returned object may be an internal dictionary; as the type
352
+ annotation indicates, it should not be modified in place.
353
+ """
354
+ raise NotImplementedError()
355
+
356
+ @property
357
+ @abstractmethod
358
+ def quantum_only_xgraph(self) -> networkx.DiGraph:
359
+ """A directed acyclic graph with quanta as nodes and datasets elided.
360
+
361
+ Notes
362
+ -----
363
+ Partial loads may not fully populate this graph, but it can always be
364
+ accessed.
365
+
366
+ Node state dictionaries are described by the `QuantumInfo` type
367
+ (or a subtype thereof).
368
+
369
+ The returned object is a read-only view of an internal one.
370
+ """
371
+ raise NotImplementedError()
372
+
373
+ @property
374
+ @abstractmethod
375
+ def bipartite_xgraph(self) -> networkx.DiGraph:
376
+ """A directed acyclic graph with quantum and dataset nodes.
377
+
378
+ Notes
379
+ -----
380
+ Partial loads may not fully populate this graph, but it can always be
381
+ accessed.
382
+
383
+ Node state dictionaries are described by the `QuantumInfo` and
384
+ `DatasetInfo` types (or a subtypes thereof). Edges have state
385
+ dictionaries described by `BipartiteEdgeInfo`.
386
+
387
+ The returned object is a read-only view of an internal one.
388
+ """
389
+ raise NotImplementedError()
390
+
391
+
392
+ @dataclasses.dataclass
393
+ class BaseQuantumGraphWriter:
394
+ """A helper class for writing quantum graphs."""
395
+
396
+ zf: zipfile.ZipFile
397
+ """The zip archive that represents the quantum graph on disk."""
398
+
399
+ compressor: Compressor
400
+ """A compressor for all compressed JSON blocks."""
401
+
402
+ address_writer: AddressWriter
403
+ """A helper object for reading addresses into the multi-block files."""
404
+
405
+ int_size: int
406
+ """Size (in bytes) used to write integers to binary files."""
407
+
408
+ @classmethod
409
+ @contextmanager
410
+ def open(
411
+ cls,
412
+ uri: ResourcePathExpression,
413
+ header: HeaderModel,
414
+ pipeline_graph: PipelineGraph,
415
+ indices: dict[uuid.UUID, int],
416
+ *,
417
+ address_filename: str,
418
+ compressor: Compressor,
419
+ cdict_data: bytes | None = None,
420
+ ) -> Iterator[Self]:
421
+ uri = ResourcePath(uri)
422
+ address_writer = AddressWriter(indices)
423
+ with uri.open(mode="wb") as stream:
424
+ with zipfile.ZipFile(stream, mode="w", compression=zipfile.ZIP_STORED) as zf:
425
+ self = cls(zf, compressor, address_writer, header.int_size)
426
+ self.write_single_model("header", header)
427
+ if cdict_data is not None:
428
+ zf.writestr("compression_dict", cdict_data)
429
+ self.write_single_model("pipeline_graph", SerializedPipelineGraph.serialize(pipeline_graph))
430
+ yield self
431
+ address_writer.write_to_zip(zf, address_filename, int_size=self.int_size)
432
+
433
+ def write_single_model(self, name: str, model: pydantic.BaseModel) -> None:
434
+ """Write a single compressed JSON block as a 'file' in a zip archive.
435
+
436
+ Parameters
437
+ ----------
438
+ name : `str`
439
+ Base name of the file. An extension will be added.
440
+ model : `pydantic.BaseModel`
441
+ Pydantic model to convert to JSON.
442
+ """
443
+ json_data = model.model_dump_json().encode()
444
+ self.write_single_block(name, json_data)
445
+
446
+ def write_single_block(self, name: str, json_data: bytes) -> None:
447
+ """Write a single compressed JSON block as a 'file' in a zip archive.
448
+
449
+ Parameters
450
+ ----------
451
+ name : `str`
452
+ Base name of the file. An extension will be added.
453
+ json_data : `bytes`
454
+ Raw JSON to compress and write.
455
+ """
456
+ json_data = self.compressor.compress(json_data)
457
+ self.zf.writestr(f"{name}.json.zst", json_data)
458
+
459
+
460
+ @dataclasses.dataclass
461
+ class BaseQuantumGraphReader:
462
+ """A helper class for reading quantum graphs."""
463
+
464
+ header: HeaderModel
465
+ """Header metadata for the quantum graph."""
466
+
467
+ pipeline_graph: PipelineGraph
468
+ """Graph of tasks and dataset type names that appear in the quantum
469
+ graph.
470
+ """
471
+
472
+ zf: zipfile.ZipFile
473
+ """The zip archive that represents the quantum graph on disk."""
474
+
475
+ decompressor: Decompressor
476
+ """A decompressor for all compressed JSON blocks."""
477
+
478
+ address_reader: AddressReader
479
+ """A helper object for reading addresses into the multi-block files."""
480
+
481
+ page_size: int
482
+ """Approximate number of bytes to read at a time.
483
+
484
+ Note that this does not set a page size for *all* reads, but it
485
+ does affect the smallest, most numerous reads.
486
+ """
487
+
488
+ @classmethod
489
+ @contextmanager
490
+ def _open(
491
+ cls,
492
+ uri: ResourcePathExpression,
493
+ *,
494
+ address_filename: str,
495
+ graph_type: str,
496
+ page_size: int | None = None,
497
+ import_mode: TaskImportMode = TaskImportMode.ASSUME_CONSISTENT_EDGES,
498
+ ) -> Iterator[Self]:
499
+ """Construct a reader from a URI.
500
+
501
+ Parameters
502
+ ----------
503
+ uri : convertible to `lsst.resources.ResourcePath`
504
+ URI to open. Should have a ``.qg`` extension.
505
+ address_filename : `str`
506
+ Base filename for the address file.
507
+ graph_type : `str`
508
+ Value to expect for `HeaderModel.graph_type`.
509
+ page_size : `int`, optional
510
+ Approximate number of bytes to read at once from address files.
511
+ Note that this does not set a page size for *all* reads, but it
512
+ does affect the smallest, most numerous reads. When `None`, the
513
+ ``LSST_QG_PAGE_SIZE`` environment variable is checked before
514
+ falling back to a default of 5MB.
515
+ import_mode : `..pipeline_graph.TaskImportMode`, optional
516
+ How to handle importing the task classes referenced in the pipeline
517
+ graph.
518
+
519
+ Returns
520
+ -------
521
+ reader : `contextlib.AbstractContextManager` [ \
522
+ `PredictedQuantumGraphReader` ]
523
+ A context manager that returns the reader when entered.
524
+ """
525
+ if page_size is None:
526
+ page_size = int(os.environ.get("LSST_QG_PAGE_SIZE", DEFAULT_PAGE_SIZE))
527
+ uri = ResourcePath(uri)
528
+ cdict: zstandard.ZstdCompressionDict | None = None
529
+ with uri.open(mode="rb") as zf_stream:
530
+ with zipfile.ZipFile(zf_stream, "r") as zf:
531
+ if (cdict_path := zipfile.Path(zf, "compression_dict")).exists():
532
+ cdict = zstandard.ZstdCompressionDict(cdict_path.read_bytes())
533
+ decompressor = zstandard.ZstdDecompressor(cdict)
534
+ header = cls._read_single_block_static("header", HeaderModel, zf, decompressor)
535
+ if not header.graph_type == graph_type:
536
+ raise TypeError(f"Header is for a {header.graph_type!r} graph, not {graph_type!r} graph.")
537
+ serialized_pipeline_graph = cls._read_single_block_static(
538
+ "pipeline_graph", SerializedPipelineGraph, zf, decompressor
539
+ )
540
+ pipeline_graph = serialized_pipeline_graph.deserialize(import_mode)
541
+ with AddressReader.open_in_zip(
542
+ zf, address_filename, page_size=page_size, int_size=header.int_size
543
+ ) as address_reader:
544
+ yield cls(
545
+ header=header,
546
+ pipeline_graph=pipeline_graph,
547
+ zf=zf,
548
+ decompressor=decompressor,
549
+ address_reader=address_reader,
550
+ page_size=page_size,
551
+ )
552
+
553
+ @staticmethod
554
+ def _read_single_block_static(
555
+ name: str, model_type: type[_T], zf: zipfile.ZipFile, decompressor: Decompressor
556
+ ) -> _T:
557
+ """Read a single compressed JSON block from a 'file' in a zip archive.
558
+
559
+ Parameters
560
+ ----------
561
+ zf : `zipfile.ZipFile`
562
+ Zip archive to read the file from.
563
+ name : `str`
564
+ Base name of the file. An extension will be added.
565
+ model_type : `type` [ `pydantic.BaseModel` ]
566
+ Pydantic model to validate JSON with.
567
+ decompressor : `Decompressor`
568
+ Object with a `decompress` method that takes and returns `bytes`.
569
+
570
+ Returns
571
+ -------
572
+ model : `pydantic.BaseModel`
573
+ Validated model.
574
+ """
575
+ compressed_data = zf.read(f"{name}.json.zst")
576
+ json_data = decompressor.decompress(compressed_data)
577
+ return model_type.model_validate_json(json_data)
578
+
579
+ def _read_single_block(self, name: str, model_type: type[_T]) -> _T:
580
+ """Read a single compressed JSON block from a 'file' in a zip archive.
581
+
582
+ Parameters
583
+ ----------
584
+ name : `str`
585
+ Base name of the file. An extension will be added.
586
+ model_type : `type` [ `pydantic.BaseModel` ]
587
+ Pydantic model to validate JSON with.
588
+
589
+ Returns
590
+ -------
591
+ model : `pydantic.BaseModel`
592
+ Validated model.
593
+ """
594
+ return self._read_single_block_static(name, model_type, self.zf, self.decompressor)
595
+
596
+ def _read_single_block_raw(self, name: str) -> bytes:
597
+ """Read a single compressed block from a 'file' in a zip archive.
598
+
599
+ Parameters
600
+ ----------
601
+ name : `str`
602
+ Base name of the file. An extension will be added.
603
+
604
+ Returns
605
+ -------
606
+ data : `bytes`
607
+ Decompressed bytes.
608
+ """
609
+ compressed_data = self.zf.read(f"{name}.json.zst")
610
+ return self.decompressor.decompress(compressed_data)