lsst-pipe-base 29.2025.4500__py3-none-any.whl → 29.2025.4700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +156 -11
- lsst/pipe/base/log_capture.py +98 -7
- lsst/pipe/base/pipeline_graph/expressions.py +3 -3
- lsst/pipe/base/quantum_graph/_common.py +21 -1
- lsst/pipe/base/quantum_graph/_multiblock.py +14 -39
- lsst/pipe/base/quantum_graph/_predicted.py +90 -90
- lsst/pipe/base/quantum_graph/_provenance.py +345 -200
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +19 -19
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +201 -72
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +45 -35
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +15 -17
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +57 -149
- lsst/pipe/base/quantum_graph_builder.py +0 -1
- lsst/pipe/base/quantum_provenance_graph.py +2 -44
- lsst/pipe/base/single_quantum_executor.py +43 -9
- lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/RECORD +29 -29
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4500.dist-info → lsst_pipe_base-29.2025.4700.dist-info}/zip-safe +0 -0
lsst/pipe/base/_status.py
CHANGED
|
@@ -27,28 +27,37 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
+
__all__ = (
|
|
31
|
+
"AlgorithmError",
|
|
32
|
+
"AnnotatedPartialOutputsError",
|
|
33
|
+
"ExceptionInfo",
|
|
34
|
+
"InvalidQuantumError",
|
|
35
|
+
"NoWorkFound",
|
|
36
|
+
"QuantumAttemptStatus",
|
|
37
|
+
"QuantumSuccessCaveats",
|
|
38
|
+
"RepeatableQuantumError",
|
|
39
|
+
"UnprocessableDataError",
|
|
40
|
+
"UpstreamFailureNoWorkFound",
|
|
41
|
+
)
|
|
42
|
+
|
|
30
43
|
import abc
|
|
31
44
|
import enum
|
|
32
45
|
import logging
|
|
46
|
+
import sys
|
|
33
47
|
from typing import TYPE_CHECKING, Any, ClassVar, Protocol
|
|
34
48
|
|
|
49
|
+
import pydantic
|
|
50
|
+
|
|
35
51
|
from lsst.utils import introspection
|
|
52
|
+
from lsst.utils.logging import LsstLogAdapter, getLogger
|
|
36
53
|
|
|
37
54
|
from ._task_metadata import GetSetDictMetadata, NestedMetadataDict
|
|
38
55
|
|
|
39
56
|
if TYPE_CHECKING:
|
|
40
|
-
from
|
|
57
|
+
from ._task_metadata import TaskMetadata
|
|
41
58
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"AnnotatedPartialOutputsError",
|
|
45
|
-
"InvalidQuantumError",
|
|
46
|
-
"NoWorkFound",
|
|
47
|
-
"QuantumSuccessCaveats",
|
|
48
|
-
"RepeatableQuantumError",
|
|
49
|
-
"UnprocessableDataError",
|
|
50
|
-
"UpstreamFailureNoWorkFound",
|
|
51
|
-
)
|
|
59
|
+
|
|
60
|
+
_LOG = getLogger(__name__)
|
|
52
61
|
|
|
53
62
|
|
|
54
63
|
class QuantumSuccessCaveats(enum.Flag):
|
|
@@ -175,6 +184,142 @@ class QuantumSuccessCaveats(enum.Flag):
|
|
|
175
184
|
}
|
|
176
185
|
|
|
177
186
|
|
|
187
|
+
class ExceptionInfo(pydantic.BaseModel):
|
|
188
|
+
"""Information about an exception that was raised."""
|
|
189
|
+
|
|
190
|
+
type_name: str
|
|
191
|
+
"""Fully-qualified Python type name for the exception raised."""
|
|
192
|
+
|
|
193
|
+
message: str
|
|
194
|
+
"""String message included in the exception."""
|
|
195
|
+
|
|
196
|
+
metadata: dict[str, float | int | str | bool | None]
|
|
197
|
+
"""Additional metadata included in the exception."""
|
|
198
|
+
|
|
199
|
+
@classmethod
|
|
200
|
+
def _from_metadata(cls, md: TaskMetadata) -> ExceptionInfo:
|
|
201
|
+
"""Construct from task metadata.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
md : `TaskMetadata`
|
|
206
|
+
Metadata about the error, as written by
|
|
207
|
+
`AnnotatedPartialOutputsError`.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
info : `ExceptionInfo`
|
|
212
|
+
Information about the exception.
|
|
213
|
+
"""
|
|
214
|
+
result = cls(type_name=md["type"], message=md["message"], metadata={})
|
|
215
|
+
if "metadata" in md:
|
|
216
|
+
raw_err_metadata = md["metadata"].to_dict()
|
|
217
|
+
for k, v in raw_err_metadata.items():
|
|
218
|
+
# Guard against error metadata we wouldn't be able to serialize
|
|
219
|
+
# later via Pydantic; don't want one weird value bringing down
|
|
220
|
+
# our ability to report on an entire run.
|
|
221
|
+
if isinstance(v, float | int | str | bool):
|
|
222
|
+
result.metadata[k] = v
|
|
223
|
+
else:
|
|
224
|
+
_LOG.debug(
|
|
225
|
+
"Not propagating nested or JSON-incompatible exception metadata key %s=%r.", k, v
|
|
226
|
+
)
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
230
|
+
# when we inherit those docstrings in our public classes.
|
|
231
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
232
|
+
|
|
233
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
234
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
235
|
+
return super().copy(*args, **kwargs)
|
|
236
|
+
|
|
237
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
238
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
239
|
+
return super().model_dump(*args, **kwargs)
|
|
240
|
+
|
|
241
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
242
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
243
|
+
return super().model_dump(*args, **kwargs)
|
|
244
|
+
|
|
245
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
246
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
247
|
+
return super().model_copy(*args, **kwargs)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
251
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
252
|
+
return super().model_construct(*args, **kwargs)
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
256
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
257
|
+
return super().model_json_schema(*args, **kwargs)
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
261
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
262
|
+
return super().model_validate(*args, **kwargs)
|
|
263
|
+
|
|
264
|
+
@classmethod
|
|
265
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
266
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
267
|
+
return super().model_validate_json(*args, **kwargs)
|
|
268
|
+
|
|
269
|
+
@classmethod
|
|
270
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
271
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
272
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class QuantumAttemptStatus(enum.Enum):
|
|
276
|
+
"""Enum summarizing an attempt to run a quantum."""
|
|
277
|
+
|
|
278
|
+
UNKNOWN = -3
|
|
279
|
+
"""The status of this attempt is unknown.
|
|
280
|
+
|
|
281
|
+
This usually means no logs or metadata were written, and it at least could
|
|
282
|
+
not be determined whether the quantum was blocked by an upstream failure
|
|
283
|
+
(if it was definitely blocked, `BLOCKED` is set instead).
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
LOGS_MISSING = -2
|
|
287
|
+
"""Task metadata was written for this attempt but logs were not.
|
|
288
|
+
|
|
289
|
+
This is a rare condition that requires a hard failure (i.e. the kind that
|
|
290
|
+
can prevent a ``finally`` block from running or I/O from being durable) at
|
|
291
|
+
a very precise time.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
FAILED = -1
|
|
295
|
+
"""Execution of the quantum failed.
|
|
296
|
+
|
|
297
|
+
This is always set if the task metadata dataset was not written but logs
|
|
298
|
+
were, as is the case when a Python exception is caught and handled by the
|
|
299
|
+
execution system. It may also be set in cases where logs were not written
|
|
300
|
+
either, but other information was available (e.g. from higher-level
|
|
301
|
+
orchestration tooling) to mark it as a failure.
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
BLOCKED = 0
|
|
305
|
+
"""This quantum was not executed because an upstream quantum failed.
|
|
306
|
+
|
|
307
|
+
Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
|
|
308
|
+
`LOGS_MISSING` is not.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
SUCCESSFUL = 1
|
|
312
|
+
"""This quantum was successfully executed.
|
|
313
|
+
|
|
314
|
+
Quanta may be considered successful even if they do not write any outputs
|
|
315
|
+
or shortcut early by raising `NoWorkFound` or one of its variants. They
|
|
316
|
+
may even be considered successful if they raise
|
|
317
|
+
`AnnotatedPartialOutputsError` if the executor is configured to treat that
|
|
318
|
+
exception as a non-failure. See `QuantumSuccessCaveats` for details on how
|
|
319
|
+
these "successes with caveats" are reported.
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
|
|
178
323
|
class GetSetDictMetadataHolder(Protocol):
|
|
179
324
|
"""Protocol for objects that have a ``metadata`` attribute that satisfies
|
|
180
325
|
`GetSetDictMetadata`.
|
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -29,28 +29,105 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["LogCapture"]
|
|
31
31
|
|
|
32
|
+
import dataclasses
|
|
32
33
|
import logging
|
|
33
34
|
import os
|
|
34
35
|
import shutil
|
|
35
36
|
import tempfile
|
|
37
|
+
import uuid
|
|
36
38
|
from collections.abc import Iterator
|
|
37
39
|
from contextlib import contextmanager, suppress
|
|
38
40
|
from logging import FileHandler
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
|
|
42
|
+
import pydantic
|
|
42
43
|
|
|
43
|
-
from .
|
|
44
|
+
from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
|
|
45
|
+
from lsst.daf.butler.logging import (
|
|
46
|
+
ButlerLogRecord,
|
|
47
|
+
ButlerLogRecordHandler,
|
|
48
|
+
ButlerLogRecords,
|
|
49
|
+
ButlerMDC,
|
|
50
|
+
JsonLogFormatter,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
from ._status import ExceptionInfo, InvalidQuantumError
|
|
54
|
+
from ._task_metadata import TaskMetadata
|
|
44
55
|
from .automatic_connection_constants import METADATA_OUTPUT_TEMPLATE
|
|
45
56
|
from .pipeline_graph import TaskNode
|
|
46
57
|
|
|
47
58
|
_LOG = logging.getLogger(__name__)
|
|
48
59
|
|
|
49
60
|
|
|
50
|
-
class
|
|
51
|
-
"""
|
|
61
|
+
class _ExecutionLogRecordsExtra(pydantic.BaseModel):
|
|
62
|
+
"""Extra information about a quantum's execution stored with logs.
|
|
63
|
+
|
|
64
|
+
This middleware-private model includes information that is not directly
|
|
65
|
+
available via any public interface, as it is used exclusively for
|
|
66
|
+
provenance extraction and then made available through the provenance
|
|
67
|
+
quantum graph.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
exception: ExceptionInfo | None = None
|
|
71
|
+
"""Exception information for this quantum, if it failed.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
metadata: TaskMetadata | None = None
|
|
75
|
+
"""Metadata for this quantum, if it failed.
|
|
76
|
+
|
|
77
|
+
Metadata datasets are written if and only if a quantum succeeds, but we
|
|
78
|
+
still want to capture metadata from failed attempts, so we store it in the
|
|
79
|
+
log dataset. This field is always `None` when the quantum succeeds,
|
|
80
|
+
because in that case the metadata is already stored separately.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
|
|
84
|
+
"""The IDs of other quanta previously executed in the same process as this
|
|
85
|
+
one.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
logs: list[ButlerLogRecord] = pydantic.Field(default_factory=list)
|
|
89
|
+
"""Logs for this attempt.
|
|
90
|
+
|
|
91
|
+
This is always empty for the most recent attempt, because that stores logs
|
|
92
|
+
in the main section of the butler log records.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
previous_attempts: list[_ExecutionLogRecordsExtra] = pydantic.Field(default_factory=list)
|
|
96
|
+
"""Information about previous attempts to run this task within the same
|
|
97
|
+
`~lsst.daf.butler.CollectionType.RUN` collection.
|
|
98
|
+
|
|
99
|
+
This is always empty for any attempt other than the most recent one,
|
|
100
|
+
as all previous attempts are flattened into one list.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def attach_previous_attempt(self, log_records: ButlerLogRecords) -> None:
|
|
104
|
+
"""Attach logs from a previous attempt to this struct.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
log_records : `ButlerLogRecords`
|
|
109
|
+
Logs from a past attempt to run a quantum.
|
|
110
|
+
"""
|
|
111
|
+
previous = self.model_validate(log_records.extra)
|
|
112
|
+
previous.logs.extend(log_records)
|
|
113
|
+
self.previous_attempts.extend(previous.previous_attempts)
|
|
114
|
+
self.previous_attempts.append(previous)
|
|
115
|
+
previous.previous_attempts.clear()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclasses.dataclass
|
|
119
|
+
class _LogCaptureContext:
|
|
120
|
+
"""Controls for log capture returned by the `LogCapture.capture_logging`
|
|
121
|
+
context manager.
|
|
122
|
+
"""
|
|
52
123
|
|
|
53
124
|
store: bool = True
|
|
125
|
+
"""Whether to store logs at all."""
|
|
126
|
+
|
|
127
|
+
extra: _ExecutionLogRecordsExtra = dataclasses.field(default_factory=_ExecutionLogRecordsExtra)
|
|
128
|
+
"""Extra information about the quantum's execution to store for provenance
|
|
129
|
+
extraction.
|
|
130
|
+
"""
|
|
54
131
|
|
|
55
132
|
|
|
56
133
|
class LogCapture:
|
|
@@ -88,7 +165,7 @@ class LogCapture:
|
|
|
88
165
|
return cls(butler, butler)
|
|
89
166
|
|
|
90
167
|
@contextmanager
|
|
91
|
-
def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[
|
|
168
|
+
def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
|
|
92
169
|
"""Configure logging system to capture logs for execution of this task.
|
|
93
170
|
|
|
94
171
|
Parameters
|
|
@@ -121,7 +198,7 @@ class LogCapture:
|
|
|
121
198
|
metadata_ref = quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_node.label)][0]
|
|
122
199
|
mdc["RUN"] = metadata_ref.run
|
|
123
200
|
|
|
124
|
-
ctx =
|
|
201
|
+
ctx = _LogCaptureContext()
|
|
125
202
|
log_dataset_name = (
|
|
126
203
|
task_node.log_output.dataset_type_name if task_node.log_output is not None else None
|
|
127
204
|
)
|
|
@@ -154,6 +231,12 @@ class LogCapture:
|
|
|
154
231
|
# Ensure that the logs are stored in butler.
|
|
155
232
|
logging.getLogger().removeHandler(log_handler_file)
|
|
156
233
|
log_handler_file.close()
|
|
234
|
+
if ctx.extra:
|
|
235
|
+
with open(log_file, "a") as log_stream:
|
|
236
|
+
ButlerLogRecords.write_streaming_extra(
|
|
237
|
+
log_stream,
|
|
238
|
+
ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
|
|
239
|
+
)
|
|
157
240
|
if ctx.store:
|
|
158
241
|
self._ingest_log_records(quantum, log_dataset_name, log_file)
|
|
159
242
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
@@ -165,7 +248,15 @@ class LogCapture:
|
|
|
165
248
|
try:
|
|
166
249
|
with ButlerMDC.set_mdc(mdc):
|
|
167
250
|
yield ctx
|
|
251
|
+
except:
|
|
252
|
+
raise
|
|
253
|
+
else:
|
|
254
|
+
# If the quantum succeeded, we don't need to save the
|
|
255
|
+
# metadata in the logs, because we'll have saved them in
|
|
256
|
+
# the metadata.
|
|
257
|
+
ctx.extra.metadata = None
|
|
168
258
|
finally:
|
|
259
|
+
log_handler_memory.records.extra = ctx.extra.model_dump()
|
|
169
260
|
# Ensure that the logs are stored in butler.
|
|
170
261
|
logging.getLogger().removeHandler(log_handler_memory)
|
|
171
262
|
if ctx.store:
|
|
@@ -45,13 +45,13 @@ import dataclasses
|
|
|
45
45
|
import functools
|
|
46
46
|
from typing import TYPE_CHECKING, Any, Literal, TypeAlias
|
|
47
47
|
|
|
48
|
-
from lsst.daf.butler.
|
|
48
|
+
from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
|
|
49
49
|
|
|
50
50
|
from ._exceptions import InvalidExpressionError
|
|
51
51
|
|
|
52
52
|
if TYPE_CHECKING:
|
|
53
|
-
from lsst.daf.butler.
|
|
54
|
-
from lsst.daf.butler.
|
|
53
|
+
from lsst.daf.butler.queries.expressions.parser.parserLex import LexToken
|
|
54
|
+
from lsst.daf.butler.queries.expressions.parser.parserYacc import YaccProduction
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class _ParserLex:
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
30
|
__all__ = (
|
|
31
|
+
"FORMAT_VERSION",
|
|
31
32
|
"BaseQuantumGraph",
|
|
32
33
|
"BaseQuantumGraphReader",
|
|
33
34
|
"BipartiteEdgeInfo",
|
|
@@ -60,6 +61,7 @@ import pydantic
|
|
|
60
61
|
import zstandard
|
|
61
62
|
|
|
62
63
|
from lsst.daf.butler import DataCoordinate, DataIdValue
|
|
64
|
+
from lsst.daf.butler._rubin import generate_uuidv7
|
|
63
65
|
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
64
66
|
|
|
65
67
|
from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
|
|
@@ -91,6 +93,19 @@ DataCoordinateValues: TypeAlias = list[DataIdValue]
|
|
|
91
93
|
|
|
92
94
|
_T = TypeVar("_T", bound=pydantic.BaseModel)
|
|
93
95
|
|
|
96
|
+
FORMAT_VERSION: int = 1
|
|
97
|
+
"""
|
|
98
|
+
File format version number for new files.
|
|
99
|
+
|
|
100
|
+
This applies to both predicted and provenance QGs, since they usually change
|
|
101
|
+
in concert.
|
|
102
|
+
|
|
103
|
+
CHANGELOG:
|
|
104
|
+
|
|
105
|
+
- 0: Initial version.
|
|
106
|
+
- 1: Switched from internal integer IDs to UUIDs in all models.
|
|
107
|
+
"""
|
|
108
|
+
|
|
94
109
|
|
|
95
110
|
class IncompleteQuantumGraphError(RuntimeError):
|
|
96
111
|
pass
|
|
@@ -99,7 +114,7 @@ class IncompleteQuantumGraphError(RuntimeError):
|
|
|
99
114
|
class HeaderModel(pydantic.BaseModel):
|
|
100
115
|
"""Data model for the header of a quantum graph file."""
|
|
101
116
|
|
|
102
|
-
version: int =
|
|
117
|
+
version: int = FORMAT_VERSION
|
|
103
118
|
"""File format / data model version number."""
|
|
104
119
|
|
|
105
120
|
graph_type: str = ""
|
|
@@ -157,6 +172,11 @@ class HeaderModel(pydantic.BaseModel):
|
|
|
157
172
|
quantum graph file).
|
|
158
173
|
"""
|
|
159
174
|
|
|
175
|
+
provenance_dataset_id: uuid.UUID = pydantic.Field(default_factory=generate_uuidv7)
|
|
176
|
+
"""The dataset ID for provenance quantum graph when it is ingested into
|
|
177
|
+
a butler repository.
|
|
178
|
+
"""
|
|
179
|
+
|
|
160
180
|
@classmethod
|
|
161
181
|
def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
|
|
162
182
|
"""Extract a header from an old `QuantumGraph` instance.
|
|
@@ -323,10 +323,11 @@ class AddressReader:
|
|
|
323
323
|
rows: dict[uuid.UUID, AddressRow] = dataclasses.field(default_factory=dict)
|
|
324
324
|
"""Rows that have already been read."""
|
|
325
325
|
|
|
326
|
-
rows_by_index: dict[int, AddressRow] = dataclasses.field(default_factory=dict)
|
|
327
|
-
"""Rows that have already been read, keyed by integer index."""
|
|
328
|
-
|
|
329
326
|
pages: list[AddressPage] = dataclasses.field(default_factory=list)
|
|
327
|
+
"""Descriptions of the file offsets and integer row indexes of pages and
|
|
328
|
+
flags for whether they have been read already.
|
|
329
|
+
"""
|
|
330
|
+
|
|
330
331
|
page_bounds: dict[int, PageBounds] = dataclasses.field(default_factory=dict)
|
|
331
332
|
"""Mapping from page index to page boundary information."""
|
|
332
333
|
|
|
@@ -502,32 +503,23 @@ class AddressReader:
|
|
|
502
503
|
self.pages.clear()
|
|
503
504
|
return self.rows
|
|
504
505
|
|
|
505
|
-
def find(self, key: uuid.UUID
|
|
506
|
+
def find(self, key: uuid.UUID) -> AddressRow:
|
|
506
507
|
"""Read the row for the given UUID or integer index.
|
|
507
508
|
|
|
508
509
|
Parameters
|
|
509
510
|
----------
|
|
510
|
-
key : `uuid.UUID`
|
|
511
|
-
UUID
|
|
511
|
+
key : `uuid.UUID`
|
|
512
|
+
UUID to find.
|
|
512
513
|
|
|
513
514
|
Returns
|
|
514
515
|
-------
|
|
515
516
|
row : `AddressRow`
|
|
516
517
|
Addresses for the given UUID.
|
|
517
518
|
"""
|
|
518
|
-
|
|
519
|
-
case uuid.UUID():
|
|
520
|
-
return self._find_uuid(key)
|
|
521
|
-
case int():
|
|
522
|
-
return self._find_index(key)
|
|
523
|
-
case _:
|
|
524
|
-
raise TypeError(f"Invalid argument: {key}.")
|
|
525
|
-
|
|
526
|
-
def _find_uuid(self, target: uuid.UUID) -> AddressRow:
|
|
527
|
-
if (row := self.rows.get(target)) is not None:
|
|
519
|
+
if (row := self.rows.get(key)) is not None:
|
|
528
520
|
return row
|
|
529
521
|
if self.n_rows == 0 or not self.pages:
|
|
530
|
-
raise LookupError(f"Address for {
|
|
522
|
+
raise LookupError(f"Address for {key} not found.")
|
|
531
523
|
|
|
532
524
|
# Use a binary search to find the page containing the target UUID.
|
|
533
525
|
left = 0
|
|
@@ -535,35 +527,19 @@ class AddressReader:
|
|
|
535
527
|
while left <= right:
|
|
536
528
|
mid = left + ((right - left) // 2)
|
|
537
529
|
self._read_page(mid)
|
|
538
|
-
if (row := self.rows.get(
|
|
530
|
+
if (row := self.rows.get(key)) is not None:
|
|
539
531
|
return row
|
|
540
532
|
bounds = self.page_bounds[mid]
|
|
541
|
-
if
|
|
533
|
+
if key.int < bounds.uuid_int_begin:
|
|
542
534
|
right = mid - 1
|
|
543
|
-
elif
|
|
535
|
+
elif key.int > bounds.uuid_int_end:
|
|
544
536
|
left = mid + 1
|
|
545
537
|
else:
|
|
546
538
|
# Should have been on this page, but it wasn't.
|
|
547
|
-
raise LookupError(f"Address for {
|
|
539
|
+
raise LookupError(f"Address for {key} not found.")
|
|
548
540
|
|
|
549
541
|
# Ran out of pages to search.
|
|
550
|
-
raise LookupError(f"Address for {
|
|
551
|
-
|
|
552
|
-
def _find_index(self, target: int) -> AddressRow:
|
|
553
|
-
# First shortcut if we've already loaded this row.
|
|
554
|
-
if (row := self.rows_by_index.get(target)) is not None:
|
|
555
|
-
return row
|
|
556
|
-
if target < 0 or target >= self.n_rows:
|
|
557
|
-
raise LookupError(f"Address for index {target} not found.")
|
|
558
|
-
# Since all indexes should be present, we can predict the right page
|
|
559
|
-
# exactly.
|
|
560
|
-
page_index = target // self.rows_per_page
|
|
561
|
-
self._read_page(page_index)
|
|
562
|
-
try:
|
|
563
|
-
return self.rows_by_index[target]
|
|
564
|
-
except KeyError:
|
|
565
|
-
_LOG.debug("Index find failed: %s should have been in page %s.", target, page_index)
|
|
566
|
-
raise LookupError(f"Address for {target} not found.") from None
|
|
542
|
+
raise LookupError(f"Address for {key} not found.")
|
|
567
543
|
|
|
568
544
|
def _read_page(self, page_index: int, page_stream: BytesIO | None = None) -> bool:
|
|
569
545
|
page = self.pages[page_index]
|
|
@@ -586,7 +562,6 @@ class AddressReader:
|
|
|
586
562
|
def _read_row(self, page_stream: BytesIO) -> AddressRow:
|
|
587
563
|
row = AddressRow.read(page_stream, self.n_addresses, self.int_size)
|
|
588
564
|
self.rows[row.key] = row
|
|
589
|
-
self.rows_by_index[row.index] = row
|
|
590
565
|
_LOG.debug("Read address row %s.", row)
|
|
591
566
|
return row
|
|
592
567
|
|