lsst-pipe-base 29.2025.4400__py3-none-any.whl → 29.2025.4600__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_status.py +156 -11
- lsst/pipe/base/log_capture.py +98 -7
- lsst/pipe/base/pipelineIR.py +36 -3
- lsst/pipe/base/pipeline_graph/expressions.py +3 -3
- lsst/pipe/base/quantum_graph/_common.py +6 -0
- lsst/pipe/base/quantum_graph/_predicted.py +13 -17
- lsst/pipe/base/quantum_graph/_provenance.py +322 -106
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +9 -9
- lsst/pipe/base/quantum_graph/aggregator/_progress.py +77 -84
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +154 -53
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +27 -34
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +8 -7
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +5 -8
- lsst/pipe/base/quantum_provenance_graph.py +2 -44
- lsst/pipe/base/single_quantum_executor.py +43 -9
- lsst/pipe/base/tests/mocks/_data_id_match.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +1 -1
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/RECORD +28 -28
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4400.dist-info → lsst_pipe_base-29.2025.4600.dist-info}/zip-safe +0 -0
lsst/pipe/base/_status.py
CHANGED
|
@@ -27,28 +27,37 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
+
__all__ = (
|
|
31
|
+
"AlgorithmError",
|
|
32
|
+
"AnnotatedPartialOutputsError",
|
|
33
|
+
"ExceptionInfo",
|
|
34
|
+
"InvalidQuantumError",
|
|
35
|
+
"NoWorkFound",
|
|
36
|
+
"QuantumAttemptStatus",
|
|
37
|
+
"QuantumSuccessCaveats",
|
|
38
|
+
"RepeatableQuantumError",
|
|
39
|
+
"UnprocessableDataError",
|
|
40
|
+
"UpstreamFailureNoWorkFound",
|
|
41
|
+
)
|
|
42
|
+
|
|
30
43
|
import abc
|
|
31
44
|
import enum
|
|
32
45
|
import logging
|
|
46
|
+
import sys
|
|
33
47
|
from typing import TYPE_CHECKING, Any, ClassVar, Protocol
|
|
34
48
|
|
|
49
|
+
import pydantic
|
|
50
|
+
|
|
35
51
|
from lsst.utils import introspection
|
|
52
|
+
from lsst.utils.logging import LsstLogAdapter, getLogger
|
|
36
53
|
|
|
37
54
|
from ._task_metadata import GetSetDictMetadata, NestedMetadataDict
|
|
38
55
|
|
|
39
56
|
if TYPE_CHECKING:
|
|
40
|
-
from
|
|
57
|
+
from ._task_metadata import TaskMetadata
|
|
41
58
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"AnnotatedPartialOutputsError",
|
|
45
|
-
"InvalidQuantumError",
|
|
46
|
-
"NoWorkFound",
|
|
47
|
-
"QuantumSuccessCaveats",
|
|
48
|
-
"RepeatableQuantumError",
|
|
49
|
-
"UnprocessableDataError",
|
|
50
|
-
"UpstreamFailureNoWorkFound",
|
|
51
|
-
)
|
|
59
|
+
|
|
60
|
+
_LOG = getLogger(__name__)
|
|
52
61
|
|
|
53
62
|
|
|
54
63
|
class QuantumSuccessCaveats(enum.Flag):
|
|
@@ -175,6 +184,142 @@ class QuantumSuccessCaveats(enum.Flag):
|
|
|
175
184
|
}
|
|
176
185
|
|
|
177
186
|
|
|
187
|
+
class ExceptionInfo(pydantic.BaseModel):
|
|
188
|
+
"""Information about an exception that was raised."""
|
|
189
|
+
|
|
190
|
+
type_name: str
|
|
191
|
+
"""Fully-qualified Python type name for the exception raised."""
|
|
192
|
+
|
|
193
|
+
message: str
|
|
194
|
+
"""String message included in the exception."""
|
|
195
|
+
|
|
196
|
+
metadata: dict[str, float | int | str | bool | None]
|
|
197
|
+
"""Additional metadata included in the exception."""
|
|
198
|
+
|
|
199
|
+
@classmethod
|
|
200
|
+
def _from_metadata(cls, md: TaskMetadata) -> ExceptionInfo:
|
|
201
|
+
"""Construct from task metadata.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
md : `TaskMetadata`
|
|
206
|
+
Metadata about the error, as written by
|
|
207
|
+
`AnnotatedPartialOutputsError`.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
info : `ExceptionInfo`
|
|
212
|
+
Information about the exception.
|
|
213
|
+
"""
|
|
214
|
+
result = cls(type_name=md["type"], message=md["message"], metadata={})
|
|
215
|
+
if "metadata" in md:
|
|
216
|
+
raw_err_metadata = md["metadata"].to_dict()
|
|
217
|
+
for k, v in raw_err_metadata.items():
|
|
218
|
+
# Guard against error metadata we wouldn't be able to serialize
|
|
219
|
+
# later via Pydantic; don't want one weird value bringing down
|
|
220
|
+
# our ability to report on an entire run.
|
|
221
|
+
if isinstance(v, float | int | str | bool):
|
|
222
|
+
result.metadata[k] = v
|
|
223
|
+
else:
|
|
224
|
+
_LOG.debug(
|
|
225
|
+
"Not propagating nested or JSON-incompatible exception metadata key %s=%r.", k, v
|
|
226
|
+
)
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
230
|
+
# when we inherit those docstrings in our public classes.
|
|
231
|
+
if "sphinx" in sys.modules and not TYPE_CHECKING:
|
|
232
|
+
|
|
233
|
+
def copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
234
|
+
"""See `pydantic.BaseModel.copy`."""
|
|
235
|
+
return super().copy(*args, **kwargs)
|
|
236
|
+
|
|
237
|
+
def model_dump(self, *args: Any, **kwargs: Any) -> Any:
|
|
238
|
+
"""See `pydantic.BaseModel.model_dump`."""
|
|
239
|
+
return super().model_dump(*args, **kwargs)
|
|
240
|
+
|
|
241
|
+
def model_dump_json(self, *args: Any, **kwargs: Any) -> Any:
|
|
242
|
+
"""See `pydantic.BaseModel.model_dump_json`."""
|
|
243
|
+
return super().model_dump(*args, **kwargs)
|
|
244
|
+
|
|
245
|
+
def model_copy(self, *args: Any, **kwargs: Any) -> Any:
|
|
246
|
+
"""See `pydantic.BaseModel.model_copy`."""
|
|
247
|
+
return super().model_copy(*args, **kwargs)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def model_construct(cls, *args: Any, **kwargs: Any) -> Any: # type: ignore[misc, override]
|
|
251
|
+
"""See `pydantic.BaseModel.model_construct`."""
|
|
252
|
+
return super().model_construct(*args, **kwargs)
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Any:
|
|
256
|
+
"""See `pydantic.BaseModel.model_json_schema`."""
|
|
257
|
+
return super().model_json_schema(*args, **kwargs)
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def model_validate(cls, *args: Any, **kwargs: Any) -> Any:
|
|
261
|
+
"""See `pydantic.BaseModel.model_validate`."""
|
|
262
|
+
return super().model_validate(*args, **kwargs)
|
|
263
|
+
|
|
264
|
+
@classmethod
|
|
265
|
+
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Any:
|
|
266
|
+
"""See `pydantic.BaseModel.model_validate_json`."""
|
|
267
|
+
return super().model_validate_json(*args, **kwargs)
|
|
268
|
+
|
|
269
|
+
@classmethod
|
|
270
|
+
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Any:
|
|
271
|
+
"""See `pydantic.BaseModel.model_validate_strings`."""
|
|
272
|
+
return super().model_validate_strings(*args, **kwargs)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class QuantumAttemptStatus(enum.Enum):
|
|
276
|
+
"""Enum summarizing an attempt to run a quantum."""
|
|
277
|
+
|
|
278
|
+
UNKNOWN = -3
|
|
279
|
+
"""The status of this attempt is unknown.
|
|
280
|
+
|
|
281
|
+
This usually means no logs or metadata were written, and it at least could
|
|
282
|
+
not be determined whether the quantum was blocked by an upstream failure
|
|
283
|
+
(if it was definitely blocked, `BLOCKED` is set instead).
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
LOGS_MISSING = -2
|
|
287
|
+
"""Task metadata was written for this attempt but logs were not.
|
|
288
|
+
|
|
289
|
+
This is a rare condition that requires a hard failure (i.e. the kind that
|
|
290
|
+
can prevent a ``finally`` block from running or I/O from being durable) at
|
|
291
|
+
a very precise time.
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
FAILED = -1
|
|
295
|
+
"""Execution of the quantum failed.
|
|
296
|
+
|
|
297
|
+
This is always set if the task metadata dataset was not written but logs
|
|
298
|
+
were, as is the case when a Python exception is caught and handled by the
|
|
299
|
+
execution system. It may also be set in cases where logs were not written
|
|
300
|
+
either, but other information was available (e.g. from higher-level
|
|
301
|
+
orchestration tooling) to mark it as a failure.
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
BLOCKED = 0
|
|
305
|
+
"""This quantum was not executed because an upstream quantum failed.
|
|
306
|
+
|
|
307
|
+
Upstream quanta with status `UNKNOWN` or `FAILED` are considered blockers;
|
|
308
|
+
`LOGS_MISSING` is not.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
SUCCESSFUL = 1
|
|
312
|
+
"""This quantum was successfully executed.
|
|
313
|
+
|
|
314
|
+
Quanta may be considered successful even if they do not write any outputs
|
|
315
|
+
or shortcut early by raising `NoWorkFound` or one of its variants. They
|
|
316
|
+
may even be considered successful if they raise
|
|
317
|
+
`AnnotatedPartialOutputsError` if the executor is configured to treat that
|
|
318
|
+
exception as a non-failure. See `QuantumSuccessCaveats` for details on how
|
|
319
|
+
these "successes with caveats" are reported.
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
|
|
178
323
|
class GetSetDictMetadataHolder(Protocol):
|
|
179
324
|
"""Protocol for objects that have a ``metadata`` attribute that satisfies
|
|
180
325
|
`GetSetDictMetadata`.
|
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -29,28 +29,105 @@ from __future__ import annotations
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["LogCapture"]
|
|
31
31
|
|
|
32
|
+
import dataclasses
|
|
32
33
|
import logging
|
|
33
34
|
import os
|
|
34
35
|
import shutil
|
|
35
36
|
import tempfile
|
|
37
|
+
import uuid
|
|
36
38
|
from collections.abc import Iterator
|
|
37
39
|
from contextlib import contextmanager, suppress
|
|
38
40
|
from logging import FileHandler
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
|
|
42
|
+
import pydantic
|
|
42
43
|
|
|
43
|
-
from .
|
|
44
|
+
from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
|
|
45
|
+
from lsst.daf.butler.logging import (
|
|
46
|
+
ButlerLogRecord,
|
|
47
|
+
ButlerLogRecordHandler,
|
|
48
|
+
ButlerLogRecords,
|
|
49
|
+
ButlerMDC,
|
|
50
|
+
JsonLogFormatter,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
from ._status import ExceptionInfo, InvalidQuantumError
|
|
54
|
+
from ._task_metadata import TaskMetadata
|
|
44
55
|
from .automatic_connection_constants import METADATA_OUTPUT_TEMPLATE
|
|
45
56
|
from .pipeline_graph import TaskNode
|
|
46
57
|
|
|
47
58
|
_LOG = logging.getLogger(__name__)
|
|
48
59
|
|
|
49
60
|
|
|
50
|
-
class
|
|
51
|
-
"""
|
|
61
|
+
class _ExecutionLogRecordsExtra(pydantic.BaseModel):
|
|
62
|
+
"""Extra information about a quantum's execution stored with logs.
|
|
63
|
+
|
|
64
|
+
This middleware-private model includes information that is not directly
|
|
65
|
+
available via any public interface, as it is used exclusively for
|
|
66
|
+
provenance extraction and then made available through the provenance
|
|
67
|
+
quantum graph.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
exception: ExceptionInfo | None = None
|
|
71
|
+
"""Exception information for this quantum, if it failed.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
metadata: TaskMetadata | None = None
|
|
75
|
+
"""Metadata for this quantum, if it failed.
|
|
76
|
+
|
|
77
|
+
Metadata datasets are written if and only if a quantum succeeds, but we
|
|
78
|
+
still want to capture metadata from failed attempts, so we store it in the
|
|
79
|
+
log dataset. This field is always `None` when the quantum succeeds,
|
|
80
|
+
because in that case the metadata is already stored separately.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
previous_process_quanta: list[uuid.UUID] = pydantic.Field(default_factory=list)
|
|
84
|
+
"""The IDs of other quanta previously executed in the same process as this
|
|
85
|
+
one.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
logs: list[ButlerLogRecord] = pydantic.Field(default_factory=list)
|
|
89
|
+
"""Logs for this attempt.
|
|
90
|
+
|
|
91
|
+
This is always empty for the most recent attempt, because that stores logs
|
|
92
|
+
in the main section of the butler log records.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
previous_attempts: list[_ExecutionLogRecordsExtra] = pydantic.Field(default_factory=list)
|
|
96
|
+
"""Information about previous attempts to run this task within the same
|
|
97
|
+
`~lsst.daf.butler.CollectionType.RUN` collection.
|
|
98
|
+
|
|
99
|
+
This is always empty for any attempt other than the most recent one,
|
|
100
|
+
as all previous attempts are flattened into one list.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
def attach_previous_attempt(self, log_records: ButlerLogRecords) -> None:
|
|
104
|
+
"""Attach logs from a previous attempt to this struct.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
log_records : `ButlerLogRecords`
|
|
109
|
+
Logs from a past attempt to run a quantum.
|
|
110
|
+
"""
|
|
111
|
+
previous = self.model_validate(log_records.extra)
|
|
112
|
+
previous.logs.extend(log_records)
|
|
113
|
+
self.previous_attempts.extend(previous.previous_attempts)
|
|
114
|
+
self.previous_attempts.append(previous)
|
|
115
|
+
previous.previous_attempts.clear()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclasses.dataclass
|
|
119
|
+
class _LogCaptureContext:
|
|
120
|
+
"""Controls for log capture returned by the `LogCapture.capture_logging`
|
|
121
|
+
context manager.
|
|
122
|
+
"""
|
|
52
123
|
|
|
53
124
|
store: bool = True
|
|
125
|
+
"""Whether to store logs at all."""
|
|
126
|
+
|
|
127
|
+
extra: _ExecutionLogRecordsExtra = dataclasses.field(default_factory=_ExecutionLogRecordsExtra)
|
|
128
|
+
"""Extra information about the quantum's execution to store for provenance
|
|
129
|
+
extraction.
|
|
130
|
+
"""
|
|
54
131
|
|
|
55
132
|
|
|
56
133
|
class LogCapture:
|
|
@@ -88,7 +165,7 @@ class LogCapture:
|
|
|
88
165
|
return cls(butler, butler)
|
|
89
166
|
|
|
90
167
|
@contextmanager
|
|
91
|
-
def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[
|
|
168
|
+
def capture_logging(self, task_node: TaskNode, /, quantum: Quantum) -> Iterator[_LogCaptureContext]:
|
|
92
169
|
"""Configure logging system to capture logs for execution of this task.
|
|
93
170
|
|
|
94
171
|
Parameters
|
|
@@ -121,7 +198,7 @@ class LogCapture:
|
|
|
121
198
|
metadata_ref = quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_node.label)][0]
|
|
122
199
|
mdc["RUN"] = metadata_ref.run
|
|
123
200
|
|
|
124
|
-
ctx =
|
|
201
|
+
ctx = _LogCaptureContext()
|
|
125
202
|
log_dataset_name = (
|
|
126
203
|
task_node.log_output.dataset_type_name if task_node.log_output is not None else None
|
|
127
204
|
)
|
|
@@ -154,6 +231,12 @@ class LogCapture:
|
|
|
154
231
|
# Ensure that the logs are stored in butler.
|
|
155
232
|
logging.getLogger().removeHandler(log_handler_file)
|
|
156
233
|
log_handler_file.close()
|
|
234
|
+
if ctx.extra:
|
|
235
|
+
with open(log_file, "a") as log_stream:
|
|
236
|
+
ButlerLogRecords.write_streaming_extra(
|
|
237
|
+
log_stream,
|
|
238
|
+
ctx.extra.model_dump_json(exclude_unset=True, exclude_defaults=True),
|
|
239
|
+
)
|
|
157
240
|
if ctx.store:
|
|
158
241
|
self._ingest_log_records(quantum, log_dataset_name, log_file)
|
|
159
242
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
@@ -165,7 +248,15 @@ class LogCapture:
|
|
|
165
248
|
try:
|
|
166
249
|
with ButlerMDC.set_mdc(mdc):
|
|
167
250
|
yield ctx
|
|
251
|
+
except:
|
|
252
|
+
raise
|
|
253
|
+
else:
|
|
254
|
+
# If the quantum succeeded, we don't need to save the
|
|
255
|
+
# metadata in the logs, because we'll have saved them in
|
|
256
|
+
# the metadata.
|
|
257
|
+
ctx.extra.metadata = None
|
|
168
258
|
finally:
|
|
259
|
+
log_handler_memory.records.extra = ctx.extra.model_dump()
|
|
169
260
|
# Ensure that the logs are stored in butler.
|
|
170
261
|
logging.getLogger().removeHandler(log_handler_memory)
|
|
171
262
|
if ctx.store:
|
lsst/pipe/base/pipelineIR.py
CHANGED
|
@@ -45,7 +45,7 @@ import warnings
|
|
|
45
45
|
from collections import Counter
|
|
46
46
|
from collections.abc import Generator, Hashable, Iterable, MutableMapping
|
|
47
47
|
from dataclasses import dataclass, field
|
|
48
|
-
from typing import Any, Literal
|
|
48
|
+
from typing import Any, Literal, cast
|
|
49
49
|
|
|
50
50
|
import yaml
|
|
51
51
|
|
|
@@ -461,6 +461,8 @@ class ImportIR:
|
|
|
461
461
|
"""list of tasks that should be excluded when inheriting this pipeline.
|
|
462
462
|
Either the include or exclude attributes may be specified, but not both.
|
|
463
463
|
"""
|
|
464
|
+
rename: dict[str, str] = field(default_factory=dict)
|
|
465
|
+
"""dict of tasks to rename, keyed by old name with new name value."""
|
|
464
466
|
importContracts: bool = True
|
|
465
467
|
"""Boolean attribute to dictate if contracts should be inherited with the
|
|
466
468
|
pipeline or not.
|
|
@@ -497,18 +499,49 @@ class ImportIR:
|
|
|
497
499
|
"An include list and an exclude list cannot both be specified"
|
|
498
500
|
" when declaring a pipeline import."
|
|
499
501
|
)
|
|
502
|
+
if rename_keys := self.rename.keys():
|
|
503
|
+
rename_values_set = set(self.rename.values())
|
|
504
|
+
if len(rename_values_set) != len(rename_keys):
|
|
505
|
+
raise ValueError(f"rename {rename_keys=} must not have duplicates")
|
|
506
|
+
if rename_values_set.intersection(rename_keys):
|
|
507
|
+
raise ValueError(
|
|
508
|
+
f"rename keys={rename_keys} must not intersect with values={self.rename.values()}"
|
|
509
|
+
)
|
|
510
|
+
|
|
500
511
|
tmp_pipeline = PipelineIR.from_uri(os.path.expandvars(self.location))
|
|
501
512
|
if self.instrument is not _Tags.KeepInstrument:
|
|
502
513
|
tmp_pipeline.instrument = self.instrument
|
|
503
514
|
|
|
504
515
|
included_labels = set()
|
|
516
|
+
renamed_tasks = {}
|
|
505
517
|
for label in tmp_pipeline.tasks:
|
|
518
|
+
is_included = self.include and label in self.include
|
|
506
519
|
if (
|
|
507
|
-
|
|
520
|
+
is_included
|
|
508
521
|
or (self.exclude and label not in self.exclude)
|
|
509
522
|
or (self.include is None and self.exclude is None)
|
|
510
523
|
):
|
|
511
|
-
|
|
524
|
+
if (label_new := self.rename.get(label)) is not None:
|
|
525
|
+
renamed_tasks[label] = label_new
|
|
526
|
+
if is_included:
|
|
527
|
+
self.include = [
|
|
528
|
+
label_new if (x == label) else label for x in cast(list[str], self.include)
|
|
529
|
+
]
|
|
530
|
+
else:
|
|
531
|
+
label_new = label
|
|
532
|
+
included_labels.add(label_new)
|
|
533
|
+
|
|
534
|
+
rename_errors = []
|
|
535
|
+
for label, label_new in renamed_tasks.items():
|
|
536
|
+
if label_new in tmp_pipeline.tasks:
|
|
537
|
+
rename_errors.append(f"Can't rename {label=} to existing {label_new=}")
|
|
538
|
+
else:
|
|
539
|
+
task = tmp_pipeline.tasks.pop(label)
|
|
540
|
+
task.label = label_new
|
|
541
|
+
tmp_pipeline.tasks[label_new] = task
|
|
542
|
+
|
|
543
|
+
if rename_errors:
|
|
544
|
+
raise ValueError("; ".join(rename_errors))
|
|
512
545
|
|
|
513
546
|
# Handle labeled subsets being specified in the include or exclude
|
|
514
547
|
# list, adding or removing labels.
|
|
@@ -45,13 +45,13 @@ import dataclasses
|
|
|
45
45
|
import functools
|
|
46
46
|
from typing import TYPE_CHECKING, Any, Literal, TypeAlias
|
|
47
47
|
|
|
48
|
-
from lsst.daf.butler.
|
|
48
|
+
from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
|
|
49
49
|
|
|
50
50
|
from ._exceptions import InvalidExpressionError
|
|
51
51
|
|
|
52
52
|
if TYPE_CHECKING:
|
|
53
|
-
from lsst.daf.butler.
|
|
54
|
-
from lsst.daf.butler.
|
|
53
|
+
from lsst.daf.butler.queries.expressions.parser.parserLex import LexToken
|
|
54
|
+
from lsst.daf.butler.queries.expressions.parser.parserYacc import YaccProduction
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class _ParserLex:
|
|
@@ -60,6 +60,7 @@ import pydantic
|
|
|
60
60
|
import zstandard
|
|
61
61
|
|
|
62
62
|
from lsst.daf.butler import DataCoordinate, DataIdValue
|
|
63
|
+
from lsst.daf.butler._rubin import generate_uuidv7
|
|
63
64
|
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
64
65
|
|
|
65
66
|
from ..pipeline_graph import DatasetTypeNode, Edge, PipelineGraph, TaskImportMode, TaskNode
|
|
@@ -157,6 +158,11 @@ class HeaderModel(pydantic.BaseModel):
|
|
|
157
158
|
quantum graph file).
|
|
158
159
|
"""
|
|
159
160
|
|
|
161
|
+
provenance_dataset_id: uuid.UUID = pydantic.Field(default_factory=generate_uuidv7)
|
|
162
|
+
"""The dataset ID for provenance quantum graph when it is ingested into
|
|
163
|
+
a butler repository.
|
|
164
|
+
"""
|
|
165
|
+
|
|
160
166
|
@classmethod
|
|
161
167
|
def from_old_quantum_graph(cls, old_quantum_graph: QuantumGraph) -> HeaderModel:
|
|
162
168
|
"""Extract a header from an old `QuantumGraph` instance.
|
|
@@ -1899,11 +1899,12 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1899
1899
|
"""Construct a `PredictedQuantumGraph` instance from this reader."""
|
|
1900
1900
|
return self.components.assemble()
|
|
1901
1901
|
|
|
1902
|
-
def read_all(self) ->
|
|
1902
|
+
def read_all(self) -> None:
|
|
1903
1903
|
"""Read all components in full."""
|
|
1904
|
-
|
|
1904
|
+
self.read_thin_graph()
|
|
1905
|
+
self.read_execution_quanta()
|
|
1905
1906
|
|
|
1906
|
-
def read_thin_graph(self) ->
|
|
1907
|
+
def read_thin_graph(self) -> None:
|
|
1907
1908
|
"""Read the thin graph.
|
|
1908
1909
|
|
|
1909
1910
|
The thin graph is a quantum-quantum DAG with internal integer IDs for
|
|
@@ -1918,17 +1919,15 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1918
1919
|
self.components.quantum_indices.update(
|
|
1919
1920
|
{row.key: row.index for row in self.address_reader.rows.values()}
|
|
1920
1921
|
)
|
|
1921
|
-
return self
|
|
1922
1922
|
|
|
1923
|
-
def read_init_quanta(self) ->
|
|
1923
|
+
def read_init_quanta(self) -> None:
|
|
1924
1924
|
"""Read the list of special quanta that represent init-inputs and
|
|
1925
1925
|
init-outputs.
|
|
1926
1926
|
"""
|
|
1927
1927
|
if not self.components.init_quanta.root:
|
|
1928
1928
|
self.components.init_quanta = self._read_single_block("init_quanta", PredictedInitQuantaModel)
|
|
1929
|
-
return self
|
|
1930
1929
|
|
|
1931
|
-
def read_dimension_data(self) ->
|
|
1930
|
+
def read_dimension_data(self) -> None:
|
|
1932
1931
|
"""Read all dimension records.
|
|
1933
1932
|
|
|
1934
1933
|
Record data IDs will be immediately deserialized, while other fields
|
|
@@ -1948,11 +1947,8 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1948
1947
|
universe=self.components.pipeline_graph.universe,
|
|
1949
1948
|
),
|
|
1950
1949
|
)
|
|
1951
|
-
return self
|
|
1952
1950
|
|
|
1953
|
-
def read_quantum_datasets(
|
|
1954
|
-
self, quantum_ids: Iterable[uuid.UUID] | None = None
|
|
1955
|
-
) -> PredictedQuantumGraphReader:
|
|
1951
|
+
def read_quantum_datasets(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
|
|
1956
1952
|
"""Read information about all datasets produced and consumed by the
|
|
1957
1953
|
given quantum IDs.
|
|
1958
1954
|
|
|
@@ -1977,7 +1973,7 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1977
1973
|
self.address_reader.read_all()
|
|
1978
1974
|
for address_row in self.address_reader.rows.values():
|
|
1979
1975
|
self.components.quantum_indices[address_row.key] = address_row.index
|
|
1980
|
-
return
|
|
1976
|
+
return
|
|
1981
1977
|
with MultiblockReader.open_in_zip(
|
|
1982
1978
|
self.zf, "quantum_datasets", int_size=self.components.header.int_size
|
|
1983
1979
|
) as mb_reader:
|
|
@@ -1991,11 +1987,9 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
1991
1987
|
)
|
|
1992
1988
|
if quantum_datasets is not None:
|
|
1993
1989
|
self.components.quantum_datasets[address_row.key] = quantum_datasets
|
|
1994
|
-
return
|
|
1990
|
+
return
|
|
1995
1991
|
|
|
1996
|
-
def read_execution_quanta(
|
|
1997
|
-
self, quantum_ids: Iterable[uuid.UUID] | None = None
|
|
1998
|
-
) -> PredictedQuantumGraphReader:
|
|
1992
|
+
def read_execution_quanta(self, quantum_ids: Iterable[uuid.UUID] | None = None) -> None:
|
|
1999
1993
|
"""Read all information needed to execute the given quanta.
|
|
2000
1994
|
|
|
2001
1995
|
Parameters
|
|
@@ -2004,4 +1998,6 @@ class PredictedQuantumGraphReader(BaseQuantumGraphReader):
|
|
|
2004
1998
|
Iterable of quantum IDs to load. If not provided, all quanta will
|
|
2005
1999
|
be loaded. The UUIDs of special init quanta will be ignored.
|
|
2006
2000
|
"""
|
|
2007
|
-
|
|
2001
|
+
self.read_init_quanta()
|
|
2002
|
+
self.read_dimension_data()
|
|
2003
|
+
self.read_quantum_datasets(quantum_ids)
|