lsst-pipe-base 29.2025.4800__py3-none-any.whl → 30.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_instrument.py +6 -5
- lsst/pipe/base/caching_limited_butler.py +3 -0
- lsst/pipe/base/log_capture.py +39 -79
- lsst/pipe/base/log_on_close.py +79 -0
- lsst/pipe/base/mp_graph_executor.py +51 -15
- lsst/pipe/base/quantum_graph/_common.py +4 -3
- lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
- lsst/pipe/base/quantum_graph/_predicted.py +106 -12
- lsst/pipe/base/quantum_graph/_provenance.py +657 -6
- lsst/pipe/base/quantum_graph/aggregator/_communicators.py +18 -50
- lsst/pipe/base/quantum_graph/aggregator/_ingester.py +14 -3
- lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -232
- lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -113
- lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +10 -5
- lsst/pipe/base/quantum_graph/aggregator/_writer.py +31 -348
- lsst/pipe/base/quantum_graph/formatter.py +101 -0
- lsst/pipe/base/quantum_graph_builder.py +12 -1
- lsst/pipe/base/quantum_graph_executor.py +116 -13
- lsst/pipe/base/quantum_graph_skeleton.py +1 -7
- lsst/pipe/base/script/register_instrument.py +4 -4
- lsst/pipe/base/script/retrieve_artifacts_for_quanta.py +5 -6
- lsst/pipe/base/script/transfer_from_graph.py +42 -42
- lsst/pipe/base/script/zip_from_graph.py +7 -8
- lsst/pipe/base/separable_pipeline_executor.py +18 -2
- lsst/pipe/base/simple_pipeline_executor.py +4 -3
- lsst/pipe/base/single_quantum_executor.py +70 -34
- lsst/pipe/base/tests/mocks/_repo.py +44 -16
- lsst/pipe/base/tests/simpleQGraph.py +43 -35
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/RECORD +39 -37
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/WHEEL +1 -1
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("ProvenanceFormatter",)
|
|
31
|
+
|
|
32
|
+
import uuid
|
|
33
|
+
from typing import Any, ClassVar
|
|
34
|
+
|
|
35
|
+
import pydantic
|
|
36
|
+
|
|
37
|
+
from lsst.daf.butler import FormatterV2
|
|
38
|
+
from lsst.resources import ResourcePath
|
|
39
|
+
from lsst.utils.logging import getLogger
|
|
40
|
+
|
|
41
|
+
from ..pipeline_graph import TaskImportMode
|
|
42
|
+
from ._provenance import ProvenanceQuantumGraphReader
|
|
43
|
+
|
|
44
|
+
_LOG = getLogger(__file__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class _ProvenanceFormatterParameters(pydantic.BaseModel):
|
|
48
|
+
"""A Pydantic model for validating and applying defaults to the
|
|
49
|
+
read parameters of `ProvenanceFormatter`.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
import_mode: TaskImportMode = TaskImportMode.DO_NOT_IMPORT
|
|
53
|
+
quanta: list[uuid.UUID] | None = None
|
|
54
|
+
datasets: list[uuid.UUID] | None = None
|
|
55
|
+
read_init_quanta: bool = True
|
|
56
|
+
|
|
57
|
+
@pydantic.field_validator("quanta", mode="before")
|
|
58
|
+
@classmethod
|
|
59
|
+
def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
|
|
60
|
+
return list(v)
|
|
61
|
+
|
|
62
|
+
@pydantic.field_validator("datasets", mode="before")
|
|
63
|
+
@classmethod
|
|
64
|
+
def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
|
|
65
|
+
return list(v)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def nodes(self) -> list[uuid.UUID]:
|
|
69
|
+
if self.quanta is not None:
|
|
70
|
+
if self.datasets is not None:
|
|
71
|
+
return self.quanta + self.datasets
|
|
72
|
+
else:
|
|
73
|
+
return self.quanta
|
|
74
|
+
elif self.datasets is not None:
|
|
75
|
+
return self.datasets
|
|
76
|
+
raise ValueError("'datasets' and/or 'quanta' parameters are required for this component")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ProvenanceFormatter(FormatterV2):
|
|
80
|
+
"""Butler interface for reading `ProvenanceQuantumGraph` objects."""
|
|
81
|
+
|
|
82
|
+
default_extension: ClassVar[str] = ".qg"
|
|
83
|
+
can_read_from_uri: ClassVar[bool] = True
|
|
84
|
+
|
|
85
|
+
def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
|
|
86
|
+
parameters = _ProvenanceFormatterParameters.model_validate(self.file_descriptor.parameters or {})
|
|
87
|
+
with ProvenanceQuantumGraphReader.open(uri, import_mode=parameters.import_mode) as reader:
|
|
88
|
+
match component:
|
|
89
|
+
case None:
|
|
90
|
+
if parameters.read_init_quanta:
|
|
91
|
+
reader.read_init_quanta()
|
|
92
|
+
reader.read_quanta(parameters.quanta)
|
|
93
|
+
reader.read_datasets(parameters.datasets)
|
|
94
|
+
return reader.graph
|
|
95
|
+
case "metadata":
|
|
96
|
+
return reader.fetch_metadata(parameters.nodes)
|
|
97
|
+
case "logs":
|
|
98
|
+
return reader.fetch_logs(parameters.nodes)
|
|
99
|
+
case "packages":
|
|
100
|
+
return reader.fetch_packages()
|
|
101
|
+
raise AssertionError(f"Unexpected component {component!r}.")
|
|
@@ -1095,11 +1095,13 @@ class QuantumGraphBuilder(ABC):
|
|
|
1095
1095
|
to `lsst.daf.butler.DatastoreRecordData`, as used by
|
|
1096
1096
|
`lsst.daf.butler.Quantum`.
|
|
1097
1097
|
"""
|
|
1098
|
+
self.log.info("Fetching and attaching datastore records for all overall inputs.")
|
|
1098
1099
|
overall_inputs = skeleton.extract_overall_inputs()
|
|
1099
1100
|
exported_records = self.butler._datastore.export_records(overall_inputs.values())
|
|
1100
1101
|
for task_label in self._pipeline_graph.tasks:
|
|
1101
1102
|
if not skeleton.has_task(task_label):
|
|
1102
1103
|
continue
|
|
1104
|
+
self.log.verbose("Fetching and attaching datastore records for task %s.", task_label)
|
|
1103
1105
|
task_init_key = skeleton.get_task_init_node(task_label)
|
|
1104
1106
|
init_input_ids = {
|
|
1105
1107
|
ref.id
|
|
@@ -1152,12 +1154,14 @@ class QuantumGraphBuilder(ABC):
|
|
|
1152
1154
|
"""
|
|
1153
1155
|
from .graph import QuantumGraph
|
|
1154
1156
|
|
|
1157
|
+
self.log.info("Transforming graph skeleton into a QuantumGraph instance.")
|
|
1155
1158
|
quanta: dict[TaskDef, set[Quantum]] = {}
|
|
1156
1159
|
init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {}
|
|
1157
1160
|
init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {}
|
|
1158
1161
|
for task_def in self._pipeline_graph._iter_task_defs():
|
|
1159
1162
|
if not skeleton.has_task(task_def.label):
|
|
1160
1163
|
continue
|
|
1164
|
+
self.log.verbose("Transforming graph skeleton nodes for task %s.", task_def.label)
|
|
1161
1165
|
task_node = self._pipeline_graph.tasks[task_def.label]
|
|
1162
1166
|
task_init_key = skeleton.get_task_init_node(task_def.label)
|
|
1163
1167
|
task_init_state = skeleton[task_init_key]
|
|
@@ -1198,7 +1202,8 @@ class QuantumGraphBuilder(ABC):
|
|
|
1198
1202
|
ref = skeleton.get_dataset_ref(dataset_key)
|
|
1199
1203
|
assert ref is not None, "Global init input refs should be resolved already."
|
|
1200
1204
|
global_init_outputs.append(ref)
|
|
1201
|
-
|
|
1205
|
+
self.log.verbose("Invoking QuantumGraph class constructor.")
|
|
1206
|
+
result = QuantumGraph(
|
|
1202
1207
|
quanta,
|
|
1203
1208
|
metadata=all_metadata,
|
|
1204
1209
|
universe=self.universe,
|
|
@@ -1207,6 +1212,8 @@ class QuantumGraphBuilder(ABC):
|
|
|
1207
1212
|
globalInitOutputs=global_init_outputs,
|
|
1208
1213
|
registryDatasetTypes=registry_dataset_types,
|
|
1209
1214
|
)
|
|
1215
|
+
self.log.info("Graph build complete.")
|
|
1216
|
+
return result
|
|
1210
1217
|
|
|
1211
1218
|
@final
|
|
1212
1219
|
@timeMethod
|
|
@@ -1243,6 +1250,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
1243
1250
|
PredictedQuantumGraphComponents,
|
|
1244
1251
|
)
|
|
1245
1252
|
|
|
1253
|
+
self.log.info("Transforming graph skeleton into PredictedQuantumGraph components.")
|
|
1246
1254
|
components = PredictedQuantumGraphComponents(pipeline_graph=self._pipeline_graph)
|
|
1247
1255
|
components.header.inputs = list(self.input_collections)
|
|
1248
1256
|
components.header.output_run = self.output_run
|
|
@@ -1270,6 +1278,7 @@ class QuantumGraphBuilder(ABC):
|
|
|
1270
1278
|
for task_node in self._pipeline_graph.tasks.values():
|
|
1271
1279
|
if not skeleton.has_task(task_node.label):
|
|
1272
1280
|
continue
|
|
1281
|
+
self.log.verbose("Transforming graph skeleton nodes for task %s.", task_node.label)
|
|
1273
1282
|
task_init_key = TaskInitKey(task_node.label)
|
|
1274
1283
|
init_quantum_datasets = PredictedQuantumDatasetsModel.model_construct(
|
|
1275
1284
|
quantum_id=generate_uuidv7(),
|
|
@@ -1315,8 +1324,10 @@ class QuantumGraphBuilder(ABC):
|
|
|
1315
1324
|
},
|
|
1316
1325
|
)
|
|
1317
1326
|
components.quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
|
|
1327
|
+
self.log.verbose("Building the thin summary graph.")
|
|
1318
1328
|
components.set_thin_graph()
|
|
1319
1329
|
components.set_header_counts()
|
|
1330
|
+
self.log.info("Graph build complete.")
|
|
1320
1331
|
return components
|
|
1321
1332
|
|
|
1322
1333
|
@staticmethod
|
|
@@ -27,23 +27,113 @@
|
|
|
27
27
|
|
|
28
28
|
from __future__ import annotations
|
|
29
29
|
|
|
30
|
-
__all__ = ["QuantumExecutor", "QuantumGraphExecutor"]
|
|
30
|
+
__all__ = ["QuantumExecutionResult", "QuantumExecutor", "QuantumGraphExecutor"]
|
|
31
31
|
|
|
32
32
|
from abc import ABC, abstractmethod
|
|
33
|
-
from typing import TYPE_CHECKING
|
|
33
|
+
from typing import TYPE_CHECKING, Self
|
|
34
|
+
|
|
35
|
+
from lsst.daf.butler import Quantum
|
|
34
36
|
|
|
35
37
|
from .quantum_reports import QuantumReport, Report
|
|
36
38
|
|
|
37
39
|
if TYPE_CHECKING:
|
|
38
40
|
import uuid
|
|
39
41
|
|
|
40
|
-
from lsst.daf.butler import
|
|
42
|
+
from lsst.daf.butler.logging import ButlerLogRecords
|
|
41
43
|
|
|
44
|
+
from ._task_metadata import TaskMetadata
|
|
42
45
|
from .graph import QuantumGraph
|
|
43
46
|
from .pipeline_graph import TaskNode
|
|
44
47
|
from .quantum_graph import PredictedQuantumGraph
|
|
45
48
|
|
|
46
49
|
|
|
50
|
+
class QuantumExecutionResult(tuple[Quantum, QuantumReport | None]):
|
|
51
|
+
"""A result struct that captures information about a single quantum's
|
|
52
|
+
execution.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
quantum : `lsst.daf.butler.Quantum`
|
|
57
|
+
Quantum that was executed.
|
|
58
|
+
report : `.quantum_reports.QuantumReport`
|
|
59
|
+
Report with basic information about the execution.
|
|
60
|
+
task_metadata : `TaskMetadata`, optional
|
|
61
|
+
Metadata saved by the task and executor during execution.
|
|
62
|
+
skipped_existing : `bool`, optional
|
|
63
|
+
If `True`, this quantum was not executed because it appeared to have
|
|
64
|
+
already been executed successfully.
|
|
65
|
+
adjusted_no_work : `bool`, optional
|
|
66
|
+
If `True`, this quantum was not executed because the
|
|
67
|
+
`PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
|
|
68
|
+
|
|
69
|
+
Notes
|
|
70
|
+
-----
|
|
71
|
+
For backwards compatibility, this class is a two-element tuple that allows
|
|
72
|
+
the ``quantum`` and ``report`` attributes to be unpacked. Additional
|
|
73
|
+
regular attributes may be added by executors (but the tuple must remain
|
|
74
|
+
only two elements to enable the current unpacking interface).
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __new__(
|
|
78
|
+
cls,
|
|
79
|
+
quantum: Quantum,
|
|
80
|
+
report: QuantumReport | None,
|
|
81
|
+
*,
|
|
82
|
+
task_metadata: TaskMetadata | None = None,
|
|
83
|
+
skipped_existing: bool | None = None,
|
|
84
|
+
adjusted_no_work: bool | None = None,
|
|
85
|
+
) -> Self:
|
|
86
|
+
return super().__new__(cls, (quantum, report))
|
|
87
|
+
|
|
88
|
+
# We need to define both __init__ and __new__ because tuple inheritance
|
|
89
|
+
# requires __new__ and numpydoc requires __init__.
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
quantum: Quantum,
|
|
94
|
+
report: QuantumReport | None,
|
|
95
|
+
*,
|
|
96
|
+
task_metadata: TaskMetadata | None = None,
|
|
97
|
+
skipped_existing: bool | None = None,
|
|
98
|
+
adjusted_no_work: bool | None = None,
|
|
99
|
+
):
|
|
100
|
+
self._task_metadata = task_metadata
|
|
101
|
+
self._skipped_existing = skipped_existing
|
|
102
|
+
self._adjusted_no_work = adjusted_no_work
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def quantum(self) -> Quantum:
|
|
106
|
+
"""The quantum actually executed."""
|
|
107
|
+
return self[0]
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def report(self) -> QuantumReport | None:
|
|
111
|
+
"""Structure describing the status of the execution of a quantum.
|
|
112
|
+
|
|
113
|
+
This is `None` if the implementation does not support this feature.
|
|
114
|
+
"""
|
|
115
|
+
return self[1]
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def task_metadata(self) -> TaskMetadata | None:
|
|
119
|
+
"""Metadata saved by the task and executor during execution."""
|
|
120
|
+
return self._task_metadata
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def skipped_existing(self) -> bool | None:
|
|
124
|
+
"""If `True`, this quantum was not executed because it appeared to have
|
|
125
|
+
already been executed successfully.
|
|
126
|
+
"""
|
|
127
|
+
return self._skipped_existing
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def adjusted_no_work(self) -> bool | None:
|
|
131
|
+
"""If `True`, this quantum was not executed because the
|
|
132
|
+
`PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
|
|
133
|
+
"""
|
|
134
|
+
return self._adjusted_no_work
|
|
135
|
+
|
|
136
|
+
|
|
47
137
|
class QuantumExecutor(ABC):
|
|
48
138
|
"""Class which abstracts execution of a single Quantum.
|
|
49
139
|
|
|
@@ -55,8 +145,14 @@ class QuantumExecutor(ABC):
|
|
|
55
145
|
|
|
56
146
|
@abstractmethod
|
|
57
147
|
def execute(
|
|
58
|
-
self,
|
|
59
|
-
|
|
148
|
+
self,
|
|
149
|
+
task_node: TaskNode,
|
|
150
|
+
/,
|
|
151
|
+
quantum: Quantum,
|
|
152
|
+
quantum_id: uuid.UUID | None = None,
|
|
153
|
+
*,
|
|
154
|
+
log_records: ButlerLogRecords | None = None,
|
|
155
|
+
) -> QuantumExecutionResult:
|
|
60
156
|
"""Execute single quantum.
|
|
61
157
|
|
|
62
158
|
Parameters
|
|
@@ -67,15 +163,18 @@ class QuantumExecutor(ABC):
|
|
|
67
163
|
Quantum for this execution.
|
|
68
164
|
quantum_id : `uuid.UUID` or `None`, optional
|
|
69
165
|
The ID of the quantum to be executed.
|
|
166
|
+
log_records : `lsst.daf.butler.ButlerLogRecords`, optional
|
|
167
|
+
Container that should be used to store logs in memory before
|
|
168
|
+
writing them to the butler. This disables streaming log (since
|
|
169
|
+
we'd have to store them in memory anyway), but it permits the
|
|
170
|
+
caller to prepend logs to be stored in the butler and allows task
|
|
171
|
+
logs to be inspected by the caller after execution is complete.
|
|
70
172
|
|
|
71
173
|
Returns
|
|
72
174
|
-------
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
Structure describing the status of the execution of a quantum.
|
|
77
|
-
`None` is returned if implementation does not support this
|
|
78
|
-
feature.
|
|
175
|
+
result : `QuantumExecutionResult`
|
|
176
|
+
Result struct. May also be unpacked as a 2-tuple (see type
|
|
177
|
+
documentation).
|
|
79
178
|
|
|
80
179
|
Notes
|
|
81
180
|
-----
|
|
@@ -93,7 +192,9 @@ class QuantumGraphExecutor(ABC):
|
|
|
93
192
|
"""
|
|
94
193
|
|
|
95
194
|
@abstractmethod
|
|
96
|
-
def execute(
|
|
195
|
+
def execute(
|
|
196
|
+
self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
|
|
197
|
+
) -> None:
|
|
97
198
|
"""Execute whole graph.
|
|
98
199
|
|
|
99
200
|
Implementation of this method depends on particular execution model
|
|
@@ -103,8 +204,10 @@ class QuantumGraphExecutor(ABC):
|
|
|
103
204
|
|
|
104
205
|
Parameters
|
|
105
206
|
----------
|
|
106
|
-
graph : `.QuantumGraph`
|
|
207
|
+
graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
|
|
107
208
|
Execution graph.
|
|
209
|
+
provenance_graph_file : `str`, optional
|
|
210
|
+
A filename to write provenance to.
|
|
108
211
|
"""
|
|
109
212
|
raise NotImplementedError()
|
|
110
213
|
|
|
@@ -383,12 +383,6 @@ class QuantumGraphSkeleton:
|
|
|
383
383
|
The dataset ref of the prerequisite.
|
|
384
384
|
**attrs : `~typing.Any`
|
|
385
385
|
Additional attributes for the node.
|
|
386
|
-
|
|
387
|
-
Notes
|
|
388
|
-
-----
|
|
389
|
-
This automatically sets the 'existing_input' ref attribute (see
|
|
390
|
-
`set_existing_input_ref`), since prerequisites are always overall
|
|
391
|
-
inputs.
|
|
392
386
|
"""
|
|
393
387
|
key = PrerequisiteDatasetKey(ref.datasetType.name, ref.id.bytes)
|
|
394
388
|
self._xgraph.add_node(key, data_id=ref.dataId, ref=ref, **attrs)
|
|
@@ -606,7 +600,7 @@ class QuantumGraphSkeleton:
|
|
|
606
600
|
|
|
607
601
|
def set_output_in_the_way(self, ref: DatasetRef) -> None:
|
|
608
602
|
"""Associate a dataset node with a `DatasetRef` that represents an
|
|
609
|
-
existing output in the output RUN
|
|
603
|
+
existing output in the output RUN collection.
|
|
610
604
|
|
|
611
605
|
Parameters
|
|
612
606
|
----------
|
|
@@ -53,7 +53,7 @@ def register_instrument(repo: str, instrument: list[str], update: bool = False)
|
|
|
53
53
|
Raised iff the instrument is not a subclass of
|
|
54
54
|
`lsst.pipe.base.Instrument`.
|
|
55
55
|
"""
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
with Butler.from_config(repo, writeable=True) as butler:
|
|
57
|
+
for string in instrument:
|
|
58
|
+
instrument_instance = Instrument.from_string(string, butler.registry)
|
|
59
|
+
instrument_instance.register(butler.registry, update=update)
|
|
@@ -93,16 +93,15 @@ def retrieve_artifacts_for_quanta(
|
|
|
93
93
|
dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
|
|
94
94
|
|
|
95
95
|
# Make QBB, its config is the same as output Butler.
|
|
96
|
-
|
|
96
|
+
with QuantumBackedButler.from_predicted(
|
|
97
97
|
config=repo,
|
|
98
98
|
predicted_inputs=[ref.id for ref in refs],
|
|
99
99
|
predicted_outputs=[],
|
|
100
100
|
dimensions=qgraph.universe,
|
|
101
101
|
datastore_records=datastore_records,
|
|
102
102
|
dataset_types=dataset_types,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
)
|
|
103
|
+
) as qbb:
|
|
104
|
+
paths = qbb.retrieve_artifacts(
|
|
105
|
+
refs, dest, transfer=transfer, overwrite=clobber, preserve_path=preserve_path
|
|
106
|
+
)
|
|
108
107
|
return paths
|
|
@@ -85,52 +85,52 @@ def transfer_from_graph(
|
|
|
85
85
|
# Get data repository dataset type definitions from the QuantumGraph.
|
|
86
86
|
dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
|
|
87
87
|
|
|
88
|
-
# Make QBB, its config is the same as output Butler.
|
|
89
|
-
qbb = QuantumBackedButler.from_predicted(
|
|
90
|
-
config=dest,
|
|
91
|
-
predicted_inputs=[ref.id for ref in output_refs],
|
|
92
|
-
predicted_outputs=[],
|
|
93
|
-
dimensions=qgraph.universe,
|
|
94
|
-
datastore_records={},
|
|
95
|
-
dataset_types=dataset_types,
|
|
96
|
-
)
|
|
97
|
-
|
|
98
88
|
# Filter the refs based on requested dataset types.
|
|
99
89
|
filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
|
|
100
90
|
_LOG.verbose("After filtering by dataset_type, number of datasets to transfer: %d", len(filtered_refs))
|
|
101
91
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
92
|
+
# Make QBB, its config is the same as output Butler.
|
|
93
|
+
with (
|
|
94
|
+
QuantumBackedButler.from_predicted(
|
|
95
|
+
config=dest,
|
|
96
|
+
predicted_inputs=[ref.id for ref in output_refs],
|
|
97
|
+
predicted_outputs=[],
|
|
98
|
+
dimensions=qgraph.universe,
|
|
99
|
+
datastore_records={},
|
|
100
|
+
dataset_types=dataset_types,
|
|
101
|
+
) as qbb,
|
|
102
|
+
Butler.from_config(dest, writeable=True) as dest_butler,
|
|
103
|
+
):
|
|
104
|
+
# For faster restarts, filter out those the destination already knows.
|
|
105
|
+
filtered_refs = filter_by_existence(dest_butler, filtered_refs)
|
|
106
|
+
|
|
107
|
+
# Transfer in chunks
|
|
108
|
+
chunk_size = 50_000
|
|
109
|
+
n_chunks = math.ceil(len(filtered_refs) / chunk_size)
|
|
110
|
+
chunk_num = 0
|
|
111
|
+
count = 0
|
|
112
|
+
for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
|
|
113
|
+
chunk_num += 1
|
|
114
|
+
if n_chunks > 1:
|
|
115
|
+
_LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
|
|
116
|
+
transferred = dest_butler.transfer_from(
|
|
117
|
+
qbb,
|
|
118
|
+
chunk,
|
|
119
|
+
transfer="auto",
|
|
120
|
+
register_dataset_types=register_dataset_types,
|
|
121
|
+
transfer_dimensions=transfer_dimensions,
|
|
122
|
+
dry_run=dry_run,
|
|
123
|
+
)
|
|
124
|
+
count += len(transferred)
|
|
125
|
+
|
|
126
|
+
# If asked to do so, update output chain definition.
|
|
127
|
+
if update_output_chain and (metadata := qgraph.metadata) is not None:
|
|
128
|
+
# These are defined in CmdLineFwk.
|
|
129
|
+
output_run = metadata.get("output_run")
|
|
130
|
+
output = metadata.get("output")
|
|
131
|
+
input = metadata.get("input")
|
|
132
|
+
if output_run is not None and output is not None:
|
|
133
|
+
_update_chain(dest_butler, output, output_run, input)
|
|
134
134
|
|
|
135
135
|
return count
|
|
136
136
|
|
|
@@ -72,19 +72,18 @@ def zip_from_graph(
|
|
|
72
72
|
# Get data repository dataset type definitions from the QuantumGraph.
|
|
73
73
|
dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
|
|
74
74
|
|
|
75
|
+
# Filter the refs based on requested dataset types.
|
|
76
|
+
filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
|
|
77
|
+
|
|
75
78
|
# Make QBB, its config is the same as output Butler.
|
|
76
|
-
|
|
79
|
+
with QuantumBackedButler.from_predicted(
|
|
77
80
|
config=repo,
|
|
78
81
|
predicted_inputs=[ref.id for ref in output_refs],
|
|
79
82
|
predicted_outputs=[],
|
|
80
83
|
dimensions=qgraph.universe,
|
|
81
84
|
datastore_records={},
|
|
82
85
|
dataset_types=dataset_types,
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
|
|
87
|
-
|
|
88
|
-
_LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
|
|
89
|
-
zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
|
|
86
|
+
) as qbb:
|
|
87
|
+
_LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
|
|
88
|
+
zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
|
|
90
89
|
return zip
|
|
@@ -40,7 +40,8 @@ from collections.abc import Iterable
|
|
|
40
40
|
from typing import Any
|
|
41
41
|
|
|
42
42
|
import lsst.resources
|
|
43
|
-
from lsst.daf.butler import Butler
|
|
43
|
+
from lsst.daf.butler import Butler, DatasetRef
|
|
44
|
+
from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
|
|
44
45
|
|
|
45
46
|
from ._quantumContext import ExecutionResources
|
|
46
47
|
from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
|
|
@@ -362,6 +363,8 @@ class SeparablePipelineExecutor:
|
|
|
362
363
|
fail_fast: bool = False,
|
|
363
364
|
graph_executor: QuantumGraphExecutor | None = None,
|
|
364
365
|
num_proc: int = 1,
|
|
366
|
+
*,
|
|
367
|
+
provenance_dataset_ref: DatasetRef | None = None,
|
|
365
368
|
) -> None:
|
|
366
369
|
"""Run a pipeline in the form of a prepared quantum graph.
|
|
367
370
|
|
|
@@ -384,6 +387,14 @@ class SeparablePipelineExecutor:
|
|
|
384
387
|
The number of processes that can be used to run the pipeline. The
|
|
385
388
|
default value ensures that no subprocess is created. Only used with
|
|
386
389
|
the default graph executor.
|
|
390
|
+
provenance_dataset_ref : `lsst.daf.butler.DatasetRef`, optional
|
|
391
|
+
Dataset that should be used to save provenance. Provenance is only
|
|
392
|
+
supported when running in a single process (at least for the
|
|
393
|
+
default quantum executor), and should not be used with
|
|
394
|
+
``skip_existing_in=[output_run]`` when retrying a previous
|
|
395
|
+
execution attempt. The caller is responsible for registering the
|
|
396
|
+
dataset type and for ensuring that the dimensions of this dataset
|
|
397
|
+
do not lead to uniqueness conflicts.
|
|
387
398
|
"""
|
|
388
399
|
if not graph_executor:
|
|
389
400
|
quantum_executor = SingleQuantumExecutor(
|
|
@@ -404,4 +415,9 @@ class SeparablePipelineExecutor:
|
|
|
404
415
|
# forked processes.
|
|
405
416
|
self._butler.registry.resetConnectionPool()
|
|
406
417
|
|
|
407
|
-
|
|
418
|
+
if provenance_dataset_ref is not None:
|
|
419
|
+
with TemporaryForIngest(self._butler, provenance_dataset_ref) as temporary:
|
|
420
|
+
graph_executor.execute(graph, provenance_graph_file=temporary.ospath)
|
|
421
|
+
temporary.ingest()
|
|
422
|
+
else:
|
|
423
|
+
graph_executor.execute(graph)
|
|
@@ -40,6 +40,7 @@ from lsst.daf.butler import (
|
|
|
40
40
|
DatasetRef,
|
|
41
41
|
Quantum,
|
|
42
42
|
)
|
|
43
|
+
from lsst.daf.butler.registry import RegistryDefaults
|
|
43
44
|
from lsst.pex.config import Config
|
|
44
45
|
|
|
45
46
|
from ._instrument import Instrument
|
|
@@ -152,9 +153,9 @@ class SimplePipelineExecutor:
|
|
|
152
153
|
collections = [output_run]
|
|
153
154
|
collections.extend(inputs)
|
|
154
155
|
butler.registry.setCollectionChain(output, collections)
|
|
155
|
-
#
|
|
156
|
-
|
|
157
|
-
return
|
|
156
|
+
# Override the registry defaults. No need to clone.
|
|
157
|
+
butler.registry.defaults = RegistryDefaults(collections=[output], run=output_run)
|
|
158
|
+
return butler
|
|
158
159
|
|
|
159
160
|
@classmethod
|
|
160
161
|
def from_pipeline_filename(
|