lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/_datasetQueryConstraints.py +1 -1
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +6 -4
- lsst/pipe/base/connectionTypes.py +19 -19
- lsst/pipe/base/connections.py +2 -2
- lsst/pipe/base/exec_fixup_data_id.py +131 -0
- lsst/pipe/base/execution_graph_fixup.py +69 -0
- lsst/pipe/base/log_capture.py +227 -0
- lsst/pipe/base/mp_graph_executor.py +774 -0
- lsst/pipe/base/quantum_graph_builder.py +43 -42
- lsst/pipe/base/quantum_graph_executor.py +125 -0
- lsst/pipe/base/quantum_reports.py +334 -0
- lsst/pipe/base/script/transfer_from_graph.py +4 -1
- lsst/pipe/base/separable_pipeline_executor.py +296 -0
- lsst/pipe/base/simple_pipeline_executor.py +674 -0
- lsst/pipe/base/single_quantum_executor.py +636 -0
- lsst/pipe/base/taskFactory.py +18 -12
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/RECORD +27 -18
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("SimplePipelineExecutor",)
|
|
31
|
+
|
|
32
|
+
import datetime
|
|
33
|
+
import getpass
|
|
34
|
+
import itertools
|
|
35
|
+
import os
|
|
36
|
+
from collections.abc import Iterable, Iterator, Mapping
|
|
37
|
+
from typing import Any, cast
|
|
38
|
+
|
|
39
|
+
from lsst.daf.butler import (
|
|
40
|
+
Butler,
|
|
41
|
+
CollectionType,
|
|
42
|
+
DataCoordinate,
|
|
43
|
+
DatasetRef,
|
|
44
|
+
DimensionDataExtractor,
|
|
45
|
+
DimensionGroup,
|
|
46
|
+
Quantum,
|
|
47
|
+
)
|
|
48
|
+
from lsst.pex.config import Config
|
|
49
|
+
|
|
50
|
+
from ._instrument import Instrument
|
|
51
|
+
from ._quantumContext import ExecutionResources
|
|
52
|
+
from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
|
|
53
|
+
from .graph import QuantumGraph
|
|
54
|
+
from .pipeline import Pipeline
|
|
55
|
+
from .pipeline_graph import PipelineGraph
|
|
56
|
+
from .pipelineTask import PipelineTask
|
|
57
|
+
from .single_quantum_executor import SingleQuantumExecutor
|
|
58
|
+
from .taskFactory import TaskFactory
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SimplePipelineExecutor:
|
|
62
|
+
"""A simple, high-level executor for pipelines.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
quantum_graph : `.QuantumGraph`
|
|
67
|
+
Graph to be executed.
|
|
68
|
+
butler : `~lsst.daf.butler.Butler`
|
|
69
|
+
Object that manages all I/O. Must be initialized with `collections`
|
|
70
|
+
and `run` properties that correspond to the input and output
|
|
71
|
+
collections, which must be consistent with those used to create
|
|
72
|
+
``quantum_graph``.
|
|
73
|
+
resources : `.ExecutionResources`
|
|
74
|
+
The resources available to each quantum being executed.
|
|
75
|
+
raise_on_partial_outputs : `bool`, optional
|
|
76
|
+
If `True` raise exceptions chained by `.AnnotatedPartialOutputsError`
|
|
77
|
+
immediately, instead of considering the partial result a success and
|
|
78
|
+
continuing to run downstream tasks.
|
|
79
|
+
|
|
80
|
+
Notes
|
|
81
|
+
-----
|
|
82
|
+
Most callers should use one of the `classmethod` factory functions
|
|
83
|
+
(`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
|
|
84
|
+
invoking the constructor directly; these guarantee that the
|
|
85
|
+
`~lsst.daf.butler.Butler` and `.QuantumGraph` are created consistently.
|
|
86
|
+
|
|
87
|
+
This class is intended primarily to support unit testing and small-scale
|
|
88
|
+
integration testing of `.PipelineTask` classes. It deliberately lacks many
|
|
89
|
+
features present in the command-line-only ``pipetask`` tool in order to
|
|
90
|
+
keep the implementation simple. Python callers that need more
|
|
91
|
+
sophistication should call lower-level tools like
|
|
92
|
+
`~.quantum_graph_builder.QuantumGraphBuilder` and
|
|
93
|
+
`.single_quantum_executor.SingleQuantumExecutor` directly.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
quantum_graph: QuantumGraph,
|
|
99
|
+
butler: Butler,
|
|
100
|
+
resources: ExecutionResources | None = None,
|
|
101
|
+
raise_on_partial_outputs: bool = True,
|
|
102
|
+
):
|
|
103
|
+
self.quantum_graph = quantum_graph
|
|
104
|
+
self.butler = butler
|
|
105
|
+
self.resources = resources
|
|
106
|
+
self.raise_on_partial_outputs = raise_on_partial_outputs
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def prep_butler(
|
|
110
|
+
cls,
|
|
111
|
+
root: str,
|
|
112
|
+
inputs: Iterable[str],
|
|
113
|
+
output: str,
|
|
114
|
+
output_run: str | None = None,
|
|
115
|
+
) -> Butler:
|
|
116
|
+
"""Return configured `~lsst.daf.butler.Butler`.
|
|
117
|
+
|
|
118
|
+
Helper method for creating `~lsst.daf.butler.Butler` instances with
|
|
119
|
+
collections appropriate for processing.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
root : `str`
|
|
124
|
+
Root of the butler data repository; must already exist, with all
|
|
125
|
+
necessary input data.
|
|
126
|
+
inputs : `~collections.abc.Iterable` [ `str` ]
|
|
127
|
+
Collections to search for all input datasets, in search order.
|
|
128
|
+
output : `str`
|
|
129
|
+
Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
|
|
130
|
+
collection to create that will combine both inputs and outputs.
|
|
131
|
+
output_run : `str`, optional
|
|
132
|
+
Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
|
|
133
|
+
directly hold all output datasets. If not provided, a name will be
|
|
134
|
+
created from ``output`` and a timestamp.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
butler : `~lsst.daf.butler.Butler`
|
|
139
|
+
Butler client instance compatible with all `classmethod` factories.
|
|
140
|
+
Always writeable.
|
|
141
|
+
"""
|
|
142
|
+
if output_run is None:
|
|
143
|
+
output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
|
|
144
|
+
# Make initial butler with no collections, since we haven't created
|
|
145
|
+
# them yet.
|
|
146
|
+
butler = Butler.from_config(root, writeable=True)
|
|
147
|
+
butler.registry.registerCollection(output_run, CollectionType.RUN)
|
|
148
|
+
butler.registry.registerCollection(output, CollectionType.CHAINED)
|
|
149
|
+
collections = [output_run]
|
|
150
|
+
collections.extend(inputs)
|
|
151
|
+
butler.registry.setCollectionChain(output, collections)
|
|
152
|
+
# Remake butler to let it infer default data IDs from collections, now
|
|
153
|
+
# that those collections exist.
|
|
154
|
+
return Butler.from_config(butler=butler, collections=[output], run=output_run)
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def from_pipeline_filename(
|
|
158
|
+
cls,
|
|
159
|
+
pipeline_filename: str,
|
|
160
|
+
*,
|
|
161
|
+
where: str = "",
|
|
162
|
+
bind: Mapping[str, Any] | None = None,
|
|
163
|
+
butler: Butler,
|
|
164
|
+
resources: ExecutionResources | None = None,
|
|
165
|
+
raise_on_partial_outputs: bool = True,
|
|
166
|
+
attach_datastore_records: bool = False,
|
|
167
|
+
output: str | None = None,
|
|
168
|
+
output_run: str | None = None,
|
|
169
|
+
) -> SimplePipelineExecutor:
|
|
170
|
+
"""Create an executor by building a QuantumGraph from an on-disk
|
|
171
|
+
pipeline YAML file.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
pipeline_filename : `str`
|
|
176
|
+
Name of the YAML file to load the pipeline definition from.
|
|
177
|
+
where : `str`, optional
|
|
178
|
+
Data ID query expression that constraints the quanta generated.
|
|
179
|
+
bind : `~collections.abc.Mapping`, optional
|
|
180
|
+
Mapping containing literal values that should be injected into the
|
|
181
|
+
``where`` expression, keyed by the identifiers they replace.
|
|
182
|
+
butler : `~lsst.daf.butler.Butler`
|
|
183
|
+
Butler that manages all I/O. `prep_butler` can be used to create
|
|
184
|
+
one.
|
|
185
|
+
resources : `.ExecutionResources`
|
|
186
|
+
The resources available to each quantum being executed.
|
|
187
|
+
raise_on_partial_outputs : `bool`, optional
|
|
188
|
+
If `True` raise exceptions chained by
|
|
189
|
+
`.AnnotatedPartialOutputsError` immediately, instead of considering
|
|
190
|
+
the partial result a success and continuing to run downstream
|
|
191
|
+
tasks.
|
|
192
|
+
attach_datastore_records : `bool`, optional
|
|
193
|
+
Whether to attach datastore records to the quantum graph. This is
|
|
194
|
+
usually unnecessary, unless the executor is used to test behavior
|
|
195
|
+
that depends on datastore records.
|
|
196
|
+
output : `str`, optional
|
|
197
|
+
Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
|
|
198
|
+
collection to create that will combine both inputs and outputs.
|
|
199
|
+
output_run : `str`, optional
|
|
200
|
+
Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
|
|
201
|
+
directly hold all output datasets. If not provided, a name will be
|
|
202
|
+
created from ``output`` and a timestamp.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
executor : `SimplePipelineExecutor`
|
|
207
|
+
An executor instance containing the constructed `.QuantumGraph` and
|
|
208
|
+
`~lsst.daf.butler.Butler`, ready for `run` to be called.
|
|
209
|
+
"""
|
|
210
|
+
pipeline = Pipeline.fromFile(pipeline_filename)
|
|
211
|
+
return cls.from_pipeline(
|
|
212
|
+
pipeline,
|
|
213
|
+
butler=butler,
|
|
214
|
+
where=where,
|
|
215
|
+
bind=bind,
|
|
216
|
+
resources=resources,
|
|
217
|
+
raise_on_partial_outputs=raise_on_partial_outputs,
|
|
218
|
+
attach_datastore_records=attach_datastore_records,
|
|
219
|
+
output=output,
|
|
220
|
+
output_run=output_run,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def from_task_class(
|
|
225
|
+
cls,
|
|
226
|
+
task_class: type[PipelineTask],
|
|
227
|
+
config: Config | None = None,
|
|
228
|
+
label: str | None = None,
|
|
229
|
+
*,
|
|
230
|
+
where: str = "",
|
|
231
|
+
bind: Mapping[str, Any] | None = None,
|
|
232
|
+
butler: Butler,
|
|
233
|
+
resources: ExecutionResources | None = None,
|
|
234
|
+
raise_on_partial_outputs: bool = True,
|
|
235
|
+
attach_datastore_records: bool = False,
|
|
236
|
+
output: str | None = None,
|
|
237
|
+
output_run: str | None = None,
|
|
238
|
+
) -> SimplePipelineExecutor:
|
|
239
|
+
"""Create an executor by building a QuantumGraph from a pipeline
|
|
240
|
+
containing a single task.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
task_class : `type`
|
|
245
|
+
A concrete `.PipelineTask` subclass.
|
|
246
|
+
config : `~lsst.pex.config.Config`, optional
|
|
247
|
+
Configuration for the task. If not provided, task-level defaults
|
|
248
|
+
will be used (no per-instrument overrides).
|
|
249
|
+
label : `str`, optional
|
|
250
|
+
Label for the task in its pipeline; defaults to
|
|
251
|
+
``task_class._DefaultName``.
|
|
252
|
+
where : `str`, optional
|
|
253
|
+
Data ID query expression that constraints the quanta generated.
|
|
254
|
+
bind : `~collections.abc.Mapping`, optional
|
|
255
|
+
Mapping containing literal values that should be injected into the
|
|
256
|
+
``where`` expression, keyed by the identifiers they replace.
|
|
257
|
+
butler : `~lsst.daf.butler.Butler`
|
|
258
|
+
Butler that manages all I/O. `prep_butler` can be used to create
|
|
259
|
+
one.
|
|
260
|
+
resources : `.ExecutionResources`
|
|
261
|
+
The resources available to each quantum being executed.
|
|
262
|
+
raise_on_partial_outputs : `bool`, optional
|
|
263
|
+
If `True` raise exceptions chained by
|
|
264
|
+
`.AnnotatedPartialOutputsError` immediately, instead of considering
|
|
265
|
+
the partial result a success and continuing to run downstream
|
|
266
|
+
tasks.
|
|
267
|
+
attach_datastore_records : `bool`, optional
|
|
268
|
+
Whether to attach datastore records to the quantum graph. This is
|
|
269
|
+
usually unnecessary, unless the executor is used to test behavior
|
|
270
|
+
that depends on datastore records.
|
|
271
|
+
output : `str`, optional
|
|
272
|
+
Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
|
|
273
|
+
collection to create that will combine both inputs and outputs.
|
|
274
|
+
output_run : `str`, optional
|
|
275
|
+
Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
|
|
276
|
+
directly hold all output datasets. If not provided, a name will be
|
|
277
|
+
created from ``output`` and a timestamp.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
executor : `SimplePipelineExecutor`
|
|
282
|
+
An executor instance containing the constructed `.QuantumGraph` and
|
|
283
|
+
`~lsst.daf.butler.Butler`, ready for `run` to be called.
|
|
284
|
+
"""
|
|
285
|
+
if config is None:
|
|
286
|
+
config = task_class.ConfigClass()
|
|
287
|
+
if label is None:
|
|
288
|
+
label = task_class._DefaultName
|
|
289
|
+
if not isinstance(config, task_class.ConfigClass):
|
|
290
|
+
raise TypeError(
|
|
291
|
+
f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
|
|
292
|
+
f"got {type(config).__name__}."
|
|
293
|
+
)
|
|
294
|
+
pipeline_graph = PipelineGraph()
|
|
295
|
+
pipeline_graph.add_task(label=label, task_class=task_class, config=config)
|
|
296
|
+
return cls.from_pipeline_graph(
|
|
297
|
+
pipeline_graph,
|
|
298
|
+
butler=butler,
|
|
299
|
+
where=where,
|
|
300
|
+
bind=bind,
|
|
301
|
+
resources=resources,
|
|
302
|
+
raise_on_partial_outputs=raise_on_partial_outputs,
|
|
303
|
+
attach_datastore_records=attach_datastore_records,
|
|
304
|
+
output=output,
|
|
305
|
+
output_run=output_run,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
@classmethod
|
|
309
|
+
def from_pipeline(
|
|
310
|
+
cls,
|
|
311
|
+
pipeline: Pipeline,
|
|
312
|
+
*,
|
|
313
|
+
where: str = "",
|
|
314
|
+
bind: Mapping[str, Any] | None = None,
|
|
315
|
+
butler: Butler,
|
|
316
|
+
resources: ExecutionResources | None = None,
|
|
317
|
+
raise_on_partial_outputs: bool = True,
|
|
318
|
+
attach_datastore_records: bool = False,
|
|
319
|
+
output: str | None = None,
|
|
320
|
+
output_run: str | None = None,
|
|
321
|
+
) -> SimplePipelineExecutor:
|
|
322
|
+
"""Create an executor by building a QuantumGraph from an in-memory
|
|
323
|
+
pipeline.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
pipeline : `.Pipeline` or `~collections.abc.Iterable` [ `.TaskDef` ]
|
|
328
|
+
A Python object describing the tasks to run, along with their
|
|
329
|
+
labels and configuration.
|
|
330
|
+
where : `str`, optional
|
|
331
|
+
Data ID query expression that constraints the quanta generated.
|
|
332
|
+
bind : `~collections.abc.Mapping`, optional
|
|
333
|
+
Mapping containing literal values that should be injected into the
|
|
334
|
+
``where`` expression, keyed by the identifiers they replace.
|
|
335
|
+
butler : `~lsst.daf.butler.Butler`
|
|
336
|
+
Butler that manages all I/O. `prep_butler` can be used to create
|
|
337
|
+
one.
|
|
338
|
+
resources : `.ExecutionResources`
|
|
339
|
+
The resources available to each quantum being executed.
|
|
340
|
+
raise_on_partial_outputs : `bool`, optional
|
|
341
|
+
If `True` raise exceptions chained by
|
|
342
|
+
`.AnnotatedPartialOutputsError` immediately, instead of considering
|
|
343
|
+
the partial result a success and continuing to run downstream
|
|
344
|
+
tasks.
|
|
345
|
+
attach_datastore_records : `bool`, optional
|
|
346
|
+
Whether to attach datastore records to the quantum graph. This is
|
|
347
|
+
usually unnecessary, unless the executor is used to test behavior
|
|
348
|
+
that depends on datastore records.
|
|
349
|
+
output : `str`, optional
|
|
350
|
+
Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
|
|
351
|
+
collection to create that will combine both inputs and outputs.
|
|
352
|
+
output_run : `str`, optional
|
|
353
|
+
Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
|
|
354
|
+
directly hold all output datasets. If not provided, a name will
|
|
355
|
+
be created from ``output`` and a timestamp.
|
|
356
|
+
|
|
357
|
+
Returns
|
|
358
|
+
-------
|
|
359
|
+
executor : `SimplePipelineExecutor`
|
|
360
|
+
An executor instance containing the constructed `.QuantumGraph` and
|
|
361
|
+
`~lsst.daf.butler.Butler`, ready for `run` to be called.
|
|
362
|
+
"""
|
|
363
|
+
pipeline_graph = pipeline.to_graph()
|
|
364
|
+
return cls.from_pipeline_graph(
|
|
365
|
+
pipeline_graph,
|
|
366
|
+
where=where,
|
|
367
|
+
bind=bind,
|
|
368
|
+
butler=butler,
|
|
369
|
+
resources=resources,
|
|
370
|
+
raise_on_partial_outputs=raise_on_partial_outputs,
|
|
371
|
+
attach_datastore_records=attach_datastore_records,
|
|
372
|
+
output=output,
|
|
373
|
+
output_run=output_run,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
@classmethod
|
|
377
|
+
def from_pipeline_graph(
|
|
378
|
+
cls,
|
|
379
|
+
pipeline_graph: PipelineGraph,
|
|
380
|
+
*,
|
|
381
|
+
where: str = "",
|
|
382
|
+
bind: Mapping[str, Any] | None = None,
|
|
383
|
+
butler: Butler,
|
|
384
|
+
resources: ExecutionResources | None = None,
|
|
385
|
+
raise_on_partial_outputs: bool = True,
|
|
386
|
+
attach_datastore_records: bool = False,
|
|
387
|
+
output: str | None = None,
|
|
388
|
+
output_run: str | None = None,
|
|
389
|
+
) -> SimplePipelineExecutor:
|
|
390
|
+
"""Create an executor by building a QuantumGraph from an in-memory
|
|
391
|
+
pipeline graph.
|
|
392
|
+
|
|
393
|
+
Parameters
|
|
394
|
+
----------
|
|
395
|
+
pipeline_graph : `~.pipeline_graph.PipelineGraph`
|
|
396
|
+
A Python object describing the tasks to run, along with their
|
|
397
|
+
labels and configuration, in graph form. Will be resolved against
|
|
398
|
+
the given ``butler``, with any existing resolutions ignored.
|
|
399
|
+
where : `str`, optional
|
|
400
|
+
Data ID query expression that constraints the quanta generated.
|
|
401
|
+
bind : `~collections.abc.Mapping`, optional
|
|
402
|
+
Mapping containing literal values that should be injected into the
|
|
403
|
+
``where`` expression, keyed by the identifiers they replace.
|
|
404
|
+
butler : `~lsst.daf.butler.Butler`
|
|
405
|
+
Butler that manages all I/O. `prep_butler` can be used to create
|
|
406
|
+
one. Must have its `~lsst.daf.butler.Butler.run` and
|
|
407
|
+
``butler.collections.defaults`` not empty and not `None`.
|
|
408
|
+
resources : `.ExecutionResources`
|
|
409
|
+
The resources available to each quantum being executed.
|
|
410
|
+
raise_on_partial_outputs : `bool`, optional
|
|
411
|
+
If `True` raise exceptions chained by
|
|
412
|
+
`.AnnotatedPartialOutputsError` immediately, instead
|
|
413
|
+
of considering the partial result a success and continuing to run
|
|
414
|
+
downstream tasks.
|
|
415
|
+
attach_datastore_records : `bool`, optional
|
|
416
|
+
Whether to attach datastore records to the quantum graph. This is
|
|
417
|
+
usually unnecessary, unless the executor is used to test behavior
|
|
418
|
+
that depends on datastore records.
|
|
419
|
+
output : `str`, optional
|
|
420
|
+
Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
|
|
421
|
+
collection to create that will combine both inputs and outputs.
|
|
422
|
+
output_run : `str`, optional
|
|
423
|
+
Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
|
|
424
|
+
directly hold all output datasets. If not provided, a name will
|
|
425
|
+
be created from ``output`` and a timestamp.
|
|
426
|
+
|
|
427
|
+
Returns
|
|
428
|
+
-------
|
|
429
|
+
executor : `SimplePipelineExecutor`
|
|
430
|
+
An executor instance containing the constructed
|
|
431
|
+
`.QuantumGraph` and `~lsst.daf.butler.Butler`, ready
|
|
432
|
+
for `run` to be called.
|
|
433
|
+
"""
|
|
434
|
+
if output_run is None:
|
|
435
|
+
output_run = butler.run
|
|
436
|
+
if output_run is None:
|
|
437
|
+
if output is None:
|
|
438
|
+
raise TypeError("At least one of output or output_run must be provided.")
|
|
439
|
+
output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
|
|
440
|
+
|
|
441
|
+
quantum_graph_builder = AllDimensionsQuantumGraphBuilder(
|
|
442
|
+
pipeline_graph, butler, where=where, bind=bind, output_run=output_run
|
|
443
|
+
)
|
|
444
|
+
metadata = {
|
|
445
|
+
"input": list(butler.collections.defaults),
|
|
446
|
+
"output": output,
|
|
447
|
+
"output_run": output_run,
|
|
448
|
+
"skip_existing_in": [],
|
|
449
|
+
"skip_existing": False,
|
|
450
|
+
"data_query": where,
|
|
451
|
+
"user": getpass.getuser(),
|
|
452
|
+
"time": str(datetime.datetime.now()),
|
|
453
|
+
}
|
|
454
|
+
quantum_graph = quantum_graph_builder.build(
|
|
455
|
+
metadata=metadata, attach_datastore_records=attach_datastore_records
|
|
456
|
+
)
|
|
457
|
+
return cls(
|
|
458
|
+
quantum_graph=quantum_graph,
|
|
459
|
+
butler=butler,
|
|
460
|
+
resources=resources,
|
|
461
|
+
raise_on_partial_outputs=raise_on_partial_outputs,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
def use_local_butler(
|
|
465
|
+
self, root: str, register_dataset_types: bool = True, transfer_dimensions: bool = True
|
|
466
|
+
) -> Butler:
|
|
467
|
+
"""Transfer all inputs to a local data repository. and set the executor
|
|
468
|
+
to write outputs to it.
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
root : `str`
|
|
473
|
+
Path to the local data repository; created if it does not exist.
|
|
474
|
+
register_dataset_types : `bool`, optional
|
|
475
|
+
Whether to register dataset types in the new repository. If
|
|
476
|
+
`False`, the local data repository must already exist and already
|
|
477
|
+
have all input dataset types registered.
|
|
478
|
+
transfer_dimensions : `bool`, optional
|
|
479
|
+
Whether to transfer dimension records to the new repository. If
|
|
480
|
+
`False`, the local data repository must already exist and already
|
|
481
|
+
have all needed dimension records.
|
|
482
|
+
|
|
483
|
+
Returns
|
|
484
|
+
-------
|
|
485
|
+
butler : `lsst.daf.butler.Butler`
|
|
486
|
+
Writeable butler for local data repository.
|
|
487
|
+
|
|
488
|
+
Notes
|
|
489
|
+
-----
|
|
490
|
+
The input collection structure from the original data repository is not
|
|
491
|
+
preserved by this method (it cannot be reconstructed from the quantum
|
|
492
|
+
graph). Instead, a `~lsst.daf.butler.CollectionType.TAGGED` collection
|
|
493
|
+
is created to gather all inputs, and appended to the output
|
|
494
|
+
`~lsst.daf.butler.CollectionType.CHAINED` collection after the output
|
|
495
|
+
`~lsst.daf.butler.CollectionType.RUN` collection. Calibration inputs
|
|
496
|
+
with the same data ID but multiple validity ranges are *not* included
|
|
497
|
+
in that `~lsst.daf.butler.CollectionType.TAGGED`; they are still
|
|
498
|
+
transferred to the local data repository, but can only be found via the
|
|
499
|
+
quantum graph or their original `~lsst.daf.butler.CollectionType.RUN`
|
|
500
|
+
collections.
|
|
501
|
+
"""
|
|
502
|
+
if not os.path.exists(root):
|
|
503
|
+
Butler.makeRepo(root)
|
|
504
|
+
out_butler = Butler.from_config(root, writeable=True)
|
|
505
|
+
|
|
506
|
+
output_run = self.quantum_graph.metadata["output_run"]
|
|
507
|
+
out_butler.collections.register(output_run, CollectionType.RUN)
|
|
508
|
+
output = self.quantum_graph.metadata["output"]
|
|
509
|
+
inputs: str | None = None
|
|
510
|
+
if output is not None:
|
|
511
|
+
inputs = f"{output}/inputs"
|
|
512
|
+
out_butler.collections.register(output, CollectionType.CHAINED)
|
|
513
|
+
out_butler.collections.register(inputs, CollectionType.TAGGED)
|
|
514
|
+
out_butler.collections.redefine_chain(output, [output_run, inputs])
|
|
515
|
+
|
|
516
|
+
if transfer_dimensions:
|
|
517
|
+
# We can't just let the transfer_from call below take care of this
|
|
518
|
+
# because we need dimensions for outputs as well as inputs. And if
|
|
519
|
+
# we have to do the outputs explicitly, it's more efficient to do
|
|
520
|
+
# the inputs at the same time since a lot of those dimensions will
|
|
521
|
+
# be the same.
|
|
522
|
+
self._transfer_qg_dimension_records(out_butler)
|
|
523
|
+
|
|
524
|
+
# Extract overall-input DatasetRefs to transfer and possibly insert
|
|
525
|
+
# into a TAGGED collection.
|
|
526
|
+
refs: set[DatasetRef] = set()
|
|
527
|
+
to_tag_by_type: dict[str, dict[DataCoordinate, DatasetRef | None]] = {}
|
|
528
|
+
pipeline_graph = self.quantum_graph.pipeline_graph
|
|
529
|
+
for name, dataset_type_node in pipeline_graph.iter_overall_inputs():
|
|
530
|
+
assert dataset_type_node is not None, "PipelineGraph should be resolved."
|
|
531
|
+
to_tag_for_type = to_tag_by_type.setdefault(name, {})
|
|
532
|
+
for task_node in pipeline_graph.consumers_of(name):
|
|
533
|
+
for quantum in self.quantum_graph.get_task_quanta(task_node.label).values():
|
|
534
|
+
for ref in quantum.inputs[name]:
|
|
535
|
+
ref = dataset_type_node.generalize_ref(ref)
|
|
536
|
+
refs.add(ref)
|
|
537
|
+
if to_tag_for_type.setdefault(ref.dataId, ref) != ref:
|
|
538
|
+
# There is already a dataset with the same data ID
|
|
539
|
+
# and dataset type, but a different UUID/run. This
|
|
540
|
+
# can only happen for calibrations found in
|
|
541
|
+
# calibration collections, and for now we have no
|
|
542
|
+
# choice but to leave them out of the TAGGED inputs
|
|
543
|
+
# collection in the local butler.
|
|
544
|
+
to_tag_for_type[ref.dataId] = None
|
|
545
|
+
|
|
546
|
+
out_butler.transfer_from(
|
|
547
|
+
self.butler,
|
|
548
|
+
refs,
|
|
549
|
+
register_dataset_types=register_dataset_types,
|
|
550
|
+
transfer_dimensions=False,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
if inputs is not None:
|
|
554
|
+
to_tag_flat: list[DatasetRef] = []
|
|
555
|
+
for ref_map in to_tag_by_type.values():
|
|
556
|
+
for tag_ref in ref_map.values():
|
|
557
|
+
if tag_ref is not None:
|
|
558
|
+
to_tag_flat.append(tag_ref)
|
|
559
|
+
out_butler.registry.associate(inputs, to_tag_flat)
|
|
560
|
+
|
|
561
|
+
out_butler.registry.defaults = self.butler.registry.defaults.clone(collections=output, run=output_run)
|
|
562
|
+
self.butler = out_butler
|
|
563
|
+
return self.butler
|
|
564
|
+
|
|
565
|
+
def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
|
|
566
|
+
"""Run all the quanta in the `.QuantumGraph` in topological order.
|
|
567
|
+
|
|
568
|
+
Use this method to run all quanta in the graph. Use
|
|
569
|
+
`as_generator` to get a generator to run the quanta one at
|
|
570
|
+
a time.
|
|
571
|
+
|
|
572
|
+
Parameters
|
|
573
|
+
----------
|
|
574
|
+
register_dataset_types : `bool`, optional
|
|
575
|
+
If `True`, register all output dataset types before executing any
|
|
576
|
+
quanta.
|
|
577
|
+
save_versions : `bool`, optional
|
|
578
|
+
If `True` (default), save a package versions dataset.
|
|
579
|
+
|
|
580
|
+
Returns
|
|
581
|
+
-------
|
|
582
|
+
quanta : `list` [ `~lsst.daf.butler.Quantum` ]
|
|
583
|
+
Executed quanta.
|
|
584
|
+
|
|
585
|
+
Notes
|
|
586
|
+
-----
|
|
587
|
+
A topological ordering is not in general unique, but no other
|
|
588
|
+
guarantees are made about the order in which quanta are processed.
|
|
589
|
+
"""
|
|
590
|
+
return list(
|
|
591
|
+
self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
def as_generator(
|
|
595
|
+
self, register_dataset_types: bool = False, save_versions: bool = True
|
|
596
|
+
) -> Iterator[Quantum]:
|
|
597
|
+
"""Yield quanta in the `.QuantumGraph` in topological order.
|
|
598
|
+
|
|
599
|
+
These quanta will be run as the returned generator is iterated
|
|
600
|
+
over. Use this method to run the quanta one at a time.
|
|
601
|
+
Use `run` to run all quanta in the graph.
|
|
602
|
+
|
|
603
|
+
Parameters
|
|
604
|
+
----------
|
|
605
|
+
register_dataset_types : `bool`, optional
|
|
606
|
+
If `True`, register all output dataset types before executing any
|
|
607
|
+
quanta.
|
|
608
|
+
save_versions : `bool`, optional
|
|
609
|
+
If `True` (default), save a package versions dataset.
|
|
610
|
+
|
|
611
|
+
Returns
|
|
612
|
+
-------
|
|
613
|
+
quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
|
|
614
|
+
Executed quanta.
|
|
615
|
+
|
|
616
|
+
Notes
|
|
617
|
+
-----
|
|
618
|
+
Global initialization steps (see `.QuantumGraph.init_output_run`) are
|
|
619
|
+
performed immediately when this method is called, but individual quanta
|
|
620
|
+
are not actually executed until the returned iterator is iterated over.
|
|
621
|
+
|
|
622
|
+
A topological ordering is not in general unique, but no other
|
|
623
|
+
guarantees are made about the order in which quanta are processed.
|
|
624
|
+
"""
|
|
625
|
+
if register_dataset_types:
|
|
626
|
+
self.quantum_graph.pipeline_graph.register_dataset_types(self.butler)
|
|
627
|
+
self.quantum_graph.write_configs(self.butler, compare_existing=False)
|
|
628
|
+
self.quantum_graph.write_init_outputs(self.butler, skip_existing=False)
|
|
629
|
+
if save_versions:
|
|
630
|
+
self.quantum_graph.write_packages(self.butler, compare_existing=False)
|
|
631
|
+
task_factory = TaskFactory()
|
|
632
|
+
single_quantum_executor = SingleQuantumExecutor(
|
|
633
|
+
butler=self.butler,
|
|
634
|
+
task_factory=task_factory,
|
|
635
|
+
resources=self.resources,
|
|
636
|
+
raise_on_partial_outputs=self.raise_on_partial_outputs,
|
|
637
|
+
)
|
|
638
|
+
# Important that this returns a generator expression rather than being
|
|
639
|
+
# a generator itself; that is what makes the init stuff above happen
|
|
640
|
+
# immediately instead of when the first quanta is executed, which might
|
|
641
|
+
# be useful for callers who want to check the state of the repo in
|
|
642
|
+
# between.
|
|
643
|
+
return (
|
|
644
|
+
single_quantum_executor.execute(qnode.task_node, qnode.quantum)[0] for qnode in self.quantum_graph
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
def _transfer_qg_dimension_records(self, out_butler: Butler) -> None:
|
|
648
|
+
"""Transfer all dimension records from the quantum graph to a butler.
|
|
649
|
+
|
|
650
|
+
Parameters
|
|
651
|
+
----------
|
|
652
|
+
out_butler : `lsst.daf.butler.Butler`
|
|
653
|
+
Butler to transfer records to.
|
|
654
|
+
"""
|
|
655
|
+
pipeline_graph = self.quantum_graph.pipeline_graph
|
|
656
|
+
all_dimensions = DimensionGroup.union(
|
|
657
|
+
*pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
|
|
658
|
+
universe=self.butler.dimensions,
|
|
659
|
+
)
|
|
660
|
+
dimension_data_extractor = DimensionDataExtractor.from_dimension_group(all_dimensions)
|
|
661
|
+
for task_node in pipeline_graph.tasks.values():
|
|
662
|
+
task_quanta = self.quantum_graph.get_task_quanta(task_node.label)
|
|
663
|
+
for quantum in task_quanta.values():
|
|
664
|
+
dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
|
|
665
|
+
for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
|
|
666
|
+
dimension_data_extractor.update(ref.dataId for ref in refs)
|
|
667
|
+
for element_name in all_dimensions.elements:
|
|
668
|
+
record_set = dimension_data_extractor.records.get(element_name)
|
|
669
|
+
if record_set and record_set.element.has_own_table:
|
|
670
|
+
out_butler.registry.insertDimensionData(
|
|
671
|
+
record_set.element,
|
|
672
|
+
*record_set,
|
|
673
|
+
skip_existing=True,
|
|
674
|
+
)
|