lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/__init__.py +0 -1
- lsst/pipe/base/_datasetQueryConstraints.py +1 -1
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +10 -46
- lsst/pipe/base/caching_limited_butler.py +8 -4
- lsst/pipe/base/connectionTypes.py +19 -19
- lsst/pipe/base/connections.py +2 -2
- lsst/pipe/base/exec_fixup_data_id.py +131 -0
- lsst/pipe/base/execution_graph_fixup.py +69 -0
- lsst/pipe/base/graph/graphSummary.py +4 -4
- lsst/pipe/base/log_capture.py +227 -0
- lsst/pipe/base/mp_graph_executor.py +786 -0
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
- lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph_builder.py +85 -58
- lsst/pipe/base/quantum_graph_executor.py +125 -0
- lsst/pipe/base/quantum_graph_skeleton.py +60 -1
- lsst/pipe/base/quantum_reports.py +334 -0
- lsst/pipe/base/script/transfer_from_graph.py +4 -1
- lsst/pipe/base/separable_pipeline_executor.py +296 -0
- lsst/pipe/base/simple_pipeline_executor.py +674 -0
- lsst/pipe/base/single_quantum_executor.py +635 -0
- lsst/pipe/base/taskFactory.py +18 -12
- lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
- lsst/pipe/base/tests/mocks/__init__.py +1 -0
- lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
- lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/RECORD +38 -28
- lsst/pipe/base/executionButlerBuilder.py +0 -493
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
__all__ = ["SingleQuantumExecutor"]
|
|
29
|
+
|
|
30
|
+
import logging
|
|
31
|
+
import time
|
|
32
|
+
import uuid
|
|
33
|
+
from collections import defaultdict
|
|
34
|
+
from collections.abc import Callable, Mapping
|
|
35
|
+
from itertools import chain
|
|
36
|
+
from typing import Any, cast
|
|
37
|
+
|
|
38
|
+
from lsst.daf.butler import (
|
|
39
|
+
Butler,
|
|
40
|
+
ButlerMetrics,
|
|
41
|
+
DatasetRef,
|
|
42
|
+
DatasetType,
|
|
43
|
+
LimitedButler,
|
|
44
|
+
NamedKeyDict,
|
|
45
|
+
Quantum,
|
|
46
|
+
)
|
|
47
|
+
from lsst.utils.timer import logInfo
|
|
48
|
+
|
|
49
|
+
from ._instrument import Instrument
|
|
50
|
+
from ._quantumContext import ExecutionResources, QuantumContext
|
|
51
|
+
from ._status import AnnotatedPartialOutputsError, InvalidQuantumError, NoWorkFound, QuantumSuccessCaveats
|
|
52
|
+
from .connections import AdjustQuantumHelper
|
|
53
|
+
from .log_capture import LogCapture
|
|
54
|
+
from .pipeline_graph import TaskNode
|
|
55
|
+
from .pipelineTask import PipelineTask
|
|
56
|
+
from .quantum_graph_executor import QuantumExecutor
|
|
57
|
+
from .quantum_reports import QuantumReport
|
|
58
|
+
from .task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
|
|
59
|
+
from .taskFactory import TaskFactory
|
|
60
|
+
|
|
61
|
+
_LOG = logging.getLogger(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SingleQuantumExecutor(QuantumExecutor):
|
|
65
|
+
"""Executor class which runs one Quantum at a time.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
butler : `~lsst.daf.butler.Butler` or `None`, optional
|
|
70
|
+
Data butler, `None` means that a limited butler should be used instead.
|
|
71
|
+
task_factory : `.TaskFactory`, optional
|
|
72
|
+
Instance of a task factory. Defaults to a new instance of
|
|
73
|
+
`lsst.pipe.base.TaskFactory`.
|
|
74
|
+
skip_existing_in : `str` or `~collections.abc.Iterable` [ `str` ]
|
|
75
|
+
Expressions representing the collections to search for existing output
|
|
76
|
+
datasets. See :ref:`daf_butler_ordered_collection_searches` for allowed
|
|
77
|
+
types. This class only checks for the presence of butler output run in
|
|
78
|
+
the list of collections. If the output run is present in the list then
|
|
79
|
+
the quanta whose complete outputs exist in the output run will be
|
|
80
|
+
skipped. `None` or empty string/sequence disables skipping.
|
|
81
|
+
clobber_outputs : `bool`, optional
|
|
82
|
+
If `True`, then outputs from a quantum that exist in output run
|
|
83
|
+
collection will be removed prior to executing a quantum. If
|
|
84
|
+
``skip_existing_in`` contains output run, then only partial outputs
|
|
85
|
+
from a quantum will be removed. Only used when ``butler`` is not
|
|
86
|
+
`None`.
|
|
87
|
+
enable_lsst_debug : `bool`, optional
|
|
88
|
+
Enable debugging with ``lsstDebug`` facility for a task.
|
|
89
|
+
limited_butler_factory : `Callable`, optional
|
|
90
|
+
A method that creates a `~lsst.daf.butler.LimitedButler` instance for a
|
|
91
|
+
given Quantum. This parameter must be defined if ``butler`` is `None`.
|
|
92
|
+
If ``butler`` is not `None` then this parameter is ignored.
|
|
93
|
+
resources : `.ExecutionResources`, optional
|
|
94
|
+
The resources available to this quantum when executing.
|
|
95
|
+
skip_existing : `bool`, optional
|
|
96
|
+
If `True`, skip quanta whose metadata datasets are already stored.
|
|
97
|
+
Unlike ``skip_existing_in``, this works with limited butlers as well as
|
|
98
|
+
full butlers. Always set to `True` if ``skip_existing_in`` matches
|
|
99
|
+
``butler.run``.
|
|
100
|
+
assume_no_existing_outputs : `bool`, optional
|
|
101
|
+
If `True`, assume preexisting outputs are impossible (e.g. because this
|
|
102
|
+
is known by higher-level code to be a new ``RUN`` collection), and do
|
|
103
|
+
not look for them. This causes the ``skip_existing`` and
|
|
104
|
+
``clobber_outputs`` options to be ignored, but unlike just setting both
|
|
105
|
+
of those to `False`, it also avoids all dataset existence checks.
|
|
106
|
+
raise_on_partial_outputs : `bool`, optional
|
|
107
|
+
If `True` raise exceptions chained by `.AnnotatedPartialOutputsError`
|
|
108
|
+
immediately, instead of considering the partial result a success and
|
|
109
|
+
continuing to run downstream tasks.
|
|
110
|
+
job_metadata : `~collections.abc.Mapping`
|
|
111
|
+
Mapping with extra metadata to embed within the quantum metadata under
|
|
112
|
+
the "job" key. This is intended to correspond to information common
|
|
113
|
+
to all quanta being executed in a single process, such as the time
|
|
114
|
+
taken to load the quantum graph in a BPS job.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
*,
|
|
120
|
+
butler: Butler | None = None,
|
|
121
|
+
task_factory: TaskFactory | None = None,
|
|
122
|
+
skip_existing_in: Any = None,
|
|
123
|
+
clobber_outputs: bool = False,
|
|
124
|
+
enable_lsst_debug: bool = False,
|
|
125
|
+
limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
|
|
126
|
+
resources: ExecutionResources | None = None,
|
|
127
|
+
skip_existing: bool = False,
|
|
128
|
+
assume_no_existing_outputs: bool = False,
|
|
129
|
+
raise_on_partial_outputs: bool = True,
|
|
130
|
+
job_metadata: Mapping[str, int | str | float] | None = None,
|
|
131
|
+
):
|
|
132
|
+
self._butler = butler
|
|
133
|
+
self._task_factory = task_factory if task_factory is not None else TaskFactory()
|
|
134
|
+
self._clobber_outputs = clobber_outputs
|
|
135
|
+
self._enable_lsst_debug = enable_lsst_debug
|
|
136
|
+
self._limited_butler_factory = limited_butler_factory
|
|
137
|
+
self._resources = resources
|
|
138
|
+
self._assume_no_existing_outputs = assume_no_existing_outputs
|
|
139
|
+
self._raise_on_partial_outputs = raise_on_partial_outputs
|
|
140
|
+
self._job_metadata = job_metadata
|
|
141
|
+
|
|
142
|
+
if self._butler is None:
|
|
143
|
+
assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
|
|
144
|
+
|
|
145
|
+
# Find whether output run is in skip_existing_in.
|
|
146
|
+
self._skip_existing = skip_existing
|
|
147
|
+
if self._butler is not None and skip_existing_in and not self._skip_existing:
|
|
148
|
+
self._skip_existing = self._butler.run in self._butler.collections.query(
|
|
149
|
+
skip_existing_in, flatten_chains=True
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def execute(
|
|
153
|
+
self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
|
|
154
|
+
) -> tuple[Quantum, QuantumReport | None]:
|
|
155
|
+
# Docstring inherited from QuantumExecutor.execute
|
|
156
|
+
assert quantum.dataId is not None, "Quantum DataId cannot be None"
|
|
157
|
+
|
|
158
|
+
if self._butler is not None:
|
|
159
|
+
self._butler.registry.refresh()
|
|
160
|
+
|
|
161
|
+
result = self._execute(task_node, quantum, quantum_id=quantum_id)
|
|
162
|
+
report = QuantumReport(dataId=quantum.dataId, taskLabel=task_node.label)
|
|
163
|
+
return result, report
|
|
164
|
+
|
|
165
|
+
def _execute(
|
|
166
|
+
self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
|
|
167
|
+
) -> Quantum:
|
|
168
|
+
"""Execute the quantum.
|
|
169
|
+
|
|
170
|
+
Internal implementation of `execute()`.
|
|
171
|
+
"""
|
|
172
|
+
startTime = time.time()
|
|
173
|
+
|
|
174
|
+
# Make a limited butler instance if needed.
|
|
175
|
+
limited_butler: LimitedButler
|
|
176
|
+
if self._butler is not None:
|
|
177
|
+
limited_butler = self._butler
|
|
178
|
+
else:
|
|
179
|
+
# We check this in constructor, but mypy needs this check here.
|
|
180
|
+
assert self._limited_butler_factory is not None
|
|
181
|
+
limited_butler = self._limited_butler_factory(quantum)
|
|
182
|
+
|
|
183
|
+
if self._butler is not None:
|
|
184
|
+
log_capture = LogCapture.from_full(self._butler)
|
|
185
|
+
else:
|
|
186
|
+
log_capture = LogCapture.from_limited(limited_butler)
|
|
187
|
+
with log_capture.capture_logging(task_node, quantum) as captureLog:
|
|
188
|
+
# Save detailed resource usage before task start to metadata.
|
|
189
|
+
quantumMetadata = _TASK_METADATA_TYPE()
|
|
190
|
+
logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
|
|
191
|
+
|
|
192
|
+
_LOG.info(
|
|
193
|
+
"Preparing execution of quantum for label=%s dataId=%s.", task_node.label, quantum.dataId
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# check whether to skip or delete old outputs, if it returns True
|
|
197
|
+
# or raises an exception do not try to store logs, as they may be
|
|
198
|
+
# already in butler.
|
|
199
|
+
captureLog.store = False
|
|
200
|
+
if self._check_existing_outputs(quantum, task_node, limited_butler):
|
|
201
|
+
_LOG.info(
|
|
202
|
+
"Skipping already-successful quantum for label=%s dataId=%s.",
|
|
203
|
+
task_node.label,
|
|
204
|
+
quantum.dataId,
|
|
205
|
+
)
|
|
206
|
+
return quantum
|
|
207
|
+
captureLog.store = True
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
quantum = self._updated_quantum_inputs(quantum, task_node, limited_butler)
|
|
211
|
+
except NoWorkFound as exc:
|
|
212
|
+
_LOG.info(
|
|
213
|
+
"Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
|
|
214
|
+
task_node.label,
|
|
215
|
+
quantum.dataId,
|
|
216
|
+
str(exc),
|
|
217
|
+
)
|
|
218
|
+
quantumMetadata["caveats"] = QuantumSuccessCaveats.from_adjust_quantum_no_work().value
|
|
219
|
+
# Make empty metadata that looks something like what a
|
|
220
|
+
# do-nothing task would write (but we don't bother with empty
|
|
221
|
+
# nested PropertySets for subtasks). This is slightly
|
|
222
|
+
# duplicative with logic in pipe_base that we can't easily call
|
|
223
|
+
# from here; we'll fix this on DM-29761.
|
|
224
|
+
logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
|
|
225
|
+
fullMetadata = _TASK_FULL_METADATA_TYPE()
|
|
226
|
+
fullMetadata[task_node.label] = _TASK_METADATA_TYPE()
|
|
227
|
+
fullMetadata["quantum"] = quantumMetadata
|
|
228
|
+
if self._job_metadata is not None:
|
|
229
|
+
fullMetadata["job"] = self._job_metadata
|
|
230
|
+
self._write_metadata(quantum, fullMetadata, task_node, limited_butler)
|
|
231
|
+
return quantum
|
|
232
|
+
|
|
233
|
+
# enable lsstDebug debugging
|
|
234
|
+
if self._enable_lsst_debug:
|
|
235
|
+
try:
|
|
236
|
+
_LOG.debug("Will try to import debug.py")
|
|
237
|
+
import debug # type: ignore # noqa:F401
|
|
238
|
+
except ImportError:
|
|
239
|
+
_LOG.warning("No 'debug' module found.")
|
|
240
|
+
|
|
241
|
+
# initialize global state
|
|
242
|
+
self._init_globals(quantum)
|
|
243
|
+
|
|
244
|
+
# Ensure that we are executing a frozen config
|
|
245
|
+
task_node.config.freeze()
|
|
246
|
+
logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
|
|
247
|
+
init_input_refs = list(quantum.initInputs.values())
|
|
248
|
+
|
|
249
|
+
_LOG.info(
|
|
250
|
+
"Constructing task and executing quantum for label=%s dataId=%s.",
|
|
251
|
+
task_node.label,
|
|
252
|
+
quantum.dataId,
|
|
253
|
+
)
|
|
254
|
+
task = self._task_factory.makeTask(task_node, limited_butler, init_input_refs)
|
|
255
|
+
logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
|
|
256
|
+
try:
|
|
257
|
+
caveats, outputsPut, butler_metrics = self._run_quantum(
|
|
258
|
+
task, quantum, task_node, limited_butler, quantum_id=quantum_id
|
|
259
|
+
)
|
|
260
|
+
except Exception as e:
|
|
261
|
+
_LOG.error(
|
|
262
|
+
"Execution of task '%s' on quantum %s failed. Exception %s: %s",
|
|
263
|
+
task_node.label,
|
|
264
|
+
quantum.dataId,
|
|
265
|
+
e.__class__.__name__,
|
|
266
|
+
str(e),
|
|
267
|
+
)
|
|
268
|
+
raise
|
|
269
|
+
else:
|
|
270
|
+
quantumMetadata["butler_metrics"] = butler_metrics.model_dump()
|
|
271
|
+
quantumMetadata["caveats"] = caveats.value
|
|
272
|
+
# Stringify the UUID for easier compatibility with
|
|
273
|
+
# PropertyList.
|
|
274
|
+
quantumMetadata["outputs"] = [str(output) for output in outputsPut]
|
|
275
|
+
logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
|
|
276
|
+
fullMetadata = task.getFullMetadata()
|
|
277
|
+
fullMetadata["quantum"] = quantumMetadata
|
|
278
|
+
if self._job_metadata is not None:
|
|
279
|
+
fullMetadata["job"] = self._job_metadata
|
|
280
|
+
self._write_metadata(quantum, fullMetadata, task_node, limited_butler)
|
|
281
|
+
stopTime = time.time()
|
|
282
|
+
_LOG.info(
|
|
283
|
+
"Execution of task '%s' on quantum %s took %.3f seconds",
|
|
284
|
+
task_node.label,
|
|
285
|
+
quantum.dataId,
|
|
286
|
+
stopTime - startTime,
|
|
287
|
+
)
|
|
288
|
+
return quantum
|
|
289
|
+
|
|
290
|
+
def _check_existing_outputs(
|
|
291
|
+
self, quantum: Quantum, task_node: TaskNode, /, limited_butler: LimitedButler
|
|
292
|
+
) -> bool:
|
|
293
|
+
"""Decide whether this quantum needs to be executed.
|
|
294
|
+
|
|
295
|
+
If only partial outputs exist then they are removed if
|
|
296
|
+
``clobberOutputs`` is True, otherwise an exception is raised.
|
|
297
|
+
|
|
298
|
+
The ``LimitedButler`` is used for everything, and should be set to
|
|
299
|
+
``self.butler`` if no separate ``LimitedButler`` is available.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
quantum : `~lsst.daf.butler.Quantum`
|
|
304
|
+
Quantum to check for existing outputs.
|
|
305
|
+
task_node : `~.pipeline_graph.TaskNode`
|
|
306
|
+
Task definition structure.
|
|
307
|
+
limited_butler : `~lsst.daf.butler.LimitedButler`
|
|
308
|
+
Butler to use for querying and clobbering.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
exist : `bool`
|
|
313
|
+
`True` if ``self.skipExisting`` is defined, and a previous
|
|
314
|
+
execution of this quanta appears to have completed successfully
|
|
315
|
+
(either because metadata was written or all datasets were written).
|
|
316
|
+
`False` otherwise.
|
|
317
|
+
|
|
318
|
+
Raises
|
|
319
|
+
------
|
|
320
|
+
RuntimeError
|
|
321
|
+
Raised if some outputs exist and some not.
|
|
322
|
+
"""
|
|
323
|
+
if self._assume_no_existing_outputs:
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
if self._skip_existing:
|
|
327
|
+
_LOG.debug(
|
|
328
|
+
"Checking existence of metadata from previous execution of label=%s dataId=%s.",
|
|
329
|
+
task_node.label,
|
|
330
|
+
quantum.dataId,
|
|
331
|
+
)
|
|
332
|
+
# Metadata output exists; this is sufficient to assume the previous
|
|
333
|
+
# run was successful and should be skipped.
|
|
334
|
+
[metadata_ref] = quantum.outputs[task_node.metadata_output.dataset_type_name]
|
|
335
|
+
if metadata_ref is not None:
|
|
336
|
+
if limited_butler.stored(metadata_ref):
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
# Find and prune (partial) outputs if `self.clobberOutputs` is set.
|
|
340
|
+
_LOG.debug(
|
|
341
|
+
"Looking for existing outputs in the way for label=%s dataId=%s.", task_node.label, quantum.dataId
|
|
342
|
+
)
|
|
343
|
+
ref_dict = limited_butler.stored_many(chain.from_iterable(quantum.outputs.values()))
|
|
344
|
+
existingRefs = [ref for ref, exists in ref_dict.items() if exists]
|
|
345
|
+
missingRefs = [ref for ref, exists in ref_dict.items() if not exists]
|
|
346
|
+
if existingRefs:
|
|
347
|
+
if not missingRefs:
|
|
348
|
+
# Full outputs exist.
|
|
349
|
+
if self._skip_existing:
|
|
350
|
+
return True
|
|
351
|
+
elif self._clobber_outputs:
|
|
352
|
+
_LOG.info("Removing complete outputs for quantum %s: %s", quantum, existingRefs)
|
|
353
|
+
limited_butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
|
|
354
|
+
else:
|
|
355
|
+
raise RuntimeError(
|
|
356
|
+
f"Complete outputs exists for a quantum {quantum} "
|
|
357
|
+
"and neither clobberOutputs nor skipExisting is set: "
|
|
358
|
+
f"existingRefs={existingRefs}"
|
|
359
|
+
)
|
|
360
|
+
else:
|
|
361
|
+
# Partial outputs from a failed quantum.
|
|
362
|
+
_LOG.debug(
|
|
363
|
+
"Partial outputs exist for quantum %s existingRefs=%s missingRefs=%s",
|
|
364
|
+
quantum,
|
|
365
|
+
existingRefs,
|
|
366
|
+
missingRefs,
|
|
367
|
+
)
|
|
368
|
+
if self._clobber_outputs:
|
|
369
|
+
# only prune
|
|
370
|
+
_LOG.info("Removing partial outputs for task %s: %s", task_node.label, existingRefs)
|
|
371
|
+
limited_butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
|
|
372
|
+
return False
|
|
373
|
+
else:
|
|
374
|
+
raise RuntimeError(
|
|
375
|
+
"Registry inconsistency while checking for existing quantum outputs:"
|
|
376
|
+
f" quantum={quantum} existingRefs={existingRefs}"
|
|
377
|
+
f" missingRefs={missingRefs}"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# By default always execute.
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
def _updated_quantum_inputs(
|
|
384
|
+
self, quantum: Quantum, task_node: TaskNode, /, limited_butler: LimitedButler
|
|
385
|
+
) -> Quantum:
|
|
386
|
+
"""Update quantum with extra information, returns a new updated
|
|
387
|
+
Quantum.
|
|
388
|
+
|
|
389
|
+
Some methods may require input DatasetRefs to have non-None
|
|
390
|
+
``dataset_id``, but in case of intermediate dataset it may not be
|
|
391
|
+
filled during QuantumGraph construction. This method will retrieve
|
|
392
|
+
missing info from registry.
|
|
393
|
+
|
|
394
|
+
Parameters
|
|
395
|
+
----------
|
|
396
|
+
quantum : `~lsst.daf.butler.Quantum`
|
|
397
|
+
Single Quantum instance.
|
|
398
|
+
task_node : `~.pipeline_graph.TaskNode`
|
|
399
|
+
Task definition structure.
|
|
400
|
+
limited_butler : `~lsst.daf.butler.LimitedButler`
|
|
401
|
+
Butler to use for querying.
|
|
402
|
+
|
|
403
|
+
Returns
|
|
404
|
+
-------
|
|
405
|
+
update : `~lsst.daf.butler.Quantum`
|
|
406
|
+
Updated Quantum instance.
|
|
407
|
+
"""
|
|
408
|
+
anyChanges = False
|
|
409
|
+
updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
|
|
410
|
+
for key, refsForDatasetType in quantum.inputs.items():
|
|
411
|
+
_LOG.debug(
|
|
412
|
+
"Checking existence of input '%s' for label=%s dataId=%s.",
|
|
413
|
+
key.name,
|
|
414
|
+
task_node.label,
|
|
415
|
+
quantum.dataId,
|
|
416
|
+
)
|
|
417
|
+
toCheck = []
|
|
418
|
+
newRefsForDatasetType = updatedInputs[key]
|
|
419
|
+
for ref in refsForDatasetType:
|
|
420
|
+
if self._should_assume_exists(quantum, ref):
|
|
421
|
+
newRefsForDatasetType.append(ref)
|
|
422
|
+
else:
|
|
423
|
+
toCheck.append(ref)
|
|
424
|
+
if not toCheck:
|
|
425
|
+
_LOG.debug(
|
|
426
|
+
"Assuming overall input '%s' is present without checks for label=%s dataId=%s.",
|
|
427
|
+
key.name,
|
|
428
|
+
task_node.label,
|
|
429
|
+
quantum.dataId,
|
|
430
|
+
)
|
|
431
|
+
continue
|
|
432
|
+
stored = limited_butler.stored_many(toCheck)
|
|
433
|
+
for ref in toCheck:
|
|
434
|
+
if stored[ref]:
|
|
435
|
+
newRefsForDatasetType.append(ref)
|
|
436
|
+
else:
|
|
437
|
+
# This should only happen if a predicted intermediate was
|
|
438
|
+
# not actually produced upstream, but
|
|
439
|
+
# datastore misconfigurations can unfortunately also land
|
|
440
|
+
# us here.
|
|
441
|
+
_LOG.info("No dataset artifact found for %s", ref)
|
|
442
|
+
continue
|
|
443
|
+
if len(newRefsForDatasetType) != len(refsForDatasetType):
|
|
444
|
+
anyChanges = True
|
|
445
|
+
# If we removed any input datasets, let the task check if it has enough
|
|
446
|
+
# to proceed and/or prune related datasets that it also doesn't
|
|
447
|
+
# need/produce anymore. It will raise NoWorkFound if it can't run,
|
|
448
|
+
# which we'll let propagate up. This is exactly what we run during QG
|
|
449
|
+
# generation, because a task shouldn't care whether an input is missing
|
|
450
|
+
# because some previous task didn't produce it, or because it just
|
|
451
|
+
# wasn't there during QG generation.
|
|
452
|
+
namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
|
|
453
|
+
helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
|
|
454
|
+
if anyChanges:
|
|
455
|
+
_LOG.debug("Running adjustQuantum for label=%s dataId=%s.", task_node.label, quantum.dataId)
|
|
456
|
+
assert quantum.dataId is not None, "Quantum DataId cannot be None"
|
|
457
|
+
helper.adjust_in_place(task_node.get_connections(), label=task_node.label, data_id=quantum.dataId)
|
|
458
|
+
return Quantum(
|
|
459
|
+
taskName=quantum.taskName,
|
|
460
|
+
taskClass=quantum.taskClass,
|
|
461
|
+
dataId=quantum.dataId,
|
|
462
|
+
initInputs=quantum.initInputs,
|
|
463
|
+
inputs=helper.inputs,
|
|
464
|
+
outputs=helper.outputs,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
def _run_quantum(
|
|
468
|
+
self,
|
|
469
|
+
task: PipelineTask,
|
|
470
|
+
quantum: Quantum,
|
|
471
|
+
task_node: TaskNode,
|
|
472
|
+
/,
|
|
473
|
+
limited_butler: LimitedButler,
|
|
474
|
+
quantum_id: uuid.UUID | None = None,
|
|
475
|
+
) -> tuple[QuantumSuccessCaveats, list[uuid.UUID], ButlerMetrics]:
|
|
476
|
+
"""Execute task on a single quantum.
|
|
477
|
+
|
|
478
|
+
Parameters
|
|
479
|
+
----------
|
|
480
|
+
task : `PipelineTask`
|
|
481
|
+
Task object.
|
|
482
|
+
quantum : `~lsst.daf.butler.Quantum`
|
|
483
|
+
Single Quantum instance.
|
|
484
|
+
task_node : `~.pipeline_graph.TaskNode`
|
|
485
|
+
Task definition structure.
|
|
486
|
+
limited_butler : `~lsst.daf.butler.LimitedButler`
|
|
487
|
+
Butler to use for dataset I/O.
|
|
488
|
+
quantum_id : `uuid.UUID` or `None`, optional
|
|
489
|
+
ID of the quantum being executed.
|
|
490
|
+
|
|
491
|
+
Returns
|
|
492
|
+
-------
|
|
493
|
+
flags : `QuantumSuccessCaveats`
|
|
494
|
+
Flags that describe qualified successes.
|
|
495
|
+
ids_put : list[ `uuid.UUID` ]
|
|
496
|
+
Record of all the dataset IDs that were written by this quantum
|
|
497
|
+
being executed.
|
|
498
|
+
metrics : `lsst.daf.butler.ButlerMetrics`
|
|
499
|
+
Butler metrics recorded for this quantum.
|
|
500
|
+
"""
|
|
501
|
+
flags = QuantumSuccessCaveats.NO_CAVEATS
|
|
502
|
+
|
|
503
|
+
# Create a butler that operates in the context of a quantum
|
|
504
|
+
butlerQC = QuantumContext(limited_butler, quantum, resources=self._resources, quantum_id=quantum_id)
|
|
505
|
+
|
|
506
|
+
# Get the input and output references for the task
|
|
507
|
+
inputRefs, outputRefs = task_node.get_connections().buildDatasetRefs(quantum)
|
|
508
|
+
|
|
509
|
+
# Call task runQuantum() method.
|
|
510
|
+
try:
|
|
511
|
+
with limited_butler.record_metrics() as butler_metrics:
|
|
512
|
+
task.runQuantum(butlerQC, inputRefs, outputRefs)
|
|
513
|
+
except NoWorkFound as err:
|
|
514
|
+
# Not an error, just an early exit.
|
|
515
|
+
_LOG.info(
|
|
516
|
+
"Task '%s' on quantum %s exited early with no work found: %s.",
|
|
517
|
+
task_node.label,
|
|
518
|
+
quantum.dataId,
|
|
519
|
+
str(err),
|
|
520
|
+
)
|
|
521
|
+
flags |= err.FLAGS
|
|
522
|
+
except AnnotatedPartialOutputsError as caught:
|
|
523
|
+
error: BaseException
|
|
524
|
+
if caught.__cause__ is None:
|
|
525
|
+
_LOG.error(
|
|
526
|
+
"Incorrect use of AnnotatedPartialOutputsError: no chained exception found.",
|
|
527
|
+
task_node.label,
|
|
528
|
+
quantum.dataId,
|
|
529
|
+
)
|
|
530
|
+
error = caught
|
|
531
|
+
else:
|
|
532
|
+
error = caught.__cause__
|
|
533
|
+
if self._raise_on_partial_outputs:
|
|
534
|
+
# Note: this is a real edge case that required some
|
|
535
|
+
# experimentation: without 'from None' below, this raise would
|
|
536
|
+
# produce a "while one exception was being handled, another was
|
|
537
|
+
# raised" traceback involving AnnotatedPartialOutputsError.
|
|
538
|
+
# With the 'from None', we get just the error chained to it, as
|
|
539
|
+
# desired.
|
|
540
|
+
raise error from None
|
|
541
|
+
else:
|
|
542
|
+
_LOG.error(
|
|
543
|
+
"Task '%s' on quantum %s exited with partial outputs; "
|
|
544
|
+
"considering this a qualified success and proceeding.",
|
|
545
|
+
task_node.label,
|
|
546
|
+
quantum.dataId,
|
|
547
|
+
)
|
|
548
|
+
_LOG.error(error, exc_info=error)
|
|
549
|
+
flags |= caught.FLAGS
|
|
550
|
+
if not butlerQC.outputsPut:
|
|
551
|
+
flags |= QuantumSuccessCaveats.ALL_OUTPUTS_MISSING
|
|
552
|
+
if not butlerQC.outputsPut == butlerQC.allOutputs:
|
|
553
|
+
flags |= QuantumSuccessCaveats.ANY_OUTPUTS_MISSING
|
|
554
|
+
ids_put = [output[2] for output in butlerQC.outputsPut]
|
|
555
|
+
return flags, ids_put, butler_metrics
|
|
556
|
+
|
|
557
|
+
def _write_metadata(
|
|
558
|
+
self, quantum: Quantum, metadata: Any, task_node: TaskNode, /, limited_butler: LimitedButler
|
|
559
|
+
) -> None:
|
|
560
|
+
# DatasetRef has to be in the Quantum outputs, can lookup by name
|
|
561
|
+
try:
|
|
562
|
+
[ref] = quantum.outputs[task_node.metadata_output.dataset_type_name]
|
|
563
|
+
except LookupError as exc:
|
|
564
|
+
raise InvalidQuantumError(
|
|
565
|
+
"Quantum outputs is missing metadata dataset type "
|
|
566
|
+
f"{task_node.metadata_output.dataset_type_name};"
|
|
567
|
+
" this could happen due to inconsistent options between QuantumGraph generation"
|
|
568
|
+
" and execution"
|
|
569
|
+
) from exc
|
|
570
|
+
limited_butler.put(metadata, ref)
|
|
571
|
+
|
|
572
|
+
def _init_globals(self, quantum: Quantum) -> None:
|
|
573
|
+
"""Initialize global state needed for task execution.
|
|
574
|
+
|
|
575
|
+
Parameters
|
|
576
|
+
----------
|
|
577
|
+
quantum : `~lsst.daf.butler.Quantum`
|
|
578
|
+
Single Quantum instance.
|
|
579
|
+
|
|
580
|
+
Notes
|
|
581
|
+
-----
|
|
582
|
+
There is an issue with initializing filters singleton which is done
|
|
583
|
+
by instrument, to avoid requiring tasks to do it in runQuantum()
|
|
584
|
+
we do it here when any dataId has an instrument dimension. Also for
|
|
585
|
+
now we only allow single instrument, verify that all instrument
|
|
586
|
+
names in all dataIds are identical.
|
|
587
|
+
|
|
588
|
+
This will need revision when filter singleton disappears.
|
|
589
|
+
"""
|
|
590
|
+
# can only work for full butler
|
|
591
|
+
if self._butler is None:
|
|
592
|
+
return
|
|
593
|
+
oneInstrument = None
|
|
594
|
+
for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
|
|
595
|
+
for datasetRef in datasetRefs:
|
|
596
|
+
dataId = datasetRef.dataId
|
|
597
|
+
instrument = cast(str, dataId.get("instrument"))
|
|
598
|
+
if instrument is not None:
|
|
599
|
+
if oneInstrument is not None:
|
|
600
|
+
assert ( # type: ignore
|
|
601
|
+
instrument == oneInstrument
|
|
602
|
+
), "Currently require that only one instrument is used per graph"
|
|
603
|
+
else:
|
|
604
|
+
oneInstrument = instrument
|
|
605
|
+
Instrument.fromName(instrument, self._butler.registry)
|
|
606
|
+
|
|
607
|
+
def _should_assume_exists(self, quantum: Quantum, ref: DatasetRef) -> bool | None:
|
|
608
|
+
"""Report whether the given dataset can be assumed to exist because
|
|
609
|
+
some previous check reported that it did.
|
|
610
|
+
|
|
611
|
+
If this is `True` for a dataset does not in fact exist anymore, that's
|
|
612
|
+
an unexpected problem that we want to raise as an exception, and
|
|
613
|
+
definitely not a case where some predicted output just wasn't produced.
|
|
614
|
+
We can't always tell the difference, but in this case we can.
|
|
615
|
+
|
|
616
|
+
Parameters
|
|
617
|
+
----------
|
|
618
|
+
quantum : `Quantum`
|
|
619
|
+
Quantum being processed.
|
|
620
|
+
ref : `lsst.daf.butler.DatasetRef`
|
|
621
|
+
Reference to the input dataset.
|
|
622
|
+
|
|
623
|
+
Returns
|
|
624
|
+
-------
|
|
625
|
+
exists : `bool` or `None`
|
|
626
|
+
`True` if this dataset is definitely an overall input, `False` if
|
|
627
|
+
some other quantum in the graph is expected to produce it, and
|
|
628
|
+
`None` if the answer could not be determined.
|
|
629
|
+
"""
|
|
630
|
+
if quantum.datastore_records:
|
|
631
|
+
for datastore_record_data in quantum.datastore_records.values():
|
|
632
|
+
if ref.id in datastore_record_data.records:
|
|
633
|
+
return True
|
|
634
|
+
return False
|
|
635
|
+
return None
|
lsst/pipe/base/taskFactory.py
CHANGED
|
@@ -25,15 +25,13 @@
|
|
|
25
25
|
# You should have received a copy of the GNU General Public License
|
|
26
26
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
|
-
"""Module defining TaskFactory interface."""
|
|
29
|
-
|
|
30
28
|
from __future__ import annotations
|
|
31
29
|
|
|
32
30
|
__all__ = ["TaskFactory"]
|
|
33
31
|
|
|
34
|
-
from abc import ABCMeta
|
|
32
|
+
from abc import ABCMeta
|
|
35
33
|
from collections.abc import Iterable
|
|
36
|
-
from typing import TYPE_CHECKING
|
|
34
|
+
from typing import TYPE_CHECKING, Any
|
|
37
35
|
|
|
38
36
|
if TYPE_CHECKING:
|
|
39
37
|
from lsst.daf.butler import DatasetRef, LimitedButler
|
|
@@ -43,13 +41,8 @@ if TYPE_CHECKING:
|
|
|
43
41
|
|
|
44
42
|
|
|
45
43
|
class TaskFactory(metaclass=ABCMeta):
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
Task factory is responsible for creating instances of PipelineTask
|
|
49
|
-
subclasses.
|
|
50
|
-
"""
|
|
44
|
+
"""A helper class for creating instances of PipelineTask subclasses."""
|
|
51
45
|
|
|
52
|
-
@abstractmethod
|
|
53
46
|
def makeTask(
|
|
54
47
|
self,
|
|
55
48
|
task_node: TaskNode,
|
|
@@ -57,7 +50,8 @@ class TaskFactory(metaclass=ABCMeta):
|
|
|
57
50
|
butler: LimitedButler,
|
|
58
51
|
initInputRefs: Iterable[DatasetRef] | None,
|
|
59
52
|
) -> PipelineTask:
|
|
60
|
-
"""Create new PipelineTask instance from its
|
|
53
|
+
"""Create new PipelineTask instance from its
|
|
54
|
+
`~.pipeline_graph.TaskNode`.
|
|
61
55
|
|
|
62
56
|
Parameters
|
|
63
57
|
----------
|
|
@@ -79,4 +73,16 @@ class TaskFactory(metaclass=ABCMeta):
|
|
|
79
73
|
Any exceptions that are raised by PipelineTask constructor or its
|
|
80
74
|
configuration class are propagated back to caller.
|
|
81
75
|
"""
|
|
82
|
-
|
|
76
|
+
config = task_node.config
|
|
77
|
+
init_inputs: dict[str, Any] = {}
|
|
78
|
+
init_input_refs_by_dataset_type = {}
|
|
79
|
+
if initInputRefs is not None:
|
|
80
|
+
init_input_refs_by_dataset_type = {ref.datasetType.name: ref for ref in initInputRefs}
|
|
81
|
+
task_class = task_node.task_class
|
|
82
|
+
if init_input_refs_by_dataset_type:
|
|
83
|
+
for read_edge in task_node.init.inputs.values():
|
|
84
|
+
init_inputs[read_edge.connection_name] = butler.get(
|
|
85
|
+
init_input_refs_by_dataset_type[read_edge.dataset_type_name]
|
|
86
|
+
)
|
|
87
|
+
task = task_class(config=config, initInputs=init_inputs, name=task_node.label)
|
|
88
|
+
return task
|