lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. lsst/pipe/base/_datasetQueryConstraints.py +1 -1
  2. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +6 -4
  3. lsst/pipe/base/connectionTypes.py +19 -19
  4. lsst/pipe/base/connections.py +2 -2
  5. lsst/pipe/base/exec_fixup_data_id.py +131 -0
  6. lsst/pipe/base/execution_graph_fixup.py +69 -0
  7. lsst/pipe/base/log_capture.py +227 -0
  8. lsst/pipe/base/mp_graph_executor.py +774 -0
  9. lsst/pipe/base/quantum_graph_builder.py +43 -42
  10. lsst/pipe/base/quantum_graph_executor.py +125 -0
  11. lsst/pipe/base/quantum_reports.py +334 -0
  12. lsst/pipe/base/script/transfer_from_graph.py +4 -1
  13. lsst/pipe/base/separable_pipeline_executor.py +296 -0
  14. lsst/pipe/base/simple_pipeline_executor.py +674 -0
  15. lsst/pipe/base/single_quantum_executor.py +636 -0
  16. lsst/pipe/base/taskFactory.py +18 -12
  17. lsst/pipe/base/version.py +1 -1
  18. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/METADATA +1 -1
  19. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/RECORD +27 -18
  20. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/WHEEL +0 -0
  21. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/entry_points.txt +0 -0
  22. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/COPYRIGHT +0 -0
  23. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/LICENSE +0 -0
  24. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/bsd_license.txt +0 -0
  25. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  26. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/top_level.txt +0 -0
  27. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/zip-safe +0 -0
@@ -0,0 +1,636 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ __all__ = ["SingleQuantumExecutor"]
29
+
30
+ import logging
31
+ import time
32
+ import uuid
33
+ from collections import defaultdict
34
+ from collections.abc import Callable, Mapping
35
+ from itertools import chain
36
+ from typing import Any, cast
37
+
38
+ from lsst.daf.butler import (
39
+ Butler,
40
+ ButlerMetrics,
41
+ DatasetRef,
42
+ DatasetType,
43
+ LimitedButler,
44
+ NamedKeyDict,
45
+ Quantum,
46
+ )
47
+ from lsst.utils.timer import logInfo
48
+
49
+ from ._instrument import Instrument
50
+ from ._quantumContext import ExecutionResources, QuantumContext
51
+ from ._status import AnnotatedPartialOutputsError, InvalidQuantumError, NoWorkFound, QuantumSuccessCaveats
52
+ from .connections import AdjustQuantumHelper
53
+ from .log_capture import LogCapture
54
+ from .pipeline_graph import TaskNode
55
+ from .pipelineTask import PipelineTask
56
+ from .quantum_graph_executor import QuantumExecutor
57
+ from .quantum_reports import QuantumReport
58
+ from .task import _TASK_FULL_METADATA_TYPE, _TASK_METADATA_TYPE
59
+ from .taskFactory import TaskFactory
60
+
61
+ _LOG = logging.getLogger(__name__)
62
+
63
+
64
+ class SingleQuantumExecutor(QuantumExecutor):
65
+ """Executor class which runs one Quantum at a time.
66
+
67
+ Parameters
68
+ ----------
69
+ butler : `~lsst.daf.butler.Butler` or `None`
70
+ Data butler, `None` means that Quantum-backed butler should be used
71
+ instead.
72
+ task_factory : `.TaskFactory`
73
+ Instance of a task factory.
74
+ skip_existing_in : `~typing.Any`
75
+ Expressions representing the collections to search for existing output
76
+ datasets. See :ref:`daf_butler_ordered_collection_searches` for allowed
77
+ types. This class only checks for the presence of butler output run in
78
+ the list of collections. If the output run is present in the list then
79
+ the quanta whose complete outputs exist in the output run will be
80
+ skipped. `None` or empty string/sequence disables skipping.
81
+ clobber_outputs : `bool`, optional
82
+ If `True`, then outputs from a quantum that exist in output run
83
+ collection will be removed prior to executing a quantum. If
84
+ ``skip_existing_in`` contains output run, then only partial outputs
85
+ from a quantum will be removed. Only used when ``butler`` is not
86
+ `None`.
87
+ enable_lsst_debug : `bool`, optional
88
+ Enable debugging with ``lsstDebug`` facility for a task.
89
+ limited_butler_factory : `Callable`, optional
90
+ A method that creates a `~lsst.daf.butler.LimitedButler` instance for a
91
+ given Quantum. This parameter must be defined if ``butler`` is `None`.
92
+ If ``butler`` is not `None` then this parameter is ignored.
93
+ resources : `.ExecutionResources`, optional
94
+ The resources available to this quantum when executing.
95
+ skip_existing : `bool`, optional
96
+ If `True`, skip quanta whose metadata datasets are already stored.
97
+ Unlike ``skip_existing_in``, this works with limited butlers as well as
98
+ full butlers. Always set to `True` if ``skip_existing_in`` matches
99
+ ``butler.run``.
100
+ assume_no_existing_outputs : `bool`, optional
101
+ If `True`, assume preexisting outputs are impossible (e.g. because this
102
+ is known by higher-level code to be a new ``RUN`` collection), and do
103
+ not look for them. This causes the ``skip_existing`` and
104
+ ``clobber_outputs`` options to be ignored, but unlike just setting both
105
+ of those to `False`, it also avoids all dataset existence checks.
106
+ raise_on_partial_outputs : `bool`, optional
107
+ If `True` raise exceptions chained by `.AnnotatedPartialOutputsError`
108
+ immediately, instead of considering the partial result a success and
109
+ continuing to run downstream tasks.
110
+ job_metadata : `~collections.abc.Mapping`
111
+ Mapping with extra metadata to embed within the quantum metadata under
112
+ the "job" key. This is intended to correspond to information common
113
+ to all quanta being executed in a single process, such as the time
114
+ taken to load the quantum graph in a BPS job.
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ *,
120
+ butler: Butler | None,
121
+ task_factory: TaskFactory,
122
+ skip_existing_in: Any = None,
123
+ clobber_outputs: bool = False,
124
+ enable_lsst_debug: bool = False,
125
+ limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
126
+ resources: ExecutionResources | None = None,
127
+ skip_existing: bool = False,
128
+ assume_no_existing_outputs: bool = False,
129
+ raise_on_partial_outputs: bool = True,
130
+ job_metadata: Mapping[str, int | str | float] | None = None,
131
+ ):
132
+ self._butler = butler
133
+ self._task_factory = task_factory
134
+ self._clobber_outputs = clobber_outputs
135
+ self._enable_lsst_debug = enable_lsst_debug
136
+ self._limited_butler_factory = limited_butler_factory
137
+ self._resources = resources
138
+ self._assume_no_existing_outputs = assume_no_existing_outputs
139
+ self._raise_on_partial_outputs = raise_on_partial_outputs
140
+ self._job_metadata = job_metadata
141
+
142
+ if self._butler is None:
143
+ assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
144
+
145
+ # Find whether output run is in skip_existing_in.
146
+ self._skip_existing = skip_existing
147
+ if self._butler is not None and skip_existing_in and not self._skip_existing:
148
+ self._skip_existing = self._butler.run in self._butler.collections.query(
149
+ skip_existing_in, flatten_chains=True
150
+ )
151
+
152
+ def execute(
153
+ self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
154
+ ) -> tuple[Quantum, QuantumReport | None]:
155
+ # Docstring inherited from QuantumExecutor.execute
156
+ assert quantum.dataId is not None, "Quantum DataId cannot be None"
157
+
158
+ if self._butler is not None:
159
+ self._butler.registry.refresh()
160
+
161
+ result = self._execute(task_node, quantum, quantum_id=quantum_id)
162
+ report = QuantumReport(dataId=quantum.dataId, taskLabel=task_node.label)
163
+ return result, report
164
+
165
+ def _execute(
166
+ self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
167
+ ) -> Quantum:
168
+ """Execute the quantum.
169
+
170
+ Internal implementation of `execute()`.
171
+ """
172
+ startTime = time.time()
173
+
174
+ # Make a limited butler instance if needed (which should be QBB if full
175
+ # butler is not defined).
176
+ limited_butler: LimitedButler
177
+ if self._butler is not None:
178
+ limited_butler = self._butler
179
+ else:
180
+ # We check this in constructor, but mypy needs this check here.
181
+ assert self._limited_butler_factory is not None
182
+ limited_butler = self._limited_butler_factory(quantum)
183
+
184
+ if self._butler is not None:
185
+ log_capture = LogCapture.from_full(self._butler)
186
+ else:
187
+ log_capture = LogCapture.from_limited(limited_butler)
188
+ with log_capture.capture_logging(task_node, quantum) as captureLog:
189
+ # Save detailed resource usage before task start to metadata.
190
+ quantumMetadata = _TASK_METADATA_TYPE()
191
+ logInfo(None, "prep", metadata=quantumMetadata) # type: ignore[arg-type]
192
+
193
+ _LOG.info(
194
+ "Preparing execution of quantum for label=%s dataId=%s.", task_node.label, quantum.dataId
195
+ )
196
+
197
+ # check whether to skip or delete old outputs, if it returns True
198
+ # or raises an exception do not try to store logs, as they may be
199
+ # already in butler.
200
+ captureLog.store = False
201
+ if self._check_existing_outputs(quantum, task_node, limited_butler):
202
+ _LOG.info(
203
+ "Skipping already-successful quantum for label=%s dataId=%s.",
204
+ task_node.label,
205
+ quantum.dataId,
206
+ )
207
+ return quantum
208
+ captureLog.store = True
209
+
210
+ try:
211
+ quantum = self._updated_quantum_inputs(quantum, task_node, limited_butler)
212
+ except NoWorkFound as exc:
213
+ _LOG.info(
214
+ "Nothing to do for task '%s' on quantum %s; saving metadata and skipping: %s",
215
+ task_node.label,
216
+ quantum.dataId,
217
+ str(exc),
218
+ )
219
+ quantumMetadata["caveats"] = QuantumSuccessCaveats.from_adjust_quantum_no_work().value
220
+ # Make empty metadata that looks something like what a
221
+ # do-nothing task would write (but we don't bother with empty
222
+ # nested PropertySets for subtasks). This is slightly
223
+ # duplicative with logic in pipe_base that we can't easily call
224
+ # from here; we'll fix this on DM-29761.
225
+ logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
226
+ fullMetadata = _TASK_FULL_METADATA_TYPE()
227
+ fullMetadata[task_node.label] = _TASK_METADATA_TYPE()
228
+ fullMetadata["quantum"] = quantumMetadata
229
+ if self._job_metadata is not None:
230
+ fullMetadata["job"] = self._job_metadata
231
+ self._write_metadata(quantum, fullMetadata, task_node, limited_butler)
232
+ return quantum
233
+
234
+ # enable lsstDebug debugging
235
+ if self._enable_lsst_debug:
236
+ try:
237
+ _LOG.debug("Will try to import debug.py")
238
+ import debug # type: ignore # noqa:F401
239
+ except ImportError:
240
+ _LOG.warning("No 'debug' module found.")
241
+
242
+ # initialize global state
243
+ self._init_globals(quantum)
244
+
245
+ # Ensure that we are executing a frozen config
246
+ task_node.config.freeze()
247
+ logInfo(None, "init", metadata=quantumMetadata) # type: ignore[arg-type]
248
+ init_input_refs = list(quantum.initInputs.values())
249
+
250
+ _LOG.info(
251
+ "Constructing task and executing quantum for label=%s dataId=%s.",
252
+ task_node.label,
253
+ quantum.dataId,
254
+ )
255
+ task = self._task_factory.makeTask(task_node, limited_butler, init_input_refs)
256
+ logInfo(None, "start", metadata=quantumMetadata) # type: ignore[arg-type]
257
+ try:
258
+ caveats, outputsPut, butler_metrics = self._run_quantum(
259
+ task, quantum, task_node, limited_butler, quantum_id=quantum_id
260
+ )
261
+ except Exception as e:
262
+ _LOG.error(
263
+ "Execution of task '%s' on quantum %s failed. Exception %s: %s",
264
+ task_node.label,
265
+ quantum.dataId,
266
+ e.__class__.__name__,
267
+ str(e),
268
+ )
269
+ raise
270
+ else:
271
+ quantumMetadata["butler_metrics"] = butler_metrics.model_dump()
272
+ quantumMetadata["caveats"] = caveats.value
273
+ # Stringify the UUID for easier compatibility with
274
+ # PropertyList.
275
+ quantumMetadata["outputs"] = [str(output) for output in outputsPut]
276
+ logInfo(None, "end", metadata=quantumMetadata) # type: ignore[arg-type]
277
+ fullMetadata = task.getFullMetadata()
278
+ fullMetadata["quantum"] = quantumMetadata
279
+ if self._job_metadata is not None:
280
+ fullMetadata["job"] = self._job_metadata
281
+ self._write_metadata(quantum, fullMetadata, task_node, limited_butler)
282
+ stopTime = time.time()
283
+ _LOG.info(
284
+ "Execution of task '%s' on quantum %s took %.3f seconds",
285
+ task_node.label,
286
+ quantum.dataId,
287
+ stopTime - startTime,
288
+ )
289
+ return quantum
290
+
291
+ def _check_existing_outputs(
292
+ self, quantum: Quantum, task_node: TaskNode, /, limited_butler: LimitedButler
293
+ ) -> bool:
294
+ """Decide whether this quantum needs to be executed.
295
+
296
+ If only partial outputs exist then they are removed if
297
+ ``clobberOutputs`` is True, otherwise an exception is raised.
298
+
299
+ The ``LimitedButler`` is used for everything, and should be set to
300
+ ``self.butler`` if no separate ``LimitedButler`` is available.
301
+
302
+ Parameters
303
+ ----------
304
+ quantum : `~lsst.daf.butler.Quantum`
305
+ Quantum to check for existing outputs.
306
+ task_node : `~.pipeline_graph.TaskNode`
307
+ Task definition structure.
308
+ limited_butler : `~lsst.daf.butler.LimitedButler`
309
+ Butler to use for querying and clobbering.
310
+
311
+ Returns
312
+ -------
313
+ exist : `bool`
314
+ `True` if ``self.skipExisting`` is defined, and a previous
315
+ execution of this quanta appears to have completed successfully
316
+ (either because metadata was written or all datasets were written).
317
+ `False` otherwise.
318
+
319
+ Raises
320
+ ------
321
+ RuntimeError
322
+ Raised if some outputs exist and some not.
323
+ """
324
+ if self._assume_no_existing_outputs:
325
+ return False
326
+
327
+ if self._skip_existing:
328
+ _LOG.debug(
329
+ "Checking existence of metadata from previous execution of label=%s dataId=%s.",
330
+ task_node.label,
331
+ quantum.dataId,
332
+ )
333
+ # Metadata output exists; this is sufficient to assume the previous
334
+ # run was successful and should be skipped.
335
+ [metadata_ref] = quantum.outputs[task_node.metadata_output.dataset_type_name]
336
+ if metadata_ref is not None:
337
+ if limited_butler.stored(metadata_ref):
338
+ return True
339
+
340
+ # Find and prune (partial) outputs if `self.clobberOutputs` is set.
341
+ _LOG.debug(
342
+ "Looking for existing outputs in the way for label=%s dataId=%s.", task_node.label, quantum.dataId
343
+ )
344
+ ref_dict = limited_butler.stored_many(chain.from_iterable(quantum.outputs.values()))
345
+ existingRefs = [ref for ref, exists in ref_dict.items() if exists]
346
+ missingRefs = [ref for ref, exists in ref_dict.items() if not exists]
347
+ if existingRefs:
348
+ if not missingRefs:
349
+ # Full outputs exist.
350
+ if self._skip_existing:
351
+ return True
352
+ elif self._clobber_outputs:
353
+ _LOG.info("Removing complete outputs for quantum %s: %s", quantum, existingRefs)
354
+ limited_butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
355
+ else:
356
+ raise RuntimeError(
357
+ f"Complete outputs exists for a quantum {quantum} "
358
+ "and neither clobberOutputs nor skipExisting is set: "
359
+ f"existingRefs={existingRefs}"
360
+ )
361
+ else:
362
+ # Partial outputs from a failed quantum.
363
+ _LOG.debug(
364
+ "Partial outputs exist for quantum %s existingRefs=%s missingRefs=%s",
365
+ quantum,
366
+ existingRefs,
367
+ missingRefs,
368
+ )
369
+ if self._clobber_outputs:
370
+ # only prune
371
+ _LOG.info("Removing partial outputs for task %s: %s", task_node.label, existingRefs)
372
+ limited_butler.pruneDatasets(existingRefs, disassociate=True, unstore=True, purge=True)
373
+ return False
374
+ else:
375
+ raise RuntimeError(
376
+ "Registry inconsistency while checking for existing quantum outputs:"
377
+ f" quantum={quantum} existingRefs={existingRefs}"
378
+ f" missingRefs={missingRefs}"
379
+ )
380
+
381
+ # By default always execute.
382
+ return False
383
+
384
+ def _updated_quantum_inputs(
385
+ self, quantum: Quantum, task_node: TaskNode, /, limited_butler: LimitedButler
386
+ ) -> Quantum:
387
+ """Update quantum with extra information, returns a new updated
388
+ Quantum.
389
+
390
+ Some methods may require input DatasetRefs to have non-None
391
+ ``dataset_id``, but in case of intermediate dataset it may not be
392
+ filled during QuantumGraph construction. This method will retrieve
393
+ missing info from registry.
394
+
395
+ Parameters
396
+ ----------
397
+ quantum : `~lsst.daf.butler.Quantum`
398
+ Single Quantum instance.
399
+ task_node : `~.pipeline_graph.TaskNode`
400
+ Task definition structure.
401
+ limited_butler : `~lsst.daf.butler.LimitedButler`
402
+ Butler to use for querying.
403
+
404
+ Returns
405
+ -------
406
+ update : `~lsst.daf.butler.Quantum`
407
+ Updated Quantum instance.
408
+ """
409
+ anyChanges = False
410
+ updatedInputs: defaultdict[DatasetType, list] = defaultdict(list)
411
+ for key, refsForDatasetType in quantum.inputs.items():
412
+ _LOG.debug(
413
+ "Checking existence of input '%s' for label=%s dataId=%s.",
414
+ key.name,
415
+ task_node.label,
416
+ quantum.dataId,
417
+ )
418
+ toCheck = []
419
+ newRefsForDatasetType = updatedInputs[key]
420
+ for ref in refsForDatasetType:
421
+ if self._should_assume_exists(quantum, ref):
422
+ newRefsForDatasetType.append(ref)
423
+ else:
424
+ toCheck.append(ref)
425
+ if not toCheck:
426
+ _LOG.debug(
427
+ "Assuming overall input '%s' is present without checks for label=%s dataId=%s.",
428
+ key.name,
429
+ task_node.label,
430
+ quantum.dataId,
431
+ )
432
+ continue
433
+ stored = limited_butler.stored_many(toCheck)
434
+ for ref in toCheck:
435
+ if stored[ref]:
436
+ newRefsForDatasetType.append(ref)
437
+ else:
438
+ # This should only happen if a predicted intermediate was
439
+ # not actually produced upstream, but
440
+ # datastore misconfigurations can unfortunately also land
441
+ # us here.
442
+ _LOG.info("No dataset artifact found for %s", ref)
443
+ continue
444
+ if len(newRefsForDatasetType) != len(refsForDatasetType):
445
+ anyChanges = True
446
+ # If we removed any input datasets, let the task check if it has enough
447
+ # to proceed and/or prune related datasets that it also doesn't
448
+ # need/produce anymore. It will raise NoWorkFound if it can't run,
449
+ # which we'll let propagate up. This is exactly what we run during QG
450
+ # generation, because a task shouldn't care whether an input is missing
451
+ # because some previous task didn't produce it, or because it just
452
+ # wasn't there during QG generation.
453
+ namedUpdatedInputs = NamedKeyDict[DatasetType, list[DatasetRef]](updatedInputs.items())
454
+ helper = AdjustQuantumHelper(namedUpdatedInputs, quantum.outputs)
455
+ if anyChanges:
456
+ _LOG.debug("Running adjustQuantum for label=%s dataId=%s.", task_node.label, quantum.dataId)
457
+ assert quantum.dataId is not None, "Quantum DataId cannot be None"
458
+ helper.adjust_in_place(task_node.get_connections(), label=task_node.label, data_id=quantum.dataId)
459
+ return Quantum(
460
+ taskName=quantum.taskName,
461
+ taskClass=quantum.taskClass,
462
+ dataId=quantum.dataId,
463
+ initInputs=quantum.initInputs,
464
+ inputs=helper.inputs,
465
+ outputs=helper.outputs,
466
+ )
467
+
468
+ def _run_quantum(
469
+ self,
470
+ task: PipelineTask,
471
+ quantum: Quantum,
472
+ task_node: TaskNode,
473
+ /,
474
+ limited_butler: LimitedButler,
475
+ quantum_id: uuid.UUID | None = None,
476
+ ) -> tuple[QuantumSuccessCaveats, list[uuid.UUID], ButlerMetrics]:
477
+ """Execute task on a single quantum.
478
+
479
+ Parameters
480
+ ----------
481
+ task : `PipelineTask`
482
+ Task object.
483
+ quantum : `~lsst.daf.butler.Quantum`
484
+ Single Quantum instance.
485
+ task_node : `~.pipeline_graph.TaskNode`
486
+ Task definition structure.
487
+ limited_butler : `~lsst.daf.butler.LimitedButler`
488
+ Butler to use for dataset I/O.
489
+ quantum_id : `uuid.UUID` or `None`, optional
490
+ ID of the quantum being executed.
491
+
492
+ Returns
493
+ -------
494
+ flags : `QuantumSuccessCaveats`
495
+ Flags that describe qualified successes.
496
+ ids_put : list[ `uuid.UUID` ]
497
+ Record of all the dataset IDs that were written by this quantum
498
+ being executed.
499
+ metrics : `lsst.daf.butler.ButlerMetrics`
500
+ Butler metrics recorded for this quantum.
501
+ """
502
+ flags = QuantumSuccessCaveats.NO_CAVEATS
503
+
504
+ # Create a butler that operates in the context of a quantum
505
+ butlerQC = QuantumContext(limited_butler, quantum, resources=self._resources, quantum_id=quantum_id)
506
+
507
+ # Get the input and output references for the task
508
+ inputRefs, outputRefs = task_node.get_connections().buildDatasetRefs(quantum)
509
+
510
+ # Call task runQuantum() method.
511
+ try:
512
+ with limited_butler.record_metrics() as butler_metrics:
513
+ task.runQuantum(butlerQC, inputRefs, outputRefs)
514
+ except NoWorkFound as err:
515
+ # Not an error, just an early exit.
516
+ _LOG.info(
517
+ "Task '%s' on quantum %s exited early with no work found: %s.",
518
+ task_node.label,
519
+ quantum.dataId,
520
+ str(err),
521
+ )
522
+ flags |= err.FLAGS
523
+ except AnnotatedPartialOutputsError as caught:
524
+ error: BaseException
525
+ if caught.__cause__ is None:
526
+ _LOG.error(
527
+ "Incorrect use of AnnotatedPartialOutputsError: no chained exception found.",
528
+ task_node.label,
529
+ quantum.dataId,
530
+ )
531
+ error = caught
532
+ else:
533
+ error = caught.__cause__
534
+ if self._raise_on_partial_outputs:
535
+ # Note: this is a real edge case that required some
536
+ # experimentation: without 'from None' below, this raise would
537
+ # produce a "while one exception was being handled, another was
538
+ # raised" traceback involving AnnotatedPartialOutputsError.
539
+ # With the 'from None', we get just the error chained to it, as
540
+ # desired.
541
+ raise error from None
542
+ else:
543
+ _LOG.error(
544
+ "Task '%s' on quantum %s exited with partial outputs; "
545
+ "considering this a qualified success and proceeding.",
546
+ task_node.label,
547
+ quantum.dataId,
548
+ )
549
+ _LOG.error(error, exc_info=error)
550
+ flags |= caught.FLAGS
551
+ if not butlerQC.outputsPut:
552
+ flags |= QuantumSuccessCaveats.ALL_OUTPUTS_MISSING
553
+ if not butlerQC.outputsPut == butlerQC.allOutputs:
554
+ flags |= QuantumSuccessCaveats.ANY_OUTPUTS_MISSING
555
+ ids_put = [output[2] for output in butlerQC.outputsPut]
556
+ return flags, ids_put, butler_metrics
557
+
558
+ def _write_metadata(
559
+ self, quantum: Quantum, metadata: Any, task_node: TaskNode, /, limited_butler: LimitedButler
560
+ ) -> None:
561
+ # DatasetRef has to be in the Quantum outputs, can lookup by name
562
+ try:
563
+ [ref] = quantum.outputs[task_node.metadata_output.dataset_type_name]
564
+ except LookupError as exc:
565
+ raise InvalidQuantumError(
566
+ "Quantum outputs is missing metadata dataset type "
567
+ f"{task_node.metadata_output.dataset_type_name};"
568
+ " this could happen due to inconsistent options between QuantumGraph generation"
569
+ " and execution"
570
+ ) from exc
571
+ limited_butler.put(metadata, ref)
572
+
573
+ def _init_globals(self, quantum: Quantum) -> None:
574
+ """Initialize global state needed for task execution.
575
+
576
+ Parameters
577
+ ----------
578
+ quantum : `~lsst.daf.butler.Quantum`
579
+ Single Quantum instance.
580
+
581
+ Notes
582
+ -----
583
+ There is an issue with initializing filters singleton which is done
584
+ by instrument, to avoid requiring tasks to do it in runQuantum()
585
+ we do it here when any dataId has an instrument dimension. Also for
586
+ now we only allow single instrument, verify that all instrument
587
+ names in all dataIds are identical.
588
+
589
+ This will need revision when filter singleton disappears.
590
+ """
591
+ # can only work for full butler
592
+ if self._butler is None:
593
+ return
594
+ oneInstrument = None
595
+ for datasetRefs in chain(quantum.inputs.values(), quantum.outputs.values()):
596
+ for datasetRef in datasetRefs:
597
+ dataId = datasetRef.dataId
598
+ instrument = cast(str, dataId.get("instrument"))
599
+ if instrument is not None:
600
+ if oneInstrument is not None:
601
+ assert ( # type: ignore
602
+ instrument == oneInstrument
603
+ ), "Currently require that only one instrument is used per graph"
604
+ else:
605
+ oneInstrument = instrument
606
+ Instrument.fromName(instrument, self._butler.registry)
607
+
608
+ def _should_assume_exists(self, quantum: Quantum, ref: DatasetRef) -> bool | None:
609
+ """Report whether the given dataset can be assumed to exist because
610
+ some previous check reported that it did.
611
+
612
+ If this is `True` for a dataset does not in fact exist anymore, that's
613
+ an unexpected problem that we want to raise as an exception, and
614
+ definitely not a case where some predicted output just wasn't produced.
615
+ We can't always tell the difference, but in this case we can.
616
+
617
+ Parameters
618
+ ----------
619
+ quantum : `Quantum`
620
+ Quantum being processed.
621
+ ref : `lsst.daf.butler.DatasetRef`
622
+ Reference to the input dataset.
623
+
624
+ Returns
625
+ -------
626
+ exists : `bool` or `None`
627
+ `True` if this dataset is definitely an overall input, `False` if
628
+ some other quantum in the graph is expected to produce it, and
629
+ `None` if the answer could not be determined.
630
+ """
631
+ if quantum.datastore_records:
632
+ for datastore_record_data in quantum.datastore_records.values():
633
+ if ref.id in datastore_record_data.records:
634
+ return True
635
+ return False
636
+ return None
@@ -25,15 +25,13 @@
25
25
  # You should have received a copy of the GNU General Public License
26
26
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
27
 
28
- """Module defining TaskFactory interface."""
29
-
30
28
  from __future__ import annotations
31
29
 
32
30
  __all__ = ["TaskFactory"]
33
31
 
34
- from abc import ABCMeta, abstractmethod
32
+ from abc import ABCMeta
35
33
  from collections.abc import Iterable
36
- from typing import TYPE_CHECKING
34
+ from typing import TYPE_CHECKING, Any
37
35
 
38
36
  if TYPE_CHECKING:
39
37
  from lsst.daf.butler import DatasetRef, LimitedButler
@@ -43,13 +41,8 @@ if TYPE_CHECKING:
43
41
 
44
42
 
45
43
  class TaskFactory(metaclass=ABCMeta):
46
- """Abstract base class for task factory.
47
-
48
- Task factory is responsible for creating instances of PipelineTask
49
- subclasses.
50
- """
44
+ """A helper class for creating instances of PipelineTask subclasses."""
51
45
 
52
- @abstractmethod
53
46
  def makeTask(
54
47
  self,
55
48
  task_node: TaskNode,
@@ -57,7 +50,8 @@ class TaskFactory(metaclass=ABCMeta):
57
50
  butler: LimitedButler,
58
51
  initInputRefs: Iterable[DatasetRef] | None,
59
52
  ) -> PipelineTask:
60
- """Create new PipelineTask instance from its `~lsst.pipe.base.TaskDef`.
53
+ """Create new PipelineTask instance from its
54
+ `~.pipeline_graph.TaskNode`.
61
55
 
62
56
  Parameters
63
57
  ----------
@@ -79,4 +73,16 @@ class TaskFactory(metaclass=ABCMeta):
79
73
  Any exceptions that are raised by PipelineTask constructor or its
80
74
  configuration class are propagated back to caller.
81
75
  """
82
- raise NotImplementedError()
76
+ config = task_node.config
77
+ init_inputs: dict[str, Any] = {}
78
+ init_input_refs_by_dataset_type = {}
79
+ if initInputRefs is not None:
80
+ init_input_refs_by_dataset_type = {ref.datasetType.name: ref for ref in initInputRefs}
81
+ task_class = task_node.task_class
82
+ if init_input_refs_by_dataset_type:
83
+ for read_edge in task_node.init.inputs.values():
84
+ init_inputs[read_edge.connection_name] = butler.get(
85
+ init_input_refs_by_dataset_type[read_edge.dataset_type_name]
86
+ )
87
+ task = task_class(config=config, initInputs=init_inputs, name=task_node.label)
88
+ return task