lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. lsst/pipe/base/_datasetQueryConstraints.py +1 -1
  2. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +6 -4
  3. lsst/pipe/base/connectionTypes.py +19 -19
  4. lsst/pipe/base/connections.py +2 -2
  5. lsst/pipe/base/exec_fixup_data_id.py +131 -0
  6. lsst/pipe/base/execution_graph_fixup.py +69 -0
  7. lsst/pipe/base/log_capture.py +227 -0
  8. lsst/pipe/base/mp_graph_executor.py +774 -0
  9. lsst/pipe/base/quantum_graph_builder.py +43 -42
  10. lsst/pipe/base/quantum_graph_executor.py +125 -0
  11. lsst/pipe/base/quantum_reports.py +334 -0
  12. lsst/pipe/base/script/transfer_from_graph.py +4 -1
  13. lsst/pipe/base/separable_pipeline_executor.py +296 -0
  14. lsst/pipe/base/simple_pipeline_executor.py +674 -0
  15. lsst/pipe/base/single_quantum_executor.py +636 -0
  16. lsst/pipe/base/taskFactory.py +18 -12
  17. lsst/pipe/base/version.py +1 -1
  18. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/METADATA +1 -1
  19. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/RECORD +27 -18
  20. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/WHEEL +0 -0
  21. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/entry_points.txt +0 -0
  22. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/COPYRIGHT +0 -0
  23. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/LICENSE +0 -0
  24. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/bsd_license.txt +0 -0
  25. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/licenses/gpl-v3.0.txt +0 -0
  26. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/top_level.txt +0 -0
  27. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3100.dist-info}/zip-safe +0 -0
@@ -0,0 +1,674 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("SimplePipelineExecutor",)
31
+
32
+ import datetime
33
+ import getpass
34
+ import itertools
35
+ import os
36
+ from collections.abc import Iterable, Iterator, Mapping
37
+ from typing import Any, cast
38
+
39
+ from lsst.daf.butler import (
40
+ Butler,
41
+ CollectionType,
42
+ DataCoordinate,
43
+ DatasetRef,
44
+ DimensionDataExtractor,
45
+ DimensionGroup,
46
+ Quantum,
47
+ )
48
+ from lsst.pex.config import Config
49
+
50
+ from ._instrument import Instrument
51
+ from ._quantumContext import ExecutionResources
52
+ from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
53
+ from .graph import QuantumGraph
54
+ from .pipeline import Pipeline
55
+ from .pipeline_graph import PipelineGraph
56
+ from .pipelineTask import PipelineTask
57
+ from .single_quantum_executor import SingleQuantumExecutor
58
+ from .taskFactory import TaskFactory
59
+
60
+
61
+ class SimplePipelineExecutor:
62
+ """A simple, high-level executor for pipelines.
63
+
64
+ Parameters
65
+ ----------
66
+ quantum_graph : `.QuantumGraph`
67
+ Graph to be executed.
68
+ butler : `~lsst.daf.butler.Butler`
69
+ Object that manages all I/O. Must be initialized with `collections`
70
+ and `run` properties that correspond to the input and output
71
+ collections, which must be consistent with those used to create
72
+ ``quantum_graph``.
73
+ resources : `.ExecutionResources`
74
+ The resources available to each quantum being executed.
75
+ raise_on_partial_outputs : `bool`, optional
76
+ If `True` raise exceptions chained by `.AnnotatedPartialOutputsError`
77
+ immediately, instead of considering the partial result a success and
78
+ continuing to run downstream tasks.
79
+
80
+ Notes
81
+ -----
82
+ Most callers should use one of the `classmethod` factory functions
83
+ (`from_pipeline_filename`, `from_task_class`, `from_pipeline`) instead of
84
+ invoking the constructor directly; these guarantee that the
85
+ `~lsst.daf.butler.Butler` and `.QuantumGraph` are created consistently.
86
+
87
+ This class is intended primarily to support unit testing and small-scale
88
+ integration testing of `.PipelineTask` classes. It deliberately lacks many
89
+ features present in the command-line-only ``pipetask`` tool in order to
90
+ keep the implementation simple. Python callers that need more
91
+ sophistication should call lower-level tools like
92
+ `~.quantum_graph_builder.QuantumGraphBuilder` and
93
+ `.single_quantum_executor.SingleQuantumExecutor` directly.
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ quantum_graph: QuantumGraph,
99
+ butler: Butler,
100
+ resources: ExecutionResources | None = None,
101
+ raise_on_partial_outputs: bool = True,
102
+ ):
103
+ self.quantum_graph = quantum_graph
104
+ self.butler = butler
105
+ self.resources = resources
106
+ self.raise_on_partial_outputs = raise_on_partial_outputs
107
+
108
+ @classmethod
109
+ def prep_butler(
110
+ cls,
111
+ root: str,
112
+ inputs: Iterable[str],
113
+ output: str,
114
+ output_run: str | None = None,
115
+ ) -> Butler:
116
+ """Return configured `~lsst.daf.butler.Butler`.
117
+
118
+ Helper method for creating `~lsst.daf.butler.Butler` instances with
119
+ collections appropriate for processing.
120
+
121
+ Parameters
122
+ ----------
123
+ root : `str`
124
+ Root of the butler data repository; must already exist, with all
125
+ necessary input data.
126
+ inputs : `~collections.abc.Iterable` [ `str` ]
127
+ Collections to search for all input datasets, in search order.
128
+ output : `str`
129
+ Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
130
+ collection to create that will combine both inputs and outputs.
131
+ output_run : `str`, optional
132
+ Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
133
+ directly hold all output datasets. If not provided, a name will be
134
+ created from ``output`` and a timestamp.
135
+
136
+ Returns
137
+ -------
138
+ butler : `~lsst.daf.butler.Butler`
139
+ Butler client instance compatible with all `classmethod` factories.
140
+ Always writeable.
141
+ """
142
+ if output_run is None:
143
+ output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
144
+ # Make initial butler with no collections, since we haven't created
145
+ # them yet.
146
+ butler = Butler.from_config(root, writeable=True)
147
+ butler.registry.registerCollection(output_run, CollectionType.RUN)
148
+ butler.registry.registerCollection(output, CollectionType.CHAINED)
149
+ collections = [output_run]
150
+ collections.extend(inputs)
151
+ butler.registry.setCollectionChain(output, collections)
152
+ # Remake butler to let it infer default data IDs from collections, now
153
+ # that those collections exist.
154
+ return Butler.from_config(butler=butler, collections=[output], run=output_run)
155
+
156
+ @classmethod
157
+ def from_pipeline_filename(
158
+ cls,
159
+ pipeline_filename: str,
160
+ *,
161
+ where: str = "",
162
+ bind: Mapping[str, Any] | None = None,
163
+ butler: Butler,
164
+ resources: ExecutionResources | None = None,
165
+ raise_on_partial_outputs: bool = True,
166
+ attach_datastore_records: bool = False,
167
+ output: str | None = None,
168
+ output_run: str | None = None,
169
+ ) -> SimplePipelineExecutor:
170
+ """Create an executor by building a QuantumGraph from an on-disk
171
+ pipeline YAML file.
172
+
173
+ Parameters
174
+ ----------
175
+ pipeline_filename : `str`
176
+ Name of the YAML file to load the pipeline definition from.
177
+ where : `str`, optional
178
+ Data ID query expression that constraints the quanta generated.
179
+ bind : `~collections.abc.Mapping`, optional
180
+ Mapping containing literal values that should be injected into the
181
+ ``where`` expression, keyed by the identifiers they replace.
182
+ butler : `~lsst.daf.butler.Butler`
183
+ Butler that manages all I/O. `prep_butler` can be used to create
184
+ one.
185
+ resources : `.ExecutionResources`
186
+ The resources available to each quantum being executed.
187
+ raise_on_partial_outputs : `bool`, optional
188
+ If `True` raise exceptions chained by
189
+ `.AnnotatedPartialOutputsError` immediately, instead of considering
190
+ the partial result a success and continuing to run downstream
191
+ tasks.
192
+ attach_datastore_records : `bool`, optional
193
+ Whether to attach datastore records to the quantum graph. This is
194
+ usually unnecessary, unless the executor is used to test behavior
195
+ that depends on datastore records.
196
+ output : `str`, optional
197
+ Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
198
+ collection to create that will combine both inputs and outputs.
199
+ output_run : `str`, optional
200
+ Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
201
+ directly hold all output datasets. If not provided, a name will be
202
+ created from ``output`` and a timestamp.
203
+
204
+ Returns
205
+ -------
206
+ executor : `SimplePipelineExecutor`
207
+ An executor instance containing the constructed `.QuantumGraph` and
208
+ `~lsst.daf.butler.Butler`, ready for `run` to be called.
209
+ """
210
+ pipeline = Pipeline.fromFile(pipeline_filename)
211
+ return cls.from_pipeline(
212
+ pipeline,
213
+ butler=butler,
214
+ where=where,
215
+ bind=bind,
216
+ resources=resources,
217
+ raise_on_partial_outputs=raise_on_partial_outputs,
218
+ attach_datastore_records=attach_datastore_records,
219
+ output=output,
220
+ output_run=output_run,
221
+ )
222
+
223
+ @classmethod
224
+ def from_task_class(
225
+ cls,
226
+ task_class: type[PipelineTask],
227
+ config: Config | None = None,
228
+ label: str | None = None,
229
+ *,
230
+ where: str = "",
231
+ bind: Mapping[str, Any] | None = None,
232
+ butler: Butler,
233
+ resources: ExecutionResources | None = None,
234
+ raise_on_partial_outputs: bool = True,
235
+ attach_datastore_records: bool = False,
236
+ output: str | None = None,
237
+ output_run: str | None = None,
238
+ ) -> SimplePipelineExecutor:
239
+ """Create an executor by building a QuantumGraph from a pipeline
240
+ containing a single task.
241
+
242
+ Parameters
243
+ ----------
244
+ task_class : `type`
245
+ A concrete `.PipelineTask` subclass.
246
+ config : `~lsst.pex.config.Config`, optional
247
+ Configuration for the task. If not provided, task-level defaults
248
+ will be used (no per-instrument overrides).
249
+ label : `str`, optional
250
+ Label for the task in its pipeline; defaults to
251
+ ``task_class._DefaultName``.
252
+ where : `str`, optional
253
+ Data ID query expression that constraints the quanta generated.
254
+ bind : `~collections.abc.Mapping`, optional
255
+ Mapping containing literal values that should be injected into the
256
+ ``where`` expression, keyed by the identifiers they replace.
257
+ butler : `~lsst.daf.butler.Butler`
258
+ Butler that manages all I/O. `prep_butler` can be used to create
259
+ one.
260
+ resources : `.ExecutionResources`
261
+ The resources available to each quantum being executed.
262
+ raise_on_partial_outputs : `bool`, optional
263
+ If `True` raise exceptions chained by
264
+ `.AnnotatedPartialOutputsError` immediately, instead of considering
265
+ the partial result a success and continuing to run downstream
266
+ tasks.
267
+ attach_datastore_records : `bool`, optional
268
+ Whether to attach datastore records to the quantum graph. This is
269
+ usually unnecessary, unless the executor is used to test behavior
270
+ that depends on datastore records.
271
+ output : `str`, optional
272
+ Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
273
+ collection to create that will combine both inputs and outputs.
274
+ output_run : `str`, optional
275
+ Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
276
+ directly hold all output datasets. If not provided, a name will be
277
+ created from ``output`` and a timestamp.
278
+
279
+ Returns
280
+ -------
281
+ executor : `SimplePipelineExecutor`
282
+ An executor instance containing the constructed `.QuantumGraph` and
283
+ `~lsst.daf.butler.Butler`, ready for `run` to be called.
284
+ """
285
+ if config is None:
286
+ config = task_class.ConfigClass()
287
+ if label is None:
288
+ label = task_class._DefaultName
289
+ if not isinstance(config, task_class.ConfigClass):
290
+ raise TypeError(
291
+ f"Invalid config class type: expected {task_class.ConfigClass.__name__}, "
292
+ f"got {type(config).__name__}."
293
+ )
294
+ pipeline_graph = PipelineGraph()
295
+ pipeline_graph.add_task(label=label, task_class=task_class, config=config)
296
+ return cls.from_pipeline_graph(
297
+ pipeline_graph,
298
+ butler=butler,
299
+ where=where,
300
+ bind=bind,
301
+ resources=resources,
302
+ raise_on_partial_outputs=raise_on_partial_outputs,
303
+ attach_datastore_records=attach_datastore_records,
304
+ output=output,
305
+ output_run=output_run,
306
+ )
307
+
308
+ @classmethod
309
+ def from_pipeline(
310
+ cls,
311
+ pipeline: Pipeline,
312
+ *,
313
+ where: str = "",
314
+ bind: Mapping[str, Any] | None = None,
315
+ butler: Butler,
316
+ resources: ExecutionResources | None = None,
317
+ raise_on_partial_outputs: bool = True,
318
+ attach_datastore_records: bool = False,
319
+ output: str | None = None,
320
+ output_run: str | None = None,
321
+ ) -> SimplePipelineExecutor:
322
+ """Create an executor by building a QuantumGraph from an in-memory
323
+ pipeline.
324
+
325
+ Parameters
326
+ ----------
327
+ pipeline : `.Pipeline` or `~collections.abc.Iterable` [ `.TaskDef` ]
328
+ A Python object describing the tasks to run, along with their
329
+ labels and configuration.
330
+ where : `str`, optional
331
+ Data ID query expression that constraints the quanta generated.
332
+ bind : `~collections.abc.Mapping`, optional
333
+ Mapping containing literal values that should be injected into the
334
+ ``where`` expression, keyed by the identifiers they replace.
335
+ butler : `~lsst.daf.butler.Butler`
336
+ Butler that manages all I/O. `prep_butler` can be used to create
337
+ one.
338
+ resources : `.ExecutionResources`
339
+ The resources available to each quantum being executed.
340
+ raise_on_partial_outputs : `bool`, optional
341
+ If `True` raise exceptions chained by
342
+ `.AnnotatedPartialOutputsError` immediately, instead of considering
343
+ the partial result a success and continuing to run downstream
344
+ tasks.
345
+ attach_datastore_records : `bool`, optional
346
+ Whether to attach datastore records to the quantum graph. This is
347
+ usually unnecessary, unless the executor is used to test behavior
348
+ that depends on datastore records.
349
+ output : `str`, optional
350
+ Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
351
+ collection to create that will combine both inputs and outputs.
352
+ output_run : `str`, optional
353
+ Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
354
+ directly hold all output datasets. If not provided, a name will
355
+ be created from ``output`` and a timestamp.
356
+
357
+ Returns
358
+ -------
359
+ executor : `SimplePipelineExecutor`
360
+ An executor instance containing the constructed `.QuantumGraph` and
361
+ `~lsst.daf.butler.Butler`, ready for `run` to be called.
362
+ """
363
+ pipeline_graph = pipeline.to_graph()
364
+ return cls.from_pipeline_graph(
365
+ pipeline_graph,
366
+ where=where,
367
+ bind=bind,
368
+ butler=butler,
369
+ resources=resources,
370
+ raise_on_partial_outputs=raise_on_partial_outputs,
371
+ attach_datastore_records=attach_datastore_records,
372
+ output=output,
373
+ output_run=output_run,
374
+ )
375
+
376
+ @classmethod
377
+ def from_pipeline_graph(
378
+ cls,
379
+ pipeline_graph: PipelineGraph,
380
+ *,
381
+ where: str = "",
382
+ bind: Mapping[str, Any] | None = None,
383
+ butler: Butler,
384
+ resources: ExecutionResources | None = None,
385
+ raise_on_partial_outputs: bool = True,
386
+ attach_datastore_records: bool = False,
387
+ output: str | None = None,
388
+ output_run: str | None = None,
389
+ ) -> SimplePipelineExecutor:
390
+ """Create an executor by building a QuantumGraph from an in-memory
391
+ pipeline graph.
392
+
393
+ Parameters
394
+ ----------
395
+ pipeline_graph : `~.pipeline_graph.PipelineGraph`
396
+ A Python object describing the tasks to run, along with their
397
+ labels and configuration, in graph form. Will be resolved against
398
+ the given ``butler``, with any existing resolutions ignored.
399
+ where : `str`, optional
400
+ Data ID query expression that constraints the quanta generated.
401
+ bind : `~collections.abc.Mapping`, optional
402
+ Mapping containing literal values that should be injected into the
403
+ ``where`` expression, keyed by the identifiers they replace.
404
+ butler : `~lsst.daf.butler.Butler`
405
+ Butler that manages all I/O. `prep_butler` can be used to create
406
+ one. Must have its `~lsst.daf.butler.Butler.run` and
407
+ ``butler.collections.defaults`` not empty and not `None`.
408
+ resources : `.ExecutionResources`
409
+ The resources available to each quantum being executed.
410
+ raise_on_partial_outputs : `bool`, optional
411
+ If `True` raise exceptions chained by
412
+ `.AnnotatedPartialOutputsError` immediately, instead
413
+ of considering the partial result a success and continuing to run
414
+ downstream tasks.
415
+ attach_datastore_records : `bool`, optional
416
+ Whether to attach datastore records to the quantum graph. This is
417
+ usually unnecessary, unless the executor is used to test behavior
418
+ that depends on datastore records.
419
+ output : `str`, optional
420
+ Name of a new output `~lsst.daf.butler.CollectionType.CHAINED`
421
+ collection to create that will combine both inputs and outputs.
422
+ output_run : `str`, optional
423
+ Name of the output `~lsst.daf.butler.CollectionType.RUN` that will
424
+ directly hold all output datasets. If not provided, a name will
425
+ be created from ``output`` and a timestamp.
426
+
427
+ Returns
428
+ -------
429
+ executor : `SimplePipelineExecutor`
430
+ An executor instance containing the constructed
431
+ `.QuantumGraph` and `~lsst.daf.butler.Butler`, ready
432
+ for `run` to be called.
433
+ """
434
+ if output_run is None:
435
+ output_run = butler.run
436
+ if output_run is None:
437
+ if output is None:
438
+ raise TypeError("At least one of output or output_run must be provided.")
439
+ output_run = f"{output}/{Instrument.makeCollectionTimestamp()}"
440
+
441
+ quantum_graph_builder = AllDimensionsQuantumGraphBuilder(
442
+ pipeline_graph, butler, where=where, bind=bind, output_run=output_run
443
+ )
444
+ metadata = {
445
+ "input": list(butler.collections.defaults),
446
+ "output": output,
447
+ "output_run": output_run,
448
+ "skip_existing_in": [],
449
+ "skip_existing": False,
450
+ "data_query": where,
451
+ "user": getpass.getuser(),
452
+ "time": str(datetime.datetime.now()),
453
+ }
454
+ quantum_graph = quantum_graph_builder.build(
455
+ metadata=metadata, attach_datastore_records=attach_datastore_records
456
+ )
457
+ return cls(
458
+ quantum_graph=quantum_graph,
459
+ butler=butler,
460
+ resources=resources,
461
+ raise_on_partial_outputs=raise_on_partial_outputs,
462
+ )
463
+
464
+ def use_local_butler(
465
+ self, root: str, register_dataset_types: bool = True, transfer_dimensions: bool = True
466
+ ) -> Butler:
467
+ """Transfer all inputs to a local data repository. and set the executor
468
+ to write outputs to it.
469
+
470
+ Parameters
471
+ ----------
472
+ root : `str`
473
+ Path to the local data repository; created if it does not exist.
474
+ register_dataset_types : `bool`, optional
475
+ Whether to register dataset types in the new repository. If
476
+ `False`, the local data repository must already exist and already
477
+ have all input dataset types registered.
478
+ transfer_dimensions : `bool`, optional
479
+ Whether to transfer dimension records to the new repository. If
480
+ `False`, the local data repository must already exist and already
481
+ have all needed dimension records.
482
+
483
+ Returns
484
+ -------
485
+ butler : `lsst.daf.butler.Butler`
486
+ Writeable butler for local data repository.
487
+
488
+ Notes
489
+ -----
490
+ The input collection structure from the original data repository is not
491
+ preserved by this method (it cannot be reconstructed from the quantum
492
+ graph). Instead, a `~lsst.daf.butler.CollectionType.TAGGED` collection
493
+ is created to gather all inputs, and appended to the output
494
+ `~lsst.daf.butler.CollectionType.CHAINED` collection after the output
495
+ `~lsst.daf.butler.CollectionType.RUN` collection. Calibration inputs
496
+ with the same data ID but multiple validity ranges are *not* included
497
+ in that `~lsst.daf.butler.CollectionType.TAGGED`; they are still
498
+ transferred to the local data repository, but can only be found via the
499
+ quantum graph or their original `~lsst.daf.butler.CollectionType.RUN`
500
+ collections.
501
+ """
502
+ if not os.path.exists(root):
503
+ Butler.makeRepo(root)
504
+ out_butler = Butler.from_config(root, writeable=True)
505
+
506
+ output_run = self.quantum_graph.metadata["output_run"]
507
+ out_butler.collections.register(output_run, CollectionType.RUN)
508
+ output = self.quantum_graph.metadata["output"]
509
+ inputs: str | None = None
510
+ if output is not None:
511
+ inputs = f"{output}/inputs"
512
+ out_butler.collections.register(output, CollectionType.CHAINED)
513
+ out_butler.collections.register(inputs, CollectionType.TAGGED)
514
+ out_butler.collections.redefine_chain(output, [output_run, inputs])
515
+
516
+ if transfer_dimensions:
517
+ # We can't just let the transfer_from call below take care of this
518
+ # because we need dimensions for outputs as well as inputs. And if
519
+ # we have to do the outputs explicitly, it's more efficient to do
520
+ # the inputs at the same time since a lot of those dimensions will
521
+ # be the same.
522
+ self._transfer_qg_dimension_records(out_butler)
523
+
524
+ # Extract overall-input DatasetRefs to transfer and possibly insert
525
+ # into a TAGGED collection.
526
+ refs: set[DatasetRef] = set()
527
+ to_tag_by_type: dict[str, dict[DataCoordinate, DatasetRef | None]] = {}
528
+ pipeline_graph = self.quantum_graph.pipeline_graph
529
+ for name, dataset_type_node in pipeline_graph.iter_overall_inputs():
530
+ assert dataset_type_node is not None, "PipelineGraph should be resolved."
531
+ to_tag_for_type = to_tag_by_type.setdefault(name, {})
532
+ for task_node in pipeline_graph.consumers_of(name):
533
+ for quantum in self.quantum_graph.get_task_quanta(task_node.label).values():
534
+ for ref in quantum.inputs[name]:
535
+ ref = dataset_type_node.generalize_ref(ref)
536
+ refs.add(ref)
537
+ if to_tag_for_type.setdefault(ref.dataId, ref) != ref:
538
+ # There is already a dataset with the same data ID
539
+ # and dataset type, but a different UUID/run. This
540
+ # can only happen for calibrations found in
541
+ # calibration collections, and for now we have no
542
+ # choice but to leave them out of the TAGGED inputs
543
+ # collection in the local butler.
544
+ to_tag_for_type[ref.dataId] = None
545
+
546
+ out_butler.transfer_from(
547
+ self.butler,
548
+ refs,
549
+ register_dataset_types=register_dataset_types,
550
+ transfer_dimensions=False,
551
+ )
552
+
553
+ if inputs is not None:
554
+ to_tag_flat: list[DatasetRef] = []
555
+ for ref_map in to_tag_by_type.values():
556
+ for tag_ref in ref_map.values():
557
+ if tag_ref is not None:
558
+ to_tag_flat.append(tag_ref)
559
+ out_butler.registry.associate(inputs, to_tag_flat)
560
+
561
+ out_butler.registry.defaults = self.butler.registry.defaults.clone(collections=output, run=output_run)
562
+ self.butler = out_butler
563
+ return self.butler
564
+
565
+ def run(self, register_dataset_types: bool = False, save_versions: bool = True) -> list[Quantum]:
566
+ """Run all the quanta in the `.QuantumGraph` in topological order.
567
+
568
+ Use this method to run all quanta in the graph. Use
569
+ `as_generator` to get a generator to run the quanta one at
570
+ a time.
571
+
572
+ Parameters
573
+ ----------
574
+ register_dataset_types : `bool`, optional
575
+ If `True`, register all output dataset types before executing any
576
+ quanta.
577
+ save_versions : `bool`, optional
578
+ If `True` (default), save a package versions dataset.
579
+
580
+ Returns
581
+ -------
582
+ quanta : `list` [ `~lsst.daf.butler.Quantum` ]
583
+ Executed quanta.
584
+
585
+ Notes
586
+ -----
587
+ A topological ordering is not in general unique, but no other
588
+ guarantees are made about the order in which quanta are processed.
589
+ """
590
+ return list(
591
+ self.as_generator(register_dataset_types=register_dataset_types, save_versions=save_versions)
592
+ )
593
+
594
+ def as_generator(
595
+ self, register_dataset_types: bool = False, save_versions: bool = True
596
+ ) -> Iterator[Quantum]:
597
+ """Yield quanta in the `.QuantumGraph` in topological order.
598
+
599
+ These quanta will be run as the returned generator is iterated
600
+ over. Use this method to run the quanta one at a time.
601
+ Use `run` to run all quanta in the graph.
602
+
603
+ Parameters
604
+ ----------
605
+ register_dataset_types : `bool`, optional
606
+ If `True`, register all output dataset types before executing any
607
+ quanta.
608
+ save_versions : `bool`, optional
609
+ If `True` (default), save a package versions dataset.
610
+
611
+ Returns
612
+ -------
613
+ quanta : `~collections.abc.Iterator` [ `~lsst.daf.butler.Quantum` ]
614
+ Executed quanta.
615
+
616
+ Notes
617
+ -----
618
+ Global initialization steps (see `.QuantumGraph.init_output_run`) are
619
+ performed immediately when this method is called, but individual quanta
620
+ are not actually executed until the returned iterator is iterated over.
621
+
622
+ A topological ordering is not in general unique, but no other
623
+ guarantees are made about the order in which quanta are processed.
624
+ """
625
+ if register_dataset_types:
626
+ self.quantum_graph.pipeline_graph.register_dataset_types(self.butler)
627
+ self.quantum_graph.write_configs(self.butler, compare_existing=False)
628
+ self.quantum_graph.write_init_outputs(self.butler, skip_existing=False)
629
+ if save_versions:
630
+ self.quantum_graph.write_packages(self.butler, compare_existing=False)
631
+ task_factory = TaskFactory()
632
+ single_quantum_executor = SingleQuantumExecutor(
633
+ butler=self.butler,
634
+ task_factory=task_factory,
635
+ resources=self.resources,
636
+ raise_on_partial_outputs=self.raise_on_partial_outputs,
637
+ )
638
+ # Important that this returns a generator expression rather than being
639
+ # a generator itself; that is what makes the init stuff above happen
640
+ # immediately instead of when the first quanta is executed, which might
641
+ # be useful for callers who want to check the state of the repo in
642
+ # between.
643
+ return (
644
+ single_quantum_executor.execute(qnode.task_node, qnode.quantum)[0] for qnode in self.quantum_graph
645
+ )
646
+
647
+ def _transfer_qg_dimension_records(self, out_butler: Butler) -> None:
648
+ """Transfer all dimension records from the quantum graph to a butler.
649
+
650
+ Parameters
651
+ ----------
652
+ out_butler : `lsst.daf.butler.Butler`
653
+ Butler to transfer records to.
654
+ """
655
+ pipeline_graph = self.quantum_graph.pipeline_graph
656
+ all_dimensions = DimensionGroup.union(
657
+ *pipeline_graph.group_by_dimensions(prerequisites=True).keys(),
658
+ universe=self.butler.dimensions,
659
+ )
660
+ dimension_data_extractor = DimensionDataExtractor.from_dimension_group(all_dimensions)
661
+ for task_node in pipeline_graph.tasks.values():
662
+ task_quanta = self.quantum_graph.get_task_quanta(task_node.label)
663
+ for quantum in task_quanta.values():
664
+ dimension_data_extractor.update([cast(DataCoordinate, quantum.dataId)])
665
+ for refs in itertools.chain(quantum.inputs.values(), quantum.outputs.values()):
666
+ dimension_data_extractor.update(ref.dataId for ref in refs)
667
+ for element_name in all_dimensions.elements:
668
+ record_set = dimension_data_extractor.records.get(element_name)
669
+ if record_set and record_set.element.has_own_table:
670
+ out_butler.registry.insertDimensionData(
671
+ record_set.element,
672
+ *record_set,
673
+ skip_existing=True,
674
+ )