lsst-pipe-base 29.2025.4800__py3-none-any.whl → 30.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lsst/pipe/base/_instrument.py +6 -5
  2. lsst/pipe/base/caching_limited_butler.py +3 -0
  3. lsst/pipe/base/log_capture.py +39 -79
  4. lsst/pipe/base/log_on_close.py +79 -0
  5. lsst/pipe/base/mp_graph_executor.py +51 -15
  6. lsst/pipe/base/quantum_graph/_common.py +4 -3
  7. lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
  8. lsst/pipe/base/quantum_graph/_predicted.py +106 -12
  9. lsst/pipe/base/quantum_graph/_provenance.py +657 -6
  10. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +18 -50
  11. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +14 -3
  12. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +49 -232
  13. lsst/pipe/base/quantum_graph/aggregator/_structs.py +3 -113
  14. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +10 -5
  15. lsst/pipe/base/quantum_graph/aggregator/_writer.py +31 -348
  16. lsst/pipe/base/quantum_graph/formatter.py +101 -0
  17. lsst/pipe/base/quantum_graph_builder.py +12 -1
  18. lsst/pipe/base/quantum_graph_executor.py +116 -13
  19. lsst/pipe/base/quantum_graph_skeleton.py +1 -7
  20. lsst/pipe/base/script/register_instrument.py +4 -4
  21. lsst/pipe/base/script/retrieve_artifacts_for_quanta.py +5 -6
  22. lsst/pipe/base/script/transfer_from_graph.py +42 -42
  23. lsst/pipe/base/script/zip_from_graph.py +7 -8
  24. lsst/pipe/base/separable_pipeline_executor.py +18 -2
  25. lsst/pipe/base/simple_pipeline_executor.py +4 -3
  26. lsst/pipe/base/single_quantum_executor.py +70 -34
  27. lsst/pipe/base/tests/mocks/_repo.py +44 -16
  28. lsst/pipe/base/tests/simpleQGraph.py +43 -35
  29. lsst/pipe/base/version.py +1 -1
  30. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/METADATA +1 -1
  31. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/RECORD +39 -37
  32. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/WHEEL +1 -1
  33. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/entry_points.txt +0 -0
  34. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/COPYRIGHT +0 -0
  35. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/LICENSE +0 -0
  36. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/bsd_license.txt +0 -0
  37. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/licenses/gpl-v3.0.txt +0 -0
  38. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/top_level.txt +0 -0
  39. {lsst_pipe_base-29.2025.4800.dist-info → lsst_pipe_base-30.0.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,101 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ from __future__ import annotations
29
+
30
+ __all__ = ("ProvenanceFormatter",)
31
+
32
+ import uuid
33
+ from typing import Any, ClassVar
34
+
35
+ import pydantic
36
+
37
+ from lsst.daf.butler import FormatterV2
38
+ from lsst.resources import ResourcePath
39
+ from lsst.utils.logging import getLogger
40
+
41
+ from ..pipeline_graph import TaskImportMode
42
+ from ._provenance import ProvenanceQuantumGraphReader
43
+
44
+ _LOG = getLogger(__file__)
45
+
46
+
47
+ class _ProvenanceFormatterParameters(pydantic.BaseModel):
48
+ """A Pydantic model for validating and applying defaults to the
49
+ read parameters of `ProvenanceFormatter`.
50
+ """
51
+
52
+ import_mode: TaskImportMode = TaskImportMode.DO_NOT_IMPORT
53
+ quanta: list[uuid.UUID] | None = None
54
+ datasets: list[uuid.UUID] | None = None
55
+ read_init_quanta: bool = True
56
+
57
+ @pydantic.field_validator("quanta", mode="before")
58
+ @classmethod
59
+ def quanta_to_list(cls, v: Any) -> list[uuid.UUID]:
60
+ return list(v)
61
+
62
+ @pydantic.field_validator("datasets", mode="before")
63
+ @classmethod
64
+ def datasets_to_list(cls, v: Any) -> list[uuid.UUID]:
65
+ return list(v)
66
+
67
+ @property
68
+ def nodes(self) -> list[uuid.UUID]:
69
+ if self.quanta is not None:
70
+ if self.datasets is not None:
71
+ return self.quanta + self.datasets
72
+ else:
73
+ return self.quanta
74
+ elif self.datasets is not None:
75
+ return self.datasets
76
+ raise ValueError("'datasets' and/or 'quanta' parameters are required for this component")
77
+
78
+
79
+ class ProvenanceFormatter(FormatterV2):
80
+ """Butler interface for reading `ProvenanceQuantumGraph` objects."""
81
+
82
+ default_extension: ClassVar[str] = ".qg"
83
+ can_read_from_uri: ClassVar[bool] = True
84
+
85
+ def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any:
86
+ parameters = _ProvenanceFormatterParameters.model_validate(self.file_descriptor.parameters or {})
87
+ with ProvenanceQuantumGraphReader.open(uri, import_mode=parameters.import_mode) as reader:
88
+ match component:
89
+ case None:
90
+ if parameters.read_init_quanta:
91
+ reader.read_init_quanta()
92
+ reader.read_quanta(parameters.quanta)
93
+ reader.read_datasets(parameters.datasets)
94
+ return reader.graph
95
+ case "metadata":
96
+ return reader.fetch_metadata(parameters.nodes)
97
+ case "logs":
98
+ return reader.fetch_logs(parameters.nodes)
99
+ case "packages":
100
+ return reader.fetch_packages()
101
+ raise AssertionError(f"Unexpected component {component!r}.")
@@ -1095,11 +1095,13 @@ class QuantumGraphBuilder(ABC):
1095
1095
  to `lsst.daf.butler.DatastoreRecordData`, as used by
1096
1096
  `lsst.daf.butler.Quantum`.
1097
1097
  """
1098
+ self.log.info("Fetching and attaching datastore records for all overall inputs.")
1098
1099
  overall_inputs = skeleton.extract_overall_inputs()
1099
1100
  exported_records = self.butler._datastore.export_records(overall_inputs.values())
1100
1101
  for task_label in self._pipeline_graph.tasks:
1101
1102
  if not skeleton.has_task(task_label):
1102
1103
  continue
1104
+ self.log.verbose("Fetching and attaching datastore records for task %s.", task_label)
1103
1105
  task_init_key = skeleton.get_task_init_node(task_label)
1104
1106
  init_input_ids = {
1105
1107
  ref.id
@@ -1152,12 +1154,14 @@ class QuantumGraphBuilder(ABC):
1152
1154
  """
1153
1155
  from .graph import QuantumGraph
1154
1156
 
1157
+ self.log.info("Transforming graph skeleton into a QuantumGraph instance.")
1155
1158
  quanta: dict[TaskDef, set[Quantum]] = {}
1156
1159
  init_inputs: dict[TaskDef, Iterable[DatasetRef]] = {}
1157
1160
  init_outputs: dict[TaskDef, Iterable[DatasetRef]] = {}
1158
1161
  for task_def in self._pipeline_graph._iter_task_defs():
1159
1162
  if not skeleton.has_task(task_def.label):
1160
1163
  continue
1164
+ self.log.verbose("Transforming graph skeleton nodes for task %s.", task_def.label)
1161
1165
  task_node = self._pipeline_graph.tasks[task_def.label]
1162
1166
  task_init_key = skeleton.get_task_init_node(task_def.label)
1163
1167
  task_init_state = skeleton[task_init_key]
@@ -1198,7 +1202,8 @@ class QuantumGraphBuilder(ABC):
1198
1202
  ref = skeleton.get_dataset_ref(dataset_key)
1199
1203
  assert ref is not None, "Global init input refs should be resolved already."
1200
1204
  global_init_outputs.append(ref)
1201
- return QuantumGraph(
1205
+ self.log.verbose("Invoking QuantumGraph class constructor.")
1206
+ result = QuantumGraph(
1202
1207
  quanta,
1203
1208
  metadata=all_metadata,
1204
1209
  universe=self.universe,
@@ -1207,6 +1212,8 @@ class QuantumGraphBuilder(ABC):
1207
1212
  globalInitOutputs=global_init_outputs,
1208
1213
  registryDatasetTypes=registry_dataset_types,
1209
1214
  )
1215
+ self.log.info("Graph build complete.")
1216
+ return result
1210
1217
 
1211
1218
  @final
1212
1219
  @timeMethod
@@ -1243,6 +1250,7 @@ class QuantumGraphBuilder(ABC):
1243
1250
  PredictedQuantumGraphComponents,
1244
1251
  )
1245
1252
 
1253
+ self.log.info("Transforming graph skeleton into PredictedQuantumGraph components.")
1246
1254
  components = PredictedQuantumGraphComponents(pipeline_graph=self._pipeline_graph)
1247
1255
  components.header.inputs = list(self.input_collections)
1248
1256
  components.header.output_run = self.output_run
@@ -1270,6 +1278,7 @@ class QuantumGraphBuilder(ABC):
1270
1278
  for task_node in self._pipeline_graph.tasks.values():
1271
1279
  if not skeleton.has_task(task_node.label):
1272
1280
  continue
1281
+ self.log.verbose("Transforming graph skeleton nodes for task %s.", task_node.label)
1273
1282
  task_init_key = TaskInitKey(task_node.label)
1274
1283
  init_quantum_datasets = PredictedQuantumDatasetsModel.model_construct(
1275
1284
  quantum_id=generate_uuidv7(),
@@ -1315,8 +1324,10 @@ class QuantumGraphBuilder(ABC):
1315
1324
  },
1316
1325
  )
1317
1326
  components.quantum_datasets[quantum_datasets.quantum_id] = quantum_datasets
1327
+ self.log.verbose("Building the thin summary graph.")
1318
1328
  components.set_thin_graph()
1319
1329
  components.set_header_counts()
1330
+ self.log.info("Graph build complete.")
1320
1331
  return components
1321
1332
 
1322
1333
  @staticmethod
@@ -27,23 +27,113 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ["QuantumExecutor", "QuantumGraphExecutor"]
30
+ __all__ = ["QuantumExecutionResult", "QuantumExecutor", "QuantumGraphExecutor"]
31
31
 
32
32
  from abc import ABC, abstractmethod
33
- from typing import TYPE_CHECKING
33
+ from typing import TYPE_CHECKING, Self
34
+
35
+ from lsst.daf.butler import Quantum
34
36
 
35
37
  from .quantum_reports import QuantumReport, Report
36
38
 
37
39
  if TYPE_CHECKING:
38
40
  import uuid
39
41
 
40
- from lsst.daf.butler import Quantum
42
+ from lsst.daf.butler.logging import ButlerLogRecords
41
43
 
44
+ from ._task_metadata import TaskMetadata
42
45
  from .graph import QuantumGraph
43
46
  from .pipeline_graph import TaskNode
44
47
  from .quantum_graph import PredictedQuantumGraph
45
48
 
46
49
 
50
+ class QuantumExecutionResult(tuple[Quantum, QuantumReport | None]):
51
+ """A result struct that captures information about a single quantum's
52
+ execution.
53
+
54
+ Parameters
55
+ ----------
56
+ quantum : `lsst.daf.butler.Quantum`
57
+ Quantum that was executed.
58
+ report : `.quantum_reports.QuantumReport`
59
+ Report with basic information about the execution.
60
+ task_metadata : `TaskMetadata`, optional
61
+ Metadata saved by the task and executor during execution.
62
+ skipped_existing : `bool`, optional
63
+ If `True`, this quantum was not executed because it appeared to have
64
+ already been executed successfully.
65
+ adjusted_no_work : `bool`, optional
66
+ If `True`, this quantum was not executed because the
67
+ `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
68
+
69
+ Notes
70
+ -----
71
+ For backwards compatibility, this class is a two-element tuple that allows
72
+ the ``quantum`` and ``report`` attributes to be unpacked. Additional
73
+ regular attributes may be added by executors (but the tuple must remain
74
+ only two elements to enable the current unpacking interface).
75
+ """
76
+
77
+ def __new__(
78
+ cls,
79
+ quantum: Quantum,
80
+ report: QuantumReport | None,
81
+ *,
82
+ task_metadata: TaskMetadata | None = None,
83
+ skipped_existing: bool | None = None,
84
+ adjusted_no_work: bool | None = None,
85
+ ) -> Self:
86
+ return super().__new__(cls, (quantum, report))
87
+
88
+ # We need to define both __init__ and __new__ because tuple inheritance
89
+ # requires __new__ and numpydoc requires __init__.
90
+
91
+ def __init__(
92
+ self,
93
+ quantum: Quantum,
94
+ report: QuantumReport | None,
95
+ *,
96
+ task_metadata: TaskMetadata | None = None,
97
+ skipped_existing: bool | None = None,
98
+ adjusted_no_work: bool | None = None,
99
+ ):
100
+ self._task_metadata = task_metadata
101
+ self._skipped_existing = skipped_existing
102
+ self._adjusted_no_work = adjusted_no_work
103
+
104
+ @property
105
+ def quantum(self) -> Quantum:
106
+ """The quantum actually executed."""
107
+ return self[0]
108
+
109
+ @property
110
+ def report(self) -> QuantumReport | None:
111
+ """Structure describing the status of the execution of a quantum.
112
+
113
+ This is `None` if the implementation does not support this feature.
114
+ """
115
+ return self[1]
116
+
117
+ @property
118
+ def task_metadata(self) -> TaskMetadata | None:
119
+ """Metadata saved by the task and executor during execution."""
120
+ return self._task_metadata
121
+
122
+ @property
123
+ def skipped_existing(self) -> bool | None:
124
+ """If `True`, this quantum was not executed because it appeared to have
125
+ already been executed successfully.
126
+ """
127
+ return self._skipped_existing
128
+
129
+ @property
130
+ def adjusted_no_work(self) -> bool | None:
131
+ """If `True`, this quantum was not executed because the
132
+ `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
133
+ """
134
+ return self._adjusted_no_work
135
+
136
+
47
137
  class QuantumExecutor(ABC):
48
138
  """Class which abstracts execution of a single Quantum.
49
139
 
@@ -55,8 +145,14 @@ class QuantumExecutor(ABC):
55
145
 
56
146
  @abstractmethod
57
147
  def execute(
58
- self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
59
- ) -> tuple[Quantum, QuantumReport | None]:
148
+ self,
149
+ task_node: TaskNode,
150
+ /,
151
+ quantum: Quantum,
152
+ quantum_id: uuid.UUID | None = None,
153
+ *,
154
+ log_records: ButlerLogRecords | None = None,
155
+ ) -> QuantumExecutionResult:
60
156
  """Execute single quantum.
61
157
 
62
158
  Parameters
@@ -67,15 +163,18 @@ class QuantumExecutor(ABC):
67
163
  Quantum for this execution.
68
164
  quantum_id : `uuid.UUID` or `None`, optional
69
165
  The ID of the quantum to be executed.
166
+ log_records : `lsst.daf.butler.ButlerLogRecords`, optional
167
+ Container that should be used to store logs in memory before
168
+ writing them to the butler. This disables streaming log (since
169
+ we'd have to store them in memory anyway), but it permits the
170
+ caller to prepend logs to be stored in the butler and allows task
171
+ logs to be inspected by the caller after execution is complete.
70
172
 
71
173
  Returns
72
174
  -------
73
- quantum : `~lsst.daf.butler.Quantum`
74
- The quantum actually executed.
75
- report : `~.quantum_reports.QuantumReport`
76
- Structure describing the status of the execution of a quantum.
77
- `None` is returned if implementation does not support this
78
- feature.
175
+ result : `QuantumExecutionResult`
176
+ Result struct. May also be unpacked as a 2-tuple (see type
177
+ documentation).
79
178
 
80
179
  Notes
81
180
  -----
@@ -93,7 +192,9 @@ class QuantumGraphExecutor(ABC):
93
192
  """
94
193
 
95
194
  @abstractmethod
96
- def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
195
+ def execute(
196
+ self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
197
+ ) -> None:
97
198
  """Execute whole graph.
98
199
 
99
200
  Implementation of this method depends on particular execution model
@@ -103,8 +204,10 @@ class QuantumGraphExecutor(ABC):
103
204
 
104
205
  Parameters
105
206
  ----------
106
- graph : `.QuantumGraph`
207
+ graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
107
208
  Execution graph.
209
+ provenance_graph_file : `str`, optional
210
+ A filename to write provenance to.
108
211
  """
109
212
  raise NotImplementedError()
110
213
 
@@ -383,12 +383,6 @@ class QuantumGraphSkeleton:
383
383
  The dataset ref of the prerequisite.
384
384
  **attrs : `~typing.Any`
385
385
  Additional attributes for the node.
386
-
387
- Notes
388
- -----
389
- This automatically sets the 'existing_input' ref attribute (see
390
- `set_existing_input_ref`), since prerequisites are always overall
391
- inputs.
392
386
  """
393
387
  key = PrerequisiteDatasetKey(ref.datasetType.name, ref.id.bytes)
394
388
  self._xgraph.add_node(key, data_id=ref.dataId, ref=ref, **attrs)
@@ -606,7 +600,7 @@ class QuantumGraphSkeleton:
606
600
 
607
601
  def set_output_in_the_way(self, ref: DatasetRef) -> None:
608
602
  """Associate a dataset node with a `DatasetRef` that represents an
609
- existing output in the output RUN collectoin.
603
+ existing output in the output RUN collection.
610
604
 
611
605
  Parameters
612
606
  ----------
@@ -53,7 +53,7 @@ def register_instrument(repo: str, instrument: list[str], update: bool = False)
53
53
  Raised iff the instrument is not a subclass of
54
54
  `lsst.pipe.base.Instrument`.
55
55
  """
56
- butler = Butler.from_config(repo, writeable=True)
57
- for string in instrument:
58
- instrument_instance = Instrument.from_string(string, butler.registry)
59
- instrument_instance.register(butler.registry, update=update)
56
+ with Butler.from_config(repo, writeable=True) as butler:
57
+ for string in instrument:
58
+ instrument_instance = Instrument.from_string(string, butler.registry)
59
+ instrument_instance.register(butler.registry, update=update)
@@ -93,16 +93,15 @@ def retrieve_artifacts_for_quanta(
93
93
  dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
94
94
 
95
95
  # Make QBB, its config is the same as output Butler.
96
- qbb = QuantumBackedButler.from_predicted(
96
+ with QuantumBackedButler.from_predicted(
97
97
  config=repo,
98
98
  predicted_inputs=[ref.id for ref in refs],
99
99
  predicted_outputs=[],
100
100
  dimensions=qgraph.universe,
101
101
  datastore_records=datastore_records,
102
102
  dataset_types=dataset_types,
103
- )
104
-
105
- paths = qbb.retrieve_artifacts(
106
- refs, dest, transfer=transfer, overwrite=clobber, preserve_path=preserve_path
107
- )
103
+ ) as qbb:
104
+ paths = qbb.retrieve_artifacts(
105
+ refs, dest, transfer=transfer, overwrite=clobber, preserve_path=preserve_path
106
+ )
108
107
  return paths
@@ -85,52 +85,52 @@ def transfer_from_graph(
85
85
  # Get data repository dataset type definitions from the QuantumGraph.
86
86
  dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
87
87
 
88
- # Make QBB, its config is the same as output Butler.
89
- qbb = QuantumBackedButler.from_predicted(
90
- config=dest,
91
- predicted_inputs=[ref.id for ref in output_refs],
92
- predicted_outputs=[],
93
- dimensions=qgraph.universe,
94
- datastore_records={},
95
- dataset_types=dataset_types,
96
- )
97
-
98
88
  # Filter the refs based on requested dataset types.
99
89
  filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
100
90
  _LOG.verbose("After filtering by dataset_type, number of datasets to transfer: %d", len(filtered_refs))
101
91
 
102
- dest_butler = Butler.from_config(dest, writeable=True)
103
-
104
- # For faster restarts, filter out those the destination already knows.
105
- filtered_refs = filter_by_existence(dest_butler, filtered_refs)
106
-
107
- # Transfer in chunks
108
- chunk_size = 50_000
109
- n_chunks = math.ceil(len(filtered_refs) / chunk_size)
110
- chunk_num = 0
111
- count = 0
112
- for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
113
- chunk_num += 1
114
- if n_chunks > 1:
115
- _LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
116
- transferred = dest_butler.transfer_from(
117
- qbb,
118
- chunk,
119
- transfer="auto",
120
- register_dataset_types=register_dataset_types,
121
- transfer_dimensions=transfer_dimensions,
122
- dry_run=dry_run,
123
- )
124
- count += len(transferred)
125
-
126
- # If asked to do so, update output chain definition.
127
- if update_output_chain and (metadata := qgraph.metadata) is not None:
128
- # These are defined in CmdLineFwk.
129
- output_run = metadata.get("output_run")
130
- output = metadata.get("output")
131
- input = metadata.get("input")
132
- if output_run is not None and output is not None:
133
- _update_chain(dest_butler, output, output_run, input)
92
+ # Make QBB, its config is the same as output Butler.
93
+ with (
94
+ QuantumBackedButler.from_predicted(
95
+ config=dest,
96
+ predicted_inputs=[ref.id for ref in output_refs],
97
+ predicted_outputs=[],
98
+ dimensions=qgraph.universe,
99
+ datastore_records={},
100
+ dataset_types=dataset_types,
101
+ ) as qbb,
102
+ Butler.from_config(dest, writeable=True) as dest_butler,
103
+ ):
104
+ # For faster restarts, filter out those the destination already knows.
105
+ filtered_refs = filter_by_existence(dest_butler, filtered_refs)
106
+
107
+ # Transfer in chunks
108
+ chunk_size = 50_000
109
+ n_chunks = math.ceil(len(filtered_refs) / chunk_size)
110
+ chunk_num = 0
111
+ count = 0
112
+ for chunk in chunk_iterable(filtered_refs, chunk_size=chunk_size):
113
+ chunk_num += 1
114
+ if n_chunks > 1:
115
+ _LOG.verbose("Transferring %d datasets in chunk %d/%d", len(chunk), chunk_num, n_chunks)
116
+ transferred = dest_butler.transfer_from(
117
+ qbb,
118
+ chunk,
119
+ transfer="auto",
120
+ register_dataset_types=register_dataset_types,
121
+ transfer_dimensions=transfer_dimensions,
122
+ dry_run=dry_run,
123
+ )
124
+ count += len(transferred)
125
+
126
+ # If asked to do so, update output chain definition.
127
+ if update_output_chain and (metadata := qgraph.metadata) is not None:
128
+ # These are defined in CmdLineFwk.
129
+ output_run = metadata.get("output_run")
130
+ output = metadata.get("output")
131
+ input = metadata.get("input")
132
+ if output_run is not None and output is not None:
133
+ _update_chain(dest_butler, output, output_run, input)
134
134
 
135
135
  return count
136
136
 
@@ -72,19 +72,18 @@ def zip_from_graph(
72
72
  # Get data repository dataset type definitions from the QuantumGraph.
73
73
  dataset_types = {dstype.name: dstype for dstype in qgraph.registryDatasetTypes()}
74
74
 
75
+ # Filter the refs based on requested dataset types.
76
+ filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
77
+
75
78
  # Make QBB, its config is the same as output Butler.
76
- qbb = QuantumBackedButler.from_predicted(
79
+ with QuantumBackedButler.from_predicted(
77
80
  config=repo,
78
81
  predicted_inputs=[ref.id for ref in output_refs],
79
82
  predicted_outputs=[],
80
83
  dimensions=qgraph.universe,
81
84
  datastore_records={},
82
85
  dataset_types=dataset_types,
83
- )
84
-
85
- # Filter the refs based on requested dataset types.
86
- filtered_refs = filter_by_dataset_type_glob(output_refs, dataset_type)
87
-
88
- _LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
89
- zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
86
+ ) as qbb:
87
+ _LOG.info("Retrieving artifacts for %d datasets and storing in Zip file.", len(filtered_refs))
88
+ zip = qbb.retrieve_artifacts_zip(filtered_refs, dest)
90
89
  return zip
@@ -40,7 +40,8 @@ from collections.abc import Iterable
40
40
  from typing import Any
41
41
 
42
42
  import lsst.resources
43
- from lsst.daf.butler import Butler
43
+ from lsst.daf.butler import Butler, DatasetRef
44
+ from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
44
45
 
45
46
  from ._quantumContext import ExecutionResources
46
47
  from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
@@ -362,6 +363,8 @@ class SeparablePipelineExecutor:
362
363
  fail_fast: bool = False,
363
364
  graph_executor: QuantumGraphExecutor | None = None,
364
365
  num_proc: int = 1,
366
+ *,
367
+ provenance_dataset_ref: DatasetRef | None = None,
365
368
  ) -> None:
366
369
  """Run a pipeline in the form of a prepared quantum graph.
367
370
 
@@ -384,6 +387,14 @@ class SeparablePipelineExecutor:
384
387
  The number of processes that can be used to run the pipeline. The
385
388
  default value ensures that no subprocess is created. Only used with
386
389
  the default graph executor.
390
+ provenance_dataset_ref : `lsst.daf.butler.DatasetRef`, optional
391
+ Dataset that should be used to save provenance. Provenance is only
392
+ supported when running in a single process (at least for the
393
+ default quantum executor), and should not be used with
394
+ ``skip_existing_in=[output_run]`` when retrying a previous
395
+ execution attempt. The caller is responsible for registering the
396
+ dataset type and for ensuring that the dimensions of this dataset
397
+ do not lead to uniqueness conflicts.
387
398
  """
388
399
  if not graph_executor:
389
400
  quantum_executor = SingleQuantumExecutor(
@@ -404,4 +415,9 @@ class SeparablePipelineExecutor:
404
415
  # forked processes.
405
416
  self._butler.registry.resetConnectionPool()
406
417
 
407
- graph_executor.execute(graph)
418
+ if provenance_dataset_ref is not None:
419
+ with TemporaryForIngest(self._butler, provenance_dataset_ref) as temporary:
420
+ graph_executor.execute(graph, provenance_graph_file=temporary.ospath)
421
+ temporary.ingest()
422
+ else:
423
+ graph_executor.execute(graph)
@@ -40,6 +40,7 @@ from lsst.daf.butler import (
40
40
  DatasetRef,
41
41
  Quantum,
42
42
  )
43
+ from lsst.daf.butler.registry import RegistryDefaults
43
44
  from lsst.pex.config import Config
44
45
 
45
46
  from ._instrument import Instrument
@@ -152,9 +153,9 @@ class SimplePipelineExecutor:
152
153
  collections = [output_run]
153
154
  collections.extend(inputs)
154
155
  butler.registry.setCollectionChain(output, collections)
155
- # Remake butler to let it infer default data IDs from collections, now
156
- # that those collections exist.
157
- return Butler.from_config(butler=butler, collections=[output], run=output_run)
156
+ # Override the registry defaults. No need to clone.
157
+ butler.registry.defaults = RegistryDefaults(collections=[output], run=output_run)
158
+ return butler
158
159
 
159
160
  @classmethod
160
161
  def from_pipeline_filename(