lsst-pipe-base 30.2026.200__py3-none-any.whl → 30.2026.400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. lsst/pipe/base/_instrument.py +10 -12
  2. lsst/pipe/base/_status.py +29 -10
  3. lsst/pipe/base/automatic_connection_constants.py +9 -1
  4. lsst/pipe/base/cli/cmd/__init__.py +16 -2
  5. lsst/pipe/base/cli/cmd/commands.py +42 -4
  6. lsst/pipe/base/connectionTypes.py +72 -160
  7. lsst/pipe/base/connections.py +3 -6
  8. lsst/pipe/base/execution_reports.py +0 -5
  9. lsst/pipe/base/log_capture.py +8 -4
  10. lsst/pipe/base/log_on_close.py +79 -0
  11. lsst/pipe/base/mp_graph_executor.py +51 -15
  12. lsst/pipe/base/pipeline.py +3 -4
  13. lsst/pipe/base/pipelineIR.py +0 -6
  14. lsst/pipe/base/pipelineTask.py +5 -7
  15. lsst/pipe/base/pipeline_graph/_edges.py +19 -7
  16. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +8 -0
  17. lsst/pipe/base/quantum_graph/_common.py +7 -4
  18. lsst/pipe/base/quantum_graph/_multiblock.py +6 -16
  19. lsst/pipe/base/quantum_graph/_predicted.py +111 -10
  20. lsst/pipe/base/quantum_graph/_provenance.py +727 -26
  21. lsst/pipe/base/quantum_graph/aggregator/_communicators.py +26 -50
  22. lsst/pipe/base/quantum_graph/aggregator/_config.py +78 -9
  23. lsst/pipe/base/quantum_graph/aggregator/_ingester.py +12 -11
  24. lsst/pipe/base/quantum_graph/aggregator/_scanner.py +48 -234
  25. lsst/pipe/base/quantum_graph/aggregator/_structs.py +6 -116
  26. lsst/pipe/base/quantum_graph/aggregator/_supervisor.py +24 -18
  27. lsst/pipe/base/quantum_graph/aggregator/_writer.py +33 -350
  28. lsst/pipe/base/quantum_graph/formatter.py +171 -0
  29. lsst/pipe/base/quantum_graph/ingest_graph.py +356 -0
  30. lsst/pipe/base/quantum_graph_executor.py +116 -13
  31. lsst/pipe/base/quantum_provenance_graph.py +17 -2
  32. lsst/pipe/base/separable_pipeline_executor.py +18 -2
  33. lsst/pipe/base/single_quantum_executor.py +59 -41
  34. lsst/pipe/base/struct.py +4 -0
  35. lsst/pipe/base/version.py +1 -1
  36. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/METADATA +2 -1
  37. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/RECORD +45 -42
  38. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/WHEEL +1 -1
  39. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/entry_points.txt +0 -0
  40. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/COPYRIGHT +0 -0
  41. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/LICENSE +0 -0
  42. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/bsd_license.txt +0 -0
  43. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/licenses/gpl-v3.0.txt +0 -0
  44. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/top_level.txt +0 -0
  45. {lsst_pipe_base-30.2026.200.dist-info → lsst_pipe_base-30.2026.400.dist-info}/zip-safe +0 -0
@@ -0,0 +1,356 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+ """A tool for ingesting provenance quantum graphs (written by the `aggregator`
29
+ module) and [re-]ingesting other datasets (metadata/logs/configs) backed by the
30
+ same file. This "finalizes" the RUN collection, prohibiting (at least
31
+ conceptually) further processing.
32
+
33
+ This always proceeds in three steps, so we can resume efficiently:
34
+
35
+ 1. First we ask the butler to "forget" any metadata/log/config datasets that
36
+ exist in the output RUN collection, removing any record of them from the
37
+ butler database while preserving their files.
38
+
39
+ 2. Next we ingest the ``run_provenance`` graph dataset itself.
40
+
41
+ 3. Finally, in batches of quanta, we use a
42
+ `~lsst.daf.butler.QuantumBackedButler` to delete the original
43
+ metadata/log/config files and ingest new versions of those datasets into the
44
+ butler.
45
+
46
+ Thus, at any point, if the ``run_provenance`` dataset has not been ingested,
47
+ we know any metadata/log/config datasets that have been ingested are backed by
48
+ the original files.
49
+
50
+ Moreover, if the ``run_provenance`` dataset has been ingested, any existing
51
+ metadata/log/config datasets must be backed by the graph file, and the original
52
+ files for those datasets will have been deleted.
53
+
54
+ We also know that at all times the metadata/log/config *content* is safely
55
+ present in either the original files in the butler storage or in an
56
+ already-ingested ``run_provenance`` dataset.
57
+ """
58
+
59
+ from __future__ import annotations
60
+
61
+ __all__ = ("ingest_graph",)
62
+
63
+ import dataclasses
64
+ import itertools
65
+ import uuid
66
+ from collections.abc import Iterator
67
+ from contextlib import contextmanager
68
+
69
+ from lsst.daf.butler import (
70
+ Butler,
71
+ Config,
72
+ DataCoordinate,
73
+ DatasetRef,
74
+ DatasetType,
75
+ FileDataset,
76
+ QuantumBackedButler,
77
+ )
78
+ from lsst.daf.butler.registry.sql_registry import SqlRegistry
79
+ from lsst.resources import ResourcePath, ResourcePathExpression
80
+ from lsst.utils.logging import getLogger
81
+
82
+ from ..automatic_connection_constants import PROVENANCE_DATASET_TYPE_NAME, PROVENANCE_STORAGE_CLASS
83
+ from ._provenance import (
84
+ ProvenanceDatasetInfo,
85
+ ProvenanceInitQuantumInfo,
86
+ ProvenanceQuantumGraph,
87
+ ProvenanceQuantumGraphReader,
88
+ ProvenanceQuantumInfo,
89
+ )
90
+ from .formatter import ProvenanceFormatter
91
+
92
+ _LOG = getLogger(__name__)
93
+
94
+
95
+ def ingest_graph(
96
+ butler_config: str | Config,
97
+ uri: ResourcePathExpression | None = None,
98
+ *,
99
+ transfer: str | None = "move",
100
+ batch_size: int = 10000,
101
+ output_run: str | None = None,
102
+ ) -> None:
103
+ """Ingest a provenance graph into a butler repository.
104
+
105
+ Parameters
106
+ ----------
107
+ butler_config : `str`
108
+ Path or alias for the butler repository, or a butler repository config
109
+ object.
110
+ uri : convertible to `lsst.resources.ResourcePath` or `None`, optional
111
+ Location of the provenance quantum graph to ingest. `None` indicates
112
+ that the quantum graph has already been ingested, but other ingests
113
+ and/or deletions failed and need to be resumed.
114
+ batch_size : `int`, optional
115
+ Number of datasets to process in each transaction.
116
+ output_run : `str`, optional
117
+ Output `~lsst.daf.butler.CollectionType.RUN` collection name. Only
118
+ needs to be provided if ``uri`` is `None`. If it is provided the
119
+ output run in the graph is checked against it.
120
+
121
+ Notes
122
+ -----
123
+ After this operation, no further processing may be done in the
124
+ `~lsst.daf.butler.CollectionType.RUN` collection.
125
+
126
+ If this process is interrupted, it can pick up where it left off if run
127
+ again (at the cost of some duplicate work to figure out how much progress
128
+ it had made).
129
+ """
130
+ with _GraphIngester.open(butler_config, uri, output_run) as helper:
131
+ helper.fetch_already_ingested_datasets()
132
+ if not helper.graph_already_ingested:
133
+ assert uri is not None
134
+ helper.forget_ingested_datasets(batch_size=batch_size)
135
+ helper.ingest_graph_dataset(uri, transfer=transfer)
136
+ helper.clean_and_reingest_datasets(batch_size=batch_size)
137
+
138
+
139
+ @dataclasses.dataclass
140
+ class _GraphIngester:
141
+ butler_config: str | Config
142
+ butler: Butler
143
+ graph: ProvenanceQuantumGraph
144
+ graph_already_ingested: bool
145
+ n_datasets: int
146
+ datasets_already_ingested: set[uuid.UUID] = dataclasses.field(default_factory=set)
147
+
148
+ @property
149
+ def output_run(self) -> str:
150
+ return self.graph.header.output_run
151
+
152
+ @classmethod
153
+ @contextmanager
154
+ def open(
155
+ cls,
156
+ butler_config: str | Config,
157
+ uri: ResourcePathExpression | None,
158
+ output_run: str | None,
159
+ ) -> Iterator[_GraphIngester]:
160
+ with Butler.from_config(butler_config, collections=output_run, writeable=True) as butler:
161
+ butler.registry.registerDatasetType(
162
+ DatasetType(PROVENANCE_DATASET_TYPE_NAME, butler.dimensions.empty, PROVENANCE_STORAGE_CLASS)
163
+ )
164
+ graph, graph_already_ingested = cls.read_graph(butler, uri)
165
+ if output_run is not None and graph.header.output_run != output_run:
166
+ raise ValueError(
167
+ f"Given output run {output_run!r} does not match the graph "
168
+ f"header {graph.header.output_run!r}."
169
+ )
170
+ n_datasets = 2 * len(graph.quantum_only_xgraph) + len(graph.init_quanta)
171
+ yield cls(
172
+ butler_config=butler_config,
173
+ butler=butler,
174
+ graph=graph,
175
+ graph_already_ingested=graph_already_ingested,
176
+ n_datasets=n_datasets,
177
+ )
178
+
179
+ @staticmethod
180
+ def read_graph(
181
+ butler: Butler,
182
+ uri: ResourcePathExpression | None,
183
+ ) -> tuple[ProvenanceQuantumGraph, bool]:
184
+ if uri is not None:
185
+ _LOG.info("Reading the pre-ingest provenance graph.")
186
+ with ProvenanceQuantumGraphReader.open(uri) as reader:
187
+ reader.read_quanta()
188
+ reader.read_init_quanta()
189
+ graph = reader.graph
190
+ already_ingested = (
191
+ butler.find_dataset(PROVENANCE_DATASET_TYPE_NAME, collections=[graph.header.output_run])
192
+ is not None
193
+ )
194
+ return graph, already_ingested
195
+ else:
196
+ _LOG.info("Reading the already-ingested provenance graph.")
197
+ parameters = {"datasets": [], "read_init_quanta": True}
198
+ return butler.get(PROVENANCE_DATASET_TYPE_NAME, parameters=parameters), True
199
+
200
+ def fetch_already_ingested_datasets(self) -> None:
201
+ _LOG.info("Querying for existing datasets in %r.", self.output_run)
202
+ self.datasets_already_ingested.update(self.butler.registry._fetch_run_dataset_ids(self.output_run))
203
+
204
+ def iter_datasets(self) -> Iterator[tuple[uuid.UUID, ProvenanceDatasetInfo]]:
205
+ xgraph = self.graph.bipartite_xgraph
206
+ for task_label, quanta_for_task in self.graph.quanta_by_task.items():
207
+ _LOG.verbose(
208
+ "Batching up metadata and log datasets from %d %s quanta.", len(quanta_for_task), task_label
209
+ )
210
+ for quantum_id in quanta_for_task.values():
211
+ quantum_info: ProvenanceQuantumInfo = xgraph.nodes[quantum_id]
212
+ metadata_id = quantum_info["metadata_id"]
213
+ yield metadata_id, xgraph.nodes[metadata_id]
214
+ log_id = quantum_info["log_id"]
215
+ yield log_id, xgraph.nodes[log_id]
216
+ _LOG.verbose("Batching up config datasets from %d tasks.", len(self.graph.init_quanta))
217
+ for task_label, quantum_id in self.graph.init_quanta.items():
218
+ init_quantum_info: ProvenanceInitQuantumInfo = xgraph.nodes[quantum_id]
219
+ config_id = init_quantum_info["config_id"]
220
+ yield config_id, xgraph.nodes[config_id]
221
+
222
+ def forget_ingested_datasets(self, batch_size: int) -> None:
223
+ _LOG.info(
224
+ "Dropping database records for metadata/log/config datasets backed by their original files."
225
+ )
226
+ to_forget: list[DatasetRef] = []
227
+ n_forgotten: int = 0
228
+ n_skipped: int = 0
229
+ for dataset_id, dataset_info in self.iter_datasets():
230
+ if dataset_info["produced"] and dataset_id in self.datasets_already_ingested:
231
+ to_forget.append(self._make_ref_from_info(dataset_id, dataset_info))
232
+ self.datasets_already_ingested.remove(dataset_id)
233
+ if len(to_forget) >= batch_size:
234
+ n_forgotten += self._run_forget(to_forget, n_forgotten + n_skipped)
235
+ else:
236
+ n_skipped += 1
237
+ n_forgotten += self._run_forget(to_forget, n_forgotten + n_skipped)
238
+ _LOG.info(
239
+ "Removed database records for %d metadata/log/config datasets, while %d were already absent.",
240
+ n_forgotten,
241
+ n_skipped,
242
+ )
243
+
244
+ def _run_forget(self, to_forget: list[DatasetRef], n_current: int) -> int:
245
+ if to_forget:
246
+ _LOG.verbose(
247
+ "Forgetting a %d-dataset batch; %d/%d forgotten so far or already absent.",
248
+ len(to_forget),
249
+ n_current,
250
+ self.n_datasets,
251
+ )
252
+ with self.butler.registry.transaction():
253
+ self.butler._datastore.forget(to_forget)
254
+ self.butler.registry.removeDatasets(to_forget)
255
+ n = len(to_forget)
256
+ to_forget.clear()
257
+ return n
258
+
259
+ def ingest_graph_dataset(self, uri: ResourcePathExpression, transfer: str | None) -> None:
260
+ _LOG.info("Ingesting the provenance quantum graph.")
261
+ dataset_type = DatasetType(
262
+ PROVENANCE_DATASET_TYPE_NAME, self.butler.dimensions.empty, PROVENANCE_STORAGE_CLASS
263
+ )
264
+ self.butler.registry.registerDatasetType(dataset_type)
265
+ ref = DatasetRef(dataset_type, DataCoordinate.make_empty(self.butler.dimensions), run=self.output_run)
266
+ uri = ResourcePath(uri)
267
+ self.butler.ingest(
268
+ # We use .abspath() since butler assumes paths are relative to the
269
+ # repo root, while users expects them to be relative to the CWD in
270
+ # this context.
271
+ FileDataset(refs=[ref], path=uri.abspath(), formatter=ProvenanceFormatter),
272
+ transfer=transfer,
273
+ )
274
+
275
+ def clean_and_reingest_datasets(self, batch_size: int) -> None:
276
+ _LOG.info(
277
+ "Deleting original metadata/log/config files and re-ingesting them with provenance graph backing."
278
+ )
279
+ direct_uri = self.butler.getURI(PROVENANCE_DATASET_TYPE_NAME, collections=[self.output_run])
280
+ qbb = self.make_qbb()
281
+ to_process: list[DatasetRef] = []
282
+ n_processed: int = 0
283
+ n_skipped: int = 0
284
+ n_not_produced: int = 0
285
+ for dataset_id, dataset_info in self.iter_datasets():
286
+ if not dataset_info["produced"]:
287
+ n_not_produced += 1
288
+ elif dataset_id not in self.datasets_already_ingested:
289
+ to_process.append(self._make_ref_from_info(dataset_id, dataset_info))
290
+ if len(to_process) >= batch_size:
291
+ n_processed += self._run_clean_and_ingest(
292
+ qbb, direct_uri, to_process, n_processed + n_skipped
293
+ )
294
+ else:
295
+ n_skipped += 1
296
+ n_processed += self._run_clean_and_ingest(qbb, direct_uri, to_process, n_processed + n_skipped)
297
+ _LOG.info(
298
+ "Deleted and re-ingested %d metadata/log/config datasets "
299
+ "(%d had already been processed, %d were not produced).",
300
+ n_processed,
301
+ n_skipped,
302
+ n_not_produced,
303
+ )
304
+
305
+ def _run_clean_and_ingest(
306
+ self, qbb: QuantumBackedButler, direct_uri: ResourcePath, to_process: list[DatasetRef], n_current: int
307
+ ) -> int:
308
+ if not to_process:
309
+ return 0
310
+ _LOG.verbose(
311
+ "Deleting and deleting a %d-dataset batch; %d/%d complete.",
312
+ len(to_process),
313
+ n_current,
314
+ self.n_datasets,
315
+ )
316
+ sql_registry: SqlRegistry = self.butler._registry # type: ignore[attr-defined]
317
+ expanded_refs = sql_registry.expand_refs(to_process)
318
+ # We need to pass predict=True to keep QBB/FileDatastore from wasting
319
+ # time doing existence checks, since ResourcePath.mremove will ignore
320
+ # nonexistent files anyway.
321
+ original_uris = list(
322
+ itertools.chain.from_iterable(
323
+ ref_uris.iter_all() for ref_uris in qbb.get_many_uris(expanded_refs, predict=True).values()
324
+ )
325
+ )
326
+ removal_status = ResourcePath.mremove(original_uris, do_raise=False)
327
+ for path, status in removal_status.items():
328
+ if not status.success and not isinstance(status.exception, FileNotFoundError):
329
+ assert status.exception is not None, "Exception should be set if success=False."
330
+ status.exception.add_note(f"Attempting to delete original file at {path}.")
331
+ raise status.exception
332
+ file_dataset = FileDataset(refs=expanded_refs, path=direct_uri, formatter=ProvenanceFormatter)
333
+ self.butler.ingest(file_dataset, transfer=None)
334
+ n = len(to_process)
335
+ to_process.clear()
336
+ return n
337
+
338
+ @staticmethod
339
+ def _make_ref_from_info(dataset_id: uuid.UUID, dataset_info: ProvenanceDatasetInfo) -> DatasetRef:
340
+ return DatasetRef(
341
+ dataset_info["pipeline_node"].dataset_type,
342
+ dataset_info["data_id"],
343
+ run=dataset_info["run"],
344
+ id=dataset_id,
345
+ )
346
+
347
+ def make_qbb(self) -> QuantumBackedButler:
348
+ dataset_types = {d.name: d.dataset_type for d in self.graph.pipeline_graph.dataset_types.values()}
349
+ return QuantumBackedButler.from_predicted(
350
+ config=self.butler_config,
351
+ predicted_inputs=(),
352
+ predicted_outputs=(),
353
+ dimensions=self.butler.dimensions,
354
+ datastore_records={},
355
+ dataset_types=dataset_types,
356
+ )
@@ -27,23 +27,113 @@
27
27
 
28
28
  from __future__ import annotations
29
29
 
30
- __all__ = ["QuantumExecutor", "QuantumGraphExecutor"]
30
+ __all__ = ["QuantumExecutionResult", "QuantumExecutor", "QuantumGraphExecutor"]
31
31
 
32
32
  from abc import ABC, abstractmethod
33
- from typing import TYPE_CHECKING
33
+ from typing import TYPE_CHECKING, Self
34
+
35
+ from lsst.daf.butler import Quantum
34
36
 
35
37
  from .quantum_reports import QuantumReport, Report
36
38
 
37
39
  if TYPE_CHECKING:
38
40
  import uuid
39
41
 
40
- from lsst.daf.butler import Quantum
42
+ from lsst.daf.butler.logging import ButlerLogRecords
41
43
 
44
+ from ._task_metadata import TaskMetadata
42
45
  from .graph import QuantumGraph
43
46
  from .pipeline_graph import TaskNode
44
47
  from .quantum_graph import PredictedQuantumGraph
45
48
 
46
49
 
50
+ class QuantumExecutionResult(tuple[Quantum, QuantumReport | None]):
51
+ """A result struct that captures information about a single quantum's
52
+ execution.
53
+
54
+ Parameters
55
+ ----------
56
+ quantum : `lsst.daf.butler.Quantum`
57
+ Quantum that was executed.
58
+ report : `.quantum_reports.QuantumReport`
59
+ Report with basic information about the execution.
60
+ task_metadata : `TaskMetadata`, optional
61
+ Metadata saved by the task and executor during execution.
62
+ skipped_existing : `bool`, optional
63
+ If `True`, this quantum was not executed because it appeared to have
64
+ already been executed successfully.
65
+ adjusted_no_work : `bool`, optional
66
+ If `True`, this quantum was not executed because the
67
+ `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
68
+
69
+ Notes
70
+ -----
71
+ For backwards compatibility, this class is a two-element tuple that allows
72
+ the ``quantum`` and ``report`` attributes to be unpacked. Additional
73
+ regular attributes may be added by executors (but the tuple must remain
74
+ only two elements to enable the current unpacking interface).
75
+ """
76
+
77
+ def __new__(
78
+ cls,
79
+ quantum: Quantum,
80
+ report: QuantumReport | None,
81
+ *,
82
+ task_metadata: TaskMetadata | None = None,
83
+ skipped_existing: bool | None = None,
84
+ adjusted_no_work: bool | None = None,
85
+ ) -> Self:
86
+ return super().__new__(cls, (quantum, report))
87
+
88
+ # We need to define both __init__ and __new__ because tuple inheritance
89
+ # requires __new__ and numpydoc requires __init__.
90
+
91
+ def __init__(
92
+ self,
93
+ quantum: Quantum,
94
+ report: QuantumReport | None,
95
+ *,
96
+ task_metadata: TaskMetadata | None = None,
97
+ skipped_existing: bool | None = None,
98
+ adjusted_no_work: bool | None = None,
99
+ ):
100
+ self._task_metadata = task_metadata
101
+ self._skipped_existing = skipped_existing
102
+ self._adjusted_no_work = adjusted_no_work
103
+
104
+ @property
105
+ def quantum(self) -> Quantum:
106
+ """The quantum actually executed."""
107
+ return self[0]
108
+
109
+ @property
110
+ def report(self) -> QuantumReport | None:
111
+ """Structure describing the status of the execution of a quantum.
112
+
113
+ This is `None` if the implementation does not support this feature.
114
+ """
115
+ return self[1]
116
+
117
+ @property
118
+ def task_metadata(self) -> TaskMetadata | None:
119
+ """Metadata saved by the task and executor during execution."""
120
+ return self._task_metadata
121
+
122
+ @property
123
+ def skipped_existing(self) -> bool | None:
124
+ """If `True`, this quantum was not executed because it appeared to have
125
+ already been executed successfully.
126
+ """
127
+ return self._skipped_existing
128
+
129
+ @property
130
+ def adjusted_no_work(self) -> bool | None:
131
+ """If `True`, this quantum was not executed because the
132
+ `PipelineTaskConnections.adjustQuanta` hook raised `NoWorkFound`.
133
+ """
134
+ return self._adjusted_no_work
135
+
136
+
47
137
  class QuantumExecutor(ABC):
48
138
  """Class which abstracts execution of a single Quantum.
49
139
 
@@ -55,8 +145,14 @@ class QuantumExecutor(ABC):
55
145
 
56
146
  @abstractmethod
57
147
  def execute(
58
- self, task_node: TaskNode, /, quantum: Quantum, quantum_id: uuid.UUID | None = None
59
- ) -> tuple[Quantum, QuantumReport | None]:
148
+ self,
149
+ task_node: TaskNode,
150
+ /,
151
+ quantum: Quantum,
152
+ quantum_id: uuid.UUID | None = None,
153
+ *,
154
+ log_records: ButlerLogRecords | None = None,
155
+ ) -> QuantumExecutionResult:
60
156
  """Execute single quantum.
61
157
 
62
158
  Parameters
@@ -67,15 +163,18 @@ class QuantumExecutor(ABC):
67
163
  Quantum for this execution.
68
164
  quantum_id : `uuid.UUID` or `None`, optional
69
165
  The ID of the quantum to be executed.
166
+ log_records : `lsst.daf.butler.ButlerLogRecords`, optional
167
+ Container that should be used to store logs in memory before
168
+ writing them to the butler. This disables streaming log (since
169
+ we'd have to store them in memory anyway), but it permits the
170
+ caller to prepend logs to be stored in the butler and allows task
171
+ logs to be inspected by the caller after execution is complete.
70
172
 
71
173
  Returns
72
174
  -------
73
- quantum : `~lsst.daf.butler.Quantum`
74
- The quantum actually executed.
75
- report : `~.quantum_reports.QuantumReport`
76
- Structure describing the status of the execution of a quantum.
77
- `None` is returned if implementation does not support this
78
- feature.
175
+ result : `QuantumExecutionResult`
176
+ Result struct. May also be unpacked as a 2-tuple (see type
177
+ documentation).
79
178
 
80
179
  Notes
81
180
  -----
@@ -93,7 +192,9 @@ class QuantumGraphExecutor(ABC):
93
192
  """
94
193
 
95
194
  @abstractmethod
96
- def execute(self, graph: QuantumGraph | PredictedQuantumGraph) -> None:
195
+ def execute(
196
+ self, graph: QuantumGraph | PredictedQuantumGraph, *, provenance_graph_file: str | None = None
197
+ ) -> None:
97
198
  """Execute whole graph.
98
199
 
99
200
  Implementation of this method depends on particular execution model
@@ -103,8 +204,10 @@ class QuantumGraphExecutor(ABC):
103
204
 
104
205
  Parameters
105
206
  ----------
106
- graph : `.QuantumGraph`
207
+ graph : `.QuantumGraph` or `.quantum_graph.PredictedQuantumGraph`
107
208
  Execution graph.
209
+ provenance_graph_file : `str`, optional
210
+ A filename to write provenance to.
108
211
  """
109
212
  raise NotImplementedError()
110
213
 
@@ -79,6 +79,7 @@ from .automatic_connection_constants import (
79
79
  METADATA_OUTPUT_CONNECTION_NAME,
80
80
  METADATA_OUTPUT_STORAGE_CLASS,
81
81
  METADATA_OUTPUT_TEMPLATE,
82
+ PROVENANCE_DATASET_TYPE_NAME,
82
83
  )
83
84
  from .graph import QuantumGraph, QuantumNode
84
85
 
@@ -1513,8 +1514,22 @@ class QuantumProvenanceGraph:
1513
1514
  len(self._datasets.keys()),
1514
1515
  )
1515
1516
  if use_qbb:
1516
- _LOG.verbose("Using quantum-backed butler for metadata loads.")
1517
- self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_qbb(butler, qgraph)
1517
+ provenance_graph_ref: DatasetRef | None = None
1518
+ try:
1519
+ provenance_graph_ref = butler.find_dataset(
1520
+ PROVENANCE_DATASET_TYPE_NAME, collections=output_run
1521
+ )
1522
+ except MissingDatasetTypeError:
1523
+ pass
1524
+ if provenance_graph_ref is not None:
1525
+ _LOG.warning(
1526
+ "Cannot use QBB for metadata/log reads after provenance has been ingested; "
1527
+ "falling back to full butler."
1528
+ )
1529
+ self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_full(butler)
1530
+ else:
1531
+ _LOG.verbose("Using quantum-backed butler for metadata loads.")
1532
+ self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_qbb(butler, qgraph)
1518
1533
  else:
1519
1534
  _LOG.verbose("Using full butler for metadata loads.")
1520
1535
  self._butler_wrappers[output_run] = _ThreadLocalButlerWrapper.wrap_full(butler)
@@ -40,7 +40,8 @@ from collections.abc import Iterable
40
40
  from typing import Any
41
41
 
42
42
  import lsst.resources
43
- from lsst.daf.butler import Butler
43
+ from lsst.daf.butler import Butler, DatasetRef
44
+ from lsst.daf.butler._rubin.temporary_for_ingest import TemporaryForIngest
44
45
 
45
46
  from ._quantumContext import ExecutionResources
46
47
  from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
@@ -362,6 +363,8 @@ class SeparablePipelineExecutor:
362
363
  fail_fast: bool = False,
363
364
  graph_executor: QuantumGraphExecutor | None = None,
364
365
  num_proc: int = 1,
366
+ *,
367
+ provenance_dataset_ref: DatasetRef | None = None,
365
368
  ) -> None:
366
369
  """Run a pipeline in the form of a prepared quantum graph.
367
370
 
@@ -384,6 +387,14 @@ class SeparablePipelineExecutor:
384
387
  The number of processes that can be used to run the pipeline. The
385
388
  default value ensures that no subprocess is created. Only used with
386
389
  the default graph executor.
390
+ provenance_dataset_ref : `lsst.daf.butler.DatasetRef`, optional
391
+ Dataset that should be used to save provenance. Provenance is only
392
+ supported when running in a single process (at least for the
393
+ default quantum executor), and should not be used with
394
+ ``skip_existing_in=[output_run]`` when retrying a previous
395
+ execution attempt. The caller is responsible for registering the
396
+ dataset type and for ensuring that the dimensions of this dataset
397
+ do not lead to uniqueness conflicts.
387
398
  """
388
399
  if not graph_executor:
389
400
  quantum_executor = SingleQuantumExecutor(
@@ -404,4 +415,9 @@ class SeparablePipelineExecutor:
404
415
  # forked processes.
405
416
  self._butler.registry.resetConnectionPool()
406
417
 
407
- graph_executor.execute(graph)
418
+ if provenance_dataset_ref is not None:
419
+ with TemporaryForIngest(self._butler, provenance_dataset_ref) as temporary:
420
+ graph_executor.execute(graph, provenance_graph_file=temporary.ospath)
421
+ temporary.ingest()
422
+ else:
423
+ graph_executor.execute(graph)