lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lsst/pipe/base/__init__.py +0 -1
  2. lsst/pipe/base/_datasetQueryConstraints.py +1 -1
  3. lsst/pipe/base/all_dimensions_quantum_graph_builder.py +10 -46
  4. lsst/pipe/base/caching_limited_butler.py +8 -4
  5. lsst/pipe/base/connectionTypes.py +19 -19
  6. lsst/pipe/base/connections.py +2 -2
  7. lsst/pipe/base/exec_fixup_data_id.py +131 -0
  8. lsst/pipe/base/execution_graph_fixup.py +69 -0
  9. lsst/pipe/base/graph/graphSummary.py +4 -4
  10. lsst/pipe/base/log_capture.py +227 -0
  11. lsst/pipe/base/mp_graph_executor.py +786 -0
  12. lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
  13. lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
  14. lsst/pipe/base/pipeline_graph/io.py +1 -1
  15. lsst/pipe/base/quantum_graph_builder.py +85 -58
  16. lsst/pipe/base/quantum_graph_executor.py +125 -0
  17. lsst/pipe/base/quantum_graph_skeleton.py +60 -1
  18. lsst/pipe/base/quantum_reports.py +334 -0
  19. lsst/pipe/base/script/transfer_from_graph.py +4 -1
  20. lsst/pipe/base/separable_pipeline_executor.py +296 -0
  21. lsst/pipe/base/simple_pipeline_executor.py +674 -0
  22. lsst/pipe/base/single_quantum_executor.py +635 -0
  23. lsst/pipe/base/taskFactory.py +18 -12
  24. lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
  25. lsst/pipe/base/tests/mocks/__init__.py +1 -0
  26. lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
  27. lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
  28. lsst/pipe/base/version.py +1 -1
  29. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/METADATA +1 -1
  30. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/RECORD +38 -28
  31. lsst/pipe/base/executionButlerBuilder.py +0 -493
  32. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/WHEEL +0 -0
  33. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/entry_points.txt +0 -0
  34. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/COPYRIGHT +0 -0
  35. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/LICENSE +0 -0
  36. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/bsd_license.txt +0 -0
  37. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/gpl-v3.0.txt +0 -0
  38. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/top_level.txt +0 -0
  39. {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/zip-safe +0 -0
@@ -0,0 +1,296 @@
1
+ # This file is part of pipe_base.
2
+ #
3
+ # Developed for the LSST Data Management System.
4
+ # This product includes software developed by the LSST Project
5
+ # (http://www.lsst.org).
6
+ # See the COPYRIGHT file at the top-level directory of this distribution
7
+ # for details of code ownership.
8
+ #
9
+ # This software is dual licensed under the GNU General Public License and also
10
+ # under a 3-clause BSD license. Recipients may choose which of these licenses
11
+ # to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12
+ # respectively. If you choose the GPL option then the following text applies
13
+ # (but note that there is still no warranty even if you opt for BSD instead):
14
+ #
15
+ # This program is free software: you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation, either version 3 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
27
+
28
+
29
+ from __future__ import annotations
30
+
31
+ __all__ = [
32
+ "SeparablePipelineExecutor",
33
+ ]
34
+
35
+
36
+ import datetime
37
+ import getpass
38
+ import logging
39
+ from collections.abc import Iterable
40
+ from typing import Any
41
+
42
+ import lsst.resources
43
+ from lsst.daf.butler import Butler
44
+
45
+ from ._quantumContext import ExecutionResources
46
+ from .all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
47
+ from .graph import QuantumGraph
48
+ from .mp_graph_executor import MPGraphExecutor
49
+ from .pipeline import Pipeline
50
+ from .quantum_graph_builder import QuantumGraphBuilder
51
+ from .quantum_graph_executor import QuantumGraphExecutor
52
+ from .single_quantum_executor import SingleQuantumExecutor
53
+ from .taskFactory import TaskFactory
54
+
55
+ _LOG = logging.getLogger(__name__)
56
+
57
+
58
+ class SeparablePipelineExecutor:
59
+ """An executor that allows each step of pipeline execution to be
60
+ run independently.
61
+
62
+ The executor can run any or all of the following steps:
63
+
64
+ * pre-execution initialization
65
+ * pipeline building
66
+ * quantum graph generation
67
+ * quantum graph execution
68
+
69
+ Any of these steps can also be handed off to external code without
70
+ compromising the remaining ones.
71
+
72
+ Parameters
73
+ ----------
74
+ butler : `lsst.daf.butler.Butler`
75
+ A Butler whose ``collections`` and ``run`` attributes contain the input
76
+ and output collections to use for processing.
77
+ clobber_output : `bool`, optional
78
+ If set, the pipeline execution overwrites existing output files.
79
+ Otherwise, any conflict between existing and new outputs is an error.
80
+ skip_existing_in : iterable [`str`], optional
81
+ If not empty, the pipeline execution searches the listed collections
82
+ for existing outputs, and skips any quanta that have run to completion
83
+ (or have no work to do). Otherwise, all tasks are attempted (subject to
84
+ ``clobber_output``).
85
+ task_factory : `.TaskFactory`, optional
86
+ A custom task factory for use in pre-execution and execution. By
87
+ default, a new instance of `.TaskFactory` is used.
88
+ resources : `.ExecutionResources`
89
+ The resources available to each quantum being executed.
90
+ raise_on_partial_outputs : `bool`, optional
91
+ If `True` raise exceptions chained by
92
+ `.AnnotatedPartialOutputsError` immediately, instead of
93
+ considering the partial result a success and continuing to run
94
+ downstream tasks.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ butler: Butler,
100
+ clobber_output: bool = False,
101
+ skip_existing_in: Iterable[str] | None = None,
102
+ task_factory: TaskFactory | None = None,
103
+ resources: ExecutionResources | None = None,
104
+ raise_on_partial_outputs: bool = True,
105
+ ):
106
+ self._butler = Butler.from_config(
107
+ butler=butler, collections=butler.collections.defaults, run=butler.run
108
+ )
109
+ if not self._butler.collections.defaults:
110
+ raise ValueError("Butler must specify input collections for pipeline.")
111
+ if not self._butler.run:
112
+ raise ValueError("Butler must specify output run for pipeline.")
113
+
114
+ self._clobber_output = clobber_output
115
+ self._skip_existing_in = list(skip_existing_in) if skip_existing_in else []
116
+
117
+ self._task_factory = task_factory if task_factory else TaskFactory()
118
+ self.resources = resources
119
+ self.raise_on_partial_outputs = raise_on_partial_outputs
120
+
121
+ def pre_execute_qgraph(
122
+ self,
123
+ graph: QuantumGraph,
124
+ register_dataset_types: bool = False,
125
+ save_init_outputs: bool = True,
126
+ save_versions: bool = True,
127
+ ) -> None:
128
+ """Run pre-execution initialization.
129
+
130
+ This method will be deprecated after DM-38041, to be replaced with a
131
+ method that takes either a `.Pipeline` or a
132
+ resolved `.pipeline_graph.PipelineGraph` instead of a `.QuantumGraph`.
133
+
134
+ Parameters
135
+ ----------
136
+ graph : `.QuantumGraph`
137
+ The quantum graph defining the pipeline and datasets to
138
+ be initialized.
139
+ register_dataset_types : `bool`, optional
140
+ If `True`, register all output dataset types from the pipeline
141
+ represented by ``graph``.
142
+ save_init_outputs : `bool`, optional
143
+ If `True`, create init-output datasets in this object's output run.
144
+ save_versions : `bool`, optional
145
+ If `True`, save a package versions dataset.
146
+ """
147
+ if register_dataset_types:
148
+ graph.pipeline_graph.register_dataset_types(self._butler, include_packages=save_versions)
149
+ if save_init_outputs:
150
+ graph.write_init_outputs(self._butler, skip_existing=(self._butler.run in self._skip_existing_in))
151
+ graph.write_configs(self._butler)
152
+ if save_versions:
153
+ graph.write_packages(self._butler)
154
+
155
+ def make_pipeline(self, pipeline_uri: str | lsst.resources.ResourcePath) -> Pipeline:
156
+ """Build a pipeline from pipeline and configuration information.
157
+
158
+ Parameters
159
+ ----------
160
+ pipeline_uri : `str` or `lsst.resources.ResourcePath`
161
+ URI to a file containing a pipeline definition. A URI fragment may
162
+ be used to specify a subset of the pipeline, as described in
163
+ :ref:`pipeline-running-intro`.
164
+
165
+ Returns
166
+ -------
167
+ pipeline : `.Pipeline`
168
+ The fully-built pipeline.
169
+ """
170
+ return Pipeline.from_uri(pipeline_uri)
171
+
172
+ def make_quantum_graph(
173
+ self,
174
+ pipeline: Pipeline,
175
+ where: str = "",
176
+ *,
177
+ builder_class: type[QuantumGraphBuilder] = AllDimensionsQuantumGraphBuilder,
178
+ attach_datastore_records: bool = False,
179
+ **kwargs: Any,
180
+ ) -> QuantumGraph:
181
+ """Build a quantum graph from a pipeline and input datasets.
182
+
183
+ Parameters
184
+ ----------
185
+ pipeline : `.Pipeline`
186
+ The pipeline for which to generate a quantum graph.
187
+ where : `str`, optional
188
+ A data ID query that constrains the quanta generated. Must not be
189
+ provided if a custom ``builder_class`` is given and that class does
190
+ not accept ``where`` as a construction argument.
191
+ builder_class : `type` [ \
192
+ `.quantum_graph_builder.QuantumGraphBuilder` ], optional
193
+ Quantum graph builder implementation. Ignored if ``builder`` is
194
+ provided.
195
+ attach_datastore_records : `bool`, optional
196
+ Whether to attach datastore records. These are currently used only
197
+ by `lsst.daf.butler.QuantumBackedButler`, which is not used by
198
+ `SeparablePipelineExecutor` for execution.
199
+ **kwargs
200
+ Additional keyword arguments are forwarded to ``builder_class``
201
+ when a quantum graph builder instance is constructed. All
202
+ arguments accepted by the
203
+ `~.quantum_graph_builder.QuantumGraphBuilder` base
204
+ class are provided automatically (from explicit arguments to this
205
+ method and executor attributes) and do not need to be included
206
+ as keyword arguments.
207
+
208
+ Returns
209
+ -------
210
+ graph : `.QuantumGraph`
211
+ The quantum graph for ``.Pipeline`` as run on the datasets
212
+ identified by ``where``.
213
+
214
+ Notes
215
+ -----
216
+ This method does no special handling of empty quantum graphs. If
217
+ needed, clients can use `len` to test if the returned graph is empty.
218
+ """
219
+ metadata = {
220
+ "input": self._butler.collections.defaults,
221
+ "output_run": self._butler.run,
222
+ "skip_existing_in": self._skip_existing_in,
223
+ "skip_existing": bool(self._skip_existing_in),
224
+ "data_query": where,
225
+ "user": getpass.getuser(),
226
+ "time": str(datetime.datetime.now()),
227
+ }
228
+ if where:
229
+ # Only pass 'where' if it's actually provided, since some
230
+ # QuantumGraphBuilder subclasses may not accept it.
231
+ kwargs["where"] = where
232
+ qg_builder = builder_class(
233
+ pipeline.to_graph(),
234
+ self._butler,
235
+ skip_existing_in=self._skip_existing_in,
236
+ clobber=self._clobber_output,
237
+ **kwargs,
238
+ )
239
+ graph = qg_builder.build(metadata=metadata, attach_datastore_records=attach_datastore_records)
240
+ _LOG.info(
241
+ "QuantumGraph contains %d quanta for %d tasks, graph ID: %r",
242
+ len(graph),
243
+ len(graph.taskGraph),
244
+ graph.graphID,
245
+ )
246
+ return graph
247
+
248
+ def run_pipeline(
249
+ self,
250
+ graph: QuantumGraph,
251
+ fail_fast: bool = False,
252
+ graph_executor: QuantumGraphExecutor | None = None,
253
+ num_proc: int = 1,
254
+ ) -> None:
255
+ """Run a pipeline in the form of a prepared quantum graph.
256
+
257
+ Pre-execution initialization must have already been run;
258
+ see `pre_execute_qgraph`.
259
+
260
+ Parameters
261
+ ----------
262
+ graph : `.QuantumGraph`
263
+ The pipeline and datasets to execute.
264
+ fail_fast : `bool`, optional
265
+ If `True`, abort all execution if any task fails when
266
+ running with multiple processes. Only used with the default graph
267
+ executor).
268
+ graph_executor : `.quantum_graph_executor.QuantumGraphExecutor`,\
269
+ optional
270
+ A custom graph executor. By default, a new instance of
271
+ `.mp_graph_executor.MPGraphExecutor` is used.
272
+ num_proc : `int`, optional
273
+ The number of processes that can be used to run the pipeline. The
274
+ default value ensures that no subprocess is created. Only used with
275
+ the default graph executor.
276
+ """
277
+ if not graph_executor:
278
+ quantum_executor = SingleQuantumExecutor(
279
+ butler=self._butler,
280
+ task_factory=self._task_factory,
281
+ skip_existing_in=self._skip_existing_in,
282
+ clobber_outputs=self._clobber_output,
283
+ resources=self.resources,
284
+ raise_on_partial_outputs=self.raise_on_partial_outputs,
285
+ )
286
+ graph_executor = MPGraphExecutor(
287
+ num_proc=num_proc,
288
+ timeout=2_592_000.0, # In practice, timeout is never helpful; set to 30 days.
289
+ quantum_executor=quantum_executor,
290
+ fail_fast=fail_fast,
291
+ )
292
+ # Have to reset connection pool to avoid sharing connections with
293
+ # forked processes.
294
+ self._butler.registry.resetConnectionPool()
295
+
296
+ graph_executor.execute(graph)