lsst-pipe-base 29.2025.3100__py3-none-any.whl → 29.2025.3300__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/__init__.py +0 -1
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +4 -42
- lsst/pipe/base/caching_limited_butler.py +8 -4
- lsst/pipe/base/graph/graphSummary.py +4 -4
- lsst/pipe/base/mp_graph_executor.py +21 -9
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
- lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph_builder.py +42 -16
- lsst/pipe/base/quantum_graph_skeleton.py +60 -1
- lsst/pipe/base/single_quantum_executor.py +10 -11
- lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
- lsst/pipe/base/tests/mocks/__init__.py +1 -0
- lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
- lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/RECORD +26 -25
- lsst/pipe/base/executionButlerBuilder.py +0 -493
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3100.dist-info → lsst_pipe_base-29.2025.3300.dist-info}/zip-safe +0 -0
|
@@ -66,12 +66,12 @@ class SingleQuantumExecutor(QuantumExecutor):
|
|
|
66
66
|
|
|
67
67
|
Parameters
|
|
68
68
|
----------
|
|
69
|
-
butler : `~lsst.daf.butler.Butler` or `None
|
|
70
|
-
Data butler, `None` means that
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
skip_existing_in : `~
|
|
69
|
+
butler : `~lsst.daf.butler.Butler` or `None`, optional
|
|
70
|
+
Data butler, `None` means that a limited butler should be used instead.
|
|
71
|
+
task_factory : `.TaskFactory`, optional
|
|
72
|
+
Instance of a task factory. Defaults to a new instance of
|
|
73
|
+
`lsst.pipe.base.TaskFactory`.
|
|
74
|
+
skip_existing_in : `str` or `~collections.abc.Iterable` [ `str` ]
|
|
75
75
|
Expressions representing the collections to search for existing output
|
|
76
76
|
datasets. See :ref:`daf_butler_ordered_collection_searches` for allowed
|
|
77
77
|
types. This class only checks for the presence of butler output run in
|
|
@@ -117,8 +117,8 @@ class SingleQuantumExecutor(QuantumExecutor):
|
|
|
117
117
|
def __init__(
|
|
118
118
|
self,
|
|
119
119
|
*,
|
|
120
|
-
butler: Butler | None,
|
|
121
|
-
task_factory: TaskFactory,
|
|
120
|
+
butler: Butler | None = None,
|
|
121
|
+
task_factory: TaskFactory | None = None,
|
|
122
122
|
skip_existing_in: Any = None,
|
|
123
123
|
clobber_outputs: bool = False,
|
|
124
124
|
enable_lsst_debug: bool = False,
|
|
@@ -130,7 +130,7 @@ class SingleQuantumExecutor(QuantumExecutor):
|
|
|
130
130
|
job_metadata: Mapping[str, int | str | float] | None = None,
|
|
131
131
|
):
|
|
132
132
|
self._butler = butler
|
|
133
|
-
self._task_factory = task_factory
|
|
133
|
+
self._task_factory = task_factory if task_factory is not None else TaskFactory()
|
|
134
134
|
self._clobber_outputs = clobber_outputs
|
|
135
135
|
self._enable_lsst_debug = enable_lsst_debug
|
|
136
136
|
self._limited_butler_factory = limited_butler_factory
|
|
@@ -171,8 +171,7 @@ class SingleQuantumExecutor(QuantumExecutor):
|
|
|
171
171
|
"""
|
|
172
172
|
startTime = time.time()
|
|
173
173
|
|
|
174
|
-
# Make a limited butler instance if needed
|
|
175
|
-
# butler is not defined).
|
|
174
|
+
# Make a limited butler instance if needed.
|
|
176
175
|
limited_butler: LimitedButler
|
|
177
176
|
if self._butler is not None:
|
|
178
177
|
limited_butler = self._butler
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ["InMemoryLimitedButler"]
|
|
31
|
+
|
|
32
|
+
import logging
|
|
33
|
+
import uuid
|
|
34
|
+
from collections.abc import Iterable
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
from lsst.daf.butler import (
|
|
38
|
+
ButlerMetrics,
|
|
39
|
+
DatasetProvenance,
|
|
40
|
+
DatasetRef,
|
|
41
|
+
DatasetType,
|
|
42
|
+
DimensionUniverse,
|
|
43
|
+
LimitedButler,
|
|
44
|
+
MissingDatasetTypeError,
|
|
45
|
+
Quantum,
|
|
46
|
+
StorageClass,
|
|
47
|
+
StorageClassFactory,
|
|
48
|
+
)
|
|
49
|
+
from lsst.daf.butler.registry import ConflictingDefinitionError
|
|
50
|
+
|
|
51
|
+
from .._dataset_handle import InMemoryDatasetHandle
|
|
52
|
+
|
|
53
|
+
_LOG = logging.getLogger(__name__)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class InMemoryLimitedButler(LimitedButler):
|
|
57
|
+
"""A `LimitedButler` that just stores datasets in an in-memory mapping.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
universe : `lsst.daf.butler.DimensionUniverse`
|
|
62
|
+
Definitions for all dimensions.
|
|
63
|
+
dataset_types : `~collections.abc.Iterable` [ \
|
|
64
|
+
`lsst.daf.butler.DatasetType` ]
|
|
65
|
+
Definitions of all dataset types.
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
This is an incomplete implementation of the `LimitedButler` interface
|
|
70
|
+
intended only for tests. It supports all methods required by
|
|
71
|
+
`SingleQuantumExecutor`, but not transfers or URI retrieval.
|
|
72
|
+
|
|
73
|
+
While this class supports storage class conversions in `get` and `put`, it
|
|
74
|
+
uses different code paths from real butlers, and should not be used in
|
|
75
|
+
tests in which storage class correctness is part of what is being tested.
|
|
76
|
+
|
|
77
|
+
Objects are always copied (via storage class machinery) by `get`.
|
|
78
|
+
|
|
79
|
+
Pickling this class will pickle all datasets already `put` (which must be
|
|
80
|
+
pickleable). This generally allows a central butler to be initialized with
|
|
81
|
+
input datasets in one process and distributed to worker processes that run
|
|
82
|
+
quanta *once*, but it does not allow outputs from a worker process to be
|
|
83
|
+
distributed to others or the originating process. This can be hard to
|
|
84
|
+
notice because quanta will usually be skipped with
|
|
85
|
+
`lsst.pipe.base.NoWorkFound` (a success!) when all of their inputs are
|
|
86
|
+
missing.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, universe: DimensionUniverse, dataset_types: Iterable[DatasetType] = ()):
|
|
90
|
+
self.storageClasses = StorageClassFactory()
|
|
91
|
+
self._universe = universe
|
|
92
|
+
self._datasets: dict[uuid.UUID, tuple[DatasetRef, InMemoryDatasetHandle]] = {}
|
|
93
|
+
self._metrics = ButlerMetrics()
|
|
94
|
+
self._dataset_types = {dt.name: dt for dt in dataset_types}
|
|
95
|
+
assert not any(dt.component() for dt in self._dataset_types.values()), (
|
|
96
|
+
"Dataset type definitions must not be components."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
100
|
+
# Pickle customization is needed because StorageClassFactory is not
|
|
101
|
+
# pickleable.
|
|
102
|
+
return {
|
|
103
|
+
"universe": self._universe,
|
|
104
|
+
"datasets": self._datasets,
|
|
105
|
+
"dataset_types": self._dataset_types,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
|
109
|
+
self.storageClasses = StorageClassFactory()
|
|
110
|
+
self._universe = state["universe"]
|
|
111
|
+
self._datasets = state["datasets"]
|
|
112
|
+
self._metrics = ButlerMetrics()
|
|
113
|
+
self._dataset_types = state["dataset_types"]
|
|
114
|
+
|
|
115
|
+
def get_datasets(self, dataset_type: str | None = None) -> dict[DatasetRef, object]:
|
|
116
|
+
"""Return datasets that have been `put` to this butler.
|
|
117
|
+
|
|
118
|
+
Storage classes and corresponding Python types will match the dataset
|
|
119
|
+
type definitions provided at butler construction, which may not be the
|
|
120
|
+
same as what was `put`.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
dataset_type : `str`, optional
|
|
125
|
+
Dataset type name used to filter results.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
refs : `dict` [ `lsst.daf.butler.DatasetRef`, `object` ]
|
|
130
|
+
Datasets held by this butler.
|
|
131
|
+
"""
|
|
132
|
+
return {
|
|
133
|
+
ref: handle.get()
|
|
134
|
+
for ref, handle in self._datasets.values()
|
|
135
|
+
if dataset_type is None or dataset_type == ref.datasetType.name
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
def isWriteable(self) -> bool:
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
def put(self, obj: Any, ref: DatasetRef, /, *, provenance: DatasetProvenance | None = None) -> DatasetRef:
|
|
142
|
+
with self._metrics.instrument_put():
|
|
143
|
+
assert not ref.isComponent(), "Component dataset types cannot be put."
|
|
144
|
+
if ref.id in self._datasets:
|
|
145
|
+
# Some butlers may not raise reliably when a dataset already
|
|
146
|
+
# exists (it's hard to be rigorous in parallel given different
|
|
147
|
+
# guarantees provided by storage), but we don't want code to
|
|
148
|
+
# rely on it not being an error, so we want a test butler to
|
|
149
|
+
# always complain.
|
|
150
|
+
raise ConflictingDefinitionError(f"Dataset {ref} already exists.")
|
|
151
|
+
if (repo_dataset_type := self._dataset_types.get(ref.datasetType.name)) is None:
|
|
152
|
+
raise MissingDatasetTypeError(f"Dataset type {ref.datasetType.name!r} not recognized.")
|
|
153
|
+
repo_dataset_type.storageClass.coerce_type(obj)
|
|
154
|
+
self._datasets[ref.id] = (
|
|
155
|
+
ref.overrideStorageClass(repo_dataset_type.storageClass),
|
|
156
|
+
InMemoryDatasetHandle(
|
|
157
|
+
obj,
|
|
158
|
+
storageClass=repo_dataset_type.storageClass,
|
|
159
|
+
dataId=ref.dataId,
|
|
160
|
+
copy=True,
|
|
161
|
+
),
|
|
162
|
+
)
|
|
163
|
+
return ref
|
|
164
|
+
|
|
165
|
+
def get(
|
|
166
|
+
self,
|
|
167
|
+
ref: DatasetRef,
|
|
168
|
+
/,
|
|
169
|
+
*,
|
|
170
|
+
parameters: dict[str, Any] | None = None,
|
|
171
|
+
storageClass: StorageClass | str | None = None,
|
|
172
|
+
) -> Any:
|
|
173
|
+
with self._metrics.instrument_get():
|
|
174
|
+
if storageClass is None:
|
|
175
|
+
storageClass = ref.datasetType.storageClass
|
|
176
|
+
elif isinstance(storageClass, str):
|
|
177
|
+
storageClass = self.storageClasses.getStorageClass(storageClass)
|
|
178
|
+
if entry := self._datasets.get(ref.id):
|
|
179
|
+
(ref, handle) = entry
|
|
180
|
+
return handle.get(
|
|
181
|
+
component=ref.datasetType.component(), parameters=parameters, storageClass=storageClass
|
|
182
|
+
)
|
|
183
|
+
raise FileNotFoundError(f"Dataset {ref} does not exist.")
|
|
184
|
+
|
|
185
|
+
def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
|
|
186
|
+
return {ref: ref.id in self._datasets for ref in refs}
|
|
187
|
+
|
|
188
|
+
def pruneDatasets(
|
|
189
|
+
self,
|
|
190
|
+
refs: Iterable[DatasetRef],
|
|
191
|
+
*,
|
|
192
|
+
disassociate: bool = True,
|
|
193
|
+
unstore: bool = False,
|
|
194
|
+
tags: Iterable[str] = (),
|
|
195
|
+
purge: bool = False,
|
|
196
|
+
) -> None:
|
|
197
|
+
for ref in refs:
|
|
198
|
+
self._datasets.pop(ref.id, None)
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def _datastore(self) -> Any:
|
|
202
|
+
raise NotImplementedError("This test butler does not have a datastore.")
|
|
203
|
+
|
|
204
|
+
@_datastore.setter # demanded by MyPy since we declare it to be an instance attribute in LimitedButler.
|
|
205
|
+
def _datastore(self, value: Any) -> None:
|
|
206
|
+
raise NotImplementedError("This test butler does not have a datastore.")
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def dimensions(self) -> DimensionUniverse:
|
|
210
|
+
return self._universe
|
|
211
|
+
|
|
212
|
+
def factory(self, quantum: Quantum) -> InMemoryLimitedButler:
|
|
213
|
+
"""Return ``self``.
|
|
214
|
+
|
|
215
|
+
This method can be used as the ``limited_butler_factory`` argument to
|
|
216
|
+
`.single_quantum_executor.SingleQuantumExecutor`.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
quantum : `lsst.daf.butler.Quantum`
|
|
221
|
+
Ignored.
|
|
222
|
+
"""
|
|
223
|
+
return self
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("InMemoryRepo",)
|
|
31
|
+
|
|
32
|
+
from collections.abc import Iterable, Mapping
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
from lsst.daf.butler import CollectionType, DataCoordinate, DatasetRef, DatasetType, RegistryConfig
|
|
36
|
+
from lsst.daf.butler.tests.utils import create_populated_sqlite_registry
|
|
37
|
+
from lsst.resources import ResourcePath, ResourcePathExpression
|
|
38
|
+
from lsst.sphgeom import RangeSet
|
|
39
|
+
|
|
40
|
+
from ...all_dimensions_quantum_graph_builder import AllDimensionsQuantumGraphBuilder
|
|
41
|
+
from ...pipeline_graph import PipelineGraph
|
|
42
|
+
from ...single_quantum_executor import SingleQuantumExecutor
|
|
43
|
+
from ..in_memory_limited_butler import InMemoryLimitedButler
|
|
44
|
+
from ._pipeline_task import (
|
|
45
|
+
DynamicConnectionConfig,
|
|
46
|
+
DynamicTestPipelineTask,
|
|
47
|
+
DynamicTestPipelineTaskConfig,
|
|
48
|
+
)
|
|
49
|
+
from ._storage_class import MockDataset, is_mock_name
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class InMemoryRepo:
|
|
53
|
+
"""A test helper that simulates a butler repository for task execution
|
|
54
|
+
without any disk I/O.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
*args : `str` or `lsst.resources.ResourcePath`
|
|
59
|
+
Butler YAML import files to load into the test repository.
|
|
60
|
+
registry_config : `lsst.daf.butler.RegistryConfig`, optional
|
|
61
|
+
Registry configuration for the repository.
|
|
62
|
+
input_run : `str`, optional
|
|
63
|
+
Name of a `~lsst.daf.butler.CollectionType.RUN` collection that will be
|
|
64
|
+
used as an input to quantum graph generation. Input datasets created
|
|
65
|
+
by the helper are added to this collection.
|
|
66
|
+
input_chain : `str`, optional
|
|
67
|
+
Name of a `~lsst.daf.butler.CollectionType.CHAINED` collection that
|
|
68
|
+
will be the direct input to quantum graph generation. This always
|
|
69
|
+
includes ``input_run``.
|
|
70
|
+
output_run : `str`, optional
|
|
71
|
+
Name of a `~lsst.daf.butler.CollectionType.RUN` collection for
|
|
72
|
+
execution outputs.
|
|
73
|
+
use_import_collections_as_input : `bool` `str`, or \
|
|
74
|
+
`~collections.abc.Iterable` [ `str`], optional
|
|
75
|
+
Additional collections from YAML import files to include in
|
|
76
|
+
``input_chain``, or `True` to include all such collections (in
|
|
77
|
+
chain-flattened lexicographical order).
|
|
78
|
+
data_root : convertible to `lsst.resources.ResourcePath`, optional
|
|
79
|
+
Root directory to join to each element in ``*args``. Defaults to
|
|
80
|
+
the `lsst.daf.butler.tests.registry_data` package.
|
|
81
|
+
|
|
82
|
+
Notes
|
|
83
|
+
-----
|
|
84
|
+
This helper maintains an `..pipeline_graph.PipelineGraph` and a
|
|
85
|
+
no-datastore butler backed by an in-memory SQLite database for use in
|
|
86
|
+
quantum graph generation.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
*args: str | ResourcePath,
|
|
92
|
+
registry_config: RegistryConfig | None = None,
|
|
93
|
+
input_run: str = "input_run",
|
|
94
|
+
input_chain: str = "input_chain",
|
|
95
|
+
output_run: str = "output_run",
|
|
96
|
+
use_import_collections_as_input: bool | str | Iterable[str] = True,
|
|
97
|
+
data_root: ResourcePathExpression | None = "resource://lsst.daf.butler/tests/registry_data",
|
|
98
|
+
):
|
|
99
|
+
if data_root is not None:
|
|
100
|
+
data_root = ResourcePath(data_root, forceDirectory=True)
|
|
101
|
+
args = tuple(data_root.join(arg) for arg in args)
|
|
102
|
+
self.butler = create_populated_sqlite_registry(*args, registry_config=registry_config)
|
|
103
|
+
input_chain_definition = [input_run]
|
|
104
|
+
if use_import_collections_as_input:
|
|
105
|
+
if use_import_collections_as_input is True:
|
|
106
|
+
use_import_collections_as_input = sorted(
|
|
107
|
+
self.butler.collections.query("*", flatten_chains=True)
|
|
108
|
+
)
|
|
109
|
+
input_chain_definition += list(use_import_collections_as_input)
|
|
110
|
+
self.input_run = input_run
|
|
111
|
+
self.input_chain = input_chain
|
|
112
|
+
self.output_run = output_run
|
|
113
|
+
self.butler.collections.register(self.input_run)
|
|
114
|
+
self.butler.collections.register(self.input_chain, CollectionType.CHAINED)
|
|
115
|
+
self.butler.collections.redefine_chain(self.input_chain, input_chain_definition)
|
|
116
|
+
self.pipeline_graph = PipelineGraph()
|
|
117
|
+
self.last_auto_dataset_type_index = 0
|
|
118
|
+
self.last_auto_task_index = 0
|
|
119
|
+
|
|
120
|
+
def add_task(
|
|
121
|
+
self,
|
|
122
|
+
label: str | None = None,
|
|
123
|
+
*,
|
|
124
|
+
task_class: type[DynamicTestPipelineTask] = DynamicTestPipelineTask,
|
|
125
|
+
config: DynamicTestPipelineTaskConfig | None = None,
|
|
126
|
+
dimensions: Iterable[str] | None = None,
|
|
127
|
+
inputs: Mapping[str, DynamicConnectionConfig] | None = None,
|
|
128
|
+
outputs: Mapping[str, DynamicConnectionConfig] | None = None,
|
|
129
|
+
prerequisite_inputs: Mapping[str, DynamicConnectionConfig] | None = None,
|
|
130
|
+
init_inputs: Mapping[str, DynamicConnectionConfig] | None = None,
|
|
131
|
+
init_outputs: Mapping[str, DynamicConnectionConfig] | None = None,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Add a task to the helper's pipeline graph.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
label : `str`, optional
|
|
138
|
+
Label for the task. If not provided, the task name will be
|
|
139
|
+
``task_auto{self.last_auto_task_index}``, with that variable
|
|
140
|
+
incremented.
|
|
141
|
+
task_class : `type`, optional
|
|
142
|
+
Subclass of `DynamicTestPipelineTask` to use.
|
|
143
|
+
config : `DynamicTestPipelineTaskConfig`, optional
|
|
144
|
+
Task configuration to use. Note that the dimensions are always
|
|
145
|
+
overridden by the ``dimensions`` argument and ``inputs`` and
|
|
146
|
+
``outputs`` are updated by those arguments unless they are
|
|
147
|
+
explicitly set to empty dictionaries.
|
|
148
|
+
dimensions : `~collections.abc.Iterable` [ `str` ], optional
|
|
149
|
+
Dimensions of the task and any automatically-added input or output
|
|
150
|
+
connection.
|
|
151
|
+
inputs : `~collections.abc.Mapping` [ `str`, \
|
|
152
|
+
`DynamicConnectionConfig` ], optional
|
|
153
|
+
Input connections to add. If not provided, a single connection is
|
|
154
|
+
added with the same dimensions as the task and dataset type name
|
|
155
|
+
``dataset_auto{self.last_auto_dataset_type_index}``.
|
|
156
|
+
outputs : `~collections.abc.Mapping` [ `str`, \
|
|
157
|
+
`DynamicConnectionConfig` ], optional
|
|
158
|
+
Output connections to add. If not provided, a single connection is
|
|
159
|
+
added with the same dimensions as the task and dataset type name
|
|
160
|
+
``dataset_auto{self.last_auto_dataset_type_index}``, with that
|
|
161
|
+
variable incremented first.
|
|
162
|
+
prerequisite_inputs : `~collections.abc.Mapping` [ `str`, \
|
|
163
|
+
`DynamicConnectionConfig` ], optional
|
|
164
|
+
Prerequisite input connections to add. Defaults to an empty
|
|
165
|
+
mapping.
|
|
166
|
+
init_inputs : `~collections.abc.Mapping` [ `str`, \
|
|
167
|
+
`DynamicConnectionConfig` ], optional
|
|
168
|
+
Init input connections to add. Defaults to an empty mapping.
|
|
169
|
+
init_outputs : `~collections.abc.Mapping` [ `str`, \
|
|
170
|
+
`DynamicConnectionConfig` ], optional
|
|
171
|
+
Init output connections to add. Defaults to an empty mapping.
|
|
172
|
+
|
|
173
|
+
Notes
|
|
174
|
+
-----
|
|
175
|
+
The defaults for this method's arguments are designed to allow it to be
|
|
176
|
+
called in succession to create a sequence of "one-to-one" tasks in
|
|
177
|
+
which each consumes the output of the last.
|
|
178
|
+
"""
|
|
179
|
+
if config is None:
|
|
180
|
+
config = DynamicTestPipelineTaskConfig()
|
|
181
|
+
if dimensions is not None:
|
|
182
|
+
config.dimensions = list(dimensions)
|
|
183
|
+
if inputs is not None:
|
|
184
|
+
config.inputs.update(inputs)
|
|
185
|
+
else:
|
|
186
|
+
config.inputs["input_connection"] = DynamicConnectionConfig(
|
|
187
|
+
dataset_type_name=f"dataset_auto{self.last_auto_dataset_type_index}",
|
|
188
|
+
dimensions=list(config.dimensions),
|
|
189
|
+
)
|
|
190
|
+
if outputs is not None:
|
|
191
|
+
config.outputs.update(outputs)
|
|
192
|
+
else:
|
|
193
|
+
self.last_auto_dataset_type_index += 1
|
|
194
|
+
config.outputs["output_connection"] = DynamicConnectionConfig(
|
|
195
|
+
dataset_type_name=f"dataset_auto{self.last_auto_dataset_type_index}",
|
|
196
|
+
dimensions=list(config.dimensions),
|
|
197
|
+
)
|
|
198
|
+
if prerequisite_inputs is not None:
|
|
199
|
+
config.prerequisite_inputs.update(prerequisite_inputs)
|
|
200
|
+
if init_inputs is not None:
|
|
201
|
+
config.init_inputs.update(init_inputs)
|
|
202
|
+
if init_outputs is not None:
|
|
203
|
+
config.init_outputs.update(init_outputs)
|
|
204
|
+
if label is None:
|
|
205
|
+
self.last_auto_task_index += 1
|
|
206
|
+
label = f"task_auto{self.last_auto_task_index}"
|
|
207
|
+
self.pipeline_graph.add_task(label, task_class=task_class, config=config)
|
|
208
|
+
|
|
209
|
+
def make_quantum_graph_builder(
|
|
210
|
+
self,
|
|
211
|
+
*,
|
|
212
|
+
insert_mocked_inputs: bool = True,
|
|
213
|
+
register_output_dataset_types: bool = True,
|
|
214
|
+
) -> AllDimensionsQuantumGraphBuilder:
|
|
215
|
+
"""Make a quantum graph builder from the pipeline task and internal
|
|
216
|
+
data repository.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
insert_mocked_inputs : `bool`, optional
|
|
221
|
+
Whether to automatically insert datasets for all overall inputs to
|
|
222
|
+
the pipeline graph whose dataset types have not already been
|
|
223
|
+
registered. If set to `False`, inputs must be provided by imported
|
|
224
|
+
YAML files or explicit calls to `insert_datasets`, which provides
|
|
225
|
+
more fine-grained control over the data IDs of the datasets.
|
|
226
|
+
register_output_dataset_types : `bool`, optional
|
|
227
|
+
If `True`, register all output dataset types.
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
builder : \
|
|
232
|
+
`..all_dimensions_quantum_graph_builder.AllDimensionsQuantumGraphBuilder`
|
|
233
|
+
Quantum graph builder. Note that
|
|
234
|
+
``attach_datastore_records=False`` must be passed to `build`, since
|
|
235
|
+
the helper's butler does not have a datastore.
|
|
236
|
+
"""
|
|
237
|
+
if insert_mocked_inputs:
|
|
238
|
+
self.pipeline_graph.resolve(self.butler.registry)
|
|
239
|
+
for _, dataset_type_node in self.pipeline_graph.iter_overall_inputs():
|
|
240
|
+
assert dataset_type_node is not None, "pipeline graph is resolved."
|
|
241
|
+
if self.butler.registry.registerDatasetType(dataset_type_node.dataset_type):
|
|
242
|
+
self.insert_datasets(dataset_type_node.dataset_type, register=False)
|
|
243
|
+
builder = AllDimensionsQuantumGraphBuilder(
|
|
244
|
+
self.pipeline_graph,
|
|
245
|
+
self.butler,
|
|
246
|
+
input_collections=self.input_chain,
|
|
247
|
+
output_run=self.output_run,
|
|
248
|
+
)
|
|
249
|
+
if register_output_dataset_types:
|
|
250
|
+
self.pipeline_graph.register_dataset_types(self.butler)
|
|
251
|
+
return builder
|
|
252
|
+
|
|
253
|
+
def insert_datasets(
|
|
254
|
+
self, dataset_type: DatasetType | str, register: bool = True, *args: Any, **kwargs: Any
|
|
255
|
+
) -> list[DatasetRef]:
|
|
256
|
+
"""Insert input datasets into the test repository.
|
|
257
|
+
|
|
258
|
+
Parameters
|
|
259
|
+
----------
|
|
260
|
+
dataset_type : `~lsst.daf.butler.DatasetType` or `str`
|
|
261
|
+
Dataset type or name. If a name, it must be included in the
|
|
262
|
+
pipeline graph.
|
|
263
|
+
register : `bool`, optional
|
|
264
|
+
Whether to register the dataset type. If `False`, the dataset type
|
|
265
|
+
must already be registered.
|
|
266
|
+
*args : `object`
|
|
267
|
+
Forwarded to `~lsst.daf.butler.query_data_ids`.
|
|
268
|
+
**kwargs : `object`
|
|
269
|
+
Forwarded to `~lsst.daf.butler.query_data_ids`.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
refs : `list` [ `lsst.daf.butler.DatasetRef` ]
|
|
274
|
+
References to the inserted datasets.
|
|
275
|
+
|
|
276
|
+
Notes
|
|
277
|
+
-----
|
|
278
|
+
For dataset types with dimensions that are queryable, this queries for
|
|
279
|
+
all data IDs in the repository (forwarding ``*args`` and ``**kwargs``
|
|
280
|
+
for e.g. ``where`` strings). For skypix dimensions, this queries for
|
|
281
|
+
both patches and visit-detector regions (forwarding `*args`` and
|
|
282
|
+
``**kwargs`` to both) and uses all overlapping sky pixels. Dataset
|
|
283
|
+
types with a mix of skypix and queryable dimensions are not supported.
|
|
284
|
+
"""
|
|
285
|
+
if isinstance(dataset_type, str):
|
|
286
|
+
self.pipeline_graph.resolve(self.butler.registry)
|
|
287
|
+
dataset_type = self.pipeline_graph.dataset_types[dataset_type].dataset_type
|
|
288
|
+
if register:
|
|
289
|
+
self.butler.registry.registerDatasetType(dataset_type)
|
|
290
|
+
dimensions = dataset_type.dimensions
|
|
291
|
+
if dataset_type.dimensions.skypix:
|
|
292
|
+
if len(dimensions) == 1:
|
|
293
|
+
(skypix_name,) = dimensions.skypix
|
|
294
|
+
pixelization = dimensions.universe.skypix_dimensions[skypix_name].pixelization
|
|
295
|
+
ranges = RangeSet()
|
|
296
|
+
for patch_record in self.butler.query_dimension_records(
|
|
297
|
+
"patch", *args, **kwargs, explain=False
|
|
298
|
+
):
|
|
299
|
+
ranges |= pixelization.envelope(patch_record.region)
|
|
300
|
+
for vdr_record in self.butler.query_dimension_records(
|
|
301
|
+
"visit_detector_region", *args, **kwargs, explain=False
|
|
302
|
+
):
|
|
303
|
+
ranges |= pixelization.envelope(vdr_record.region)
|
|
304
|
+
data_ids = []
|
|
305
|
+
for begin, end in ranges:
|
|
306
|
+
for index in range(begin, end):
|
|
307
|
+
data_ids.append(DataCoordinate.from_required_values(dimensions, (index,)))
|
|
308
|
+
else:
|
|
309
|
+
raise NotImplementedError(
|
|
310
|
+
"Can only generate data IDs for queryable dimensions and isolated skypix."
|
|
311
|
+
)
|
|
312
|
+
else:
|
|
313
|
+
data_ids = self.butler.query_data_ids(dimensions, *args, **kwargs, explain=False)
|
|
314
|
+
return self.butler.registry.insertDatasets(dataset_type, data_ids, run=self.input_run)
|
|
315
|
+
|
|
316
|
+
def make_limited_butler(self) -> InMemoryLimitedButler:
|
|
317
|
+
"""Make a test limited butler for execution.
|
|
318
|
+
|
|
319
|
+
Returns
|
|
320
|
+
-------
|
|
321
|
+
limited_butler : `.InMemoryLimitedButler`
|
|
322
|
+
A limited butler that can be used for task execution.
|
|
323
|
+
|
|
324
|
+
Notes
|
|
325
|
+
-----
|
|
326
|
+
This queries the database-only butler used for quantum-graph generation
|
|
327
|
+
for all datasets in the ``input_chain`` collection, and populates the
|
|
328
|
+
limited butler with those that have a mock storage class. Other
|
|
329
|
+
datasets are ignored, so they will appear as though they were present
|
|
330
|
+
during quantum graph generation but absent during execution.
|
|
331
|
+
"""
|
|
332
|
+
butler = InMemoryLimitedButler(self.butler.dimensions, self.butler.registry.queryDatasetTypes())
|
|
333
|
+
for ref in self.butler._query_all_datasets(self.input_chain):
|
|
334
|
+
if is_mock_name(ref.datasetType.storageClass_name):
|
|
335
|
+
butler.put(
|
|
336
|
+
MockDataset(
|
|
337
|
+
dataset_id=ref.id,
|
|
338
|
+
dataset_type=ref.datasetType.to_simple(),
|
|
339
|
+
data_id=dict(ref.dataId.mapping),
|
|
340
|
+
run=ref.run,
|
|
341
|
+
),
|
|
342
|
+
ref,
|
|
343
|
+
)
|
|
344
|
+
return butler
|
|
345
|
+
|
|
346
|
+
def make_single_quantum_executor(self) -> tuple[SingleQuantumExecutor, InMemoryLimitedButler]:
|
|
347
|
+
"""Make a single-quantum executor backed by a new limited butler.
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
executor : `..single_quantum_executor.SingleQuantumExecutor`
|
|
352
|
+
An executor for a single quantum.
|
|
353
|
+
butler : `.InMemoryLimitedButler`
|
|
354
|
+
The butler that the executor will write to.
|
|
355
|
+
"""
|
|
356
|
+
butler = self.make_limited_butler()
|
|
357
|
+
return SingleQuantumExecutor(limited_butler_factory=butler.factory), butler
|