lsst-pipe-base 29.2025.3000__py3-none-any.whl → 29.2025.3200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/__init__.py +0 -1
- lsst/pipe/base/_datasetQueryConstraints.py +1 -1
- lsst/pipe/base/all_dimensions_quantum_graph_builder.py +10 -46
- lsst/pipe/base/caching_limited_butler.py +8 -4
- lsst/pipe/base/connectionTypes.py +19 -19
- lsst/pipe/base/connections.py +2 -2
- lsst/pipe/base/exec_fixup_data_id.py +131 -0
- lsst/pipe/base/execution_graph_fixup.py +69 -0
- lsst/pipe/base/graph/graphSummary.py +4 -4
- lsst/pipe/base/log_capture.py +227 -0
- lsst/pipe/base/mp_graph_executor.py +786 -0
- lsst/pipe/base/pipeline_graph/_pipeline_graph.py +40 -10
- lsst/pipe/base/pipeline_graph/_tasks.py +106 -0
- lsst/pipe/base/pipeline_graph/io.py +1 -1
- lsst/pipe/base/quantum_graph_builder.py +85 -58
- lsst/pipe/base/quantum_graph_executor.py +125 -0
- lsst/pipe/base/quantum_graph_skeleton.py +60 -1
- lsst/pipe/base/quantum_reports.py +334 -0
- lsst/pipe/base/script/transfer_from_graph.py +4 -1
- lsst/pipe/base/separable_pipeline_executor.py +296 -0
- lsst/pipe/base/simple_pipeline_executor.py +674 -0
- lsst/pipe/base/single_quantum_executor.py +635 -0
- lsst/pipe/base/taskFactory.py +18 -12
- lsst/pipe/base/tests/in_memory_limited_butler.py +223 -0
- lsst/pipe/base/tests/mocks/__init__.py +1 -0
- lsst/pipe/base/tests/mocks/_in_memory_repo.py +357 -0
- lsst/pipe/base/tests/mocks/_pipeline_task.py +19 -2
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/METADATA +1 -1
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/RECORD +38 -28
- lsst/pipe/base/executionButlerBuilder.py +0 -493
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3000.dist-info → lsst_pipe_base-29.2025.3200.dist-info}/zip-safe +0 -0
lsst/pipe/base/__init__.py
CHANGED
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
28
|
"""Symbols defined in this package should be imported from
|
|
29
|
-
|
|
29
|
+
`.all_dimensions_quantum_graph_builder` instead; it only appears in the docs
|
|
30
30
|
due to limitations in Sphinx.
|
|
31
31
|
"""
|
|
32
32
|
|
|
@@ -44,7 +44,6 @@ import astropy.table
|
|
|
44
44
|
from lsst.daf.butler import (
|
|
45
45
|
Butler,
|
|
46
46
|
DataCoordinate,
|
|
47
|
-
DimensionDataAttacher,
|
|
48
47
|
DimensionElement,
|
|
49
48
|
DimensionGroup,
|
|
50
49
|
DimensionRecordSet,
|
|
@@ -57,7 +56,7 @@ from lsst.utils.timer import timeMethod
|
|
|
57
56
|
|
|
58
57
|
from ._datasetQueryConstraints import DatasetQueryConstraintVariant
|
|
59
58
|
from .quantum_graph_builder import QuantumGraphBuilder, QuantumGraphBuilderError
|
|
60
|
-
from .quantum_graph_skeleton import DatasetKey,
|
|
59
|
+
from .quantum_graph_skeleton import DatasetKey, PrerequisiteDatasetKey, QuantumGraphSkeleton, QuantumKey
|
|
61
60
|
|
|
62
61
|
if TYPE_CHECKING:
|
|
63
62
|
from .pipeline_graph import DatasetTypeNode, PipelineGraph, TaskNode
|
|
@@ -65,13 +64,14 @@ if TYPE_CHECKING:
|
|
|
65
64
|
|
|
66
65
|
@final
|
|
67
66
|
class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
68
|
-
"""An implementation of
|
|
69
|
-
query for data IDs covering all dimensions in the
|
|
67
|
+
"""An implementation of `.quantum_graph_builder.QuantumGraphBuilder` that
|
|
68
|
+
uses a single large query for data IDs covering all dimensions in the
|
|
69
|
+
pipeline.
|
|
70
70
|
|
|
71
71
|
Parameters
|
|
72
72
|
----------
|
|
73
73
|
pipeline_graph : `.pipeline_graph.PipelineGraph`
|
|
74
|
-
Pipeline to build a
|
|
74
|
+
Pipeline to build a `.QuantumGraph` from, as a graph. Will be resolved
|
|
75
75
|
in-place with the given butler (any existing resolution is ignored).
|
|
76
76
|
butler : `lsst.daf.butler.Butler`
|
|
77
77
|
Client for the data repository. Should be read-only.
|
|
@@ -92,7 +92,8 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
92
92
|
are constrained by the ``where`` argument or pipeline data ID will be
|
|
93
93
|
filled in automatically.
|
|
94
94
|
**kwargs
|
|
95
|
-
Additional keyword arguments forwarded to
|
|
95
|
+
Additional keyword arguments forwarded to
|
|
96
|
+
`.quantum_graph_builder.QuantumGraphBuilder`.
|
|
96
97
|
|
|
97
98
|
Notes
|
|
98
99
|
-----
|
|
@@ -141,13 +142,14 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
141
142
|
self._query_for_data_ids(tree)
|
|
142
143
|
dimension_records = self._fetch_most_dimension_records(tree)
|
|
143
144
|
tree.generate_data_ids(self.log)
|
|
144
|
-
skeleton = self._make_subgraph_skeleton(tree)
|
|
145
|
+
skeleton: QuantumGraphSkeleton = self._make_subgraph_skeleton(tree)
|
|
145
146
|
if not skeleton.has_any_quanta:
|
|
146
147
|
# QG is going to be empty; exit early not just for efficiency, but
|
|
147
148
|
# also so downstream code doesn't have to guard against this case.
|
|
148
149
|
return skeleton
|
|
149
150
|
self._find_followup_datasets(tree, skeleton)
|
|
150
|
-
|
|
151
|
+
all_data_id_dimensions = subgraph.get_all_dimensions()
|
|
152
|
+
skeleton.attach_dimension_records(self.butler, all_data_id_dimensions, dimension_records)
|
|
151
153
|
return skeleton
|
|
152
154
|
|
|
153
155
|
def _query_for_data_ids(self, tree: _DimensionGroupTree) -> None:
|
|
@@ -484,44 +486,6 @@ class AllDimensionsQuantumGraphBuilder(QuantumGraphBuilder):
|
|
|
484
486
|
result.append(record_set)
|
|
485
487
|
return result
|
|
486
488
|
|
|
487
|
-
@timeMethod
|
|
488
|
-
def _attach_dimension_records(
|
|
489
|
-
self, skeleton: QuantumGraphSkeleton, dimension_records: Iterable[DimensionRecordSet]
|
|
490
|
-
) -> None:
|
|
491
|
-
"""Attach dimension records to most data IDs in the in-progress graph,
|
|
492
|
-
and return a data structure that records the rest.
|
|
493
|
-
|
|
494
|
-
Parameters
|
|
495
|
-
----------
|
|
496
|
-
skeleton : `.quantum_graph_skeleton.QuantumGraphSkeleton`
|
|
497
|
-
In-progress quantum graph to modify in place.
|
|
498
|
-
dimension_records : `~collections.abc.Iterable` [ \
|
|
499
|
-
`lsst.daf.butler.DimensionRecordSet` ]
|
|
500
|
-
Iterable of sets of dimension records.
|
|
501
|
-
"""
|
|
502
|
-
# Group all nodes by data ID (and dimensions of data ID).
|
|
503
|
-
data_ids_to_expand: defaultdict[DimensionGroup, defaultdict[DataCoordinate, list[Key]]] = defaultdict(
|
|
504
|
-
lambda: defaultdict(list)
|
|
505
|
-
)
|
|
506
|
-
data_id: DataCoordinate | None
|
|
507
|
-
for node_key in skeleton:
|
|
508
|
-
if data_id := skeleton[node_key].get("data_id"):
|
|
509
|
-
data_ids_to_expand[data_id.dimensions][data_id].append(node_key)
|
|
510
|
-
attacher = DimensionDataAttacher(
|
|
511
|
-
records=dimension_records,
|
|
512
|
-
dimensions=DimensionGroup.union(*data_ids_to_expand.keys(), universe=self.universe),
|
|
513
|
-
)
|
|
514
|
-
for dimensions, data_ids in data_ids_to_expand.items():
|
|
515
|
-
with self.butler.query() as query:
|
|
516
|
-
# Butler query will be used as-needed to get dimension records
|
|
517
|
-
# (from prerequisites) we didn't fetch in advance. These are
|
|
518
|
-
# cached in the attacher so we don't look them up multiple
|
|
519
|
-
# times.
|
|
520
|
-
expanded_data_ids = attacher.attach(dimensions, data_ids.keys(), query=query)
|
|
521
|
-
for expanded_data_id, node_keys in zip(expanded_data_ids, data_ids.values()):
|
|
522
|
-
for node_key in node_keys:
|
|
523
|
-
skeleton.set_data_id(node_key, expanded_data_id)
|
|
524
|
-
|
|
525
489
|
|
|
526
490
|
@dataclasses.dataclass(eq=False, repr=False, slots=True)
|
|
527
491
|
class _DimensionGroupTwig:
|
|
@@ -84,7 +84,6 @@ class CachingLimitedButler(LimitedButler):
|
|
|
84
84
|
no_copy_on_cache: Set[str] = frozenset(),
|
|
85
85
|
):
|
|
86
86
|
self._wrapped = wrapped
|
|
87
|
-
self._datastore = self._wrapped._datastore
|
|
88
87
|
self.storageClasses = self._wrapped.storageClasses
|
|
89
88
|
self._cache_on_put = cache_on_put
|
|
90
89
|
self._cache_on_get = cache_on_get
|
|
@@ -148,9 +147,6 @@ class CachingLimitedButler(LimitedButler):
|
|
|
148
147
|
# note that this does not use the cache at all
|
|
149
148
|
return self._wrapped.getDeferred(ref, parameters=parameters, storageClass=storageClass)
|
|
150
149
|
|
|
151
|
-
def stored(self, ref: DatasetRef) -> bool:
|
|
152
|
-
return self.stored_many([ref])[ref] # TODO: remove this once DM-43086 is done.
|
|
153
|
-
|
|
154
150
|
def stored_many(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
|
|
155
151
|
result = {}
|
|
156
152
|
unknown_refs = []
|
|
@@ -205,3 +201,11 @@ class CachingLimitedButler(LimitedButler):
|
|
|
205
201
|
@property
|
|
206
202
|
def dimensions(self) -> DimensionUniverse:
|
|
207
203
|
return self._wrapped.dimensions
|
|
204
|
+
|
|
205
|
+
@property
|
|
206
|
+
def _datastore(self) -> Any:
|
|
207
|
+
return self._wrapped._datastore
|
|
208
|
+
|
|
209
|
+
@_datastore.setter # demanded by MyPy since we declare it to be an instance attribute in LimitedButler.
|
|
210
|
+
def _datastore(self, value: Any) -> None:
|
|
211
|
+
self._wrapped._datastore = value
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
27
|
|
|
28
28
|
"""Module defining connection types to be used within a
|
|
29
|
-
|
|
29
|
+
`.PipelineTaskConnections` class.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
__all__ = ["BaseConnection", "InitInput", "InitOutput", "Input", "Output", "PrerequisiteInput"]
|
|
@@ -53,7 +53,7 @@ class BaseConnection:
|
|
|
53
53
|
Indicates if this connection should expect to contain multiple objects
|
|
54
54
|
of the given dataset type. Tasks with more than one connection with
|
|
55
55
|
``multiple=True`` with the same dimensions may want to implement
|
|
56
|
-
|
|
56
|
+
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
57
57
|
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and
|
|
58
58
|
notify the execution system as early as possible of outputs that will
|
|
59
59
|
not be produced because the corresponding input is missing.
|
|
@@ -121,7 +121,7 @@ class DimensionedConnection(BaseConnection):
|
|
|
121
121
|
Indicates if this connection should expect to contain multiple objects
|
|
122
122
|
of the given dataset type. Tasks with more than one connection with
|
|
123
123
|
``multiple=True`` with the same dimensions may want to implement
|
|
124
|
-
|
|
124
|
+
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
125
125
|
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
126
126
|
the execution system as early as possible of outputs that will not be
|
|
127
127
|
produced because the corresponding input is missing.
|
|
@@ -161,7 +161,7 @@ class BaseInput(DimensionedConnection):
|
|
|
161
161
|
Indicates if this connection should expect to contain multiple objects
|
|
162
162
|
of the given dataset type. Tasks with more than one connection with
|
|
163
163
|
``multiple=True`` with the same dimensions may want to implement
|
|
164
|
-
|
|
164
|
+
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
165
165
|
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
166
166
|
the execution system as early as possible of outputs that will not be
|
|
167
167
|
produced because the corresponding input is missing.
|
|
@@ -175,14 +175,14 @@ class BaseInput(DimensionedConnection):
|
|
|
175
175
|
minimum : `bool`
|
|
176
176
|
Minimum number of datasets required for this connection, per quantum.
|
|
177
177
|
This is checked in the base implementation of
|
|
178
|
-
|
|
178
|
+
`.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
|
|
179
179
|
the minimum is not met for `Input` connections (causing the quantum to
|
|
180
180
|
be pruned, skipped, or never created, depending on the context), and
|
|
181
181
|
`FileNotFoundError` for `PrerequisiteInput` connections (causing
|
|
182
182
|
QuantumGraph generation to fail). `PipelineTask` implementations may
|
|
183
|
-
provide custom
|
|
184
|
-
for more fine-grained or configuration-driven
|
|
185
|
-
they are compatible with this minium.
|
|
183
|
+
provide custom `~.PipelineTaskConnections.adjustQuantum`
|
|
184
|
+
implementations for more fine-grained or configuration-driven
|
|
185
|
+
constraints, as long as they are compatible with this minium.
|
|
186
186
|
|
|
187
187
|
Raises
|
|
188
188
|
------
|
|
@@ -216,7 +216,7 @@ class Input(BaseInput):
|
|
|
216
216
|
Indicates if this connection should expect to contain multiple objects
|
|
217
217
|
of the given dataset type. Tasks with more than one connection with
|
|
218
218
|
``multiple=True`` with the same dimensions may want to implement
|
|
219
|
-
|
|
219
|
+
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
220
220
|
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
221
221
|
the execution system as early as possible of outputs that will not be
|
|
222
222
|
produced because the corresponding input is missing.
|
|
@@ -230,14 +230,14 @@ class Input(BaseInput):
|
|
|
230
230
|
minimum : `bool`
|
|
231
231
|
Minimum number of datasets required for this connection, per quantum.
|
|
232
232
|
This is checked in the base implementation of
|
|
233
|
-
|
|
233
|
+
`.PipelineTaskConnections.adjustQuantum`, which raises `NoWorkFound` if
|
|
234
234
|
the minimum is not met for `Input` connections (causing the quantum to
|
|
235
235
|
be pruned, skipped, or never created, depending on the context), and
|
|
236
236
|
`FileNotFoundError` for `PrerequisiteInput` connections (causing
|
|
237
237
|
QuantumGraph generation to fail). `PipelineTask` implementations may
|
|
238
|
-
provide custom
|
|
239
|
-
for more fine-grained or configuration-driven
|
|
240
|
-
they are compatible with this minium.
|
|
238
|
+
provide custom `~.PipelineTaskConnections.adjustQuantum`
|
|
239
|
+
implementations for more fine-grained or configuration-driven
|
|
240
|
+
constraints, as long as they are compatible with this minium.
|
|
241
241
|
deferGraphConstraint : `bool`, optional
|
|
242
242
|
If `True`, do not include this dataset type's existence in the initial
|
|
243
243
|
query that starts the QuantumGraph generation process. This can be
|
|
@@ -286,7 +286,7 @@ class PrerequisiteInput(BaseInput):
|
|
|
286
286
|
Indicates if this connection should expect to contain multiple objects
|
|
287
287
|
of the given dataset type. Tasks with more than one connection with
|
|
288
288
|
``multiple=True`` with the same dimensions may want to implement
|
|
289
|
-
|
|
289
|
+
`.PipelineTaskConnections.adjustQuantum` to ensure those datasets are
|
|
290
290
|
consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify
|
|
291
291
|
the execution system as early as possible of outputs that will not be
|
|
292
292
|
produced because the corresponding input is missing.
|
|
@@ -296,12 +296,12 @@ class PrerequisiteInput(BaseInput):
|
|
|
296
296
|
minimum : `bool`
|
|
297
297
|
Minimum number of datasets required for this connection, per quantum.
|
|
298
298
|
This is checked in the base implementation of
|
|
299
|
-
|
|
299
|
+
`.PipelineTaskConnections.adjustQuantum`, which raises
|
|
300
300
|
`FileNotFoundError` (causing QuantumGraph generation to fail).
|
|
301
|
-
`PipelineTask` implementations may
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
301
|
+
`PipelineTask` implementations may provide custom
|
|
302
|
+
`~.PipelineTaskConnections.adjustQuantum` implementations for more
|
|
303
|
+
fine-grained or configuration-driven constraints, as long as they are
|
|
304
|
+
compatible with this minium.
|
|
305
305
|
lookupFunction : `typing.Callable`, optional
|
|
306
306
|
An optional callable function that will look up PrerequisiteInputs
|
|
307
307
|
using the DatasetType, registry, quantum dataId, and input collections
|
lsst/pipe/base/connections.py
CHANGED
|
@@ -1063,8 +1063,8 @@ def iterConnections(
|
|
|
1063
1063
|
class AdjustQuantumHelper:
|
|
1064
1064
|
"""Helper class for calling `PipelineTaskConnections.adjustQuantum`.
|
|
1065
1065
|
|
|
1066
|
-
This class holds `
|
|
1067
|
-
`Quantum` and execution harness code, i.e. with
|
|
1066
|
+
This class holds `inputs` and `outputs` mappings in the form used by
|
|
1067
|
+
`lsst.daf.butler.Quantum` and execution harness code, i.e. with
|
|
1068
1068
|
`~lsst.daf.butler.DatasetType` keys, translating them to and from the
|
|
1069
1069
|
connection-oriented mappings used inside `PipelineTaskConnections`.
|
|
1070
1070
|
"""
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
__all__ = ["ExecutionGraphFixup"]
|
|
29
|
+
|
|
30
|
+
import contextlib
|
|
31
|
+
import itertools
|
|
32
|
+
from collections import defaultdict
|
|
33
|
+
from collections.abc import Sequence
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
import networkx as nx
|
|
37
|
+
|
|
38
|
+
from .execution_graph_fixup import ExecutionGraphFixup
|
|
39
|
+
from .graph import QuantumGraph, QuantumNode
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ExecFixupDataId(ExecutionGraphFixup):
|
|
43
|
+
"""Implementation of ExecutionGraphFixup for ordering of tasks based
|
|
44
|
+
on DataId values.
|
|
45
|
+
|
|
46
|
+
This class is a trivial implementation mostly useful as an example,
|
|
47
|
+
though it can be used to make actual fixup instances by defining
|
|
48
|
+
a method that instantiates it, e.g.::
|
|
49
|
+
|
|
50
|
+
# lsst/ap/verify/ci_fixup.py
|
|
51
|
+
|
|
52
|
+
from lsst.pipe.base.exec_fixup_data_id import ExecFixupDataId
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def assoc_fixup():
|
|
56
|
+
return ExecFixupDataId(
|
|
57
|
+
taskLabel="ap_assoc", dimensions=("visit", "detector")
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
and then executing pipetask::
|
|
61
|
+
|
|
62
|
+
pipetask run --graph-fixup=lsst.ap.verify.ci_fixup.assoc_fixup ...
|
|
63
|
+
|
|
64
|
+
This will add new dependencies between quanta executed by the task with
|
|
65
|
+
label "ap_assoc". Quanta with higher visit number will depend on quanta
|
|
66
|
+
with lower visit number and their execution will wait until lower visit
|
|
67
|
+
number finishes.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
taskLabel : `str`
|
|
72
|
+
The label of the task for which to add dependencies.
|
|
73
|
+
dimensions : `str` or sequence [`str`]
|
|
74
|
+
One or more dimension names, quanta execution will be ordered
|
|
75
|
+
according to values of these dimensions.
|
|
76
|
+
reverse : `bool`, optional
|
|
77
|
+
If `False` (default) then quanta with higher values of dimensions
|
|
78
|
+
will be executed after quanta with lower values, otherwise the order
|
|
79
|
+
is reversed.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self, taskLabel: str, dimensions: str | Sequence[str], reverse: bool = False):
|
|
83
|
+
self.taskLabel = taskLabel
|
|
84
|
+
self.dimensions = dimensions
|
|
85
|
+
self.reverse = reverse
|
|
86
|
+
if isinstance(self.dimensions, str):
|
|
87
|
+
self.dimensions = (self.dimensions,)
|
|
88
|
+
else:
|
|
89
|
+
self.dimensions = tuple(self.dimensions)
|
|
90
|
+
|
|
91
|
+
def _key(self, qnode: QuantumNode) -> tuple[Any, ...]:
|
|
92
|
+
"""Produce comparison key for quantum data.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
qnode : `QuantumNode`
|
|
97
|
+
An individual node in a `~lsst.pipe.base.QuantumGraph`
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
key : `tuple`
|
|
102
|
+
"""
|
|
103
|
+
dataId = qnode.quantum.dataId
|
|
104
|
+
assert dataId is not None, "Quantum DataId cannot be None"
|
|
105
|
+
key = tuple(dataId[dim] for dim in self.dimensions)
|
|
106
|
+
return key
|
|
107
|
+
|
|
108
|
+
def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
|
|
109
|
+
taskDef = graph.findTaskDefByLabel(self.taskLabel)
|
|
110
|
+
if taskDef is None:
|
|
111
|
+
raise ValueError(f"Cannot find task with label {self.taskLabel}")
|
|
112
|
+
quanta = list(graph.getNodesForTask(taskDef))
|
|
113
|
+
keyQuanta = defaultdict(list)
|
|
114
|
+
for q in quanta:
|
|
115
|
+
key = self._key(q)
|
|
116
|
+
keyQuanta[key].append(q)
|
|
117
|
+
keys = sorted(keyQuanta.keys(), reverse=self.reverse)
|
|
118
|
+
networkGraph = graph.graph
|
|
119
|
+
|
|
120
|
+
for prev_key, key in itertools.pairwise(keys):
|
|
121
|
+
for prev_node in keyQuanta[prev_key]:
|
|
122
|
+
for node in keyQuanta[key]:
|
|
123
|
+
# remove any existing edges between the two nodes, but
|
|
124
|
+
# don't fail if there are not any. Both directions need
|
|
125
|
+
# tried because in a directed graph, order maters
|
|
126
|
+
for edge in ((node, prev_node), (prev_node, node)):
|
|
127
|
+
with contextlib.suppress(nx.NetworkXException):
|
|
128
|
+
networkGraph.remove_edge(*edge)
|
|
129
|
+
|
|
130
|
+
networkGraph.add_edge(prev_node, node)
|
|
131
|
+
return graph
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
__all__ = ["ExecutionGraphFixup"]
|
|
29
|
+
|
|
30
|
+
from abc import ABC, abstractmethod
|
|
31
|
+
|
|
32
|
+
from .graph import QuantumGraph
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ExecutionGraphFixup(ABC):
|
|
36
|
+
"""Interface for classes which update quantum graphs before execution.
|
|
37
|
+
|
|
38
|
+
Primary goal of this class is to modify quanta dependencies which may not
|
|
39
|
+
be possible to reflect in a quantum graph using standard tools. One known
|
|
40
|
+
use case for that is to guarantee particular execution order of visits in
|
|
41
|
+
CI jobs for cases when outcome depends on the processing order of visits
|
|
42
|
+
(e.g. AP association pipeline).
|
|
43
|
+
|
|
44
|
+
Instances of this class receive pre-ordered sequence of quanta
|
|
45
|
+
(`.QuantumGraph` instances) and they are allowed to modify quanta data in
|
|
46
|
+
place, for example update ``dependencies`` field to add additional
|
|
47
|
+
dependencies. Returned list of quanta will be re-ordered once again by the
|
|
48
|
+
graph executor to reflect new dependencies.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
|
|
53
|
+
"""Update quanta in a graph.
|
|
54
|
+
|
|
55
|
+
Potentially anything in the graph could be changed if it does not
|
|
56
|
+
break executor assumptions. If modifications result in a dependency
|
|
57
|
+
cycle the executor will raise an exception.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
graph : `.QuantumGraph`
|
|
62
|
+
Quantum Graph that will be executed by the executor.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
graph : `.QuantumGraph`
|
|
67
|
+
Modified graph.
|
|
68
|
+
"""
|
|
69
|
+
raise NotImplementedError
|
|
@@ -39,10 +39,10 @@ class QgraphTaskSummary(pydantic.BaseModel):
|
|
|
39
39
|
numQuanta: int = 0
|
|
40
40
|
"""Number of Quanta for this PipelineTask in this QuantumGraph."""
|
|
41
41
|
|
|
42
|
-
numInputs: dict[str, int] = Counter
|
|
42
|
+
numInputs: dict[str, int] = pydantic.Field(default_factory=Counter)
|
|
43
43
|
"""Total number of inputs per dataset type name for this PipelineTask."""
|
|
44
44
|
|
|
45
|
-
numOutputs: dict[str, int] = Counter
|
|
45
|
+
numOutputs: dict[str, int] = pydantic.Field(default_factory=Counter)
|
|
46
46
|
"""Total number of outputs per dataset type name for this PipelineTask."""
|
|
47
47
|
|
|
48
48
|
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|
|
@@ -79,7 +79,7 @@ class QgraphTaskSummary(pydantic.BaseModel):
|
|
|
79
79
|
class QgraphSummary(pydantic.BaseModel):
|
|
80
80
|
"""Report for the QuantumGraph creation or reading."""
|
|
81
81
|
|
|
82
|
-
graphID: BuildId
|
|
82
|
+
graphID: BuildId | None = None
|
|
83
83
|
"""QuantumGraph ID."""
|
|
84
84
|
|
|
85
85
|
cmdLine: str | None = None
|
|
@@ -97,7 +97,7 @@ class QgraphSummary(pydantic.BaseModel):
|
|
|
97
97
|
outputRun: str | None = None
|
|
98
98
|
"""Output run collection."""
|
|
99
99
|
|
|
100
|
-
qgraphTaskSummaries: dict[str, QgraphTaskSummary] =
|
|
100
|
+
qgraphTaskSummaries: dict[str, QgraphTaskSummary] = pydantic.Field(default_factory=dict)
|
|
101
101
|
"""Quanta information summarized per PipelineTask."""
|
|
102
102
|
|
|
103
103
|
# Work around the fact that Sphinx chokes on Pydantic docstring formatting,
|