lsst-pipe-base 29.2025.3900__py3-none-any.whl → 29.2025.4000__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsst/pipe/base/dot_tools.py +14 -152
- lsst/pipe/base/exec_fixup_data_id.py +17 -44
- lsst/pipe/base/execution_graph_fixup.py +49 -18
- lsst/pipe/base/graph/graph.py +28 -9
- lsst/pipe/base/graph_walker.py +119 -0
- lsst/pipe/base/log_capture.py +5 -2
- lsst/pipe/base/mermaid_tools.py +11 -64
- lsst/pipe/base/mp_graph_executor.py +298 -236
- lsst/pipe/base/quantum_graph/__init__.py +32 -0
- lsst/pipe/base/quantum_graph/_common.py +610 -0
- lsst/pipe/base/quantum_graph/_multiblock.py +737 -0
- lsst/pipe/base/quantum_graph/_predicted.py +1874 -0
- lsst/pipe/base/quantum_graph/visualization.py +302 -0
- lsst/pipe/base/quantum_graph_builder.py +292 -34
- lsst/pipe/base/quantum_graph_executor.py +2 -1
- lsst/pipe/base/quantum_provenance_graph.py +16 -7
- lsst/pipe/base/separable_pipeline_executor.py +126 -15
- lsst/pipe/base/simple_pipeline_executor.py +44 -43
- lsst/pipe/base/single_quantum_executor.py +1 -40
- lsst/pipe/base/tests/mocks/__init__.py +1 -1
- lsst/pipe/base/tests/mocks/_pipeline_task.py +16 -1
- lsst/pipe/base/tests/mocks/{_in_memory_repo.py → _repo.py} +324 -45
- lsst/pipe/base/tests/mocks/_storage_class.py +6 -0
- lsst/pipe/base/tests/simpleQGraph.py +11 -5
- lsst/pipe/base/version.py +1 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/METADATA +2 -1
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/RECORD +35 -29
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/WHEEL +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/entry_points.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/COPYRIGHT +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/LICENSE +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/bsd_license.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/licenses/gpl-v3.0.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/top_level.txt +0 -0
- {lsst_pipe_base-29.2025.3900.dist-info → lsst_pipe_base-29.2025.4000.dist-info}/zip-safe +0 -0
lsst/pipe/base/dot_tools.py
CHANGED
|
@@ -33,147 +33,27 @@ from __future__ import annotations
|
|
|
33
33
|
|
|
34
34
|
__all__ = ["graph2dot", "pipeline2dot"]
|
|
35
35
|
|
|
36
|
-
# -------------------------------
|
|
37
|
-
# Imports of standard modules --
|
|
38
|
-
# -------------------------------
|
|
39
|
-
import html
|
|
40
|
-
import io
|
|
41
36
|
from collections.abc import Iterable
|
|
42
37
|
from typing import TYPE_CHECKING, Any
|
|
43
38
|
|
|
44
|
-
# -----------------------------
|
|
45
|
-
# Imports for other modules --
|
|
46
|
-
# -----------------------------
|
|
47
39
|
from .pipeline import Pipeline
|
|
48
40
|
|
|
49
41
|
if TYPE_CHECKING:
|
|
50
|
-
from
|
|
51
|
-
from
|
|
42
|
+
from .graph import QuantumGraph
|
|
43
|
+
from .pipeline import TaskDef
|
|
44
|
+
from .quantum_graph import PredictedQuantumGraph
|
|
52
45
|
|
|
53
|
-
# ----------------------------------
|
|
54
|
-
# Local non-exported definitions --
|
|
55
|
-
# ----------------------------------
|
|
56
46
|
|
|
57
|
-
|
|
58
|
-
_NODELABELPOINTSIZE = "18"
|
|
59
|
-
_ATTRIBS = dict(
|
|
60
|
-
defaultGraph=dict(splines="ortho", nodesep="0.5", ranksep="0.75", pad="0.5"),
|
|
61
|
-
defaultNode=dict(shape="box", fontname="Monospace", fontsize="14", margin="0.2,0.1", penwidth="3"),
|
|
62
|
-
defaultEdge=dict(color="black", arrowsize="1.5", penwidth="1.5"),
|
|
63
|
-
task=dict(style="filled", color="black", fillcolor="#B1F2EF"),
|
|
64
|
-
quantum=dict(style="filled", color="black", fillcolor="#B1F2EF"),
|
|
65
|
-
dsType=dict(style="rounded,filled,bold", color="#00BABC", fillcolor="#F5F5F5"),
|
|
66
|
-
dataset=dict(style="rounded,filled,bold", color="#00BABC", fillcolor="#F5F5F5"),
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _renderDefault(type: str, attribs: dict[str, str], file: io.TextIOBase) -> None:
|
|
71
|
-
"""Set default attributes for a given type."""
|
|
72
|
-
default_attribs = ", ".join([f'{key}="{val}"' for key, val in attribs.items()])
|
|
73
|
-
print(f"{type} [{default_attribs}];", file=file)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def _renderNode(file: io.TextIOBase, nodeName: str, style: str, labels: list[str]) -> None:
|
|
77
|
-
"""Render GV node"""
|
|
78
|
-
label = r"</TD></TR><TR><TD>".join(labels)
|
|
79
|
-
attrib_dict = dict(_ATTRIBS[style], label=label)
|
|
80
|
-
pre = '<<TABLE BORDER="0" CELLPADDING="5"><TR><TD>'
|
|
81
|
-
post = "</TD></TR></TABLE>>"
|
|
82
|
-
attrib = ", ".join(
|
|
83
|
-
[
|
|
84
|
-
f'{key}="{val}"' if key != "label" else f"{key}={pre}{val}{post}"
|
|
85
|
-
for key, val in attrib_dict.items()
|
|
86
|
-
]
|
|
87
|
-
)
|
|
88
|
-
print(f'"{nodeName}" [{attrib}];', file=file)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def _renderTaskNode(nodeName: str, taskDef: TaskDef, file: io.TextIOBase, idx: Any = None) -> None:
|
|
92
|
-
"""Render GV node for a task"""
|
|
93
|
-
labels = [
|
|
94
|
-
f'<B><FONT POINT-SIZE="{_NODELABELPOINTSIZE}">' + html.escape(taskDef.label) + "</FONT></B>",
|
|
95
|
-
html.escape(taskDef.taskName),
|
|
96
|
-
]
|
|
97
|
-
if idx is not None:
|
|
98
|
-
labels.append(f"<I>index:</I> {idx}")
|
|
99
|
-
if taskDef.connections:
|
|
100
|
-
# don't print collection of str directly to avoid visually noisy quotes
|
|
101
|
-
dimensions_str = ", ".join(sorted(taskDef.connections.dimensions))
|
|
102
|
-
labels.append(f"<I>dimensions:</I> {html.escape(dimensions_str)}")
|
|
103
|
-
_renderNode(file, nodeName, "task", labels)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _renderQuantumNode(
|
|
107
|
-
nodeName: str, taskDef: TaskDef, quantumNode: QuantumNode, file: io.TextIOBase
|
|
108
|
-
) -> None:
|
|
109
|
-
"""Render GV node for a quantum"""
|
|
110
|
-
labels = [f"{quantumNode.nodeId}", html.escape(taskDef.label)]
|
|
111
|
-
dataId = quantumNode.quantum.dataId
|
|
112
|
-
assert dataId is not None, "Quantum DataId cannot be None"
|
|
113
|
-
labels.extend(f"{key} = {dataId[key]}" for key in sorted(dataId.required.keys()))
|
|
114
|
-
_renderNode(file, nodeName, "quantum", labels)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _renderDSTypeNode(name: str, dimensions: list[str], file: io.TextIOBase) -> None:
|
|
118
|
-
"""Render GV node for a dataset type"""
|
|
119
|
-
labels = [f'<B><FONT POINT-SIZE="{_NODELABELPOINTSIZE}">' + html.escape(name) + "</FONT></B>"]
|
|
120
|
-
if dimensions:
|
|
121
|
-
labels.append("<I>dimensions:</I> " + html.escape(", ".join(sorted(dimensions))))
|
|
122
|
-
_renderNode(file, name, "dsType", labels)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def _renderDSNode(nodeName: str, dsRef: DatasetRef, file: io.TextIOBase) -> None:
|
|
126
|
-
"""Render GV node for a dataset"""
|
|
127
|
-
labels = [html.escape(dsRef.datasetType.name), f"run: {dsRef.run!r}"]
|
|
128
|
-
labels.extend(f"{key} = {dsRef.dataId[key]}" for key in sorted(dsRef.dataId.required.keys()))
|
|
129
|
-
_renderNode(file, nodeName, "dataset", labels)
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def _renderEdge(fromName: str, toName: str, file: io.TextIOBase, **kwargs: Any) -> None:
|
|
133
|
-
"""Render GV edge"""
|
|
134
|
-
if kwargs:
|
|
135
|
-
attrib = ", ".join([f'{key}="{val}"' for key, val in kwargs.items()])
|
|
136
|
-
print(f'"{fromName}" -> "{toName}" [{attrib}];', file=file)
|
|
137
|
-
else:
|
|
138
|
-
print(f'"{fromName}" -> "{toName}";', file=file)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def _datasetRefId(dsRef: DatasetRef) -> str:
|
|
142
|
-
"""Make an identifying string for given ref"""
|
|
143
|
-
dsId = [dsRef.datasetType.name]
|
|
144
|
-
dsId.extend(f"{key} = {dsRef.dataId[key]}" for key in sorted(dsRef.dataId.required.keys()))
|
|
145
|
-
return ":".join(dsId)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def _makeDSNode(dsRef: DatasetRef, allDatasetRefs: dict[str, str], file: io.TextIOBase) -> str:
|
|
149
|
-
"""Make new node for dataset if it does not exist.
|
|
150
|
-
|
|
151
|
-
Returns node name.
|
|
152
|
-
"""
|
|
153
|
-
dsRefId = _datasetRefId(dsRef)
|
|
154
|
-
nodeName = allDatasetRefs.get(dsRefId)
|
|
155
|
-
if nodeName is None:
|
|
156
|
-
idx = len(allDatasetRefs)
|
|
157
|
-
nodeName = f"dsref_{idx}"
|
|
158
|
-
allDatasetRefs[dsRefId] = nodeName
|
|
159
|
-
_renderDSNode(nodeName, dsRef, file)
|
|
160
|
-
return nodeName
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
# ------------------------
|
|
164
|
-
# Exported definitions --
|
|
165
|
-
# ------------------------
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def graph2dot(qgraph: QuantumGraph, file: Any) -> None:
|
|
47
|
+
def graph2dot(qgraph: QuantumGraph | PredictedQuantumGraph, file: Any) -> None:
|
|
169
48
|
"""Convert QuantumGraph into GraphViz digraph.
|
|
170
49
|
|
|
171
50
|
This method is mostly for documentation/presentation purposes.
|
|
172
51
|
|
|
173
52
|
Parameters
|
|
174
53
|
----------
|
|
175
|
-
qgraph : `lsst.pipe.base.QuantumGraph`
|
|
176
|
-
|
|
54
|
+
qgraph : `lsst.pipe.base.QuantumGraph` or \
|
|
55
|
+
`lsst.pipe.base.quantum_graph.PredictedQuantumGraph`
|
|
56
|
+
Quantum graph object.
|
|
177
57
|
file : `str` or file object
|
|
178
58
|
File where GraphViz graph (DOT language) is written, can be a file name
|
|
179
59
|
or file object.
|
|
@@ -185,38 +65,20 @@ def graph2dot(qgraph: QuantumGraph, file: Any) -> None:
|
|
|
185
65
|
ImportError
|
|
186
66
|
Raised if the task class cannot be imported.
|
|
187
67
|
"""
|
|
68
|
+
from .quantum_graph import PredictedQuantumGraph, visualization
|
|
69
|
+
|
|
70
|
+
if not isinstance(qgraph, PredictedQuantumGraph):
|
|
71
|
+
qgraph = PredictedQuantumGraph.from_old_quantum_graph(qgraph)
|
|
72
|
+
|
|
188
73
|
# open a file if needed
|
|
189
74
|
close = False
|
|
190
75
|
if not hasattr(file, "write"):
|
|
191
76
|
file = open(file, "w")
|
|
192
77
|
close = True
|
|
193
78
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
_renderDefault("node", _ATTRIBS["defaultNode"], file)
|
|
197
|
-
_renderDefault("edge", _ATTRIBS["defaultEdge"], file)
|
|
198
|
-
|
|
199
|
-
allDatasetRefs: dict[str, str] = {}
|
|
200
|
-
for taskId, taskDef in enumerate(qgraph.taskGraph):
|
|
201
|
-
quanta = qgraph.getNodesForTask(taskDef)
|
|
202
|
-
for qId, quantumNode in enumerate(quanta):
|
|
203
|
-
# node for a task
|
|
204
|
-
taskNodeName = f"task_{taskId}_{qId}"
|
|
205
|
-
_renderQuantumNode(taskNodeName, taskDef, quantumNode, file)
|
|
206
|
-
|
|
207
|
-
# quantum inputs
|
|
208
|
-
for dsRefs in quantumNode.quantum.inputs.values():
|
|
209
|
-
for dsRef in dsRefs:
|
|
210
|
-
nodeName = _makeDSNode(dsRef, allDatasetRefs, file)
|
|
211
|
-
_renderEdge(nodeName, taskNodeName, file)
|
|
212
|
-
|
|
213
|
-
# quantum outputs
|
|
214
|
-
for dsRefs in quantumNode.quantum.outputs.values():
|
|
215
|
-
for dsRef in dsRefs:
|
|
216
|
-
nodeName = _makeDSNode(dsRef, allDatasetRefs, file)
|
|
217
|
-
_renderEdge(taskNodeName, nodeName, file)
|
|
79
|
+
v = visualization.QuantumGraphDotVisualizer()
|
|
80
|
+
v.write_bipartite(qgraph, file)
|
|
218
81
|
|
|
219
|
-
print("}", file=file)
|
|
220
82
|
if close:
|
|
221
83
|
file.close()
|
|
222
84
|
|
|
@@ -27,16 +27,17 @@
|
|
|
27
27
|
|
|
28
28
|
__all__ = ["ExecutionGraphFixup"]
|
|
29
29
|
|
|
30
|
-
import contextlib
|
|
31
30
|
import itertools
|
|
31
|
+
import uuid
|
|
32
32
|
from collections import defaultdict
|
|
33
|
-
from collections.abc import Sequence
|
|
34
|
-
from typing import Any
|
|
33
|
+
from collections.abc import Mapping, Sequence
|
|
35
34
|
|
|
36
35
|
import networkx as nx
|
|
37
36
|
|
|
37
|
+
from lsst.daf.butler import DataCoordinate, DataIdValue
|
|
38
|
+
|
|
38
39
|
from .execution_graph_fixup import ExecutionGraphFixup
|
|
39
|
-
from .graph import QuantumGraph
|
|
40
|
+
from .graph import QuantumGraph
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
class ExecFixupDataId(ExecutionGraphFixup):
|
|
@@ -88,44 +89,16 @@ class ExecFixupDataId(ExecutionGraphFixup):
|
|
|
88
89
|
else:
|
|
89
90
|
self.dimensions = tuple(self.dimensions)
|
|
90
91
|
|
|
91
|
-
def _key(self, qnode: QuantumNode) -> tuple[Any, ...]:
|
|
92
|
-
"""Produce comparison key for quantum data.
|
|
93
|
-
|
|
94
|
-
Parameters
|
|
95
|
-
----------
|
|
96
|
-
qnode : `QuantumNode`
|
|
97
|
-
An individual node in a `~lsst.pipe.base.QuantumGraph`
|
|
98
|
-
|
|
99
|
-
Returns
|
|
100
|
-
-------
|
|
101
|
-
key : `tuple`
|
|
102
|
-
"""
|
|
103
|
-
dataId = qnode.quantum.dataId
|
|
104
|
-
assert dataId is not None, "Quantum DataId cannot be None"
|
|
105
|
-
key = tuple(dataId[dim] for dim in self.dimensions)
|
|
106
|
-
return key
|
|
107
|
-
|
|
108
92
|
def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
for prev_node in keyQuanta[prev_key]:
|
|
122
|
-
for node in keyQuanta[key]:
|
|
123
|
-
# remove any existing edges between the two nodes, but
|
|
124
|
-
# don't fail if there are not any. Both directions need
|
|
125
|
-
# tried because in a directed graph, order maters
|
|
126
|
-
for edge in ((node, prev_node), (prev_node, node)):
|
|
127
|
-
with contextlib.suppress(nx.NetworkXException):
|
|
128
|
-
networkGraph.remove_edge(*edge)
|
|
129
|
-
|
|
130
|
-
networkGraph.add_edge(prev_node, node)
|
|
131
|
-
return graph
|
|
93
|
+
raise NotImplementedError()
|
|
94
|
+
|
|
95
|
+
def fixup_graph(
|
|
96
|
+
self, xgraph: nx.DiGraph, quanta_by_task: Mapping[str, Mapping[DataCoordinate, uuid.UUID]]
|
|
97
|
+
) -> None:
|
|
98
|
+
quanta_by_sort_key: defaultdict[tuple[DataIdValue, ...], list[uuid.UUID]] = defaultdict(list)
|
|
99
|
+
for data_id, quantum_id in quanta_by_task[self.taskLabel].items():
|
|
100
|
+
key = tuple(data_id[dim] for dim in self.dimensions)
|
|
101
|
+
quanta_by_sort_key[key].append(quantum_id)
|
|
102
|
+
sorted_keys = sorted(quanta_by_sort_key.keys(), reverse=self.reverse)
|
|
103
|
+
for prev_key, key in itertools.pairwise(sorted_keys):
|
|
104
|
+
xgraph.add_edges_from(itertools.product(quanta_by_sort_key[prev_key], quanta_by_sort_key[key]))
|
|
@@ -27,7 +27,13 @@
|
|
|
27
27
|
|
|
28
28
|
__all__ = ["ExecutionGraphFixup"]
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
import uuid
|
|
31
|
+
from abc import ABC
|
|
32
|
+
from collections.abc import Mapping
|
|
33
|
+
|
|
34
|
+
import networkx
|
|
35
|
+
|
|
36
|
+
from lsst.daf.butler import DataCoordinate
|
|
31
37
|
|
|
32
38
|
from .graph import QuantumGraph
|
|
33
39
|
|
|
@@ -35,27 +41,26 @@ from .graph import QuantumGraph
|
|
|
35
41
|
class ExecutionGraphFixup(ABC):
|
|
36
42
|
"""Interface for classes which update quantum graphs before execution.
|
|
37
43
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
Notes
|
|
45
|
+
-----
|
|
46
|
+
The primary goal of this class is to modify quanta dependencies which may
|
|
47
|
+
not be possible to reflect in a quantum graph using standard tools. One
|
|
48
|
+
known use case for that is to guarantee particular execution order of
|
|
49
|
+
visits in CI jobs for cases when outcome depends on the processing order of
|
|
50
|
+
visits (e.g. AP association pipeline).
|
|
51
|
+
|
|
52
|
+
Instances of this class receive a preliminary graph and are allowed to
|
|
53
|
+
add edges, as long as those edges do not result in a cycle. Edges and
|
|
54
|
+
nodes may not be removed.
|
|
55
|
+
|
|
56
|
+
New subclasses should implement only `fixup_graph`, which will always be
|
|
57
|
+
called first. `fixupQuanta` is only called if `fixup_graph` raises
|
|
58
|
+
`NotImplementedError`.
|
|
49
59
|
"""
|
|
50
60
|
|
|
51
|
-
@abstractmethod
|
|
52
61
|
def fixupQuanta(self, graph: QuantumGraph) -> QuantumGraph:
|
|
53
62
|
"""Update quanta in a graph.
|
|
54
63
|
|
|
55
|
-
Potentially anything in the graph could be changed if it does not
|
|
56
|
-
break executor assumptions. If modifications result in a dependency
|
|
57
|
-
cycle the executor will raise an exception.
|
|
58
|
-
|
|
59
64
|
Parameters
|
|
60
65
|
----------
|
|
61
66
|
graph : `.QuantumGraph`
|
|
@@ -65,5 +70,31 @@ class ExecutionGraphFixup(ABC):
|
|
|
65
70
|
-------
|
|
66
71
|
graph : `.QuantumGraph`
|
|
67
72
|
Modified graph.
|
|
73
|
+
|
|
74
|
+
Notes
|
|
75
|
+
-----
|
|
76
|
+
This hook is provided for backwards compatibility only.
|
|
77
|
+
"""
|
|
78
|
+
raise NotImplementedError()
|
|
79
|
+
|
|
80
|
+
def fixup_graph(
|
|
81
|
+
self, xgraph: networkx.DiGraph, quanta_by_task: Mapping[str, Mapping[DataCoordinate, uuid.UUID]]
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Update a networkx graph of quanta in place by adding edges to
|
|
84
|
+
further constrain the ordering.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
xgraph : `networkx.DiGraph`
|
|
89
|
+
A directed acyclic graph of quanta to modify in place. Node keys
|
|
90
|
+
are quantum UUIDs, and attributes include ``task_label`` (`str`)
|
|
91
|
+
and ``data_id`` (a full `lsst.daf.butler.DataCoordinate`, without
|
|
92
|
+
dimension records attached). Edges may be added, but not removed.
|
|
93
|
+
Nodes may not be modified.
|
|
94
|
+
quanta_by_task : `~collections.abc.Mapping` [ `str`,\
|
|
95
|
+
`~collections.abc.Mapping` [ `lsst.daf.butler.DataCoordinate`,\
|
|
96
|
+
`uuid.UUID` ] ]
|
|
97
|
+
All quanta in the graph, grouped first by task label and then by
|
|
98
|
+
data ID.
|
|
68
99
|
"""
|
|
69
|
-
raise NotImplementedError
|
|
100
|
+
raise NotImplementedError()
|
lsst/pipe/base/graph/graph.py
CHANGED
|
@@ -806,11 +806,18 @@ class QuantumGraph:
|
|
|
806
806
|
uri : convertible to `~lsst.resources.ResourcePath`
|
|
807
807
|
URI to where the graph should be saved.
|
|
808
808
|
"""
|
|
809
|
-
buffer = self._buildSaveObject()
|
|
810
809
|
path = ResourcePath(uri)
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
810
|
+
match path.getExtension():
|
|
811
|
+
case ".qgraph":
|
|
812
|
+
buffer = self._buildSaveObject()
|
|
813
|
+
path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes
|
|
814
|
+
case ".qg":
|
|
815
|
+
from ..quantum_graph import PredictedQuantumGraphComponents
|
|
816
|
+
|
|
817
|
+
pqg = PredictedQuantumGraphComponents.from_old_quantum_graph(self)
|
|
818
|
+
pqg.write(path)
|
|
819
|
+
case ext:
|
|
820
|
+
raise TypeError(f"Can currently only save a graph in .qgraph or .qg format, not {ext!r}.")
|
|
814
821
|
|
|
815
822
|
@property
|
|
816
823
|
def metadata(self) -> MappingProxyType[str, Any]:
|
|
@@ -962,11 +969,23 @@ class QuantumGraph:
|
|
|
962
969
|
the graph.
|
|
963
970
|
"""
|
|
964
971
|
uri = ResourcePath(uri)
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
972
|
+
match uri.getExtension():
|
|
973
|
+
case ".qgraph":
|
|
974
|
+
with LoadHelper(uri, minimumVersion, fullRead=(nodes is None)) as loader:
|
|
975
|
+
qgraph = loader.load(universe, nodes, graphID)
|
|
976
|
+
case ".qg":
|
|
977
|
+
from ..quantum_graph import PredictedQuantumGraphReader
|
|
978
|
+
|
|
979
|
+
with PredictedQuantumGraphReader.open(uri, page_size=100000) as qgr:
|
|
980
|
+
quantum_ids = (
|
|
981
|
+
[uuid.UUID(q) if not isinstance(q, uuid.UUID) else q for q in nodes]
|
|
982
|
+
if nodes is not None
|
|
983
|
+
else None
|
|
984
|
+
)
|
|
985
|
+
qgr.read_execution_quanta(quantum_ids)
|
|
986
|
+
qgraph = qgr.finish().to_old_quantum_graph()
|
|
987
|
+
case _:
|
|
988
|
+
raise ValueError(f"Only know how to handle files saved as `.qgraph`, not {uri}")
|
|
970
989
|
if not isinstance(qgraph, QuantumGraph):
|
|
971
990
|
raise TypeError(f"QuantumGraph file {uri} contains unexpected object type: {type(qgraph)}")
|
|
972
991
|
return qgraph
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# This file is part of pipe_base.
|
|
2
|
+
#
|
|
3
|
+
# Developed for the LSST Data Management System.
|
|
4
|
+
# This product includes software developed by the LSST Project
|
|
5
|
+
# (http://www.lsst.org).
|
|
6
|
+
# See the COPYRIGHT file at the top-level directory of this distribution
|
|
7
|
+
# for details of code ownership.
|
|
8
|
+
#
|
|
9
|
+
# This software is dual licensed under the GNU General Public License and also
|
|
10
|
+
# under a 3-clause BSD license. Recipients may choose which of these licenses
|
|
11
|
+
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
|
|
12
|
+
# respectively. If you choose the GPL option then the following text applies
|
|
13
|
+
# (but note that there is still no warranty even if you opt for BSD instead):
|
|
14
|
+
#
|
|
15
|
+
# This program is free software: you can redistribute it and/or modify
|
|
16
|
+
# it under the terms of the GNU General Public License as published by
|
|
17
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
18
|
+
# (at your option) any later version.
|
|
19
|
+
#
|
|
20
|
+
# This program is distributed in the hope that it will be useful,
|
|
21
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
22
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
23
|
+
# GNU General Public License for more details.
|
|
24
|
+
#
|
|
25
|
+
# You should have received a copy of the GNU General Public License
|
|
26
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
__all__ = ("GraphWalker",)
|
|
31
|
+
|
|
32
|
+
from typing import Generic, Self, TypeVar
|
|
33
|
+
|
|
34
|
+
import networkx
|
|
35
|
+
|
|
36
|
+
_T = TypeVar("_T")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GraphWalker(Generic[_T]):
|
|
40
|
+
"""A helper for traversing directed acyclic graphs.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
xgraph : `networkx.DiGraph` or `networkx.MultiDiGraph`
|
|
45
|
+
Networkx graph to process. Will be consumed during iteration, so this
|
|
46
|
+
should often be a copy.
|
|
47
|
+
|
|
48
|
+
Notes
|
|
49
|
+
-----
|
|
50
|
+
Each iteration yields a `frozenset` of nodes, which may be empty if there
|
|
51
|
+
are no nodes ready for processing. A node is only considered ready if all
|
|
52
|
+
of its predecessor nodes have been marked as complete with `finish`.
|
|
53
|
+
Iteration only completes when all nodes have been finished or failed.
|
|
54
|
+
|
|
55
|
+
`GraphWalker` is not thread-safe; calling one `GraphWalker` method while
|
|
56
|
+
another is in progress is undefined behavior. It is designed to be used
|
|
57
|
+
in the management thread or process in a parallel traversal.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, xgraph: networkx.DiGraph | networkx.MultiDiGraph):
|
|
61
|
+
self._xgraph = xgraph
|
|
62
|
+
self._ready: set[_T] = set(next(iter(networkx.dag.topological_generations(self._xgraph)), []))
|
|
63
|
+
self._active: set[_T] = set()
|
|
64
|
+
self._incomplete: set[_T] = set(self._xgraph)
|
|
65
|
+
|
|
66
|
+
def __iter__(self) -> Self:
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
def __next__(self) -> frozenset[_T]:
|
|
70
|
+
if not self._incomplete:
|
|
71
|
+
raise StopIteration()
|
|
72
|
+
new_active = frozenset(self._ready)
|
|
73
|
+
self._active.update(new_active)
|
|
74
|
+
self._ready.clear()
|
|
75
|
+
return new_active
|
|
76
|
+
|
|
77
|
+
def finish(self, key: _T) -> None:
|
|
78
|
+
"""Mark a node as successfully processed, unblocking (at least in part)
|
|
79
|
+
iteration over successor nodes.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
key : unspecified
|
|
84
|
+
NetworkX key of the node to mark finished.
|
|
85
|
+
"""
|
|
86
|
+
self._active.remove(key)
|
|
87
|
+
self._incomplete.remove(key)
|
|
88
|
+
successors = list(self._xgraph.successors(key))
|
|
89
|
+
for successor in successors:
|
|
90
|
+
assert successor not in self._active, (
|
|
91
|
+
"A node downstream of an active one should not have been yielded yet."
|
|
92
|
+
)
|
|
93
|
+
if all(
|
|
94
|
+
predecessor not in self._incomplete for predecessor in self._xgraph.predecessors(successor)
|
|
95
|
+
):
|
|
96
|
+
self._ready.add(successor)
|
|
97
|
+
|
|
98
|
+
def fail(self, key: _T) -> list[_T]:
|
|
99
|
+
"""Mark a node as unsuccessfully processed, permanently blocking all
|
|
100
|
+
recursive descendants.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
key : unspecified
|
|
105
|
+
NetworkX key of the node to mark as a failure.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
blocked : `list`
|
|
110
|
+
NetworkX keys of nodes that were recursive descendants of the
|
|
111
|
+
failed node, and will hence never be yielded by the iterator.
|
|
112
|
+
"""
|
|
113
|
+
self._active.remove(key)
|
|
114
|
+
self._incomplete.remove(key)
|
|
115
|
+
descendants = list(networkx.dag.descendants(self._xgraph, key))
|
|
116
|
+
self._xgraph.remove_node(key)
|
|
117
|
+
self._xgraph.remove_nodes_from(descendants)
|
|
118
|
+
self._incomplete.difference_update(descendants)
|
|
119
|
+
return descendants
|
lsst/pipe/base/log_capture.py
CHANGED
|
@@ -41,6 +41,7 @@ from lsst.daf.butler import Butler, FileDataset, LimitedButler, Quantum
|
|
|
41
41
|
from lsst.daf.butler.logging import ButlerLogRecordHandler, ButlerLogRecords, ButlerMDC, JsonLogFormatter
|
|
42
42
|
|
|
43
43
|
from ._status import InvalidQuantumError
|
|
44
|
+
from .automatic_connection_constants import METADATA_OUTPUT_TEMPLATE
|
|
44
45
|
from .pipeline_graph import TaskNode
|
|
45
46
|
|
|
46
47
|
_LOG = logging.getLogger(__name__)
|
|
@@ -116,8 +117,10 @@ class LogCapture:
|
|
|
116
117
|
mdc = {"LABEL": task_node.label, "RUN": ""}
|
|
117
118
|
if quantum.dataId:
|
|
118
119
|
mdc["LABEL"] += f":{quantum.dataId}"
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
|
|
121
|
+
metadata_ref = quantum.outputs[METADATA_OUTPUT_TEMPLATE.format(label=task_node.label)][0]
|
|
122
|
+
mdc["RUN"] = metadata_ref.run
|
|
123
|
+
|
|
121
124
|
ctx = _LogCaptureFlag()
|
|
122
125
|
log_dataset_name = (
|
|
123
126
|
task_node.log_output.dataset_type_name if task_node.log_output is not None else None
|
lsst/pipe/base/mermaid_tools.py
CHANGED
|
@@ -39,39 +39,12 @@ from typing import TYPE_CHECKING, Any, Literal
|
|
|
39
39
|
from .pipeline import Pipeline
|
|
40
40
|
|
|
41
41
|
if TYPE_CHECKING:
|
|
42
|
-
from
|
|
43
|
-
from
|
|
42
|
+
from .graph import QuantumGraph
|
|
43
|
+
from .pipeline import TaskDef
|
|
44
|
+
from .quantum_graph import PredictedQuantumGraph
|
|
44
45
|
|
|
45
46
|
|
|
46
|
-
def
|
|
47
|
-
"""Make a unique identifier string for a dataset ref based on its name and
|
|
48
|
-
dataId.
|
|
49
|
-
"""
|
|
50
|
-
dsIdParts = [dsRef.datasetType.name]
|
|
51
|
-
dsIdParts.extend(f"{key}_{dsRef.dataId[key]}" for key in sorted(dsRef.dataId.required.keys()))
|
|
52
|
-
return "_".join(dsIdParts)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _makeDatasetNode(dsRef: DatasetRef, allDatasetRefs: dict[str, str], file: Any) -> str:
|
|
56
|
-
"""Create a Mermaid node for a dataset if it doesn't exist, and return its
|
|
57
|
-
node ID.
|
|
58
|
-
"""
|
|
59
|
-
dsId = _datasetRefId(dsRef)
|
|
60
|
-
nodeName = allDatasetRefs.get(dsId)
|
|
61
|
-
if nodeName is None:
|
|
62
|
-
nodeName = f"DATASET_{len(allDatasetRefs)}"
|
|
63
|
-
allDatasetRefs[dsId] = nodeName
|
|
64
|
-
# Simple label: datasetType name and run.
|
|
65
|
-
label_lines = [f"**{dsRef.datasetType.name}**", f"run: {dsRef.run}"]
|
|
66
|
-
# Add dataId info.
|
|
67
|
-
for k in sorted(dsRef.dataId.required.keys()):
|
|
68
|
-
label_lines.append(f"{k}={dsRef.dataId[k]}")
|
|
69
|
-
label = "<br>".join(label_lines)
|
|
70
|
-
print(f'{nodeName}["{label}"]', file=file)
|
|
71
|
-
return nodeName
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def graph2mermaid(qgraph: QuantumGraph, file: Any) -> None:
|
|
47
|
+
def graph2mermaid(qgraph: QuantumGraph | PredictedQuantumGraph, file: Any) -> None:
|
|
75
48
|
"""Convert QuantumGraph into a Mermaid flowchart (top-down).
|
|
76
49
|
|
|
77
50
|
This method is mostly for documentation/presentation purposes.
|
|
@@ -91,45 +64,19 @@ def graph2mermaid(qgraph: QuantumGraph, file: Any) -> None:
|
|
|
91
64
|
ImportError
|
|
92
65
|
Raised if the task class cannot be imported.
|
|
93
66
|
"""
|
|
67
|
+
from .quantum_graph import PredictedQuantumGraph, visualization
|
|
68
|
+
|
|
69
|
+
if not isinstance(qgraph, PredictedQuantumGraph):
|
|
70
|
+
qgraph = PredictedQuantumGraph.from_old_quantum_graph(qgraph)
|
|
71
|
+
|
|
94
72
|
# Open a file if needed.
|
|
95
73
|
close = False
|
|
96
74
|
if not hasattr(file, "write"):
|
|
97
75
|
file = open(file, "w")
|
|
98
76
|
close = True
|
|
99
77
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
# To avoid duplicating dataset nodes, we track them.
|
|
104
|
-
allDatasetRefs: dict[str, str] = {}
|
|
105
|
-
|
|
106
|
-
# Process each task/quantum.
|
|
107
|
-
for taskId, taskDef in enumerate(qgraph.taskGraph):
|
|
108
|
-
quanta = qgraph.getNodesForTask(taskDef)
|
|
109
|
-
for qId, quantumNode in enumerate(quanta):
|
|
110
|
-
# Create quantum node.
|
|
111
|
-
taskNodeName = f"TASK_{taskId}_{qId}"
|
|
112
|
-
taskLabelLines = [f"**{taskDef.label}**", f"Node ID: {quantumNode.nodeId}"]
|
|
113
|
-
dataId = quantumNode.quantum.dataId
|
|
114
|
-
if dataId is not None:
|
|
115
|
-
for k in sorted(dataId.required.keys()):
|
|
116
|
-
taskLabelLines.append(f"{k}={dataId[k]}")
|
|
117
|
-
else:
|
|
118
|
-
raise ValueError("Quantum DataId cannot be None")
|
|
119
|
-
taskLabel = "<br>".join(taskLabelLines)
|
|
120
|
-
print(f'{taskNodeName}["{taskLabel}"]', file=file)
|
|
121
|
-
|
|
122
|
-
# Quantum inputs: datasets --> tasks
|
|
123
|
-
for dsRefs in quantumNode.quantum.inputs.values():
|
|
124
|
-
for dsRef in dsRefs:
|
|
125
|
-
dsNode = _makeDatasetNode(dsRef, allDatasetRefs, file)
|
|
126
|
-
print(f"{dsNode} --> {taskNodeName}", file=file)
|
|
127
|
-
|
|
128
|
-
# Quantum outputs: tasks --> datasets
|
|
129
|
-
for dsRefs in quantumNode.quantum.outputs.values():
|
|
130
|
-
for dsRef in dsRefs:
|
|
131
|
-
dsNode = _makeDatasetNode(dsRef, allDatasetRefs, file)
|
|
132
|
-
print(f"{taskNodeName} --> {dsNode}", file=file)
|
|
78
|
+
v = visualization.QuantumGraphMermaidVisualizer()
|
|
79
|
+
v.write_bipartite(qgraph, file)
|
|
133
80
|
|
|
134
81
|
if close:
|
|
135
82
|
file.close()
|