modacor 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modacor/__init__.py +30 -0
- modacor/dataclasses/__init__.py +0 -0
- modacor/dataclasses/basedata.py +973 -0
- modacor/dataclasses/databundle.py +23 -0
- modacor/dataclasses/helpers.py +45 -0
- modacor/dataclasses/messagehandler.py +75 -0
- modacor/dataclasses/process_step.py +233 -0
- modacor/dataclasses/process_step_describer.py +146 -0
- modacor/dataclasses/processing_data.py +59 -0
- modacor/dataclasses/trace_event.py +118 -0
- modacor/dataclasses/uncertainty_tools.py +132 -0
- modacor/dataclasses/validators.py +84 -0
- modacor/debug/pipeline_tracer.py +548 -0
- modacor/io/__init__.py +33 -0
- modacor/io/csv/__init__.py +0 -0
- modacor/io/csv/csv_sink.py +114 -0
- modacor/io/csv/csv_source.py +210 -0
- modacor/io/hdf/__init__.py +27 -0
- modacor/io/hdf/hdf_source.py +120 -0
- modacor/io/io_sink.py +41 -0
- modacor/io/io_sinks.py +61 -0
- modacor/io/io_source.py +164 -0
- modacor/io/io_sources.py +208 -0
- modacor/io/processing_path.py +113 -0
- modacor/io/tiled/__init__.py +16 -0
- modacor/io/tiled/tiled_source.py +403 -0
- modacor/io/yaml/__init__.py +27 -0
- modacor/io/yaml/yaml_source.py +116 -0
- modacor/modules/__init__.py +53 -0
- modacor/modules/base_modules/__init__.py +0 -0
- modacor/modules/base_modules/append_processing_data.py +329 -0
- modacor/modules/base_modules/append_sink.py +141 -0
- modacor/modules/base_modules/append_source.py +181 -0
- modacor/modules/base_modules/bitwise_or_masks.py +113 -0
- modacor/modules/base_modules/combine_uncertainties.py +120 -0
- modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
- modacor/modules/base_modules/divide.py +82 -0
- modacor/modules/base_modules/find_scale_factor1d.py +373 -0
- modacor/modules/base_modules/multiply.py +77 -0
- modacor/modules/base_modules/multiply_databundles.py +73 -0
- modacor/modules/base_modules/poisson_uncertainties.py +69 -0
- modacor/modules/base_modules/reduce_dimensionality.py +252 -0
- modacor/modules/base_modules/sink_processing_data.py +80 -0
- modacor/modules/base_modules/subtract.py +80 -0
- modacor/modules/base_modules/subtract_databundles.py +67 -0
- modacor/modules/base_modules/units_label_update.py +66 -0
- modacor/modules/instrument_modules/__init__.py +0 -0
- modacor/modules/instrument_modules/readme.md +9 -0
- modacor/modules/technique_modules/__init__.py +0 -0
- modacor/modules/technique_modules/scattering/__init__.py +0 -0
- modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
- modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
- modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
- modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
- modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
- modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
- modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
- modacor/runner/__init__.py +0 -0
- modacor/runner/pipeline.py +749 -0
- modacor/runner/process_step_registry.py +224 -0
- modacor/tests/__init__.py +27 -0
- modacor/tests/dataclasses/test_basedata.py +519 -0
- modacor/tests/dataclasses/test_basedata_operations.py +439 -0
- modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
- modacor/tests/dataclasses/test_process_step_describer.py +73 -0
- modacor/tests/dataclasses/test_processstep.py +282 -0
- modacor/tests/debug/test_tracing_integration.py +188 -0
- modacor/tests/integration/__init__.py +0 -0
- modacor/tests/integration/test_pipeline_run.py +238 -0
- modacor/tests/io/__init__.py +27 -0
- modacor/tests/io/csv/__init__.py +0 -0
- modacor/tests/io/csv/test_csv_source.py +156 -0
- modacor/tests/io/hdf/__init__.py +27 -0
- modacor/tests/io/hdf/test_hdf_source.py +92 -0
- modacor/tests/io/test_io_sources.py +119 -0
- modacor/tests/io/tiled/__init__.py +12 -0
- modacor/tests/io/tiled/test_tiled_source.py +120 -0
- modacor/tests/io/yaml/__init__.py +27 -0
- modacor/tests/io/yaml/static_data_example.yaml +26 -0
- modacor/tests/io/yaml/test_yaml_source.py +47 -0
- modacor/tests/modules/__init__.py +27 -0
- modacor/tests/modules/base_modules/__init__.py +27 -0
- modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
- modacor/tests/modules/base_modules/test_append_sink.py +76 -0
- modacor/tests/modules/base_modules/test_append_source.py +180 -0
- modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
- modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
- modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
- modacor/tests/modules/base_modules/test_divide.py +140 -0
- modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
- modacor/tests/modules/base_modules/test_multiply.py +113 -0
- modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
- modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
- modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
- modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
- modacor/tests/modules/base_modules/test_subtract.py +111 -0
- modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
- modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
- modacor/tests/modules/technique_modules/__init__.py +0 -0
- modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
- modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
- modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
- modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
- modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
- modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
- modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
- modacor/tests/requirements.txt +12 -0
- modacor/tests/runner/test_pipeline.py +438 -0
- modacor/tests/runner/test_process_step_registry.py +65 -0
- modacor/tests/test_import.py +43 -0
- modacor/tests/test_modacor.py +17 -0
- modacor/tests/test_units.py +79 -0
- modacor/units.py +97 -0
- modacor-1.0.0.dist-info/METADATA +482 -0
- modacor-1.0.0.dist-info/RECORD +120 -0
- modacor-1.0.0.dist-info/WHEEL +5 -0
- modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
- modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
- modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
- modacor-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,749 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
__coding__ = "utf-8"
|
|
8
|
+
__authors__ = ["Anja Hörmann", "Brian R. Pauw"] # add names to the list as appropriate
|
|
9
|
+
__copyright__ = "Copyright 2025, The MoDaCor team"
|
|
10
|
+
__date__ = "22/11/2025"
|
|
11
|
+
__status__ = "Development" # "Development", "Production"
|
|
12
|
+
# end of header and standard imports
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from graphlib import TopologicalSorter
|
|
16
|
+
|
|
17
|
+
# quick hash at node-level (UI can show "config changed" without reading trace events)
|
|
18
|
+
from hashlib import sha256
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Iterable, Mapping, Self
|
|
21
|
+
|
|
22
|
+
import yaml
|
|
23
|
+
from attrs import define, field
|
|
24
|
+
|
|
25
|
+
from modacor.debug.pipeline_tracer import tracer_event_to_datasets_payload
|
|
26
|
+
|
|
27
|
+
from ..dataclasses.process_step import ProcessStep
|
|
28
|
+
from ..dataclasses.trace_event import TraceEvent
|
|
29
|
+
from ..io.io_sources import IoSources # noqa: F401 # reserved for future use
|
|
30
|
+
from .process_step_registry import DEFAULT_PROCESS_STEP_REGISTRY, ProcessStepRegistry
|
|
31
|
+
|
|
32
|
+
__all__ = ["Pipeline"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@define
|
|
36
|
+
class Pipeline(TopologicalSorter):
|
|
37
|
+
"""
|
|
38
|
+
Pipeline nodes are assumed to be of type ProcessStep.
|
|
39
|
+
|
|
40
|
+
The underlying `graph` maps each node to the set of prerequisite nodes
|
|
41
|
+
that must complete before it can run.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
graph: dict[ProcessStep, set[ProcessStep]] = field(factory=dict)
|
|
45
|
+
name: str = field(default="Unnamed Pipeline")
|
|
46
|
+
# Optional trace events collected during a run (step_id -> list of events)
|
|
47
|
+
trace_events: dict[str, list[TraceEvent]] = field(factory=dict, repr=False)
|
|
48
|
+
|
|
49
|
+
def __attrs_post_init__(self) -> None:
|
|
50
|
+
super().__init__(graph=self.graph)
|
|
51
|
+
|
|
52
|
+
# trace helpers, this helps to debug pipelines by storing trace events per step:
|
|
53
|
+
def add_trace_event(self, event: TraceEvent) -> None:
|
|
54
|
+
self.trace_events.setdefault(str(event.step_id), []).append(event)
|
|
55
|
+
|
|
56
|
+
def clear_trace_events(self) -> None:
|
|
57
|
+
self.trace_events.clear()
|
|
58
|
+
|
|
59
|
+
# --------------------------------------------------------------------- #
|
|
60
|
+
# Pipeline construction helpers
|
|
61
|
+
# --------------------------------------------------------------------- #
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_yaml_file(
|
|
65
|
+
cls,
|
|
66
|
+
yaml_file: Path | str,
|
|
67
|
+
registry: ProcessStepRegistry | None = None,
|
|
68
|
+
) -> "Pipeline":
|
|
69
|
+
"""
|
|
70
|
+
Instantiate a Pipeline from a YAML configuration file.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
yaml_file:
|
|
75
|
+
Path to the YAML file.
|
|
76
|
+
registry:
|
|
77
|
+
Optional ProcessStepRegistry. If omitted, the global
|
|
78
|
+
DEFAULT_PROCESS_STEP_REGISTRY is used.
|
|
79
|
+
"""
|
|
80
|
+
yaml_path = Path(yaml_file)
|
|
81
|
+
yaml_string = yaml_path.read_text(encoding="utf-8")
|
|
82
|
+
return cls.from_yaml(yaml_string, registry=registry)
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def from_yaml(
|
|
86
|
+
cls,
|
|
87
|
+
yaml_string: str,
|
|
88
|
+
registry: ProcessStepRegistry | None = None,
|
|
89
|
+
) -> "Pipeline":
|
|
90
|
+
"""
|
|
91
|
+
Instantiate a Pipeline from a YAML configuration string.
|
|
92
|
+
|
|
93
|
+
Expected YAML schema (keyed by step_id):
|
|
94
|
+
|
|
95
|
+
```yaml
|
|
96
|
+
name: my_pipeline
|
|
97
|
+
steps:
|
|
98
|
+
1:
|
|
99
|
+
module: PoissonUncertainties
|
|
100
|
+
requires_steps: []
|
|
101
|
+
configuration: {...}
|
|
102
|
+
|
|
103
|
+
"pu":
|
|
104
|
+
module: PoissonUncertainties
|
|
105
|
+
requires_steps: [1] # may be int or string
|
|
106
|
+
configuration: {...}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Notes
|
|
110
|
+
-----
|
|
111
|
+
* The keys under `steps` (`1`, `"pu"`, etc.) are treated as the
|
|
112
|
+
canonical `step_id`s and are normalized to `str`.
|
|
113
|
+
* `requires_steps` entries can be ints or strings; they are also
|
|
114
|
+
normalized to `str`.
|
|
115
|
+
* If a `step_id` field is present inside a step, it must match
|
|
116
|
+
the outer key (after string conversion), otherwise an error
|
|
117
|
+
is raised to avoid silent mismatches.
|
|
118
|
+
"""
|
|
119
|
+
yaml_obj = yaml.safe_load(yaml_string) or {}
|
|
120
|
+
steps_cfg = yaml_obj.get("steps", {}) or {}
|
|
121
|
+
|
|
122
|
+
registry = registry or DEFAULT_PROCESS_STEP_REGISTRY
|
|
123
|
+
|
|
124
|
+
process_step_instances: dict[str, ProcessStep] = {}
|
|
125
|
+
dependency_ids: dict[str, set[str]] = {}
|
|
126
|
+
|
|
127
|
+
# First pass: instantiate steps and collect dependency ids
|
|
128
|
+
for raw_step_key, module_data in steps_cfg.items():
|
|
129
|
+
# Normalize outer key to string (allows numeric or string keys)
|
|
130
|
+
step_id = str(raw_step_key)
|
|
131
|
+
|
|
132
|
+
if not isinstance(module_data, dict):
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f"Step {step_id!r} must map to a mapping with 'module' and "
|
|
135
|
+
"'configuration' / 'requires_steps' fields."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Optional inner step_id sanity check
|
|
139
|
+
inner_step_id = module_data.get("step_id")
|
|
140
|
+
if inner_step_id is not None and str(inner_step_id) != step_id:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Step {step_id!r} has inner 'step_id' {inner_step_id!r} "
|
|
143
|
+
"which does not match the outer key. "
|
|
144
|
+
"Either omit the inner 'step_id' or make them identical."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
module_ref = module_data["module"]
|
|
149
|
+
except KeyError as exc:
|
|
150
|
+
raise ValueError(f"Step {step_id!r} is missing required field 'module'.") from exc
|
|
151
|
+
|
|
152
|
+
configuration = module_data.get("configuration") or {}
|
|
153
|
+
requires_raw = module_data.get("requires_steps") or []
|
|
154
|
+
short_title = module_data.get("short_title")
|
|
155
|
+
|
|
156
|
+
# Normalize dependencies to strings as well
|
|
157
|
+
requires_steps = {str(dep) for dep in requires_raw}
|
|
158
|
+
|
|
159
|
+
# Resolve ProcessStep class via registry
|
|
160
|
+
step_cls = registry.get(module_ref)
|
|
161
|
+
|
|
162
|
+
# Pass the normalized string step_id into the ProcessStep
|
|
163
|
+
step_instance: ProcessStep = step_cls(io_sources=None, io_sinks=None, step_id=step_id)
|
|
164
|
+
step_instance.modify_config_by_dict(configuration)
|
|
165
|
+
if short_title is not None:
|
|
166
|
+
step_instance.short_title = str(short_title)
|
|
167
|
+
|
|
168
|
+
process_step_instances[step_id] = step_instance
|
|
169
|
+
dependency_ids[step_id] = requires_steps
|
|
170
|
+
|
|
171
|
+
# Second pass: validate dependencies
|
|
172
|
+
all_defined_ids = set(process_step_instances.keys())
|
|
173
|
+
for step_id, deps in dependency_ids.items():
|
|
174
|
+
missing = deps - all_defined_ids
|
|
175
|
+
if missing:
|
|
176
|
+
missing_str = ", ".join(sorted(missing))
|
|
177
|
+
raise ValueError(
|
|
178
|
+
f"Step {step_id!r} requires unknown steps {missing_str}. "
|
|
179
|
+
"Check `steps` keys and `requires_steps` in the YAML."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Translate step_id graph into ProcessStep graph
|
|
183
|
+
graph: dict[ProcessStep, set[ProcessStep]] = {}
|
|
184
|
+
for step_id, deps in dependency_ids.items():
|
|
185
|
+
graph[process_step_instances[step_id]] = {process_step_instances[dep_id] for dep_id in deps}
|
|
186
|
+
|
|
187
|
+
name = yaml_obj.get("name", "Unnamed Pipeline")
|
|
188
|
+
return cls(name=name, graph=graph)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def from_dict(
|
|
192
|
+
cls,
|
|
193
|
+
graph_dict: Mapping[ProcessStep, Iterable[ProcessStep]],
|
|
194
|
+
name: str = "",
|
|
195
|
+
) -> "Pipeline":
|
|
196
|
+
"""
|
|
197
|
+
Instantiate a Pipeline from a mapping.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
graph_dict:
|
|
202
|
+
Mapping of node -> iterable of prerequisite nodes.
|
|
203
|
+
|
|
204
|
+
Notes
|
|
205
|
+
-----
|
|
206
|
+
This is a low-level constructor mainly intended for internal use or
|
|
207
|
+
tests. Normal users should prefer `from_yaml_file` or `from_yaml`.
|
|
208
|
+
"""
|
|
209
|
+
graph: dict[ProcessStep, set[ProcessStep]] = {node: set(deps) for node, deps in graph_dict.items()}
|
|
210
|
+
return cls(name=name or "Unnamed Pipeline", graph=graph)
|
|
211
|
+
|
|
212
|
+
# importer for future use of web tools for graphing pipelines:
|
|
213
|
+
@classmethod
|
|
214
|
+
def from_spec(
|
|
215
|
+
cls,
|
|
216
|
+
spec: dict,
|
|
217
|
+
registry: ProcessStepRegistry | None = None,
|
|
218
|
+
) -> "Pipeline":
|
|
219
|
+
"""
|
|
220
|
+
Build a Pipeline from a graph spec of the shape produced by `to_spec`.
|
|
221
|
+
|
|
222
|
+
Expected shape:
|
|
223
|
+
{
|
|
224
|
+
"name": "...",
|
|
225
|
+
"nodes": [
|
|
226
|
+
{"id": "...", "module": "...", "config": {...}},
|
|
227
|
+
...
|
|
228
|
+
],
|
|
229
|
+
"edges": [
|
|
230
|
+
{"from": "...", "to": "..."},
|
|
231
|
+
...
|
|
232
|
+
],
|
|
233
|
+
}
|
|
234
|
+
"""
|
|
235
|
+
registry = registry or DEFAULT_PROCESS_STEP_REGISTRY
|
|
236
|
+
|
|
237
|
+
# 1) Build ProcessStep instances
|
|
238
|
+
process_step_instances: dict[str, ProcessStep] = {}
|
|
239
|
+
for node in spec.get("nodes", []):
|
|
240
|
+
step_id = str(node["id"])
|
|
241
|
+
module_name = node["module"]
|
|
242
|
+
config = node.get("config", {}) or {}
|
|
243
|
+
|
|
244
|
+
step_cls = registry.get(module_name)
|
|
245
|
+
step = step_cls(io_sources=None, io_sinks=None, step_id=step_id)
|
|
246
|
+
step.modify_config_by_dict(config)
|
|
247
|
+
|
|
248
|
+
process_step_instances[step_id] = step
|
|
249
|
+
|
|
250
|
+
# 2) Build prerequisite sets from edges
|
|
251
|
+
# edges are from -> to, but TopologicalSorter wants node -> prerequisites
|
|
252
|
+
prereqs: dict[str, set[str]] = {sid: set() for sid in process_step_instances}
|
|
253
|
+
for edge in spec.get("edges", []):
|
|
254
|
+
src = str(edge["from"])
|
|
255
|
+
dst = str(edge["to"])
|
|
256
|
+
if src not in prereqs or dst not in prereqs:
|
|
257
|
+
raise ValueError(f"Edge refers to unknown node: {src!r} -> {dst!r}")
|
|
258
|
+
prereqs[dst].add(src)
|
|
259
|
+
|
|
260
|
+
# 3) Convert to ProcessStep graph
|
|
261
|
+
graph: dict[ProcessStep, set[ProcessStep]] = {}
|
|
262
|
+
for sid, deps in prereqs.items():
|
|
263
|
+
graph[process_step_instances[sid]] = {process_step_instances[dep_id] for dep_id in deps}
|
|
264
|
+
|
|
265
|
+
name = spec.get("name", "Unnamed Pipeline")
|
|
266
|
+
return cls(name=name, graph=graph)
|
|
267
|
+
|
|
268
|
+
# --------------------------------------------------------------------- #
|
|
269
|
+
# Graph mutation helpers
|
|
270
|
+
# --------------------------------------------------------------------- #
|
|
271
|
+
def _reinitialize(self) -> None:
|
|
272
|
+
"""Recreate the underlying TopologicalSorter with the current graph."""
|
|
273
|
+
super().__init__(graph=self.graph)
|
|
274
|
+
|
|
275
|
+
def add_incoming_branch(self, branch: Self, branching_node: ProcessStep) -> Self:
|
|
276
|
+
"""
|
|
277
|
+
Add a pipeline as a branch whose outcome shall be combined with
|
|
278
|
+
the existing pipeline at `branching_node`.
|
|
279
|
+
|
|
280
|
+
This assumes that the branch to be added has a single exit point.
|
|
281
|
+
"""
|
|
282
|
+
ordered_branch = list(branch.static_order())
|
|
283
|
+
if not ordered_branch:
|
|
284
|
+
return self
|
|
285
|
+
|
|
286
|
+
last_node = ordered_branch[-1]
|
|
287
|
+
self.graph.setdefault(branching_node, set()).add(last_node)
|
|
288
|
+
# Add the rest of the graph
|
|
289
|
+
self.graph |= branch.graph
|
|
290
|
+
self._reinitialize()
|
|
291
|
+
return self
|
|
292
|
+
|
|
293
|
+
def add_outgoing_branch(self, branch: Self, branching_node: ProcessStep) -> Self:
|
|
294
|
+
"""
|
|
295
|
+
Add a pipeline as a branch whose input is based on the existing pipeline.
|
|
296
|
+
|
|
297
|
+
This assumes that the branch to be added has a single entry point.
|
|
298
|
+
"""
|
|
299
|
+
ordered_branch = list(branch.static_order())
|
|
300
|
+
if not ordered_branch:
|
|
301
|
+
return self
|
|
302
|
+
|
|
303
|
+
first_node = ordered_branch[0]
|
|
304
|
+
branch.graph.setdefault(first_node, set()).add(branching_node)
|
|
305
|
+
self.graph |= branch.graph
|
|
306
|
+
self._reinitialize()
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
# --------------------------------------------------------------------- #
|
|
310
|
+
# Execution
|
|
311
|
+
# --------------------------------------------------------------------- #
|
|
312
|
+
|
|
313
|
+
def run(self, **kwargs) -> None:
|
|
314
|
+
"""
|
|
315
|
+
Run pipeline with simple topological scheduling.
|
|
316
|
+
|
|
317
|
+
Any keyword arguments are passed through to `ProcessStep.execute`.
|
|
318
|
+
"""
|
|
319
|
+
self.prepare()
|
|
320
|
+
while self.is_active():
|
|
321
|
+
for node in self.get_ready():
|
|
322
|
+
node.execute(**kwargs)
|
|
323
|
+
self.done(node)
|
|
324
|
+
|
|
325
|
+
# --------------------------------------------------------------------- #
|
|
326
|
+
# Introspection / visualization helpers
|
|
327
|
+
# --------------------------------------------------------------------- #
|
|
328
|
+
|
|
329
|
+
def to_spec(self) -> dict[str, Any]:
|
|
330
|
+
"""
|
|
331
|
+
Export the pipeline to a JSON-serializable graph spec.
|
|
332
|
+
|
|
333
|
+
Returns
|
|
334
|
+
-------
|
|
335
|
+
dict with structure:
|
|
336
|
+
{
|
|
337
|
+
"name": "<pipeline_name>",
|
|
338
|
+
"nodes": [
|
|
339
|
+
{
|
|
340
|
+
"id": "<step_id>",
|
|
341
|
+
"label": "<human readable label>",
|
|
342
|
+
"module": "<ProcessStep class name>",
|
|
343
|
+
"module_path": "<path to module>" or "",
|
|
344
|
+
"version": "<module version>" or "",
|
|
345
|
+
"config": {...} # current configuration dict
|
|
346
|
+
},
|
|
347
|
+
...
|
|
348
|
+
],
|
|
349
|
+
"edges": [
|
|
350
|
+
{"from": "<source_step_id>", "to": "<target_step_id>"},
|
|
351
|
+
...
|
|
352
|
+
],
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
e.g.:
|
|
356
|
+
{
|
|
357
|
+
"id": "FL",
|
|
358
|
+
"label": "Divide by relative flux",
|
|
359
|
+
"module": "Divide",
|
|
360
|
+
"requires_steps": ["DC"],
|
|
361
|
+
"config": {...},
|
|
362
|
+
"trace_events": [
|
|
363
|
+
{
|
|
364
|
+
"step_id": "FL",
|
|
365
|
+
"config_hash": "...",
|
|
366
|
+
"datasets": {
|
|
367
|
+
"sample.signal": {"diff": ["units","nan_signal"], "prev": {...}, "now": {...}}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
]
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
Adds:
|
|
374
|
+
- requires_steps per node (derived from graph prereqs)
|
|
375
|
+
- optional trace events (if Pipeline.trace_events is populated)
|
|
376
|
+
- config_hash per node (stable)
|
|
377
|
+
"""
|
|
378
|
+
nodes: list[dict[str, Any]] = []
|
|
379
|
+
edges: list[dict[str, str]] = []
|
|
380
|
+
|
|
381
|
+
# Build a stable node set (keys + prereqs, just in case)
|
|
382
|
+
all_nodes: set[ProcessStep] = set(self.graph.keys())
|
|
383
|
+
for prereqs in self.graph.values():
|
|
384
|
+
all_nodes |= set(prereqs)
|
|
385
|
+
|
|
386
|
+
# map ProcessStep instance -> its step_id (as string)
|
|
387
|
+
id_by_node: dict[ProcessStep, str] = {node: str(node.step_id) for node in all_nodes}
|
|
388
|
+
|
|
389
|
+
# For stable output: sort by step_id
|
|
390
|
+
def _node_sort_key(n: ProcessStep) -> str:
|
|
391
|
+
return str(n.step_id)
|
|
392
|
+
|
|
393
|
+
for node in sorted(all_nodes, key=_node_sort_key):
|
|
394
|
+
sid = id_by_node[node]
|
|
395
|
+
|
|
396
|
+
# Human label
|
|
397
|
+
doc = getattr(node, "documentation", None)
|
|
398
|
+
if doc is not None and getattr(doc, "calling_name", None):
|
|
399
|
+
display_label = doc.calling_name
|
|
400
|
+
else:
|
|
401
|
+
display_label = type(node).__name__
|
|
402
|
+
|
|
403
|
+
# prereqs list (sorted for spec stability)
|
|
404
|
+
prereq_ids = sorted(id_by_node[p] for p in self.graph.get(node, set()))
|
|
405
|
+
|
|
406
|
+
cfg = dict(getattr(node, "configuration", {}))
|
|
407
|
+
|
|
408
|
+
node_spec: dict[str, Any] = {
|
|
409
|
+
"id": sid,
|
|
410
|
+
"label": display_label,
|
|
411
|
+
"module": type(node).__name__,
|
|
412
|
+
"config": cfg,
|
|
413
|
+
"requires_steps": prereq_ids,
|
|
414
|
+
"produced_outputs": sorted(getattr(node, "produced_outputs", {}).keys()),
|
|
415
|
+
}
|
|
416
|
+
if getattr(node, "short_title", None):
|
|
417
|
+
node_spec["short_title"] = node.short_title
|
|
418
|
+
|
|
419
|
+
cfg_json = json.dumps(node_spec["config"], sort_keys=True, default=str).encode("utf-8")
|
|
420
|
+
node_spec["config_hash"] = sha256(cfg_json).hexdigest()
|
|
421
|
+
|
|
422
|
+
if doc is not None:
|
|
423
|
+
module_path = getattr(doc, "calling_module_path", None)
|
|
424
|
+
node_spec["module_path"] = str(module_path) if module_path is not None else ""
|
|
425
|
+
node_spec["version"] = getattr(doc, "calling_version", "") or ""
|
|
426
|
+
calling_id = getattr(doc, "calling_id", None)
|
|
427
|
+
if calling_id:
|
|
428
|
+
node_spec["module_id"] = calling_id
|
|
429
|
+
|
|
430
|
+
# Attach trace events if present (kept lightweight)
|
|
431
|
+
if sid in self.trace_events and self.trace_events[sid]:
|
|
432
|
+
node_spec["trace_events"] = [ev.to_dict() for ev in self.trace_events[sid]]
|
|
433
|
+
else:
|
|
434
|
+
node_spec["trace_events"] = []
|
|
435
|
+
|
|
436
|
+
nodes.append(node_spec)
|
|
437
|
+
|
|
438
|
+
# Edges: self.graph maps node -> set(prerequisite nodes),
|
|
439
|
+
# but visually we want edges prereq -> node.
|
|
440
|
+
for node, prereqs in self.graph.items():
|
|
441
|
+
target_id = id_by_node[node]
|
|
442
|
+
for pre in prereqs:
|
|
443
|
+
edges.append({"from": id_by_node[pre], "to": target_id})
|
|
444
|
+
|
|
445
|
+
return {"name": self.name, "nodes": nodes, "edges": edges}
|
|
446
|
+
|
|
447
|
+
def to_dot(self) -> str:
|
|
448
|
+
"""
|
|
449
|
+
Export the pipeline as a Graphviz DOT string for visualization.
|
|
450
|
+
|
|
451
|
+
Nodes are labeled with "<step_id>: <calling_name/module_name>".
|
|
452
|
+
"""
|
|
453
|
+
spec = self.to_spec()
|
|
454
|
+
lines: list[str] = [
|
|
455
|
+
f'digraph "{spec["name"]}" {{',
|
|
456
|
+
" rankdir=LR;", # left-to-right layout; change to TB for top-to-bottom
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
# Nodes
|
|
460
|
+
for node in spec["nodes"]:
|
|
461
|
+
nid = node["id"]
|
|
462
|
+
# Show both id and label so it's easy to match YAML <-> graph
|
|
463
|
+
label = f'{node["id"]}: {node["module"]}'
|
|
464
|
+
short_title = node.get("short_title")
|
|
465
|
+
if short_title:
|
|
466
|
+
label = f"{label}\\n{short_title}"
|
|
467
|
+
esc_label = label.replace('"', '\\"')
|
|
468
|
+
lines.append(f' "{nid}" [label="{esc_label}"];') # noqa: E702, E231
|
|
469
|
+
|
|
470
|
+
# Edges
|
|
471
|
+
for edge in spec["edges"]:
|
|
472
|
+
lines.append(f' "{edge["from"]}" -> "{edge["to"]}";') # noqa: E702, E231
|
|
473
|
+
|
|
474
|
+
lines.append("}")
|
|
475
|
+
return "\n".join(lines)
|
|
476
|
+
|
|
477
|
+
def to_mermaid(self, direction: str = "LR") -> str:
|
|
478
|
+
"""
|
|
479
|
+
Export the pipeline as a Mermaid flowchart definition.
|
|
480
|
+
|
|
481
|
+
Parameters
|
|
482
|
+
----------
|
|
483
|
+
direction:
|
|
484
|
+
Mermaid direction: "LR" (left-right), "TB" (top-bottom), etc.
|
|
485
|
+
"""
|
|
486
|
+
spec = self.to_spec()
|
|
487
|
+
|
|
488
|
+
# Mermaid node IDs must be simple identifiers (no spaces, quotes, etc.).
|
|
489
|
+
# We'll generate safe IDs but keep the original step_id visible in the label.
|
|
490
|
+
def sanitize(node_id: str) -> str:
|
|
491
|
+
return "".join(c if (c.isalnum() or c == "_") else "_" for c in node_id)
|
|
492
|
+
|
|
493
|
+
id_map: dict[str, str] = {}
|
|
494
|
+
for node in spec["nodes"]:
|
|
495
|
+
raw = str(node["id"])
|
|
496
|
+
id_map[node["id"]] = sanitize(raw)
|
|
497
|
+
|
|
498
|
+
lines: list[str] = [f"flowchart {direction}"]
|
|
499
|
+
|
|
500
|
+
# Nodes
|
|
501
|
+
for node in spec["nodes"]:
|
|
502
|
+
nid = id_map[node["id"]]
|
|
503
|
+
label = f'{node["id"]}: {node["module"]}'
|
|
504
|
+
short_title = node.get("short_title")
|
|
505
|
+
if short_title:
|
|
506
|
+
label = f"{label}<br/>{short_title}"
|
|
507
|
+
esc_label = label.replace('"', '\\"')
|
|
508
|
+
lines.append(f' {nid}["{esc_label}"]')
|
|
509
|
+
|
|
510
|
+
# Edges
|
|
511
|
+
for edge in spec["edges"]:
|
|
512
|
+
src = id_map[edge["from"]]
|
|
513
|
+
dst = id_map[edge["to"]]
|
|
514
|
+
lines.append(f" {src} --> {dst}")
|
|
515
|
+
|
|
516
|
+
return "\n".join(lines)
|
|
517
|
+
|
|
518
|
+
# in case we used to and from spec to modify the pipeline, we can
|
|
519
|
+
# store the new pipeline back to yaml
|
|
520
|
+
def to_yaml(self) -> str:
|
|
521
|
+
"""
|
|
522
|
+
Export the pipeline to a YAML string using the same schema
|
|
523
|
+
that `from_yaml` expects (keyed by step_id).
|
|
524
|
+
|
|
525
|
+
The result looks like:
|
|
526
|
+
|
|
527
|
+
```yaml
|
|
528
|
+
name: my_pipeline
|
|
529
|
+
steps:
|
|
530
|
+
1:
|
|
531
|
+
module: SomeStep
|
|
532
|
+
requires_steps: []
|
|
533
|
+
configuration: {...}
|
|
534
|
+
"pu":
|
|
535
|
+
module: OtherStep
|
|
536
|
+
requires_steps: [1]
|
|
537
|
+
configuration: {...}
|
|
538
|
+
```
|
|
539
|
+
"""
|
|
540
|
+
spec = self.to_spec()
|
|
541
|
+
|
|
542
|
+
# Build steps mapping keyed by step_id
|
|
543
|
+
steps: dict[str, dict[str, Any]] = {}
|
|
544
|
+
|
|
545
|
+
# Pre-compute requires_steps per node from edges
|
|
546
|
+
requires_map: dict[str, list[str]] = {n["id"]: [] for n in spec["nodes"]}
|
|
547
|
+
for edge in spec["edges"]:
|
|
548
|
+
src = str(edge["from"])
|
|
549
|
+
dst = str(edge["to"])
|
|
550
|
+
# edge: src -> dst => dst.requires_steps includes src
|
|
551
|
+
if dst in requires_map:
|
|
552
|
+
requires_map[dst].append(src)
|
|
553
|
+
else:
|
|
554
|
+
requires_map[dst] = [src]
|
|
555
|
+
|
|
556
|
+
for node in spec["nodes"]:
|
|
557
|
+
sid = str(node["id"])
|
|
558
|
+
module_name = node["module"]
|
|
559
|
+
cfg = node.get("config", {}) or {}
|
|
560
|
+
requires = requires_map.get(sid, [])
|
|
561
|
+
short_title = node.get("short_title")
|
|
562
|
+
|
|
563
|
+
step_dict: dict[str, Any] = {
|
|
564
|
+
"module": module_name,
|
|
565
|
+
}
|
|
566
|
+
if requires:
|
|
567
|
+
step_dict["requires_steps"] = requires
|
|
568
|
+
if cfg:
|
|
569
|
+
step_dict["configuration"] = cfg
|
|
570
|
+
if short_title:
|
|
571
|
+
step_dict["short_title"] = short_title
|
|
572
|
+
|
|
573
|
+
steps[sid] = step_dict
|
|
574
|
+
|
|
575
|
+
yaml_obj = {
|
|
576
|
+
"name": spec.get("name", self.name or "Unnamed Pipeline"),
|
|
577
|
+
"steps": steps,
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
# sort_keys=False keeps insertion order, which follows node order in spec
|
|
581
|
+
return yaml.safe_dump(yaml_obj, sort_keys=False)
|
|
582
|
+
|
|
583
|
+
# --------------------------------------------------------------------- #
|
|
584
|
+
# Trace events / run-time introspection
|
|
585
|
+
# --------------------------------------------------------------------- #
|
|
586
|
+
#
|
|
587
|
+
# Summary
|
|
588
|
+
# -------
|
|
589
|
+
# Pipelines can optionally collect per-step TraceEvent records during execution.
|
|
590
|
+
#
|
|
591
|
+
# Design goals:
|
|
592
|
+
# - Keep Pipeline execution fast and lightweight (no arrays stored).
|
|
593
|
+
# - Keep trace events strictly step-local (each event describes only one executed node).
|
|
594
|
+
# - Support UI rendering without requiring access to live ProcessingData.
|
|
595
|
+
#
|
|
596
|
+
# What is stored:
|
|
597
|
+
# - Always: module metadata, prerequisite step_ids, and the step configuration used.
|
|
598
|
+
# - Optionally: dataset "diff" payloads produced by PipelineTracer (units/dimensionality/NaNs/etc).
|
|
599
|
+
# - Optionally: rendered, UI-ready snippets (HTML/Markdown/plain) for trace + config.
|
|
600
|
+
#
|
|
601
|
+
# What is NOT stored:
|
|
602
|
+
# - No signal arrays, maps, or large objects (TraceEvent must stay JSON-friendly).
|
|
603
|
+
# - No global or cross-step state (events can be attached/serialized independently).
|
|
604
|
+
#
|
|
605
|
+
# Integration pattern (typical runner / notebook):
|
|
606
|
+
# node(processing_data)
|
|
607
|
+
# tracer.after_step(node, processing_data)
|
|
608
|
+
# pipeline.attach_tracer_event(node, tracer,
|
|
609
|
+
# include_rendered_trace=True,
|
|
610
|
+
# include_rendered_config=True)
|
|
611
|
+
# pipeline.done(node)
|
|
612
|
+
#
|
|
613
|
+
# Export:
|
|
614
|
+
# - Pipeline.to_spec() includes node-level config + optional trace_events per node,
|
|
615
|
+
# enabling graph viewers to show "what changed" as expandable panels.
|
|
616
|
+
|
|
617
|
+
def attach_tracer_event(
|
|
618
|
+
self,
|
|
619
|
+
node: ProcessStep,
|
|
620
|
+
tracer: Any | None,
|
|
621
|
+
*,
|
|
622
|
+
include_rendered_trace: bool = False,
|
|
623
|
+
include_rendered_config: bool = False,
|
|
624
|
+
rendered_format: str = "text/html",
|
|
625
|
+
) -> TraceEvent:
|
|
626
|
+
"""
|
|
627
|
+
Create & attach a TraceEvent for `node`, using `tracer.events` if available.
|
|
628
|
+
|
|
629
|
+
- Always attaches a TraceEvent so the graph UI can show config/module info.
|
|
630
|
+
- Adds datasets diffs only if a matching tracer event for this step_id exists.
|
|
631
|
+
"""
|
|
632
|
+
step_id = str(node.step_id)
|
|
633
|
+
doc = getattr(node, "documentation", None)
|
|
634
|
+
|
|
635
|
+
label = getattr(doc, "calling_name", "") if doc is not None else ""
|
|
636
|
+
module_path = getattr(doc, "calling_module_path", "") if doc is not None else ""
|
|
637
|
+
version = getattr(doc, "calling_version", "") if doc is not None else ""
|
|
638
|
+
|
|
639
|
+
prereqs = tuple(sorted(str(p.step_id) for p in self.graph.get(node, set())))
|
|
640
|
+
cfg = dict(getattr(node, "configuration", {}) or {})
|
|
641
|
+
|
|
642
|
+
datasets: dict[str, Any] = {}
|
|
643
|
+
|
|
644
|
+
def _html_escape(s: str) -> str:
|
|
645
|
+
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
646
|
+
|
|
647
|
+
matched_ev: dict[str, Any] | None = None
|
|
648
|
+
|
|
649
|
+
# Try to find the most recent tracer event for this step_id
|
|
650
|
+
if tracer is not None:
|
|
651
|
+
events = getattr(tracer, "events", None)
|
|
652
|
+
if isinstance(events, list) and events:
|
|
653
|
+
for ev in reversed(events):
|
|
654
|
+
if str(ev.get("step_id")) == step_id:
|
|
655
|
+
matched_ev = ev
|
|
656
|
+
datasets = tracer_event_to_datasets_payload(ev)
|
|
657
|
+
break
|
|
658
|
+
|
|
659
|
+
duration_s: float | None = None
|
|
660
|
+
if matched_ev is not None:
|
|
661
|
+
d = matched_ev.get("duration_s", None)
|
|
662
|
+
if isinstance(d, (int, float)):
|
|
663
|
+
duration_s = float(d)
|
|
664
|
+
|
|
665
|
+
messages: list[dict[str, Any]] = []
|
|
666
|
+
|
|
667
|
+
# --- Rendered trace (STRICTLY step-local) ---
|
|
668
|
+
if include_rendered_trace and matched_ev is not None:
|
|
669
|
+
try:
|
|
670
|
+
from modacor.debug.pipeline_tracer import ( # noqa: WPS433
|
|
671
|
+
MarkdownCssRenderer,
|
|
672
|
+
PlainUnicodeRenderer,
|
|
673
|
+
render_tracer_event,
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
if rendered_format in {"text/html", "text/markdown"}:
|
|
677
|
+
renderer = MarkdownCssRenderer(wrap_in_markdown_codeblock=False)
|
|
678
|
+
content = render_tracer_event(matched_ev, renderer=renderer)
|
|
679
|
+
fmt = "text/html"
|
|
680
|
+
else:
|
|
681
|
+
renderer = PlainUnicodeRenderer(wrap_in_markdown_codeblock=False)
|
|
682
|
+
content = render_tracer_event(matched_ev, renderer=renderer)
|
|
683
|
+
fmt = "text/plain"
|
|
684
|
+
|
|
685
|
+
messages.append(
|
|
686
|
+
{
|
|
687
|
+
"kind": "rendered_trace",
|
|
688
|
+
"title": "Trace",
|
|
689
|
+
"format": fmt,
|
|
690
|
+
"content": content,
|
|
691
|
+
}
|
|
692
|
+
)
|
|
693
|
+
except Exception as exc:
|
|
694
|
+
messages.append(
|
|
695
|
+
{
|
|
696
|
+
"kind": "rendered_trace_error",
|
|
697
|
+
"title": "Trace",
|
|
698
|
+
"format": "text/plain",
|
|
699
|
+
"content": f"{exc!r}",
|
|
700
|
+
}
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
# --- Rendered config (STRICTLY step-local) ---
|
|
704
|
+
if include_rendered_config:
|
|
705
|
+
try:
|
|
706
|
+
cfg_yaml = yaml.safe_dump(cfg, sort_keys=False)
|
|
707
|
+
|
|
708
|
+
if rendered_format in {"text/html", "text/markdown"}:
|
|
709
|
+
# keep styling consistent with your CSS classes
|
|
710
|
+
# (don’t rely on MarkdownCssRenderer.codewrap here; we want to escape YAML)
|
|
711
|
+
content = "<pre class='mdc-pre mdc-config'>\n" + _html_escape(cfg_yaml) + "\n</pre>"
|
|
712
|
+
fmt = "text/html"
|
|
713
|
+
else:
|
|
714
|
+
content = "Configuration:\n" + cfg_yaml
|
|
715
|
+
fmt = "text/plain"
|
|
716
|
+
|
|
717
|
+
messages.append(
|
|
718
|
+
{
|
|
719
|
+
"kind": "rendered_config",
|
|
720
|
+
"title": "Configuration",
|
|
721
|
+
"format": fmt,
|
|
722
|
+
"content": content,
|
|
723
|
+
}
|
|
724
|
+
)
|
|
725
|
+
except Exception as exc:
|
|
726
|
+
messages.append(
|
|
727
|
+
{
|
|
728
|
+
"kind": "rendered_config_error",
|
|
729
|
+
"title": "Configuration",
|
|
730
|
+
"format": "text/plain",
|
|
731
|
+
"content": f"{exc!r}",
|
|
732
|
+
}
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
event = TraceEvent(
|
|
736
|
+
step_id=step_id,
|
|
737
|
+
module=type(node).__name__,
|
|
738
|
+
label=str(label or ""),
|
|
739
|
+
module_path=str(module_path or ""),
|
|
740
|
+
version=str(version or ""),
|
|
741
|
+
requires_steps=prereqs,
|
|
742
|
+
config=cfg,
|
|
743
|
+
datasets=datasets,
|
|
744
|
+
duration_s=duration_s,
|
|
745
|
+
messages=messages,
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
self.add_trace_event(event)
|
|
749
|
+
return event
|