modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. modacor/__init__.py +30 -0
  2. modacor/dataclasses/__init__.py +0 -0
  3. modacor/dataclasses/basedata.py +973 -0
  4. modacor/dataclasses/databundle.py +23 -0
  5. modacor/dataclasses/helpers.py +45 -0
  6. modacor/dataclasses/messagehandler.py +75 -0
  7. modacor/dataclasses/process_step.py +233 -0
  8. modacor/dataclasses/process_step_describer.py +146 -0
  9. modacor/dataclasses/processing_data.py +59 -0
  10. modacor/dataclasses/trace_event.py +118 -0
  11. modacor/dataclasses/uncertainty_tools.py +132 -0
  12. modacor/dataclasses/validators.py +84 -0
  13. modacor/debug/pipeline_tracer.py +548 -0
  14. modacor/io/__init__.py +33 -0
  15. modacor/io/csv/__init__.py +0 -0
  16. modacor/io/csv/csv_sink.py +114 -0
  17. modacor/io/csv/csv_source.py +210 -0
  18. modacor/io/hdf/__init__.py +27 -0
  19. modacor/io/hdf/hdf_source.py +120 -0
  20. modacor/io/io_sink.py +41 -0
  21. modacor/io/io_sinks.py +61 -0
  22. modacor/io/io_source.py +164 -0
  23. modacor/io/io_sources.py +208 -0
  24. modacor/io/processing_path.py +113 -0
  25. modacor/io/tiled/__init__.py +16 -0
  26. modacor/io/tiled/tiled_source.py +403 -0
  27. modacor/io/yaml/__init__.py +27 -0
  28. modacor/io/yaml/yaml_source.py +116 -0
  29. modacor/modules/__init__.py +53 -0
  30. modacor/modules/base_modules/__init__.py +0 -0
  31. modacor/modules/base_modules/append_processing_data.py +329 -0
  32. modacor/modules/base_modules/append_sink.py +141 -0
  33. modacor/modules/base_modules/append_source.py +181 -0
  34. modacor/modules/base_modules/bitwise_or_masks.py +113 -0
  35. modacor/modules/base_modules/combine_uncertainties.py +120 -0
  36. modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
  37. modacor/modules/base_modules/divide.py +82 -0
  38. modacor/modules/base_modules/find_scale_factor1d.py +373 -0
  39. modacor/modules/base_modules/multiply.py +77 -0
  40. modacor/modules/base_modules/multiply_databundles.py +73 -0
  41. modacor/modules/base_modules/poisson_uncertainties.py +69 -0
  42. modacor/modules/base_modules/reduce_dimensionality.py +252 -0
  43. modacor/modules/base_modules/sink_processing_data.py +80 -0
  44. modacor/modules/base_modules/subtract.py +80 -0
  45. modacor/modules/base_modules/subtract_databundles.py +67 -0
  46. modacor/modules/base_modules/units_label_update.py +66 -0
  47. modacor/modules/instrument_modules/__init__.py +0 -0
  48. modacor/modules/instrument_modules/readme.md +9 -0
  49. modacor/modules/technique_modules/__init__.py +0 -0
  50. modacor/modules/technique_modules/scattering/__init__.py +0 -0
  51. modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
  52. modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
  53. modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
  54. modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
  55. modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
  56. modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
  57. modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
  58. modacor/runner/__init__.py +0 -0
  59. modacor/runner/pipeline.py +749 -0
  60. modacor/runner/process_step_registry.py +224 -0
  61. modacor/tests/__init__.py +27 -0
  62. modacor/tests/dataclasses/test_basedata.py +519 -0
  63. modacor/tests/dataclasses/test_basedata_operations.py +439 -0
  64. modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
  65. modacor/tests/dataclasses/test_process_step_describer.py +73 -0
  66. modacor/tests/dataclasses/test_processstep.py +282 -0
  67. modacor/tests/debug/test_tracing_integration.py +188 -0
  68. modacor/tests/integration/__init__.py +0 -0
  69. modacor/tests/integration/test_pipeline_run.py +238 -0
  70. modacor/tests/io/__init__.py +27 -0
  71. modacor/tests/io/csv/__init__.py +0 -0
  72. modacor/tests/io/csv/test_csv_source.py +156 -0
  73. modacor/tests/io/hdf/__init__.py +27 -0
  74. modacor/tests/io/hdf/test_hdf_source.py +92 -0
  75. modacor/tests/io/test_io_sources.py +119 -0
  76. modacor/tests/io/tiled/__init__.py +12 -0
  77. modacor/tests/io/tiled/test_tiled_source.py +120 -0
  78. modacor/tests/io/yaml/__init__.py +27 -0
  79. modacor/tests/io/yaml/static_data_example.yaml +26 -0
  80. modacor/tests/io/yaml/test_yaml_source.py +47 -0
  81. modacor/tests/modules/__init__.py +27 -0
  82. modacor/tests/modules/base_modules/__init__.py +27 -0
  83. modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
  84. modacor/tests/modules/base_modules/test_append_sink.py +76 -0
  85. modacor/tests/modules/base_modules/test_append_source.py +180 -0
  86. modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
  87. modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
  88. modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
  89. modacor/tests/modules/base_modules/test_divide.py +140 -0
  90. modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
  91. modacor/tests/modules/base_modules/test_multiply.py +113 -0
  92. modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
  93. modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
  94. modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
  95. modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
  96. modacor/tests/modules/base_modules/test_subtract.py +111 -0
  97. modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
  98. modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
  99. modacor/tests/modules/technique_modules/__init__.py +0 -0
  100. modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
  101. modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
  102. modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
  103. modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
  104. modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
  105. modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
  106. modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
  107. modacor/tests/requirements.txt +12 -0
  108. modacor/tests/runner/test_pipeline.py +438 -0
  109. modacor/tests/runner/test_process_step_registry.py +65 -0
  110. modacor/tests/test_import.py +43 -0
  111. modacor/tests/test_modacor.py +17 -0
  112. modacor/tests/test_units.py +79 -0
  113. modacor/units.py +97 -0
  114. modacor-1.0.0.dist-info/METADATA +482 -0
  115. modacor-1.0.0.dist-info/RECORD +120 -0
  116. modacor-1.0.0.dist-info/WHEEL +5 -0
  117. modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
  118. modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
  119. modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
  120. modacor-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,749 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Anja Hörmann", "Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "22/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ import json
15
+ from graphlib import TopologicalSorter
16
+
17
+ # quick hash at node-level (UI can show "config changed" without reading trace events)
18
+ from hashlib import sha256
19
+ from pathlib import Path
20
+ from typing import Any, Iterable, Mapping, Self
21
+
22
+ import yaml
23
+ from attrs import define, field
24
+
25
+ from modacor.debug.pipeline_tracer import tracer_event_to_datasets_payload
26
+
27
+ from ..dataclasses.process_step import ProcessStep
28
+ from ..dataclasses.trace_event import TraceEvent
29
+ from ..io.io_sources import IoSources # noqa: F401 # reserved for future use
30
+ from .process_step_registry import DEFAULT_PROCESS_STEP_REGISTRY, ProcessStepRegistry
31
+
32
+ __all__ = ["Pipeline"]
33
+
34
+
35
+ @define
36
+ class Pipeline(TopologicalSorter):
37
+ """
38
+ Pipeline nodes are assumed to be of type ProcessStep.
39
+
40
+ The underlying `graph` maps each node to the set of prerequisite nodes
41
+ that must complete before it can run.
42
+ """
43
+
44
+ graph: dict[ProcessStep, set[ProcessStep]] = field(factory=dict)
45
+ name: str = field(default="Unnamed Pipeline")
46
+ # Optional trace events collected during a run (step_id -> list of events)
47
+ trace_events: dict[str, list[TraceEvent]] = field(factory=dict, repr=False)
48
+
49
+ def __attrs_post_init__(self) -> None:
50
+ super().__init__(graph=self.graph)
51
+
52
+ # trace helpers, this helps to debug pipelines by storing trace events per step:
53
+ def add_trace_event(self, event: TraceEvent) -> None:
54
+ self.trace_events.setdefault(str(event.step_id), []).append(event)
55
+
56
+ def clear_trace_events(self) -> None:
57
+ self.trace_events.clear()
58
+
59
+ # --------------------------------------------------------------------- #
60
+ # Pipeline construction helpers
61
+ # --------------------------------------------------------------------- #
62
+
63
+ @classmethod
64
+ def from_yaml_file(
65
+ cls,
66
+ yaml_file: Path | str,
67
+ registry: ProcessStepRegistry | None = None,
68
+ ) -> "Pipeline":
69
+ """
70
+ Instantiate a Pipeline from a YAML configuration file.
71
+
72
+ Parameters
73
+ ----------
74
+ yaml_file:
75
+ Path to the YAML file.
76
+ registry:
77
+ Optional ProcessStepRegistry. If omitted, the global
78
+ DEFAULT_PROCESS_STEP_REGISTRY is used.
79
+ """
80
+ yaml_path = Path(yaml_file)
81
+ yaml_string = yaml_path.read_text(encoding="utf-8")
82
+ return cls.from_yaml(yaml_string, registry=registry)
83
+
84
+ @classmethod
85
+ def from_yaml(
86
+ cls,
87
+ yaml_string: str,
88
+ registry: ProcessStepRegistry | None = None,
89
+ ) -> "Pipeline":
90
+ """
91
+ Instantiate a Pipeline from a YAML configuration string.
92
+
93
+ Expected YAML schema (keyed by step_id):
94
+
95
+ ```yaml
96
+ name: my_pipeline
97
+ steps:
98
+ 1:
99
+ module: PoissonUncertainties
100
+ requires_steps: []
101
+ configuration: {...}
102
+
103
+ "pu":
104
+ module: PoissonUncertainties
105
+ requires_steps: [1] # may be int or string
106
+ configuration: {...}
107
+ ```
108
+
109
+ Notes
110
+ -----
111
+ * The keys under `steps` (`1`, `"pu"`, etc.) are treated as the
112
+ canonical `step_id`s and are normalized to `str`.
113
+ * `requires_steps` entries can be ints or strings; they are also
114
+ normalized to `str`.
115
+ * If a `step_id` field is present inside a step, it must match
116
+ the outer key (after string conversion), otherwise an error
117
+ is raised to avoid silent mismatches.
118
+ """
119
+ yaml_obj = yaml.safe_load(yaml_string) or {}
120
+ steps_cfg = yaml_obj.get("steps", {}) or {}
121
+
122
+ registry = registry or DEFAULT_PROCESS_STEP_REGISTRY
123
+
124
+ process_step_instances: dict[str, ProcessStep] = {}
125
+ dependency_ids: dict[str, set[str]] = {}
126
+
127
+ # First pass: instantiate steps and collect dependency ids
128
+ for raw_step_key, module_data in steps_cfg.items():
129
+ # Normalize outer key to string (allows numeric or string keys)
130
+ step_id = str(raw_step_key)
131
+
132
+ if not isinstance(module_data, dict):
133
+ raise ValueError(
134
+ f"Step {step_id!r} must map to a mapping with 'module' and "
135
+ "'configuration' / 'requires_steps' fields."
136
+ )
137
+
138
+ # Optional inner step_id sanity check
139
+ inner_step_id = module_data.get("step_id")
140
+ if inner_step_id is not None and str(inner_step_id) != step_id:
141
+ raise ValueError(
142
+ f"Step {step_id!r} has inner 'step_id' {inner_step_id!r} "
143
+ "which does not match the outer key. "
144
+ "Either omit the inner 'step_id' or make them identical."
145
+ )
146
+
147
+ try:
148
+ module_ref = module_data["module"]
149
+ except KeyError as exc:
150
+ raise ValueError(f"Step {step_id!r} is missing required field 'module'.") from exc
151
+
152
+ configuration = module_data.get("configuration") or {}
153
+ requires_raw = module_data.get("requires_steps") or []
154
+ short_title = module_data.get("short_title")
155
+
156
+ # Normalize dependencies to strings as well
157
+ requires_steps = {str(dep) for dep in requires_raw}
158
+
159
+ # Resolve ProcessStep class via registry
160
+ step_cls = registry.get(module_ref)
161
+
162
+ # Pass the normalized string step_id into the ProcessStep
163
+ step_instance: ProcessStep = step_cls(io_sources=None, io_sinks=None, step_id=step_id)
164
+ step_instance.modify_config_by_dict(configuration)
165
+ if short_title is not None:
166
+ step_instance.short_title = str(short_title)
167
+
168
+ process_step_instances[step_id] = step_instance
169
+ dependency_ids[step_id] = requires_steps
170
+
171
+ # Second pass: validate dependencies
172
+ all_defined_ids = set(process_step_instances.keys())
173
+ for step_id, deps in dependency_ids.items():
174
+ missing = deps - all_defined_ids
175
+ if missing:
176
+ missing_str = ", ".join(sorted(missing))
177
+ raise ValueError(
178
+ f"Step {step_id!r} requires unknown steps {missing_str}. "
179
+ "Check `steps` keys and `requires_steps` in the YAML."
180
+ )
181
+
182
+ # Translate step_id graph into ProcessStep graph
183
+ graph: dict[ProcessStep, set[ProcessStep]] = {}
184
+ for step_id, deps in dependency_ids.items():
185
+ graph[process_step_instances[step_id]] = {process_step_instances[dep_id] for dep_id in deps}
186
+
187
+ name = yaml_obj.get("name", "Unnamed Pipeline")
188
+ return cls(name=name, graph=graph)
189
+
190
+ @classmethod
191
+ def from_dict(
192
+ cls,
193
+ graph_dict: Mapping[ProcessStep, Iterable[ProcessStep]],
194
+ name: str = "",
195
+ ) -> "Pipeline":
196
+ """
197
+ Instantiate a Pipeline from a mapping.
198
+
199
+ Parameters
200
+ ----------
201
+ graph_dict:
202
+ Mapping of node -> iterable of prerequisite nodes.
203
+
204
+ Notes
205
+ -----
206
+ This is a low-level constructor mainly intended for internal use or
207
+ tests. Normal users should prefer `from_yaml_file` or `from_yaml`.
208
+ """
209
+ graph: dict[ProcessStep, set[ProcessStep]] = {node: set(deps) for node, deps in graph_dict.items()}
210
+ return cls(name=name or "Unnamed Pipeline", graph=graph)
211
+
212
+ # importer for future use of web tools for graphing pipelines:
213
+ @classmethod
214
+ def from_spec(
215
+ cls,
216
+ spec: dict,
217
+ registry: ProcessStepRegistry | None = None,
218
+ ) -> "Pipeline":
219
+ """
220
+ Build a Pipeline from a graph spec of the shape produced by `to_spec`.
221
+
222
+ Expected shape:
223
+ {
224
+ "name": "...",
225
+ "nodes": [
226
+ {"id": "...", "module": "...", "config": {...}},
227
+ ...
228
+ ],
229
+ "edges": [
230
+ {"from": "...", "to": "..."},
231
+ ...
232
+ ],
233
+ }
234
+ """
235
+ registry = registry or DEFAULT_PROCESS_STEP_REGISTRY
236
+
237
+ # 1) Build ProcessStep instances
238
+ process_step_instances: dict[str, ProcessStep] = {}
239
+ for node in spec.get("nodes", []):
240
+ step_id = str(node["id"])
241
+ module_name = node["module"]
242
+ config = node.get("config", {}) or {}
243
+
244
+ step_cls = registry.get(module_name)
245
+ step = step_cls(io_sources=None, io_sinks=None, step_id=step_id)
246
+ step.modify_config_by_dict(config)
247
+
248
+ process_step_instances[step_id] = step
249
+
250
+ # 2) Build prerequisite sets from edges
251
+ # edges are from -> to, but TopologicalSorter wants node -> prerequisites
252
+ prereqs: dict[str, set[str]] = {sid: set() for sid in process_step_instances}
253
+ for edge in spec.get("edges", []):
254
+ src = str(edge["from"])
255
+ dst = str(edge["to"])
256
+ if src not in prereqs or dst not in prereqs:
257
+ raise ValueError(f"Edge refers to unknown node: {src!r} -> {dst!r}")
258
+ prereqs[dst].add(src)
259
+
260
+ # 3) Convert to ProcessStep graph
261
+ graph: dict[ProcessStep, set[ProcessStep]] = {}
262
+ for sid, deps in prereqs.items():
263
+ graph[process_step_instances[sid]] = {process_step_instances[dep_id] for dep_id in deps}
264
+
265
+ name = spec.get("name", "Unnamed Pipeline")
266
+ return cls(name=name, graph=graph)
267
+
268
+ # --------------------------------------------------------------------- #
269
+ # Graph mutation helpers
270
+ # --------------------------------------------------------------------- #
271
+ def _reinitialize(self) -> None:
272
+ """Recreate the underlying TopologicalSorter with the current graph."""
273
+ super().__init__(graph=self.graph)
274
+
275
+ def add_incoming_branch(self, branch: Self, branching_node: ProcessStep) -> Self:
276
+ """
277
+ Add a pipeline as a branch whose outcome shall be combined with
278
+ the existing pipeline at `branching_node`.
279
+
280
+ This assumes that the branch to be added has a single exit point.
281
+ """
282
+ ordered_branch = list(branch.static_order())
283
+ if not ordered_branch:
284
+ return self
285
+
286
+ last_node = ordered_branch[-1]
287
+ self.graph.setdefault(branching_node, set()).add(last_node)
288
+ # Add the rest of the graph
289
+ self.graph |= branch.graph
290
+ self._reinitialize()
291
+ return self
292
+
293
+ def add_outgoing_branch(self, branch: Self, branching_node: ProcessStep) -> Self:
294
+ """
295
+ Add a pipeline as a branch whose input is based on the existing pipeline.
296
+
297
+ This assumes that the branch to be added has a single entry point.
298
+ """
299
+ ordered_branch = list(branch.static_order())
300
+ if not ordered_branch:
301
+ return self
302
+
303
+ first_node = ordered_branch[0]
304
+ branch.graph.setdefault(first_node, set()).add(branching_node)
305
+ self.graph |= branch.graph
306
+ self._reinitialize()
307
+ return self
308
+
309
+ # --------------------------------------------------------------------- #
310
+ # Execution
311
+ # --------------------------------------------------------------------- #
312
+
313
+ def run(self, **kwargs) -> None:
314
+ """
315
+ Run pipeline with simple topological scheduling.
316
+
317
+ Any keyword arguments are passed through to `ProcessStep.execute`.
318
+ """
319
+ self.prepare()
320
+ while self.is_active():
321
+ for node in self.get_ready():
322
+ node.execute(**kwargs)
323
+ self.done(node)
324
+
325
+ # --------------------------------------------------------------------- #
326
+ # Introspection / visualization helpers
327
+ # --------------------------------------------------------------------- #
328
+
329
+ def to_spec(self) -> dict[str, Any]:
330
+ """
331
+ Export the pipeline to a JSON-serializable graph spec.
332
+
333
+ Returns
334
+ -------
335
+ dict with structure:
336
+ {
337
+ "name": "<pipeline_name>",
338
+ "nodes": [
339
+ {
340
+ "id": "<step_id>",
341
+ "label": "<human readable label>",
342
+ "module": "<ProcessStep class name>",
343
+ "module_path": "<path to module>" or "",
344
+ "version": "<module version>" or "",
345
+ "config": {...} # current configuration dict
346
+ },
347
+ ...
348
+ ],
349
+ "edges": [
350
+ {"from": "<source_step_id>", "to": "<target_step_id>"},
351
+ ...
352
+ ],
353
+ }
354
+
355
+ e.g.:
356
+ {
357
+ "id": "FL",
358
+ "label": "Divide by relative flux",
359
+ "module": "Divide",
360
+ "requires_steps": ["DC"],
361
+ "config": {...},
362
+ "trace_events": [
363
+ {
364
+ "step_id": "FL",
365
+ "config_hash": "...",
366
+ "datasets": {
367
+ "sample.signal": {"diff": ["units","nan_signal"], "prev": {...}, "now": {...}}
368
+ }
369
+ }
370
+ ]
371
+ }
372
+
373
+ Adds:
374
+ - requires_steps per node (derived from graph prereqs)
375
+ - optional trace events (if Pipeline.trace_events is populated)
376
+ - config_hash per node (stable)
377
+ """
378
+ nodes: list[dict[str, Any]] = []
379
+ edges: list[dict[str, str]] = []
380
+
381
+ # Build a stable node set (keys + prereqs, just in case)
382
+ all_nodes: set[ProcessStep] = set(self.graph.keys())
383
+ for prereqs in self.graph.values():
384
+ all_nodes |= set(prereqs)
385
+
386
+ # map ProcessStep instance -> its step_id (as string)
387
+ id_by_node: dict[ProcessStep, str] = {node: str(node.step_id) for node in all_nodes}
388
+
389
+ # For stable output: sort by step_id
390
+ def _node_sort_key(n: ProcessStep) -> str:
391
+ return str(n.step_id)
392
+
393
+ for node in sorted(all_nodes, key=_node_sort_key):
394
+ sid = id_by_node[node]
395
+
396
+ # Human label
397
+ doc = getattr(node, "documentation", None)
398
+ if doc is not None and getattr(doc, "calling_name", None):
399
+ display_label = doc.calling_name
400
+ else:
401
+ display_label = type(node).__name__
402
+
403
+ # prereqs list (sorted for spec stability)
404
+ prereq_ids = sorted(id_by_node[p] for p in self.graph.get(node, set()))
405
+
406
+ cfg = dict(getattr(node, "configuration", {}))
407
+
408
+ node_spec: dict[str, Any] = {
409
+ "id": sid,
410
+ "label": display_label,
411
+ "module": type(node).__name__,
412
+ "config": cfg,
413
+ "requires_steps": prereq_ids,
414
+ "produced_outputs": sorted(getattr(node, "produced_outputs", {}).keys()),
415
+ }
416
+ if getattr(node, "short_title", None):
417
+ node_spec["short_title"] = node.short_title
418
+
419
+ cfg_json = json.dumps(node_spec["config"], sort_keys=True, default=str).encode("utf-8")
420
+ node_spec["config_hash"] = sha256(cfg_json).hexdigest()
421
+
422
+ if doc is not None:
423
+ module_path = getattr(doc, "calling_module_path", None)
424
+ node_spec["module_path"] = str(module_path) if module_path is not None else ""
425
+ node_spec["version"] = getattr(doc, "calling_version", "") or ""
426
+ calling_id = getattr(doc, "calling_id", None)
427
+ if calling_id:
428
+ node_spec["module_id"] = calling_id
429
+
430
+ # Attach trace events if present (kept lightweight)
431
+ if sid in self.trace_events and self.trace_events[sid]:
432
+ node_spec["trace_events"] = [ev.to_dict() for ev in self.trace_events[sid]]
433
+ else:
434
+ node_spec["trace_events"] = []
435
+
436
+ nodes.append(node_spec)
437
+
438
+ # Edges: self.graph maps node -> set(prerequisite nodes),
439
+ # but visually we want edges prereq -> node.
440
+ for node, prereqs in self.graph.items():
441
+ target_id = id_by_node[node]
442
+ for pre in prereqs:
443
+ edges.append({"from": id_by_node[pre], "to": target_id})
444
+
445
+ return {"name": self.name, "nodes": nodes, "edges": edges}
446
+
447
+ def to_dot(self) -> str:
448
+ """
449
+ Export the pipeline as a Graphviz DOT string for visualization.
450
+
451
+ Nodes are labeled with "<step_id>: <calling_name/module_name>".
452
+ """
453
+ spec = self.to_spec()
454
+ lines: list[str] = [
455
+ f'digraph "{spec["name"]}" {{',
456
+ " rankdir=LR;", # left-to-right layout; change to TB for top-to-bottom
457
+ ]
458
+
459
+ # Nodes
460
+ for node in spec["nodes"]:
461
+ nid = node["id"]
462
+ # Show both id and label so it's easy to match YAML <-> graph
463
+ label = f'{node["id"]}: {node["module"]}'
464
+ short_title = node.get("short_title")
465
+ if short_title:
466
+ label = f"{label}\\n{short_title}"
467
+ esc_label = label.replace('"', '\\"')
468
+ lines.append(f' "{nid}" [label="{esc_label}"];') # noqa: E702, E231
469
+
470
+ # Edges
471
+ for edge in spec["edges"]:
472
+ lines.append(f' "{edge["from"]}" -> "{edge["to"]}";') # noqa: E702, E231
473
+
474
+ lines.append("}")
475
+ return "\n".join(lines)
476
+
477
+ def to_mermaid(self, direction: str = "LR") -> str:
478
+ """
479
+ Export the pipeline as a Mermaid flowchart definition.
480
+
481
+ Parameters
482
+ ----------
483
+ direction:
484
+ Mermaid direction: "LR" (left-right), "TB" (top-bottom), etc.
485
+ """
486
+ spec = self.to_spec()
487
+
488
+ # Mermaid node IDs must be simple identifiers (no spaces, quotes, etc.).
489
+ # We'll generate safe IDs but keep the original step_id visible in the label.
490
+ def sanitize(node_id: str) -> str:
491
+ return "".join(c if (c.isalnum() or c == "_") else "_" for c in node_id)
492
+
493
+ id_map: dict[str, str] = {}
494
+ for node in spec["nodes"]:
495
+ raw = str(node["id"])
496
+ id_map[node["id"]] = sanitize(raw)
497
+
498
+ lines: list[str] = [f"flowchart {direction}"]
499
+
500
+ # Nodes
501
+ for node in spec["nodes"]:
502
+ nid = id_map[node["id"]]
503
+ label = f'{node["id"]}: {node["module"]}'
504
+ short_title = node.get("short_title")
505
+ if short_title:
506
+ label = f"{label}<br/>{short_title}"
507
+ esc_label = label.replace('"', '\\"')
508
+ lines.append(f' {nid}["{esc_label}"]')
509
+
510
+ # Edges
511
+ for edge in spec["edges"]:
512
+ src = id_map[edge["from"]]
513
+ dst = id_map[edge["to"]]
514
+ lines.append(f" {src} --> {dst}")
515
+
516
+ return "\n".join(lines)
517
+
518
+ # in case we used to and from spec to modify the pipeline, we can
519
+ # store the new pipeline back to yaml
520
+ def to_yaml(self) -> str:
521
+ """
522
+ Export the pipeline to a YAML string using the same schema
523
+ that `from_yaml` expects (keyed by step_id).
524
+
525
+ The result looks like:
526
+
527
+ ```yaml
528
+ name: my_pipeline
529
+ steps:
530
+ 1:
531
+ module: SomeStep
532
+ requires_steps: []
533
+ configuration: {...}
534
+ "pu":
535
+ module: OtherStep
536
+ requires_steps: [1]
537
+ configuration: {...}
538
+ ```
539
+ """
540
+ spec = self.to_spec()
541
+
542
+ # Build steps mapping keyed by step_id
543
+ steps: dict[str, dict[str, Any]] = {}
544
+
545
+ # Pre-compute requires_steps per node from edges
546
+ requires_map: dict[str, list[str]] = {n["id"]: [] for n in spec["nodes"]}
547
+ for edge in spec["edges"]:
548
+ src = str(edge["from"])
549
+ dst = str(edge["to"])
550
+ # edge: src -> dst => dst.requires_steps includes src
551
+ if dst in requires_map:
552
+ requires_map[dst].append(src)
553
+ else:
554
+ requires_map[dst] = [src]
555
+
556
+ for node in spec["nodes"]:
557
+ sid = str(node["id"])
558
+ module_name = node["module"]
559
+ cfg = node.get("config", {}) or {}
560
+ requires = requires_map.get(sid, [])
561
+ short_title = node.get("short_title")
562
+
563
+ step_dict: dict[str, Any] = {
564
+ "module": module_name,
565
+ }
566
+ if requires:
567
+ step_dict["requires_steps"] = requires
568
+ if cfg:
569
+ step_dict["configuration"] = cfg
570
+ if short_title:
571
+ step_dict["short_title"] = short_title
572
+
573
+ steps[sid] = step_dict
574
+
575
+ yaml_obj = {
576
+ "name": spec.get("name", self.name or "Unnamed Pipeline"),
577
+ "steps": steps,
578
+ }
579
+
580
+ # sort_keys=False keeps insertion order, which follows node order in spec
581
+ return yaml.safe_dump(yaml_obj, sort_keys=False)
582
+
583
+ # --------------------------------------------------------------------- #
584
+ # Trace events / run-time introspection
585
+ # --------------------------------------------------------------------- #
586
+ #
587
+ # Summary
588
+ # -------
589
+ # Pipelines can optionally collect per-step TraceEvent records during execution.
590
+ #
591
+ # Design goals:
592
+ # - Keep Pipeline execution fast and lightweight (no arrays stored).
593
+ # - Keep trace events strictly step-local (each event describes only one executed node).
594
+ # - Support UI rendering without requiring access to live ProcessingData.
595
+ #
596
+ # What is stored:
597
+ # - Always: module metadata, prerequisite step_ids, and the step configuration used.
598
+ # - Optionally: dataset "diff" payloads produced by PipelineTracer (units/dimensionality/NaNs/etc).
599
+ # - Optionally: rendered, UI-ready snippets (HTML/Markdown/plain) for trace + config.
600
+ #
601
+ # What is NOT stored:
602
+ # - No signal arrays, maps, or large objects (TraceEvent must stay JSON-friendly).
603
+ # - No global or cross-step state (events can be attached/serialized independently).
604
+ #
605
+ # Integration pattern (typical runner / notebook):
606
+ # node(processing_data)
607
+ # tracer.after_step(node, processing_data)
608
+ # pipeline.attach_tracer_event(node, tracer,
609
+ # include_rendered_trace=True,
610
+ # include_rendered_config=True)
611
+ # pipeline.done(node)
612
+ #
613
+ # Export:
614
+ # - Pipeline.to_spec() includes node-level config + optional trace_events per node,
615
+ # enabling graph viewers to show "what changed" as expandable panels.
616
+
617
+ def attach_tracer_event(
618
+ self,
619
+ node: ProcessStep,
620
+ tracer: Any | None,
621
+ *,
622
+ include_rendered_trace: bool = False,
623
+ include_rendered_config: bool = False,
624
+ rendered_format: str = "text/html",
625
+ ) -> TraceEvent:
626
+ """
627
+ Create & attach a TraceEvent for `node`, using `tracer.events` if available.
628
+
629
+ - Always attaches a TraceEvent so the graph UI can show config/module info.
630
+ - Adds datasets diffs only if a matching tracer event for this step_id exists.
631
+ """
632
+ step_id = str(node.step_id)
633
+ doc = getattr(node, "documentation", None)
634
+
635
+ label = getattr(doc, "calling_name", "") if doc is not None else ""
636
+ module_path = getattr(doc, "calling_module_path", "") if doc is not None else ""
637
+ version = getattr(doc, "calling_version", "") if doc is not None else ""
638
+
639
+ prereqs = tuple(sorted(str(p.step_id) for p in self.graph.get(node, set())))
640
+ cfg = dict(getattr(node, "configuration", {}) or {})
641
+
642
+ datasets: dict[str, Any] = {}
643
+
644
+ def _html_escape(s: str) -> str:
645
+ return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
646
+
647
+ matched_ev: dict[str, Any] | None = None
648
+
649
+ # Try to find the most recent tracer event for this step_id
650
+ if tracer is not None:
651
+ events = getattr(tracer, "events", None)
652
+ if isinstance(events, list) and events:
653
+ for ev in reversed(events):
654
+ if str(ev.get("step_id")) == step_id:
655
+ matched_ev = ev
656
+ datasets = tracer_event_to_datasets_payload(ev)
657
+ break
658
+
659
+ duration_s: float | None = None
660
+ if matched_ev is not None:
661
+ d = matched_ev.get("duration_s", None)
662
+ if isinstance(d, (int, float)):
663
+ duration_s = float(d)
664
+
665
+ messages: list[dict[str, Any]] = []
666
+
667
+ # --- Rendered trace (STRICTLY step-local) ---
668
+ if include_rendered_trace and matched_ev is not None:
669
+ try:
670
+ from modacor.debug.pipeline_tracer import ( # noqa: WPS433
671
+ MarkdownCssRenderer,
672
+ PlainUnicodeRenderer,
673
+ render_tracer_event,
674
+ )
675
+
676
+ if rendered_format in {"text/html", "text/markdown"}:
677
+ renderer = MarkdownCssRenderer(wrap_in_markdown_codeblock=False)
678
+ content = render_tracer_event(matched_ev, renderer=renderer)
679
+ fmt = "text/html"
680
+ else:
681
+ renderer = PlainUnicodeRenderer(wrap_in_markdown_codeblock=False)
682
+ content = render_tracer_event(matched_ev, renderer=renderer)
683
+ fmt = "text/plain"
684
+
685
+ messages.append(
686
+ {
687
+ "kind": "rendered_trace",
688
+ "title": "Trace",
689
+ "format": fmt,
690
+ "content": content,
691
+ }
692
+ )
693
+ except Exception as exc:
694
+ messages.append(
695
+ {
696
+ "kind": "rendered_trace_error",
697
+ "title": "Trace",
698
+ "format": "text/plain",
699
+ "content": f"{exc!r}",
700
+ }
701
+ )
702
+
703
+ # --- Rendered config (STRICTLY step-local) ---
704
+ if include_rendered_config:
705
+ try:
706
+ cfg_yaml = yaml.safe_dump(cfg, sort_keys=False)
707
+
708
+ if rendered_format in {"text/html", "text/markdown"}:
709
+ # keep styling consistent with your CSS classes
710
+ # (don’t rely on MarkdownCssRenderer.codewrap here; we want to escape YAML)
711
+ content = "<pre class='mdc-pre mdc-config'>\n" + _html_escape(cfg_yaml) + "\n</pre>"
712
+ fmt = "text/html"
713
+ else:
714
+ content = "Configuration:\n" + cfg_yaml
715
+ fmt = "text/plain"
716
+
717
+ messages.append(
718
+ {
719
+ "kind": "rendered_config",
720
+ "title": "Configuration",
721
+ "format": fmt,
722
+ "content": content,
723
+ }
724
+ )
725
+ except Exception as exc:
726
+ messages.append(
727
+ {
728
+ "kind": "rendered_config_error",
729
+ "title": "Configuration",
730
+ "format": "text/plain",
731
+ "content": f"{exc!r}",
732
+ }
733
+ )
734
+
735
+ event = TraceEvent(
736
+ step_id=step_id,
737
+ module=type(node).__name__,
738
+ label=str(label or ""),
739
+ module_path=str(module_path or ""),
740
+ version=str(version or ""),
741
+ requires_steps=prereqs,
742
+ config=cfg,
743
+ datasets=datasets,
744
+ duration_s=duration_s,
745
+ messages=messages,
746
+ )
747
+
748
+ self.add_trace_event(event)
749
+ return event