modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. modacor/__init__.py +30 -0
  2. modacor/dataclasses/__init__.py +0 -0
  3. modacor/dataclasses/basedata.py +973 -0
  4. modacor/dataclasses/databundle.py +23 -0
  5. modacor/dataclasses/helpers.py +45 -0
  6. modacor/dataclasses/messagehandler.py +75 -0
  7. modacor/dataclasses/process_step.py +233 -0
  8. modacor/dataclasses/process_step_describer.py +146 -0
  9. modacor/dataclasses/processing_data.py +59 -0
  10. modacor/dataclasses/trace_event.py +118 -0
  11. modacor/dataclasses/uncertainty_tools.py +132 -0
  12. modacor/dataclasses/validators.py +84 -0
  13. modacor/debug/pipeline_tracer.py +548 -0
  14. modacor/io/__init__.py +33 -0
  15. modacor/io/csv/__init__.py +0 -0
  16. modacor/io/csv/csv_sink.py +114 -0
  17. modacor/io/csv/csv_source.py +210 -0
  18. modacor/io/hdf/__init__.py +27 -0
  19. modacor/io/hdf/hdf_source.py +120 -0
  20. modacor/io/io_sink.py +41 -0
  21. modacor/io/io_sinks.py +61 -0
  22. modacor/io/io_source.py +164 -0
  23. modacor/io/io_sources.py +208 -0
  24. modacor/io/processing_path.py +113 -0
  25. modacor/io/tiled/__init__.py +16 -0
  26. modacor/io/tiled/tiled_source.py +403 -0
  27. modacor/io/yaml/__init__.py +27 -0
  28. modacor/io/yaml/yaml_source.py +116 -0
  29. modacor/modules/__init__.py +53 -0
  30. modacor/modules/base_modules/__init__.py +0 -0
  31. modacor/modules/base_modules/append_processing_data.py +329 -0
  32. modacor/modules/base_modules/append_sink.py +141 -0
  33. modacor/modules/base_modules/append_source.py +181 -0
  34. modacor/modules/base_modules/bitwise_or_masks.py +113 -0
  35. modacor/modules/base_modules/combine_uncertainties.py +120 -0
  36. modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
  37. modacor/modules/base_modules/divide.py +82 -0
  38. modacor/modules/base_modules/find_scale_factor1d.py +373 -0
  39. modacor/modules/base_modules/multiply.py +77 -0
  40. modacor/modules/base_modules/multiply_databundles.py +73 -0
  41. modacor/modules/base_modules/poisson_uncertainties.py +69 -0
  42. modacor/modules/base_modules/reduce_dimensionality.py +252 -0
  43. modacor/modules/base_modules/sink_processing_data.py +80 -0
  44. modacor/modules/base_modules/subtract.py +80 -0
  45. modacor/modules/base_modules/subtract_databundles.py +67 -0
  46. modacor/modules/base_modules/units_label_update.py +66 -0
  47. modacor/modules/instrument_modules/__init__.py +0 -0
  48. modacor/modules/instrument_modules/readme.md +9 -0
  49. modacor/modules/technique_modules/__init__.py +0 -0
  50. modacor/modules/technique_modules/scattering/__init__.py +0 -0
  51. modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
  52. modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
  53. modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
  54. modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
  55. modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
  56. modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
  57. modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
  58. modacor/runner/__init__.py +0 -0
  59. modacor/runner/pipeline.py +749 -0
  60. modacor/runner/process_step_registry.py +224 -0
  61. modacor/tests/__init__.py +27 -0
  62. modacor/tests/dataclasses/test_basedata.py +519 -0
  63. modacor/tests/dataclasses/test_basedata_operations.py +439 -0
  64. modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
  65. modacor/tests/dataclasses/test_process_step_describer.py +73 -0
  66. modacor/tests/dataclasses/test_processstep.py +282 -0
  67. modacor/tests/debug/test_tracing_integration.py +188 -0
  68. modacor/tests/integration/__init__.py +0 -0
  69. modacor/tests/integration/test_pipeline_run.py +238 -0
  70. modacor/tests/io/__init__.py +27 -0
  71. modacor/tests/io/csv/__init__.py +0 -0
  72. modacor/tests/io/csv/test_csv_source.py +156 -0
  73. modacor/tests/io/hdf/__init__.py +27 -0
  74. modacor/tests/io/hdf/test_hdf_source.py +92 -0
  75. modacor/tests/io/test_io_sources.py +119 -0
  76. modacor/tests/io/tiled/__init__.py +12 -0
  77. modacor/tests/io/tiled/test_tiled_source.py +120 -0
  78. modacor/tests/io/yaml/__init__.py +27 -0
  79. modacor/tests/io/yaml/static_data_example.yaml +26 -0
  80. modacor/tests/io/yaml/test_yaml_source.py +47 -0
  81. modacor/tests/modules/__init__.py +27 -0
  82. modacor/tests/modules/base_modules/__init__.py +27 -0
  83. modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
  84. modacor/tests/modules/base_modules/test_append_sink.py +76 -0
  85. modacor/tests/modules/base_modules/test_append_source.py +180 -0
  86. modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
  87. modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
  88. modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
  89. modacor/tests/modules/base_modules/test_divide.py +140 -0
  90. modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
  91. modacor/tests/modules/base_modules/test_multiply.py +113 -0
  92. modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
  93. modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
  94. modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
  95. modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
  96. modacor/tests/modules/base_modules/test_subtract.py +111 -0
  97. modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
  98. modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
  99. modacor/tests/modules/technique_modules/__init__.py +0 -0
  100. modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
  101. modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
  102. modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
  103. modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
  104. modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
  105. modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
  106. modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
  107. modacor/tests/requirements.txt +12 -0
  108. modacor/tests/runner/test_pipeline.py +438 -0
  109. modacor/tests/runner/test_process_step_registry.py +65 -0
  110. modacor/tests/test_import.py +43 -0
  111. modacor/tests/test_modacor.py +17 -0
  112. modacor/tests/test_units.py +79 -0
  113. modacor/units.py +97 -0
  114. modacor-1.0.0.dist-info/METADATA +482 -0
  115. modacor-1.0.0.dist-info/RECORD +120 -0
  116. modacor-1.0.0.dist-info/WHEEL +5 -0
  117. modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
  118. modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
  119. modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
  120. modacor-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,548 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "13/12/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ __version__ = "20251213.2"
13
+
14
+ from typing import Any, Protocol
15
+
16
+ import numpy as np
17
+ from attrs import define, field
18
+
19
+ from modacor.dataclasses.basedata import BaseData
20
+ from modacor.dataclasses.process_step import ProcessStep
21
+ from modacor.dataclasses.processing_data import ProcessingData
22
+
23
+ # --- Rendering ---------------------------------------------------------------
24
+
25
+
26
+ UNICODE = {
27
+ "tee": "├─",
28
+ "elbow": "└─",
29
+ "pipe": "│ ",
30
+ "space": " ",
31
+ "bullet": "•",
32
+ "changed": "🟪", # purple square
33
+ "same": "🟩", # green square
34
+ }
35
+
36
+ ANSI = {
37
+ "reset": "\x1b[0m",
38
+ "green": "\x1b[32m",
39
+ "purple": "\x1b[35m",
40
+ "bold": "\x1b[1m",
41
+ "dim": "\x1b[2m",
42
+ }
43
+
44
+
45
+ class ReportRenderer(Protocol):
46
+ def header(self, text: str) -> str:
47
+ ...
48
+
49
+ def dim(self, text: str) -> str:
50
+ ...
51
+
52
+ def ok(self, text: str) -> str:
53
+ ...
54
+
55
+ def changed(self, text: str) -> str:
56
+ ...
57
+
58
+ def badge_ok(self) -> str:
59
+ ...
60
+
61
+ def badge_changed(self) -> str:
62
+ ...
63
+
64
+ def codewrap(self, text: str) -> str:
65
+ ...
66
+
67
+
68
+ @define(frozen=True)
69
+ class PlainUnicodeRenderer:
70
+ """Unicode tree + emoji badges; no colors."""
71
+
72
+ wrap_in_markdown_codeblock: bool = False
73
+
74
+ def header(self, text: str) -> str:
75
+ return text
76
+
77
+ def dim(self, text: str) -> str:
78
+ return text
79
+
80
+ def ok(self, text: str) -> str:
81
+ return text
82
+
83
+ def changed(self, text: str) -> str:
84
+ return text
85
+
86
+ def badge_ok(self) -> str:
87
+ return UNICODE["same"]
88
+
89
+ def badge_changed(self) -> str:
90
+ return UNICODE["changed"]
91
+
92
+ def codewrap(self, text: str) -> str:
93
+ if not self.wrap_in_markdown_codeblock:
94
+ return text
95
+ return "```text\n" + text + "\n```"
96
+
97
+
98
+ @define(frozen=True)
99
+ class AnsiUnicodeRenderer:
100
+ """Unicode tree + ANSI colors (green unchanged, purple changed)."""
101
+
102
+ wrap_in_markdown_codeblock: bool = False
103
+ enable: bool = True
104
+
105
+ def _c(self, text: str, color: str) -> str:
106
+ if not self.enable:
107
+ return text
108
+ return f"{ANSI[color]}{text}{ANSI['reset']}"
109
+
110
+ def header(self, text: str) -> str:
111
+ return self._c(text, "bold")
112
+
113
+ def dim(self, text: str) -> str:
114
+ return self._c(text, "dim")
115
+
116
+ def ok(self, text: str) -> str:
117
+ return self._c(text, "green")
118
+
119
+ def changed(self, text: str) -> str:
120
+ return self._c(text, "purple")
121
+
122
+ def badge_ok(self) -> str:
123
+ return self.ok(UNICODE["same"])
124
+
125
+ def badge_changed(self) -> str:
126
+ return self.changed(UNICODE["changed"])
127
+
128
+ def codewrap(self, text: str) -> str:
129
+ if not self.wrap_in_markdown_codeblock:
130
+ return text
131
+ return "```text\n" + text + "\n```"
132
+
133
+
134
+ @define(frozen=True)
135
+ class MarkdownCssRenderer:
136
+ """
137
+ Markdown + HTML spans for styling via CSS classes.
138
+ Works well in MkDocs / Sphinx / Jupyter HTML outputs.
139
+ (GitHub won't apply your custom CSS, but text still reads fine.)
140
+ """
141
+
142
+ wrap_in_markdown_codeblock: bool = False
143
+
144
+ def header(self, text: str) -> str:
145
+ return f"**{text}**"
146
+
147
+ def dim(self, text: str) -> str:
148
+ return f"<span class='mdc-dim'>{text}</span>"
149
+
150
+ def ok(self, text: str) -> str:
151
+ return f"<span class='mdc-ok'>{text}</span>"
152
+
153
+ def changed(self, text: str) -> str:
154
+ return f"<span class='mdc-changed'>{text}</span>"
155
+
156
+ def badge_ok(self) -> str:
157
+ return self.ok(UNICODE["same"])
158
+
159
+ def badge_changed(self) -> str:
160
+ return self.changed(UNICODE["changed"])
161
+
162
+ def codewrap(self, text: str) -> str:
163
+ # Prefer <pre> so CSS can style spans inside; code fences often strip HTML
164
+ if self.wrap_in_markdown_codeblock:
165
+ return "```text\n" + text + "\n```"
166
+ return "<pre class='mdc-pre'>\n" + text + "\n</pre>"
167
+
168
+
169
+ def _nan_count(x: np.ndarray) -> int:
170
+ return int(np.isnan(x).sum()) if x.size else 0
171
+
172
+
173
+ def _finite_min_max(x: np.ndarray) -> tuple[float | None, float | None]:
174
+ if x.size == 0 or not np.isfinite(x).any():
175
+ return None, None
176
+ return float(np.nanmin(x)), float(np.nanmax(x))
177
+
178
+
179
+ @define(frozen=True)
180
+ class BaseDataProbe:
181
+ """
182
+ A tiny, array-free fingerprint of a BaseData at a point in the pipeline.
183
+
184
+ Notes:
185
+ - `ndim` is derived from the signal array.
186
+ - `rank_of_data` is taken from BaseData metadata if present.
187
+ - `dimensionality_str` is Pint dimensionality (not shape dimensions).
188
+ """
189
+
190
+ shape: tuple[int, ...]
191
+ ndim: int
192
+ rank_of_data: int | None
193
+
194
+ units_str: str
195
+ dimensionality_str: str
196
+
197
+ nan_signal: int
198
+ nan_unc: dict[str, int] = field(factory=dict)
199
+
200
+ # Optional scalar diagnostics
201
+ min_signal: float | None = None
202
+ max_signal: float | None = None
203
+
204
+ @classmethod
205
+ def from_basedata(cls, bd: BaseData, *, compute_min_max: bool = False) -> "BaseDataProbe":
206
+ sig = np.asarray(bd.signal, dtype=float)
207
+
208
+ nan_unc = {k: _nan_count(np.asarray(v, dtype=float)) for k, v in bd.uncertainties.items()}
209
+
210
+ # Pint dimensionality is often more robust than raw unit string equality
211
+ dimensionality = getattr(bd.units, "dimensionality", None)
212
+ dimensionality_str = str(dimensionality) if dimensionality is not None else "<?>"
213
+
214
+ smin: float | None = None
215
+ smax: float | None = None
216
+ if compute_min_max:
217
+ smin, smax = _finite_min_max(sig)
218
+
219
+ return cls(
220
+ shape=tuple(sig.shape),
221
+ ndim=int(sig.ndim),
222
+ rank_of_data=getattr(bd, "rank_of_data", None),
223
+ units_str=str(bd.units),
224
+ dimensionality_str=dimensionality_str,
225
+ nan_signal=_nan_count(sig),
226
+ nan_unc=nan_unc,
227
+ min_signal=smin,
228
+ max_signal=smax,
229
+ )
230
+
231
+
232
+ @define
233
+ class PipelineTracer:
234
+ """
235
+ Records only *small* per-step probes, optionally only when relevant deltas occur.
236
+
237
+ Example watch:
238
+ {"sample": ["signal"], "sample_background": ["signal"]}
239
+ """
240
+
241
+ watch: dict[str, list[str]] = field(factory=dict)
242
+
243
+ # Keep history small by default: only record when deltas occur (as defined by change_kinds)
244
+ record_only_on_change: bool = True
245
+ record_empty_step_events: bool = False
246
+
247
+ # Which changes trigger recording an event (min/max are *not* triggers by default)
248
+ change_kinds: set[str] = field(
249
+ factory=lambda: {
250
+ "units",
251
+ "dimensionality",
252
+ "shape",
253
+ "ndim",
254
+ "rank",
255
+ "nan_signal",
256
+ "nan_unc",
257
+ }
258
+ )
259
+
260
+ # Include scalar min/max in probes (does not affect change detection unless you add "minmax" to change_kinds)
261
+ compute_min_max: bool = False
262
+
263
+ # Guards (fail fast at the *first* step that introduces the issue)
264
+ fail_on_expected_mismatch: bool = False
265
+ fail_on_nan_increase: bool = False
266
+ fail_on_units_change: bool = False
267
+ fail_on_dimensionality_change: bool = False
268
+ fail_on_shape_change: bool = False
269
+ fail_on_rank_change: bool = False
270
+
271
+ # Optional expectations: step_id -> expected value
272
+ expected_units_by_step: dict[str, str] = field(factory=dict)
273
+ expected_dimensionality_by_step: dict[str, str] = field(factory=dict)
274
+ expected_ndim_by_step: dict[str, int] = field(factory=dict)
275
+ expected_rank_by_step: dict[str, int] = field(factory=dict)
276
+
277
+ _last: dict[tuple[str, str], BaseDataProbe] = field(factory=dict)
278
+ events: list[dict[str, Any]] = field(factory=list)
279
+
280
+ def _diff_kinds(self, prev: BaseDataProbe, now: BaseDataProbe) -> set[str]:
281
+ kinds: set[str] = set()
282
+
283
+ if "units" in self.change_kinds and now.units_str != prev.units_str:
284
+ kinds.add("units")
285
+ if "dimensionality" in self.change_kinds and now.dimensionality_str != prev.dimensionality_str:
286
+ kinds.add("dimensionality")
287
+ if "shape" in self.change_kinds and now.shape != prev.shape:
288
+ kinds.add("shape")
289
+ if "ndim" in self.change_kinds and now.ndim != prev.ndim:
290
+ kinds.add("ndim")
291
+ if "rank" in self.change_kinds and now.rank_of_data != prev.rank_of_data:
292
+ kinds.add("rank")
293
+
294
+ if "nan_signal" in self.change_kinds and now.nan_signal != prev.nan_signal:
295
+ kinds.add("nan_signal")
296
+
297
+ if "nan_unc" in self.change_kinds:
298
+ keys = set(prev.nan_unc) | set(now.nan_unc)
299
+ if any(now.nan_unc.get(k, 0) != prev.nan_unc.get(k, 0) for k in keys):
300
+ kinds.add("nan_unc")
301
+
302
+ # Optional: treat min/max as a trigger if explicitly requested
303
+ if "minmax" in self.change_kinds and (now.min_signal, now.max_signal) != (prev.min_signal, prev.max_signal):
304
+ kinds.add("minmax")
305
+
306
+ return kinds
307
+
308
+ def after_step( # noqa: C901 # too complex, resolve later
309
+ self,
310
+ step: ProcessStep,
311
+ data: ProcessingData,
312
+ *,
313
+ duration_s: float | None = None,
314
+ ) -> None:
315
+ step_id = getattr(step, "step_id", "<??>")
316
+ module = getattr(step.documentation, "calling_id", None) or step.__class__.__name__
317
+ name = getattr(step.documentation, "calling_name", "")
318
+
319
+ changed: dict[tuple[str, str], dict[str, Any]] = {}
320
+
321
+ for bundle_key, ds_keys in self.watch.items():
322
+ if bundle_key not in data:
323
+ continue
324
+
325
+ db = data[bundle_key]
326
+ for ds_key in ds_keys:
327
+ if ds_key not in db:
328
+ continue
329
+
330
+ bd = db[ds_key]
331
+ if not isinstance(bd, BaseData):
332
+ continue
333
+
334
+ now = BaseDataProbe.from_basedata(bd, compute_min_max=self.compute_min_max)
335
+ prev = self._last.get((bundle_key, ds_key))
336
+ self._last[(bundle_key, ds_key)] = now
337
+
338
+ # Expectations (exact string/int match)
339
+ exp_units = self.expected_units_by_step.get(step_id)
340
+ if exp_units is not None and now.units_str != exp_units and self.fail_on_expected_mismatch:
341
+ raise RuntimeError(
342
+ f"[{step_id} {module}] {bundle_key}.{ds_key} units mismatch: "
343
+ f"got '{now.units_str}', expected '{exp_units}'"
344
+ )
345
+
346
+ exp_dim = self.expected_dimensionality_by_step.get(step_id)
347
+ if exp_dim is not None and now.dimensionality_str != exp_dim and self.fail_on_expected_mismatch:
348
+ raise RuntimeError(
349
+ f"[{step_id} {module}] {bundle_key}.{ds_key} dimensionality mismatch: "
350
+ f"got '{now.dimensionality_str}', expected '{exp_dim}'. units='{now.units_str}'"
351
+ )
352
+
353
+ exp_ndim = self.expected_ndim_by_step.get(step_id)
354
+ if exp_ndim is not None and now.ndim != exp_ndim and self.fail_on_expected_mismatch:
355
+ raise RuntimeError(
356
+ f"[{step_id} {module}] {bundle_key}.{ds_key} ndim mismatch: got {now.ndim}, expected {exp_ndim}"
357
+ )
358
+
359
+ exp_rank = self.expected_rank_by_step.get(step_id)
360
+ if exp_rank is not None and now.rank_of_data != exp_rank and self.fail_on_expected_mismatch:
361
+ raise RuntimeError(
362
+ f"[{step_id} {module}] {bundle_key}.{ds_key} rank_of_data mismatch: "
363
+ f"got {now.rank_of_data}, expected {exp_rank}"
364
+ )
365
+
366
+ # Delta-driven recording / guards
367
+ if prev is None:
368
+ # Always record first probe for a watched target
369
+ changed[(bundle_key, ds_key)] = {"prev": None, "now": now, "diff": {"first_seen"}}
370
+ continue
371
+
372
+ diff = self._diff_kinds(prev, now)
373
+
374
+ if self.fail_on_units_change and now.units_str != prev.units_str:
375
+ raise RuntimeError(
376
+ f"[{step_id} {module}] {bundle_key}.{ds_key} units changed: "
377
+ f"'{prev.units_str}' -> '{now.units_str}'"
378
+ )
379
+ if self.fail_on_dimensionality_change and now.dimensionality_str != prev.dimensionality_str:
380
+ raise RuntimeError(
381
+ f"[{step_id} {module}] {bundle_key}.{ds_key} dimensionality changed: "
382
+ f"'{prev.dimensionality_str}' -> '{now.dimensionality_str}' (units='{now.units_str}')"
383
+ )
384
+ if self.fail_on_shape_change and now.shape != prev.shape:
385
+ raise RuntimeError(
386
+ f"[{step_id} {module}] {bundle_key}.{ds_key} shape changed: {prev.shape} -> {now.shape}"
387
+ )
388
+ if self.fail_on_rank_change and now.rank_of_data != prev.rank_of_data:
389
+ raise RuntimeError(
390
+ f"[{step_id} {module}] {bundle_key}.{ds_key} rank_of_data changed: "
391
+ f"{prev.rank_of_data} -> {now.rank_of_data}"
392
+ )
393
+
394
+ if self.fail_on_nan_increase:
395
+ if now.nan_signal > prev.nan_signal:
396
+ raise RuntimeError(
397
+ f"[{step_id} {module}] {bundle_key}.{ds_key} signal NaNs increased: "
398
+ f"{prev.nan_signal} -> {now.nan_signal}"
399
+ )
400
+ keys = set(prev.nan_unc) | set(now.nan_unc)
401
+ for k in keys:
402
+ n_prev = prev.nan_unc.get(k, 0)
403
+ n_now = now.nan_unc.get(k, 0)
404
+ if n_now > n_prev:
405
+ raise RuntimeError(
406
+ f"[{step_id} {module}] {bundle_key}.{ds_key} unc['{k}'] NaNs increased: "
407
+ f"{n_prev} -> {n_now}"
408
+ )
409
+
410
+ if diff:
411
+ changed[(bundle_key, ds_key)] = {"prev": prev, "now": now, "diff": diff}
412
+
413
+ if (not self.record_only_on_change) or changed or self.record_empty_step_events:
414
+ self.events.append(
415
+ {
416
+ "step_id": step_id,
417
+ "module": module,
418
+ "name": name,
419
+ "changed": changed,
420
+ "duration_s": duration_s,
421
+ }
422
+ )
423
+
424
+ def last_report(self, n: int = 20, *, renderer: ReportRenderer | None = None) -> str:
425
+ r = renderer or PlainUnicodeRenderer(wrap_in_markdown_codeblock=False)
426
+ events = self.events[-n:]
427
+ blocks = [render_tracer_event(ev, renderer=r) for ev in events]
428
+ # render_tracer_event already wraps, so join plainly:
429
+ return "\n\n".join(blocks)
430
+
431
+
432
+ def _probe_to_dict(p: BaseDataProbe) -> dict[str, Any]:
433
+ return {
434
+ "shape": list(p.shape),
435
+ "ndim": p.ndim,
436
+ "rank_of_data": p.rank_of_data,
437
+ "units": p.units_str,
438
+ "dimensionality": p.dimensionality_str,
439
+ "nan_signal": p.nan_signal,
440
+ "nan_unc": dict(p.nan_unc),
441
+ # only include if computed
442
+ **(
443
+ {"min_signal": p.min_signal, "max_signal": p.max_signal}
444
+ if (p.min_signal is not None or p.max_signal is not None)
445
+ else {}
446
+ ),
447
+ }
448
+
449
+
450
+ def tracer_event_to_datasets_payload(tracer_step_event: dict[str, Any]) -> dict[str, Any]:
451
+ """
452
+ Convert a single tracer 'events' entry into TraceEvent.datasets payload.
453
+
454
+ Input shape:
455
+ {"changed": {(bundle, ds): {"prev": BaseDataProbe|None, "now": BaseDataProbe, "diff": set[str]}}}
456
+
457
+ Output shape:
458
+ {"bundle.ds": {"diff": [...], "prev": {...}|None, "now": {...}}}
459
+ """
460
+ out: dict[str, Any] = {}
461
+ changed = tracer_step_event.get("changed", {}) or {}
462
+
463
+ # Stable order for UI diffs
464
+ for bundle_key, ds_key in sorted(changed.keys(), key=lambda x: (x[0], x[1])):
465
+ payload = changed[(bundle_key, ds_key)]
466
+ prev = payload.get("prev")
467
+ now = payload.get("now")
468
+ diff = payload.get("diff", set())
469
+
470
+ out[f"{bundle_key}.{ds_key}"] = {
471
+ "diff": sorted(diff),
472
+ "prev": None if prev is None else _probe_to_dict(prev),
473
+ "now": _probe_to_dict(now),
474
+ }
475
+
476
+ return out
477
+
478
+
479
+ def render_tracer_event(tracer_event: dict[str, Any], *, renderer: ReportRenderer | None = None) -> str:
480
+ """
481
+ Render exactly ONE tracer event (one element from PipelineTracer.events).
482
+ Strictly step-local: no reliance on global tracer state.
483
+ """
484
+ r = renderer or PlainUnicodeRenderer(wrap_in_markdown_codeblock=False)
485
+ lines: list[str] = []
486
+
487
+ def fmt_kv(label: str, prev: object | None, now: object, is_changed: bool) -> str:
488
+ badge = r.badge_changed() if is_changed else r.badge_ok()
489
+ if prev is None:
490
+ return f"{badge} {label:<18} {now}" # noqa: E231
491
+ if is_changed:
492
+ return f"{badge} {label:<18} {r.changed(str(prev))} → {r.changed(str(now))}" # noqa: E231
493
+ return f"{badge} {label:<18} {r.ok(str(now))}" # noqa: E231
494
+
495
+ step_id = tracer_event.get("step_id", "<??>")
496
+ module = tracer_event.get("module", "")
497
+ name = tracer_event.get("name", "")
498
+ dur = tracer_event.get("duration_s", None)
499
+ dur_txt = ""
500
+ if isinstance(dur, (int, float)):
501
+ dur_txt = f" {r.dim(f'⏱ {dur * 1e3:.2f} ms')}" # noqa: E231
502
+ lines.append(r.header(f"Step {step_id} — {module} — {name}") + dur_txt)
503
+
504
+ changed_map: dict[tuple[str, str], dict[str, Any]] = tracer_event.get("changed", {}) or {}
505
+ items = sorted(changed_map.items(), key=lambda kv: (kv[0][0], kv[0][1]))
506
+
507
+ for idx, ((b, d), payload) in enumerate(items):
508
+ is_last_ds = idx == (len(items) - 1)
509
+ joint = UNICODE["elbow"] if is_last_ds else UNICODE["tee"]
510
+ cont = UNICODE["space"] if is_last_ds else UNICODE["pipe"]
511
+
512
+ prev: BaseDataProbe | None = payload.get("prev")
513
+ now: BaseDataProbe = payload.get("now")
514
+ diff: set[str] = set(payload.get("diff", set()) or set())
515
+ diff_str = ", ".join(sorted(diff))
516
+ diff_note = f" {r.dim('[' + diff_str + ']')}" if diff_str else ""
517
+
518
+ lines.append(f"{joint} {UNICODE['bullet']} {b}.{d}{diff_note}")
519
+
520
+ if prev is None:
521
+ lines.append(f"{cont}{UNICODE['space']}{fmt_kv('units', None, now.units_str, True)}")
522
+ lines.append(f"{cont}{UNICODE['space']}{fmt_kv('dimensionality', None, now.dimensionality_str, True)}")
523
+ lines.append(f"{cont}{UNICODE['space']}{fmt_kv('shape', None, now.shape, True)}")
524
+ lines.append(f"{cont}{UNICODE['space']}{fmt_kv('NaN(signal)', None, now.nan_signal, True)}")
525
+ else:
526
+ lines.append(
527
+ f"{cont}{UNICODE['space']}{fmt_kv('units', prev.units_str, now.units_str, now.units_str != prev.units_str)}"
528
+ )
529
+ lines.append(
530
+ f"{cont}{UNICODE['space']}{fmt_kv('dimensionality', prev.dimensionality_str, now.dimensionality_str, now.dimensionality_str != prev.dimensionality_str)}"
531
+ )
532
+ lines.append(f"{cont}{UNICODE['space']}{fmt_kv('shape', prev.shape, now.shape, now.shape != prev.shape)}")
533
+ lines.append(
534
+ f"{cont}{UNICODE['space']}{fmt_kv('NaN(signal)', prev.nan_signal, now.nan_signal, now.nan_signal != prev.nan_signal)}"
535
+ )
536
+
537
+ unc_keys = sorted(now.nan_unc.keys() if prev is None else (set(prev.nan_unc) | set(now.nan_unc)))
538
+ if unc_keys:
539
+ lines.append(f"{cont}{UNICODE['space']}{r.dim('uncertainties:')}")
540
+ for uk in unc_keys:
541
+ p = 0 if prev is None else prev.nan_unc.get(uk, 0)
542
+ q = now.nan_unc.get(uk, 0)
543
+ lines.append(
544
+ f"{cont}{UNICODE['space']}{UNICODE['space']}"
545
+ + fmt_kv(f"NaN(unc['{uk}'])", None if prev is None else p, q, True if prev is None else (p != q))
546
+ )
547
+
548
+ return r.codewrap("\n".join(lines).rstrip())
modacor/io/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # Copyright 2025 MoDaCor Authors
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification,
5
+ # are permitted provided that the following conditions are met:
6
+ # 1. Redistributions of source code must retain the above copyright notice, this
7
+ # list of conditions and the following disclaimer.
8
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
9
+ # this list of conditions and the following disclaimer in the documentation
10
+ # and/or other materials provided with the distribution.
11
+ # 3. Neither the name of the copyright holder nor the names of its contributors
12
+ # may be used to endorse or promote products derived from this software without
13
+ # specific prior written permission.
14
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
15
+ # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18
+ # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+
25
+ __license__ = "BSD-3-Clause"
26
+ __copyright__ = "Copyright 2025 MoDaCor Authors"
27
+ __status__ = "Alpha"
28
+ __all__ = ["IoSource", "IoSources", "TiledSource"]
29
+
30
+
31
+ from .io_source import IoSource
32
+ from .io_sources import IoSources
33
+ from .tiled.tiled_source import TiledSource
File without changes
@@ -0,0 +1,114 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"]
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "12/12/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["CSVSink"]
15
+
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+ from attrs import define, field, validators
20
+
21
+ from modacor.dataclasses.messagehandler import MessageHandler
22
+ from modacor.dataclasses.processing_data import ProcessingData
23
+ from modacor.io.io_sink import IoSink
24
+ from modacor.io.processing_path import infer_units_for_path, parse_processing_path, resolve_processing_path
25
+
26
+
27
+ def _default_column_name(path: str) -> str:
28
+ pp = parse_processing_path(path)
29
+ return "/".join((pp.databundle_key, pp.basedata_name, *pp.subpath))
30
+
31
+
32
+ def _ensure_1d_array(obj, path: str) -> np.ndarray:
33
+ arr = np.asarray(obj)
34
+ if arr.ndim != 1:
35
+ raise ValueError(f"CSVSink expects 1D arrays only (got shape {arr.shape}) for path: {path}")
36
+ return arr.astype(float, copy=False)
37
+
38
+
39
+ @define(kw_only=True)
40
+ class CSVSink(IoSink):
41
+ """
42
+ Write 1D ProcessingData leaves to a delimiter-separated file.
43
+
44
+ Deterministic:
45
+ - requires explicit leaf paths (no default signal)
46
+ - no scalar broadcasting
47
+ - does not support sink subpaths (must call as 'sink_ref::')
48
+ - overwrite-only (no streaming/appending here)
49
+ """
50
+
51
+ resource_location: Path = field(converter=Path, validator=validators.instance_of(Path))
52
+ iosink_method_kwargs: dict = field(factory=dict, validator=validators.instance_of(dict))
53
+ logger: MessageHandler = field(init=False)
54
+
55
+ def __attrs_post_init__(self) -> None:
56
+ self.logger = MessageHandler(level=self.logging_level, name="CSVSink")
57
+
58
+ def write(
59
+ self,
60
+ subpath: str,
61
+ processing_data: ProcessingData,
62
+ data_paths: list[str],
63
+ override_resource_location: Path | None = None, # not sure if this will be usable in normal operation.
64
+ ) -> Path:
65
+ # CSV does not support internal sink locations
66
+ if subpath not in ("", None) and str(subpath).strip() != "":
67
+ raise ValueError(
68
+ f"CSVSink does not support subpaths. Use '{self.sink_reference}::' (got '{subpath}')." # noqa: E231
69
+ )
70
+
71
+ if not data_paths:
72
+ raise ValueError("CSVSink.write requires at least one path in data_paths.")
73
+
74
+ out_path = (override_resource_location or self.resource_location).expanduser()
75
+ out_path.parent.mkdir(parents=True, exist_ok=True)
76
+
77
+ # enforce explicit leaf: /bundle/basedata/<leaf...>
78
+ for p in data_paths:
79
+ pp = parse_processing_path(p)
80
+ if len(pp.subpath) == 0:
81
+ raise ValueError(
82
+ "CSV export requires an explicit leaf path (e.g."
83
+ f" '/{pp.databundle_key}/{pp.basedata_name}/signal'). Got: {p}"
84
+ )
85
+
86
+ cols = []
87
+ for p in data_paths:
88
+ obj = resolve_processing_path(processing_data, p)
89
+ cols.append(_ensure_1d_array(obj, p))
90
+
91
+ n = cols[0].shape[0]
92
+ for p, c in zip(data_paths, cols):
93
+ if c.shape[0] != n:
94
+ raise ValueError(
95
+ f"All columns must have identical length; expected {n}, got {c.shape[0]} for {p}" # noqa: E702
96
+ )
97
+
98
+ names = [_default_column_name(p) for p in data_paths]
99
+ units = [infer_units_for_path(processing_data, p) for p in data_paths]
100
+
101
+ # delimiter lives in iosink_method_kwargs to keep configuration minimal
102
+ delimiter = self.iosink_method_kwargs.get("delimiter", ";")
103
+
104
+ self.logger.info(f"CSVSink writing {len(cols)} columns x {n} rows to {out_path}.")
105
+
106
+ data = np.column_stack(cols) # (n, ncols)
107
+
108
+ # ensure deterministic newline + UTF-8
109
+ with out_path.open("w", encoding="utf-8", newline="\n") as f:
110
+ f.write(delimiter.join(names) + "\n")
111
+ f.write(delimiter.join(units) + "\n")
112
+ np.savetxt(f, data, **self.iosink_method_kwargs)
113
+
114
+ return out_path