modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. modacor/__init__.py +30 -0
  2. modacor/dataclasses/__init__.py +0 -0
  3. modacor/dataclasses/basedata.py +973 -0
  4. modacor/dataclasses/databundle.py +23 -0
  5. modacor/dataclasses/helpers.py +45 -0
  6. modacor/dataclasses/messagehandler.py +75 -0
  7. modacor/dataclasses/process_step.py +233 -0
  8. modacor/dataclasses/process_step_describer.py +146 -0
  9. modacor/dataclasses/processing_data.py +59 -0
  10. modacor/dataclasses/trace_event.py +118 -0
  11. modacor/dataclasses/uncertainty_tools.py +132 -0
  12. modacor/dataclasses/validators.py +84 -0
  13. modacor/debug/pipeline_tracer.py +548 -0
  14. modacor/io/__init__.py +33 -0
  15. modacor/io/csv/__init__.py +0 -0
  16. modacor/io/csv/csv_sink.py +114 -0
  17. modacor/io/csv/csv_source.py +210 -0
  18. modacor/io/hdf/__init__.py +27 -0
  19. modacor/io/hdf/hdf_source.py +120 -0
  20. modacor/io/io_sink.py +41 -0
  21. modacor/io/io_sinks.py +61 -0
  22. modacor/io/io_source.py +164 -0
  23. modacor/io/io_sources.py +208 -0
  24. modacor/io/processing_path.py +113 -0
  25. modacor/io/tiled/__init__.py +16 -0
  26. modacor/io/tiled/tiled_source.py +403 -0
  27. modacor/io/yaml/__init__.py +27 -0
  28. modacor/io/yaml/yaml_source.py +116 -0
  29. modacor/modules/__init__.py +53 -0
  30. modacor/modules/base_modules/__init__.py +0 -0
  31. modacor/modules/base_modules/append_processing_data.py +329 -0
  32. modacor/modules/base_modules/append_sink.py +141 -0
  33. modacor/modules/base_modules/append_source.py +181 -0
  34. modacor/modules/base_modules/bitwise_or_masks.py +113 -0
  35. modacor/modules/base_modules/combine_uncertainties.py +120 -0
  36. modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
  37. modacor/modules/base_modules/divide.py +82 -0
  38. modacor/modules/base_modules/find_scale_factor1d.py +373 -0
  39. modacor/modules/base_modules/multiply.py +77 -0
  40. modacor/modules/base_modules/multiply_databundles.py +73 -0
  41. modacor/modules/base_modules/poisson_uncertainties.py +69 -0
  42. modacor/modules/base_modules/reduce_dimensionality.py +252 -0
  43. modacor/modules/base_modules/sink_processing_data.py +80 -0
  44. modacor/modules/base_modules/subtract.py +80 -0
  45. modacor/modules/base_modules/subtract_databundles.py +67 -0
  46. modacor/modules/base_modules/units_label_update.py +66 -0
  47. modacor/modules/instrument_modules/__init__.py +0 -0
  48. modacor/modules/instrument_modules/readme.md +9 -0
  49. modacor/modules/technique_modules/__init__.py +0 -0
  50. modacor/modules/technique_modules/scattering/__init__.py +0 -0
  51. modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
  52. modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
  53. modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
  54. modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
  55. modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
  56. modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
  57. modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
  58. modacor/runner/__init__.py +0 -0
  59. modacor/runner/pipeline.py +749 -0
  60. modacor/runner/process_step_registry.py +224 -0
  61. modacor/tests/__init__.py +27 -0
  62. modacor/tests/dataclasses/test_basedata.py +519 -0
  63. modacor/tests/dataclasses/test_basedata_operations.py +439 -0
  64. modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
  65. modacor/tests/dataclasses/test_process_step_describer.py +73 -0
  66. modacor/tests/dataclasses/test_processstep.py +282 -0
  67. modacor/tests/debug/test_tracing_integration.py +188 -0
  68. modacor/tests/integration/__init__.py +0 -0
  69. modacor/tests/integration/test_pipeline_run.py +238 -0
  70. modacor/tests/io/__init__.py +27 -0
  71. modacor/tests/io/csv/__init__.py +0 -0
  72. modacor/tests/io/csv/test_csv_source.py +156 -0
  73. modacor/tests/io/hdf/__init__.py +27 -0
  74. modacor/tests/io/hdf/test_hdf_source.py +92 -0
  75. modacor/tests/io/test_io_sources.py +119 -0
  76. modacor/tests/io/tiled/__init__.py +12 -0
  77. modacor/tests/io/tiled/test_tiled_source.py +120 -0
  78. modacor/tests/io/yaml/__init__.py +27 -0
  79. modacor/tests/io/yaml/static_data_example.yaml +26 -0
  80. modacor/tests/io/yaml/test_yaml_source.py +47 -0
  81. modacor/tests/modules/__init__.py +27 -0
  82. modacor/tests/modules/base_modules/__init__.py +27 -0
  83. modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
  84. modacor/tests/modules/base_modules/test_append_sink.py +76 -0
  85. modacor/tests/modules/base_modules/test_append_source.py +180 -0
  86. modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
  87. modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
  88. modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
  89. modacor/tests/modules/base_modules/test_divide.py +140 -0
  90. modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
  91. modacor/tests/modules/base_modules/test_multiply.py +113 -0
  92. modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
  93. modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
  94. modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
  95. modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
  96. modacor/tests/modules/base_modules/test_subtract.py +111 -0
  97. modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
  98. modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
  99. modacor/tests/modules/technique_modules/__init__.py +0 -0
  100. modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
  101. modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
  102. modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
  103. modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
  104. modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
  105. modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
  106. modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
  107. modacor/tests/requirements.txt +12 -0
  108. modacor/tests/runner/test_pipeline.py +438 -0
  109. modacor/tests/runner/test_process_step_registry.py +65 -0
  110. modacor/tests/test_import.py +43 -0
  111. modacor/tests/test_modacor.py +17 -0
  112. modacor/tests/test_units.py +79 -0
  113. modacor/units.py +97 -0
  114. modacor-1.0.0.dist-info/METADATA +482 -0
  115. modacor-1.0.0.dist-info/RECORD +120 -0
  116. modacor-1.0.0.dist-info/WHEEL +5 -0
  117. modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
  118. modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
  119. modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
  120. modacor-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,329 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "30/10/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["AppendProcessingData"]
15
+ __version__ = "20251030.3"
16
+
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from modacor import ureg
21
+ from modacor.dataclasses.basedata import BaseData
22
+ from modacor.dataclasses.databundle import DataBundle
23
+ from modacor.dataclasses.helpers import basedata_from_sources
24
+ from modacor.dataclasses.messagehandler import MessageHandler
25
+ from modacor.dataclasses.process_step import ProcessStep
26
+ from modacor.dataclasses.process_step_describer import ProcessStepDescriber
27
+ from modacor.io.io_sources import IoSources
28
+
29
+ # Module-level handler; facilities can swap MessageHandler implementation as needed
30
+ logger = MessageHandler(name=__name__)
31
+
32
+
33
+ class AppendProcessingData(ProcessStep):
34
+ """
35
+ Load signal data from ``self.io_sources`` into a processing :class:`DataBundle`
36
+ in ``self.processing_data``.
37
+
38
+ This step creates or updates a single :class:`DataBundle` from existing
39
+ :class:`IoSources` entries in ``self.io_sources``. It:
40
+
41
+ 1. Loads the signal array from ``signal_location`` (string reference).
42
+ 2. Loads units either from:
43
+ - ``units_location`` via :meth:`IoSources.get_static_metadata`, or
44
+ - ``units_override`` as a direct units string, or
45
+ - defaults to dimensionless if neither is provided.
46
+ 3. Optionally loads uncertainty arrays from ``uncertainties_sources``.
47
+ 4. Wraps everything in a :class:`BaseData` instance.
48
+ 5. Sets ``BaseData.rank_of_data`` based on the configured ``rank_of_data``:
49
+ - If it is an ``int``, it is used directly.
50
+ - If it is a ``str``, it is interpreted as an IoSources metadata reference
51
+ (``'<io_source_id>::<dataset_path>'``) and read via
52
+ :meth:`IoSources.get_static_metadata`, then converted to ``int``.
53
+ - Validation and bounds checking are handled by :func:`validate_rank_of_data`
54
+ inside :class:`BaseData`.
55
+ 6. Stores the resulting :class:`BaseData` under the configured
56
+ ``databundle_output_key`` (default: ``"signal"``) in a
57
+ :class:`DataBundle` at ``self.processing_data[processing_key]``. If that
58
+ DataBundle already exists, it is updated: existing entries are preserved
59
+ and the ``databundle_output_key`` entry is overwritten or added.
60
+
61
+ The resulting mapping ``{processing_key: DataBundle}`` is returned.
62
+ """
63
+
64
+ documentation = ProcessStepDescriber(
65
+ calling_name="Append Processing Data",
66
+ calling_id="AppendProcessingData",
67
+ calling_module_path=Path(__file__),
68
+ calling_version=__version__,
69
+ required_data_keys=[], # this step creates/updates a DataBundle
70
+ modifies={}, # processing_key: databundle: databundle_output_key
71
+ arguments={
72
+ "processing_key": {
73
+ "type": str,
74
+ "required": True,
75
+ "default": "",
76
+ "doc": "ProcessingData key to create or update.",
77
+ },
78
+ "signal_location": {
79
+ "type": str,
80
+ "required": True,
81
+ "default": "",
82
+ "doc": "IoSources reference '<io_source_id>::<dataset_path>'.",
83
+ },
84
+ "rank_of_data": {
85
+ "type": (int, str),
86
+ "required": True,
87
+ "default": 2,
88
+ "doc": "BaseData rank as int or IoSources metadata reference.",
89
+ },
90
+ "databundle_output_key": {
91
+ "type": str,
92
+ "default": "signal",
93
+ "doc": "BaseData key inside the DataBundle.",
94
+ },
95
+ "units_location": {
96
+ "type": (str, type(None)),
97
+ "default": None,
98
+ "doc": "Optional IoSources reference for units metadata.",
99
+ },
100
+ "units_override": {
101
+ "type": (str, type(None)),
102
+ "default": None,
103
+ "doc": "Optional unit string that overrides loaded units.",
104
+ },
105
+ "uncertainties_sources": {
106
+ "type": dict,
107
+ "default": {},
108
+ "doc": "Mapping of uncertainty name to IoSources reference.",
109
+ },
110
+ },
111
+ step_keywords=["append", "processing", "data", "signal"],
112
+ step_doc="Append signal data from IoSources into a processing DataBundle.",
113
+ step_reference="",
114
+ step_note=(
115
+ "This step reads from existing IoSources and creates or updates a named DataBundle "
116
+ "with a BaseData entry (default 'signal') for use in the processing pipeline."
117
+ ),
118
+ )
119
+
120
+ # -------------------------------------------------------------------------
121
+ # Internal helpers
122
+ # -------------------------------------------------------------------------
123
+ def _resolve_rank_of_data(self, rank_cfg: Any, io_sources: IoSources) -> int:
124
+ """
125
+ Resolve the configured rank_of_data to an integer.
126
+
127
+ Parameters
128
+ ----------
129
+ rank_cfg :
130
+ Either an integer directly, or a string reference of the form
131
+ '<io_source_id>::<dataset_path>' pointing to metadata that contains
132
+ the rank as an integer-compatible value.
133
+
134
+ io_sources :
135
+ The IoSources object used to resolve metadata references.
136
+
137
+ Returns
138
+ -------
139
+ int
140
+ The resolved rank_of_data. Actual bounds checking is performed by
141
+ BaseData's internal validation.
142
+ """
143
+ # Direct int → use as-is (with int() for safety)
144
+ if isinstance(rank_cfg, int):
145
+ return int(rank_cfg)
146
+
147
+ # If it *looks* like an io_source reference, treat it as such
148
+ if isinstance(rank_cfg, str):
149
+ logger.debug(
150
+ f"AppendProcessingData: resolving rank_of_data from IoSources metadata reference '{rank_cfg}'."
151
+ )
152
+ meta_value = io_sources.get_static_metadata(rank_cfg)
153
+ try:
154
+ return int(meta_value)
155
+ except (TypeError, ValueError) as exc:
156
+ raise ValueError(
157
+ f"Could not convert rank_of_data metadata from '{rank_cfg}' to int (value={meta_value!r})."
158
+ ) from exc
159
+
160
+ # Fallback: try to cast whatever it is to int
161
+ try:
162
+ return int(rank_cfg)
163
+ except (TypeError, ValueError) as exc:
164
+ raise TypeError(
165
+ "rank_of_data must be an int or an IoSources metadata reference string, "
166
+ f"got {rank_cfg!r} ({type(rank_cfg).__name__})."
167
+ ) from exc
168
+
169
+ def _load_and_validate_configuration(self) -> dict[str, Any]:
170
+ """
171
+ Load and validate configuration values from ``self.configuration``.
172
+
173
+ Returns
174
+ -------
175
+ dict[str, Any]
176
+ A dictionary containing the resolved configuration:
177
+ - processing_key (str)
178
+ - signal_location (str)
179
+ - rank_of_data (int)
180
+ - databundle_output_key (str)
181
+ - units_location (str | None)
182
+ - units_override (str | None)
183
+ - uncertainties_sources (dict[str, str])
184
+ """
185
+ cfg = self.configuration
186
+
187
+ processing_key = cfg.get("processing_key")
188
+ if not isinstance(processing_key, str) or not processing_key:
189
+ raise ValueError("AppendProcessingData requires 'processing_key' to be a non-empty string.")
190
+
191
+ signal_location = cfg.get("signal_location")
192
+ if not isinstance(signal_location, str) or not signal_location:
193
+ raise ValueError("AppendProcessingData requires 'signal_location' to be a non-empty string.")
194
+
195
+ if "rank_of_data" not in cfg:
196
+ raise ValueError("AppendProcessingData requires 'rank_of_data' in the configuration.")
197
+ rank_cfg = cfg["rank_of_data"]
198
+ resolved_rank = self._resolve_rank_of_data(rank_cfg, self.io_sources)
199
+
200
+ databundle_output_key = cfg.get("databundle_output_key", "signal")
201
+ if not isinstance(databundle_output_key, str) or not databundle_output_key:
202
+ raise ValueError("AppendProcessingData requires 'databundle_output_key' to be a non-empty string.")
203
+
204
+ units_location = cfg.get("units_location")
205
+ if units_location is not None and not isinstance(units_location, str):
206
+ raise TypeError("'units_location' must be a string '<source_ref>::<dataset_path>' or None.")
207
+
208
+ units_override = cfg.get("units_override")
209
+ if units_override is not None and not isinstance(units_override, str):
210
+ raise TypeError("'units_override' must be a units string if provided.")
211
+
212
+ uncertainties_sources: dict[str, str] = cfg.get("uncertainties_sources", {}) or {}
213
+ if not isinstance(uncertainties_sources, dict):
214
+ raise TypeError(
215
+ f"'uncertainties_sources' must be a dict[str, str], got {type(uncertainties_sources).__name__}."
216
+ )
217
+
218
+ return {
219
+ "processing_key": processing_key,
220
+ "signal_location": signal_location,
221
+ "rank_of_data": resolved_rank,
222
+ "databundle_output_key": databundle_output_key,
223
+ "units_location": units_location,
224
+ "units_override": units_override,
225
+ "uncertainties_sources": uncertainties_sources,
226
+ }
227
+
228
+ # -------------------------------------------------------------------------
229
+ # Public API used by the pipeline
230
+ # -------------------------------------------------------------------------
231
+ def calculate(self) -> dict[str, DataBundle]:
232
+ """
233
+ Create or update a DataBundle from ``self.io_sources`` and return it.
234
+
235
+ Configuration fields:
236
+
237
+ - ``processing_key`` (str):
238
+ Name under which the DataBundle will be stored in
239
+ ``self.processing_data``.
240
+
241
+ - ``signal_location`` (str):
242
+ Data reference in the form ``'<source_ref>::<dataset_path>'``.
243
+
244
+ - ``rank_of_data`` (int or str):
245
+ Desired rank for the created :class:`BaseData` object.
246
+ If a string, it is treated as an IoSources metadata reference and
247
+ resolved via :meth:`IoSources.get_static_metadata`. Validation and
248
+ bounds checking are handled by :class:`BaseData`.
249
+
250
+ - ``databundle_output_key`` (str, default: ``"signal"``):
251
+ Key under which the new :class:`BaseData` will be stored inside the
252
+ :class:`DataBundle`. If the DataBundle already contains an entry
253
+ under this key, it will be overwritten.
254
+
255
+ - ``units_location`` (str | None):
256
+ Data reference pointing to a static metadata entry that defines the
257
+ units. If provided, the value from
258
+ :meth:`IoSources.get_static_metadata` is passed to :func:`ureg.Unit`.
259
+ If omitted and ``units_override`` is None, units default to
260
+ dimensionless.
261
+
262
+ - ``units_override`` (str | None):
263
+ Direct units string (e.g. ``"counts"`` or ``"1/m"``) that overrides
264
+ any value loaded via ``units_location``.
265
+
266
+ - ``uncertainties_sources`` (dict[str, str]):
267
+ Mapping from uncertainty name (e.g. ``"poisson"``) to data reference
268
+ (``'<source_ref>::<dataset_path>'``).
269
+ """
270
+ cfg = self._load_and_validate_configuration()
271
+
272
+ processing_key: str = cfg["processing_key"]
273
+ signal_location: str = cfg["signal_location"]
274
+ rank_of_data: int = cfg["rank_of_data"]
275
+ databundle_output_key: str = cfg["databundle_output_key"]
276
+ units_location = cfg["units_location"]
277
+ units_override = cfg["units_override"]
278
+ uncertainties_sources: dict[str, str] = cfg["uncertainties_sources"]
279
+
280
+ io_sources: IoSources = self.io_sources
281
+
282
+ logger.info(
283
+ (
284
+ f"AppendProcessingData: creating/updating DataBundle '{processing_key}' "
285
+ f"from signal_location='{signal_location}' into key '{databundle_output_key}'."
286
+ ),
287
+ )
288
+
289
+ # Load BaseData via helper: signal + units + uncertainties
290
+ bd: BaseData = basedata_from_sources(
291
+ io_sources=io_sources,
292
+ signal_source=signal_location,
293
+ units_source=units_location,
294
+ uncertainty_sources=uncertainties_sources,
295
+ )
296
+
297
+ # Override units if requested
298
+ if units_override is not None:
299
+ logger.debug(
300
+ f"AppendProcessingData: overriding units for '{processing_key}' to '{units_override}'.",
301
+ )
302
+ bd.units = ureg.Unit(units_override)
303
+
304
+ # Set rank_of_data; BaseData's own validation handles bounds
305
+ bd.rank_of_data = rank_of_data
306
+
307
+ # Create or update the DataBundle in processing_data
308
+ existing_bundle = self.processing_data.get(processing_key)
309
+ if existing_bundle is None:
310
+ databundle = DataBundle()
311
+ else:
312
+ databundle = existing_bundle
313
+
314
+ # Update/insert the BaseData at the requested key
315
+ databundle[databundle_output_key] = bd
316
+
317
+ # If no default_plot is set yet, use this key as a sensible default
318
+ if getattr(databundle, "default_plot", None) is None:
319
+ databundle.default_plot = databundle_output_key
320
+
321
+ # Store back into processing_data and build output
322
+ self.processing_data[processing_key] = databundle
323
+ output: dict[str, DataBundle] = {processing_key: databundle}
324
+
325
+ logger.info(
326
+ f"AppendProcessingData: DataBundle '{processing_key}' now contains datasets {list(databundle.keys())}.",
327
+ )
328
+
329
+ return output
@@ -0,0 +1,141 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"]
9
+ __copyright__ = "Copyright 2026, The MoDaCor team"
10
+ __date__ = "09/01/2026"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["AppendSink"]
15
+ __version__ = "20260109.1"
16
+
17
+ from importlib import import_module
18
+ from pathlib import Path
19
+ from typing import Any, Callable
20
+
21
+ from modacor.dataclasses.databundle import DataBundle
22
+ from modacor.dataclasses.messagehandler import MessageHandler
23
+ from modacor.dataclasses.process_step import ProcessStep
24
+ from modacor.dataclasses.process_step_describer import ProcessStepDescriber
25
+ from modacor.io.io_sinks import IoSinks
26
+
27
+ # Module-level handler; facilities can swap MessageHandler implementation as needed
28
+ logger = MessageHandler(name=__name__)
29
+
30
+
31
+ class AppendSink(ProcessStep):
32
+ """
33
+ Appends an IoSink to self.io_sinks.
34
+
35
+ This mirrors AppendSource: it augments the set of available I/O sinks but does
36
+ not touch the actual data bundles.
37
+ """
38
+
39
+ documentation = ProcessStepDescriber(
40
+ calling_name="Append Sink",
41
+ calling_id="AppendSink",
42
+ calling_module_path=Path(__file__),
43
+ calling_version=__version__,
44
+ required_data_keys=[],
45
+ modifies={}, # sinks only; no data modified
46
+ arguments={
47
+ "sink_identifier": {
48
+ "type": (str, list),
49
+ "required": True,
50
+ "default": "",
51
+ "doc": "Identifier(s) to register the ioSink(s) under.",
52
+ },
53
+ "sink_location": {
54
+ "type": (str, list),
55
+ "required": True,
56
+ "default": "",
57
+ "doc": "Resource location(s) understood by the sink.",
58
+ },
59
+ "iosink_module": {
60
+ "type": str,
61
+ "required": True,
62
+ "default": "",
63
+ "doc": "Fully qualified module path to the ioSink class.",
64
+ },
65
+ "iosink_method_kwargs": {
66
+ "type": dict,
67
+ "default": {},
68
+ "doc": "Keyword arguments forwarded to the ioSink constructor.",
69
+ },
70
+ },
71
+ step_keywords=["append", "sink"],
72
+ step_doc="Append an IoSink to the available data sinks",
73
+ step_reference="",
74
+ step_note="This adds an IoSink to the data sinks registry.",
75
+ )
76
+
77
+ # -------------------------------------------------------------------------
78
+ # Public API used by the pipeline
79
+ # -------------------------------------------------------------------------
80
+ def calculate(self) -> dict[str, DataBundle]:
81
+ output: dict[str, DataBundle] = {}
82
+
83
+ sink_ids: str | list[str] = self.configuration["sink_identifier"]
84
+ sink_locations: str | list[str] = self.configuration["sink_location"]
85
+ iosink_module: str = self.configuration["iosink_module"]
86
+
87
+ # Normalise to lists
88
+ if isinstance(sink_ids, str):
89
+ sink_ids = [sink_ids]
90
+ if isinstance(sink_locations, str):
91
+ sink_locations = [sink_locations]
92
+
93
+ if len(sink_ids) != len(sink_locations):
94
+ raise ValueError("If multiple sink_identifiers and sink_locations are provided, their counts must match.")
95
+
96
+ for sink_id, sink_location in zip(sink_ids, sink_locations):
97
+ if sink_id not in self.io_sinks.defined_sinks:
98
+ self._append_sink_by_name(
99
+ sink_name=iosink_module,
100
+ sink_location=sink_location,
101
+ sink_identifier=sink_id,
102
+ iosink_method_kwargs=self.configuration.get("iosink_method_kwargs", {}),
103
+ )
104
+
105
+ return output
106
+
107
+ # -------------------------------------------------------------------------
108
+ # Internal helpers
109
+ # -------------------------------------------------------------------------
110
+ def _append_sink_by_name(
111
+ self,
112
+ sink_name: str,
113
+ sink_location: str,
114
+ sink_identifier: str,
115
+ iosink_method_kwargs: dict[str, Any] = {},
116
+ ) -> None:
117
+ sink_callable = self._resolve_iosink_callable(sink_name)
118
+
119
+ # Ensure io_sinks exists or initialize it
120
+ if not hasattr(self, "io_sinks") or self.io_sinks is None:
121
+ self.io_sinks = IoSinks()
122
+ logger.info("Initialized self.io_sinks in AppendSink step.")
123
+
124
+ self.io_sinks.register_sink(
125
+ sink_callable(
126
+ sink_reference=sink_identifier,
127
+ resource_location=sink_location,
128
+ iosink_method_kwargs=iosink_method_kwargs,
129
+ )
130
+ )
131
+
132
+ def _resolve_iosink_callable(self, sink_name: str) -> Callable[..., Any]:
133
+ module_path, attr_name = sink_name.rsplit(".", 1)
134
+ module = import_module(module_path)
135
+ try:
136
+ sink_obj = getattr(module, attr_name)
137
+ except AttributeError as exc:
138
+ raise ImportError(
139
+ f"Could not find '{attr_name}' in module '{module_path}' for iosink_module='{sink_name}'."
140
+ ) from exc
141
+ return sink_obj
@@ -0,0 +1,181 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "30/10/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["AppendSource"]
15
+ __version__ = "20251030.1"
16
+
17
+ from importlib import import_module
18
+ from pathlib import Path
19
+ from typing import Any, Callable
20
+
21
+ from modacor.dataclasses.databundle import DataBundle
22
+ from modacor.dataclasses.messagehandler import MessageHandler
23
+ from modacor.dataclasses.process_step import ProcessStep
24
+ from modacor.dataclasses.process_step_describer import ProcessStepDescriber
25
+ from modacor.io.io_sources import IoSources
26
+
27
+ # Module-level handler; facilities can swap MessageHandler implementation as needed
28
+ logger = MessageHandler(name=__name__)
29
+
30
+
31
+ class AppendSource(ProcessStep):
32
+ """
33
+ Appends an ioSource to self.io_sources.
34
+
35
+ This step is intended for pipeline-graph / provenance operations: it augments
36
+ the set of available I/O sources but does not touch the actual data bundles.
37
+ """
38
+
39
+ documentation = ProcessStepDescriber(
40
+ calling_name="Append Source",
41
+ calling_id="AppendSource",
42
+ calling_module_path=Path(__file__),
43
+ calling_version=__version__,
44
+ required_data_keys=[],
45
+ modifies={}, # sources only; no data modified
46
+ arguments={
47
+ "source_identifier": {
48
+ "type": (str, list),
49
+ "required": True,
50
+ "default": "",
51
+ "doc": "Identifier(s) to register the ioSource(s) under.",
52
+ },
53
+ "source_location": {
54
+ "type": (str, list),
55
+ "required": True,
56
+ "default": "",
57
+ "doc": "Resource location(s) to load for the ioSource(s).",
58
+ },
59
+ "iosource_module": {
60
+ "type": str,
61
+ "required": True,
62
+ "default": "",
63
+ "doc": "Fully qualified module path to the ioSource class.",
64
+ },
65
+ "iosource_method_kwargs": {
66
+ "type": dict,
67
+ "default": {},
68
+ "doc": "Keyword arguments forwarded to the ioSource constructor.",
69
+ },
70
+ },
71
+ step_keywords=["append", "source"],
72
+ step_doc="Append an ioSource to the available data sources",
73
+ step_reference="",
74
+ step_note="This adds an ioSource to the data sources of the databundle.",
75
+ )
76
+
77
+ # -------------------------------------------------------------------------
78
+ # Public API used by the pipeline
79
+ # -------------------------------------------------------------------------
80
+ def calculate(self) -> dict[str, DataBundle]:
81
+ """
82
+ Append one or more sources to ``self.io_sources``.
83
+
84
+ Notes
85
+ -----
86
+ - No ``DataBundle`` objects are modified or created.
87
+ - The pipeline can treat an empty output dict as "no-op on data",
88
+ while the side-effect on ``self.io_sources`` persists.
89
+ """
90
+ output: dict[str, DataBundle] = {}
91
+
92
+ source_ids: str | list[str] = self.configuration["source_identifier"]
93
+ source_locations: str | list[str] = self.configuration["source_location"]
94
+ iosource_module: str = self.configuration["iosource_module"]
95
+
96
+ # Normalise to lists
97
+ if isinstance(source_ids, str):
98
+ source_ids = [source_ids]
99
+ if isinstance(source_locations, str):
100
+ source_locations = [source_locations]
101
+
102
+ if len(source_ids) != len(source_locations):
103
+ raise ValueError(
104
+ "If multiple source_identifiers and source_locations are provided, their counts must match."
105
+ )
106
+
107
+ for source_id, source_location in zip(source_ids, source_locations):
108
+ # Only append if not already present
109
+ if source_id not in self.io_sources.defined_sources:
110
+ self._append_loader_by_name(
111
+ loader_name=iosource_module,
112
+ source_location=source_location,
113
+ source_identifier=source_id,
114
+ iosource_method_kwargs=self.configuration.get("iosource_method_kwargs", {}),
115
+ )
116
+ # No data modified – only side-effect is on self.io_sources
117
+ return output
118
+
119
+ # -------------------------------------------------------------------------
120
+ # Internal helpers
121
+ # -------------------------------------------------------------------------
122
+ def _append_loader_by_name(
123
+ self,
124
+ loader_name: str,
125
+ source_location: str,
126
+ source_identifier: str,
127
+ iosource_method_kwargs: dict[str, Any] = {},
128
+ ) -> None:
129
+ """
130
+ Resolve the requested loader and append the resulting ioSource to
131
+ ``self.io_sources``.
132
+
133
+ Parameters
134
+ ----------
135
+ loader_name:
136
+ Either a fully qualified import path
137
+ (e.g. ``"modacor.io.hdf.hdf_source.HDFSource"``).
138
+ source_location:
139
+ Path / URI / identifier understood by the loader.
140
+ source_identifier:
141
+ Key under which the resulting ioSource will be stored in
142
+ ``self.io_sources``.
143
+ iosource_method_kwargs:
144
+ Additional keyword arguments to pass to the loader callable.
145
+ """
146
+ source_callable = self._resolve_iosource_callable(loader_name)
147
+
148
+ # Ensure io_sources exists or initialize it
149
+ if not hasattr(self, "io_sources") or self.io_sources is None:
150
+ # ProcessStep normally sets this up, but be defensive.
151
+ self.io_sources = IoSources()
152
+ logger.info("Initialized self.io_sources in AppendSource step.")
153
+
154
+ self.io_sources.register_source(
155
+ source_callable(
156
+ source_reference=source_identifier,
157
+ resource_location=source_location,
158
+ iosource_method_kwargs=iosource_method_kwargs,
159
+ )
160
+ )
161
+
162
+ def _resolve_iosource_callable(self, loader_name: str) -> Callable[..., Any]:
163
+ """
164
+ Resolve the configured loader into a callable.
165
+
166
+ Strategy
167
+ --------
168
+ 1. If ``loader_name`` contains a dot, treat it as a fully qualified
169
+ import path like ``package.module.ClassOrFunc``.
170
+ """
171
+
172
+ module_path, attr_name = loader_name.rsplit(".", 1)
173
+ module = import_module(module_path)
174
+ try:
175
+ loader_obj = getattr(module, attr_name)
176
+ except AttributeError as exc:
177
+ raise ImportError(
178
+ f"Could not find '{attr_name}' in module '{module_path}' for iosource_module='{loader_name}'."
179
+ ) from exc
180
+
181
+ return loader_obj