modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. modacor/__init__.py +30 -0
  2. modacor/dataclasses/__init__.py +0 -0
  3. modacor/dataclasses/basedata.py +973 -0
  4. modacor/dataclasses/databundle.py +23 -0
  5. modacor/dataclasses/helpers.py +45 -0
  6. modacor/dataclasses/messagehandler.py +75 -0
  7. modacor/dataclasses/process_step.py +233 -0
  8. modacor/dataclasses/process_step_describer.py +146 -0
  9. modacor/dataclasses/processing_data.py +59 -0
  10. modacor/dataclasses/trace_event.py +118 -0
  11. modacor/dataclasses/uncertainty_tools.py +132 -0
  12. modacor/dataclasses/validators.py +84 -0
  13. modacor/debug/pipeline_tracer.py +548 -0
  14. modacor/io/__init__.py +33 -0
  15. modacor/io/csv/__init__.py +0 -0
  16. modacor/io/csv/csv_sink.py +114 -0
  17. modacor/io/csv/csv_source.py +210 -0
  18. modacor/io/hdf/__init__.py +27 -0
  19. modacor/io/hdf/hdf_source.py +120 -0
  20. modacor/io/io_sink.py +41 -0
  21. modacor/io/io_sinks.py +61 -0
  22. modacor/io/io_source.py +164 -0
  23. modacor/io/io_sources.py +208 -0
  24. modacor/io/processing_path.py +113 -0
  25. modacor/io/tiled/__init__.py +16 -0
  26. modacor/io/tiled/tiled_source.py +403 -0
  27. modacor/io/yaml/__init__.py +27 -0
  28. modacor/io/yaml/yaml_source.py +116 -0
  29. modacor/modules/__init__.py +53 -0
  30. modacor/modules/base_modules/__init__.py +0 -0
  31. modacor/modules/base_modules/append_processing_data.py +329 -0
  32. modacor/modules/base_modules/append_sink.py +141 -0
  33. modacor/modules/base_modules/append_source.py +181 -0
  34. modacor/modules/base_modules/bitwise_or_masks.py +113 -0
  35. modacor/modules/base_modules/combine_uncertainties.py +120 -0
  36. modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
  37. modacor/modules/base_modules/divide.py +82 -0
  38. modacor/modules/base_modules/find_scale_factor1d.py +373 -0
  39. modacor/modules/base_modules/multiply.py +77 -0
  40. modacor/modules/base_modules/multiply_databundles.py +73 -0
  41. modacor/modules/base_modules/poisson_uncertainties.py +69 -0
  42. modacor/modules/base_modules/reduce_dimensionality.py +252 -0
  43. modacor/modules/base_modules/sink_processing_data.py +80 -0
  44. modacor/modules/base_modules/subtract.py +80 -0
  45. modacor/modules/base_modules/subtract_databundles.py +67 -0
  46. modacor/modules/base_modules/units_label_update.py +66 -0
  47. modacor/modules/instrument_modules/__init__.py +0 -0
  48. modacor/modules/instrument_modules/readme.md +9 -0
  49. modacor/modules/technique_modules/__init__.py +0 -0
  50. modacor/modules/technique_modules/scattering/__init__.py +0 -0
  51. modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
  52. modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
  53. modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
  54. modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
  55. modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
  56. modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
  57. modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
  58. modacor/runner/__init__.py +0 -0
  59. modacor/runner/pipeline.py +749 -0
  60. modacor/runner/process_step_registry.py +224 -0
  61. modacor/tests/__init__.py +27 -0
  62. modacor/tests/dataclasses/test_basedata.py +519 -0
  63. modacor/tests/dataclasses/test_basedata_operations.py +439 -0
  64. modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
  65. modacor/tests/dataclasses/test_process_step_describer.py +73 -0
  66. modacor/tests/dataclasses/test_processstep.py +282 -0
  67. modacor/tests/debug/test_tracing_integration.py +188 -0
  68. modacor/tests/integration/__init__.py +0 -0
  69. modacor/tests/integration/test_pipeline_run.py +238 -0
  70. modacor/tests/io/__init__.py +27 -0
  71. modacor/tests/io/csv/__init__.py +0 -0
  72. modacor/tests/io/csv/test_csv_source.py +156 -0
  73. modacor/tests/io/hdf/__init__.py +27 -0
  74. modacor/tests/io/hdf/test_hdf_source.py +92 -0
  75. modacor/tests/io/test_io_sources.py +119 -0
  76. modacor/tests/io/tiled/__init__.py +12 -0
  77. modacor/tests/io/tiled/test_tiled_source.py +120 -0
  78. modacor/tests/io/yaml/__init__.py +27 -0
  79. modacor/tests/io/yaml/static_data_example.yaml +26 -0
  80. modacor/tests/io/yaml/test_yaml_source.py +47 -0
  81. modacor/tests/modules/__init__.py +27 -0
  82. modacor/tests/modules/base_modules/__init__.py +27 -0
  83. modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
  84. modacor/tests/modules/base_modules/test_append_sink.py +76 -0
  85. modacor/tests/modules/base_modules/test_append_source.py +180 -0
  86. modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
  87. modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
  88. modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
  89. modacor/tests/modules/base_modules/test_divide.py +140 -0
  90. modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
  91. modacor/tests/modules/base_modules/test_multiply.py +113 -0
  92. modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
  93. modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
  94. modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
  95. modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
  96. modacor/tests/modules/base_modules/test_subtract.py +111 -0
  97. modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
  98. modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
  99. modacor/tests/modules/technique_modules/__init__.py +0 -0
  100. modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
  101. modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
  102. modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
  103. modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
  104. modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
  105. modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
  106. modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
  107. modacor/tests/requirements.txt +12 -0
  108. modacor/tests/runner/test_pipeline.py +438 -0
  109. modacor/tests/runner/test_process_step_registry.py +65 -0
  110. modacor/tests/test_import.py +43 -0
  111. modacor/tests/test_modacor.py +17 -0
  112. modacor/tests/test_units.py +79 -0
  113. modacor/units.py +97 -0
  114. modacor-1.0.0.dist-info/METADATA +482 -0
  115. modacor-1.0.0.dist-info/RECORD +120 -0
  116. modacor-1.0.0.dist-info/WHEEL +5 -0
  117. modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
  118. modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
  119. modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
  120. modacor-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,23 @@
1
+ # src/modacor/dataclasses/databundle.py
2
+ # -*- coding: utf-8 -*-
3
+ __author__ = "Jerome Kieffer"
4
+ __copyright__ = "MoDaCor team"
5
+ __license__ = "BSD3"
6
+ __date__ = "21/05/2025"
7
+ __version__ = "20250521.1"
8
+ __status__ = "Production" # "Development", "Production"
9
+ # end of header and standard imports
10
+
11
+
12
+ class DataBundle(dict):
13
+ """
14
+ DataBundle is a specialized data class for storing related data.
15
+ It contains a dictionary of BaseData data elements, for example Signal,
16
+ a wavelength and flux spectrum, Qx, Qy, Qz, Psi, etc. Process steps can
17
+ add further BaseData objects to this bundle.
18
+
19
+ """
20
+
21
+ description: str | None = None
22
+ # as per NXcanSAS, tells which basedata to plot
23
+ default_plot: str | None = None
@@ -0,0 +1,45 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw", "Armin Moser"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "16/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ from modacor import ureg
15
+ from modacor.dataclasses.basedata import BaseData
16
+ from modacor.io.io_sources import IoSources
17
+
18
+ __all__ = ["basedata_from_sources"]
19
+
20
+
21
+ def basedata_from_sources(
22
+ io_sources: IoSources,
23
+ signal_source: str,
24
+ units_source: str | None = None,
25
+ uncertainty_sources: dict[str, str] = {},
26
+ ) -> BaseData:
27
+ """Helper function to build a BaseData object from IoSources
28
+
29
+ Parameters
30
+ ----------
31
+ io_sources : IoSources
32
+ The IoSources object to load data from.
33
+ signal_source : str
34
+ The source key for the signal data.
35
+ unit_source : str | None, optional
36
+ The source key for the units data, by default None.
37
+ for iosources that support attributes, the units can also be stored as an attribute.
38
+ In that case, it can be specified by 'key to the dataset@[units_attribute_name]'
39
+ uncertainty_sources : dict[str, str], optional
40
+ A dictionary mapping uncertainty names to their source keys, by default an empty dictionary.
41
+ """
42
+ signal = io_sources.get_data(signal_source)
43
+ units = ureg.Unit(io_sources.get_static_metadata(units_source)) if units_source is not None else ureg.dimensionless
44
+ uncertainties = {name: io_sources.get_data(source) for name, source in uncertainty_sources.items()}
45
+ return BaseData(signal=signal, units=units, uncertainties=uncertainties)
@@ -0,0 +1,75 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Malte Storm", "Tim Snow", "Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "16/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+
15
+ import logging
16
+
17
+ _default_handler: MessageHandler | None = None
18
+
19
+
20
+ def get_default_handler(level: int = logging.INFO) -> MessageHandler:
21
+ """
22
+ MoDaCor-wide default message handler. Useful for overarching logging like in the pipeline runner.
23
+ For specific modules or classes, it's better to create dedicated named MessageHandler instances.
24
+ """
25
+ global _default_handler
26
+ if _default_handler is None:
27
+ _default_handler = MessageHandler(level=level, name="MoDaCor")
28
+ return _default_handler
29
+
30
+
31
+ class MessageHandler:
32
+ """
33
+ A simple class to handle logging messages at different levels.
34
+ This class should be replaced to match the messaging system used at a given location.
35
+
36
+ Args:
37
+ level (int): The logging level to use. Defaults to logging.INFO.
38
+ name (str): Logger name (typically __name__).
39
+ """
40
+
41
+ def __init__(self, level: int = logging.INFO, name: str = "MoDaCor", **kwargs):
42
+ self.level = level
43
+ self.name = name
44
+
45
+ self.logger = logging.getLogger(name)
46
+ self.logger.setLevel(level)
47
+
48
+ # Avoid adding multiple console handlers if this handler is created multiple times
49
+ if not any(isinstance(h, logging.StreamHandler) for h in self.logger.handlers):
50
+ console_handler = logging.StreamHandler()
51
+ console_handler.setLevel(level)
52
+
53
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
54
+ console_handler.setFormatter(formatter)
55
+ self.logger.addHandler(console_handler)
56
+
57
+ def log(self, message: str, level: int = None) -> None:
58
+ if level is None:
59
+ level = self.level
60
+ self.logger.log(msg=message, level=level)
61
+
62
+ def info(self, message: str):
63
+ self.log(message, level=logging.INFO)
64
+
65
+ def warning(self, message: str):
66
+ self.log(message, level=logging.WARNING)
67
+
68
+ def error(self, message: str):
69
+ self.log(message, level=logging.ERROR)
70
+
71
+ def critical(self, message: str):
72
+ self.log(message, level=logging.CRITICAL)
73
+
74
+ def debug(self, message: str):
75
+ self.log(message, level=logging.DEBUG)
@@ -0,0 +1,233 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Malte Storm", "Tim Snow", "Brian R. Pauw", "Anja Hörmann"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "16/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+ __version__ = "20251121.1"
14
+
15
+ from abc import abstractmethod
16
+ from numbers import Integral
17
+ from pathlib import Path
18
+ from typing import Any, Iterable, Type
19
+
20
+ from attrs import define, field
21
+ from attrs import validators as v
22
+
23
+ from ..io.io_sinks import IoSinks
24
+ from ..io.io_sources import IoSources
25
+ from .databundle import DataBundle
26
+ from .messagehandler import MessageHandler
27
+ from .process_step_describer import ProcessStepDescriber
28
+ from .processing_data import ProcessingData
29
+
30
+ # from .validators import is_list_of_ints
31
+
32
+
33
+ @define(eq=False)
34
+ class ProcessStep:
35
+ """A base class defining a processing step"""
36
+
37
+ # Class attributes for the process step
38
+ CONFIG_KEYS = {
39
+ "with_processing_keys": {
40
+ "type": str,
41
+ "allow_iterable": True,
42
+ "allow_none": True,
43
+ "default": None,
44
+ },
45
+ "output_processing_key": {
46
+ "type": str,
47
+ "allow_iterable": False,
48
+ "allow_none": True,
49
+ "default": None,
50
+ },
51
+ }
52
+
53
+ # three input items for the process step. For backward compatibility, the first is io_sources
54
+ # The configuration keys for the process step instantiation
55
+ io_sources: IoSources | None = field(default=None, validator=v.optional(v.instance_of(IoSources)))
56
+ # the processing data to work on
57
+ processing_data: ProcessingData = field(default=None, validator=v.optional(v.instance_of(ProcessingData)))
58
+ # optional IO sinks if needed
59
+ io_sinks: IoSinks | None = field(default=None, validator=v.optional(v.instance_of(IoSinks)))
60
+
61
+ # class attribute for a machine-readable description of the process step
62
+ documentation = ProcessStepDescriber(
63
+ calling_name="Generic Process step",
64
+ calling_id="", # to be filled in by the process
65
+ calling_module_path=Path(__file__),
66
+ calling_version=__version__,
67
+ )
68
+
69
+ # dynamic instance configuration
70
+ configuration: dict = field(
71
+ factory=dict,
72
+ # validator=lambda inst, attrs, val: inst.is_process_step_dict,
73
+ validator=lambda inst, attrs, val: ProcessStep.is_process_step_dict(inst, attrs.name if attrs else None, val),
74
+ )
75
+
76
+ # flags and attributes for running the pipeline
77
+ requires_steps: list[str] = field(factory=list)
78
+ step_id: int | str = field(default=-1, validator=v.instance_of((Integral, str)))
79
+ executed: bool = field(default=False, validator=v.instance_of(bool))
80
+ short_title: str | None = field(default=None, validator=v.optional(v.instance_of(str)))
81
+
82
+ # if the process produces intermediate arrays, they are stored here, optionally cached
83
+ produced_outputs: dict[str, Any] = field(factory=dict)
84
+ # intermediate prepared data for the process step
85
+ _prepared_data: dict[str, Any] = field(factory=dict)
86
+
87
+ # a message handler, supporting logging, warnings, errors, etc. emitted by the process
88
+ # during execution
89
+ logger: MessageHandler = field(factory=MessageHandler, validator=v.instance_of(MessageHandler))
90
+
91
+ # internal variables:
92
+ __prepared: bool = field(default=False, validator=v.instance_of(bool))
93
+
94
+ def __attrs_post_init__(self):
95
+ """
96
+ Post-initialization method to set up the process step.
97
+ """
98
+ self.configuration = self.default_config()
99
+ self.configuration.update(self.documentation.initial_configuration())
100
+
101
+ def __call__(self, processing_data: ProcessingData) -> None:
102
+ """Allow the process step to be called like a function"""
103
+ self.execute(processing_data)
104
+
105
+ # add hash function. equality can be checked
106
+ # def __hash__(self):
107
+ # return hash((self.documentation.__repr__(), self.configuration.__repr__(), self.step_id))
108
+ def __hash__(self) -> int:
109
+ return object.__hash__(self)
110
+
111
+ def prepare_execution(self):
112
+ """
113
+ Prepare the execution of the ProcessStep
114
+
115
+ This method can be used to run any costly setup code that is needed
116
+ once before the process step can be executed.
117
+ """
118
+ pass
119
+
120
+ def _normalised_processing_keys(self, cfg_key: str = "with_processing_keys") -> list[str]:
121
+ """
122
+ Normalize a ProcessingData key selection into a non-empty list of strings.
123
+
124
+ Behavior:
125
+ - None: if processing_data has exactly one key, use it; otherwise error.
126
+ - str: wrap into a one-item list.
127
+ - iterable: materialize into a list (must be non-empty).
128
+ """
129
+ if self.processing_data is None:
130
+ raise RuntimeError(f"{self.__class__.__name__}: processing_data is None in _normalised_processing_keys.")
131
+
132
+ cfg_value = self.configuration.get(cfg_key, None)
133
+
134
+ if cfg_value is None:
135
+ if len(self.processing_data) == 0:
136
+ raise ValueError(f"{self.__class__.__name__}: {cfg_key} is None and processing_data is empty.")
137
+ if len(self.processing_data) == 1:
138
+ only_key = next(iter(self.processing_data.keys()))
139
+ self.logger.info(
140
+ f"{self.__class__.__name__}: {cfg_key} not set; using the only key {only_key!r}." # noqa: E702
141
+ )
142
+ return [only_key]
143
+ raise ValueError(f"{self.__class__.__name__}: {cfg_key} is None but multiple databundles are present.")
144
+
145
+ if isinstance(cfg_value, str):
146
+ return [cfg_value]
147
+
148
+ try:
149
+ keys = list(cfg_value)
150
+ except TypeError as exc: # not iterable
151
+ raise ValueError(
152
+ f"{self.__class__.__name__}: {cfg_key} must be a string, an iterable of strings, or None."
153
+ ) from exc
154
+
155
+ if not keys:
156
+ raise ValueError(f"{self.__class__.__name__}: {cfg_key} must not be an empty list.")
157
+ return keys
158
+
159
+ @abstractmethod
160
+ def calculate(self) -> dict[str, DataBundle]:
161
+ """Calculate the process step on the given data"""
162
+ raise NotImplementedError("Subclasses must implement this method")
163
+
164
+ def execute(self, data: ProcessingData) -> None:
165
+ """Execute the process step on the given data"""
166
+ self.processing_data = data
167
+ if not self.__prepared:
168
+ self.prepare_execution()
169
+ self.__prepared = True
170
+ self.produced_outputs = self.calculate()
171
+ for _key, value in self.produced_outputs.items():
172
+ if _key in data:
173
+ data[_key].update(value)
174
+ else:
175
+ data[_key] = value
176
+ self.executed = True
177
+
178
+ def reset(self):
179
+ """Reset the process step to its initial state"""
180
+ self.__prepared = False
181
+ self.executed = False
182
+ self.produced_outputs = {}
183
+ self._prepared_data = {}
184
+
185
+ def modify_config_by_dict(self, by_dict: dict = {}) -> None:
186
+ """Modify the configuration of the process step by a dictionary"""
187
+ for key, value in by_dict.items():
188
+ if key in self.configuration:
189
+ self.configuration[key] = value
190
+ elif key in self.documentation.arguments:
191
+ # Allow setting documented arguments even if they were not part of the
192
+ # current configuration snapshot yet.
193
+ self.configuration[key] = value
194
+ else:
195
+ known_keys = ", ".join(sorted(self.configuration.keys()))
196
+ raise KeyError(f"Key {key} not found in configuration. Known keys: {known_keys}") # noqa
197
+ # restart preparation after configuration change:
198
+ self.__prepared = False
199
+
200
+ def modify_config_by_kwargs(self, **kwargs) -> None:
201
+ """Modify the configuration of the process step by keyword arguments"""
202
+ if kwargs:
203
+ self.modify_config_by_dict(kwargs)
204
+
205
+ @classmethod
206
+ def is_process_step_dict(cls, instance: Type | None, attribute: str | None, item: Any) -> bool:
207
+ """
208
+ Check if the value is a dictionary with the correct keys and types.
209
+ """
210
+ if not isinstance(item, dict):
211
+ return False
212
+ for _key, _value in item.items():
213
+ if _key not in cls.CONFIG_KEYS:
214
+ return False
215
+ _config = cls.CONFIG_KEYS[_key]
216
+ if _value is None:
217
+ if _config["allow_none"]:
218
+ continue
219
+ return False
220
+ if isinstance(_value, Iterable) and not isinstance(_value, str):
221
+ if not (_config["allow_iterable"] and all([isinstance(_i, _config["type"]) for _i in _value])):
222
+ return False
223
+ continue
224
+ if not isinstance(_value, _config["type"]):
225
+ return False
226
+ return True
227
+
228
+ @classmethod
229
+ def default_config(cls) -> dict[str, Any]:
230
+ """
231
+ Create an initial dictionary for the process step configuration.
232
+ """
233
+ return {_k: _v["default"] for _k, _v in cls.CONFIG_KEYS.items()}
@@ -0,0 +1,146 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Malte Storm", "Jérôme Kieffer", "Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "16/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ from copy import deepcopy
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from attrs import define, evolve, field
19
+ from attrs import validators as v
20
+
21
+ __all__ = ["ProcessStepDescriber"]
22
+
23
+
24
+ NXCite = str
25
+ ArgumentSpec = dict[str, Any]
26
+
27
+
28
+ _MISSING = object()
29
+
30
+
31
+ def _normalize_str_list(value: Any, field_name: str) -> list[str]:
32
+ if value is None:
33
+ return []
34
+ if isinstance(value, (str, dict)):
35
+ raise TypeError(f"{field_name} must be a list of strings, got {type(value).__name__}.")
36
+ if isinstance(value, (list, tuple, set)):
37
+ return [item.strip() if isinstance(item, str) else item for item in value]
38
+ raise TypeError(f"{field_name} must be a list of strings, got {type(value).__name__}.")
39
+
40
+
41
+ def _normalize_arguments(value: Any, field_name: str) -> dict[str, ArgumentSpec]:
42
+ if value is None:
43
+ return {}
44
+ if not isinstance(value, dict):
45
+ raise TypeError(f"{field_name} must be a mapping, got {type(value).__name__}.")
46
+
47
+ normalized: dict[str, ArgumentSpec] = {}
48
+ for key, spec in value.items():
49
+ if not isinstance(spec, dict):
50
+ raise TypeError(f"{field_name} entries must be mappings, got {type(spec).__name__} for key {key!r}.")
51
+ normalized_key = str(key).strip()
52
+ if not normalized_key:
53
+ raise ValueError(f"{field_name} keys must be non-empty strings.")
54
+
55
+ normalized_spec = dict(spec)
56
+ default = normalized_spec.get("default", _MISSING)
57
+ normalized_spec["default"] = default
58
+
59
+ required = normalized_spec.get("required", False)
60
+ if not isinstance(required, bool):
61
+ raise TypeError(f"{field_name}[{normalized_key!r}]['required'] must be a boolean.")
62
+ normalized_spec["required"] = required
63
+
64
+ normalized[normalized_key] = normalized_spec
65
+ return normalized
66
+
67
+
68
+ def _deepcopy_default(value: Any) -> Any:
69
+ return deepcopy(value)
70
+
71
+
72
+ @define
73
+ class ProcessStepDescriber:
74
+ calling_name: str = field() # short name to identify the calling process for the UI
75
+ calling_id: str = field() # not sure what we were planning here. some UID perhaps? difference with calling_module
76
+ calling_module_path: Path = field(
77
+ validator=v.instance_of(Path)
78
+ ) # partial path to the module from src/modacor/modules onwards
79
+ calling_version: str = field() # module version being executed
80
+ required_data_keys: list[str] = field(
81
+ factory=list,
82
+ converter=lambda value: _normalize_str_list(value, "required_data_keys"),
83
+ validator=v.deep_iterable(member_validator=v.instance_of(str), iterable_validator=v.instance_of(list)),
84
+ ) # list of data keys required by the process
85
+ arguments: dict[str, ArgumentSpec] = field(
86
+ factory=dict,
87
+ converter=lambda value: _normalize_arguments(value, "arguments"),
88
+ validator=v.deep_mapping(key_validator=v.instance_of(str), value_validator=v.instance_of(dict)),
89
+ ) # schema describing configurable arguments
90
+ modifies: dict[str, list] = field(
91
+ factory=dict, validator=v.instance_of(dict)
92
+ ) # which aspects of BaseData are modified by this
93
+ step_keywords: list[str] = field(
94
+ factory=list,
95
+ converter=lambda value: _normalize_str_list(value, "step_keywords"),
96
+ validator=v.deep_iterable(member_validator=v.instance_of(str), iterable_validator=v.instance_of(list)),
97
+ ) # list of keywords that can be used to identify the process (allowing for searches)
98
+ step_doc: str = field(default="") # documentation for the process
99
+ step_reference: NXCite = field(default="") # NXCite to the paper describing the process
100
+ step_note: str | None = field(default=None)
101
+ # use_frames_cache: list[str] = field(factory=list)
102
+ # # for produced_values dictionary key names in this list, the produced_values are cached
103
+ # # on first run, and reused on subsequent runs. Maybe two chaches, one for per-file and
104
+ # # one for per-execution.
105
+ # use_overall_cache: list[str] = field(factory=list)
106
+ # # for produced_values dictionary key names in this list, the produced_values are cached
107
+ # # on first run, and reused on subsequent runs. Maybe two chaches, one for per-file and
108
+ # # one for per-execution.
109
+
110
+ def copy(self) -> "ProcessStepDescriber":
111
+ return evolve(self)
112
+
113
+ def argument_names(self) -> tuple[str, ...]:
114
+ return tuple(self.arguments.keys())
115
+
116
+ def required_argument_names(self) -> tuple[str, ...]:
117
+ return tuple(name for name, spec in self.arguments.items() if spec.get("required", False))
118
+
119
+ def initial_configuration(self) -> dict[str, Any]:
120
+ configuration: dict[str, Any] = {}
121
+ for name, spec in self.arguments.items():
122
+ default = spec.get("default", _MISSING)
123
+ if default is _MISSING:
124
+ configuration[name] = None
125
+ else:
126
+ configuration[name] = _deepcopy_default(default)
127
+ return configuration
128
+
129
+ @classmethod
130
+ def from_module(
131
+ cls,
132
+ *,
133
+ calling_name: str,
134
+ calling_id: str,
135
+ module_file: str | Path,
136
+ version: str,
137
+ **kwargs: Any,
138
+ ) -> "ProcessStepDescriber":
139
+ """Convenience constructor that normalises ``module_file`` to :class:`Path`."""
140
+ return cls(
141
+ calling_name=calling_name,
142
+ calling_id=calling_id,
143
+ calling_module_path=Path(module_file),
144
+ calling_version=version,
145
+ **kwargs,
146
+ )
@@ -0,0 +1,59 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Malte Storm", "Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "16/11/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ from typing import Any
15
+
16
+ from modacor.dataclasses.databundle import DataBundle
17
+
18
+
19
+ class ProcessingData(dict):
20
+ """
21
+ The ProcessingData class is a dictionary-like object that stores reference
22
+ to DataBundles.
23
+ """
24
+
25
+ def __setitem__(self, key: str, item: DataBundle | Any):
26
+ """
27
+ Assign a value to a dictionary key.
28
+
29
+ Parameters
30
+ ----------
31
+ key : str
32
+ The dictionary key.
33
+ item : DataBundle | Any
34
+ The value / object to be added to the dictionary.
35
+
36
+ Raises
37
+ ------
38
+ TypeError
39
+ If the item is not an instance of DataBundle.
40
+ TypeError
41
+ If the key is not a string.
42
+ """
43
+ if not isinstance(item, DataBundle):
44
+ raise TypeError(f"Expected a DataBundle instance, got {type(item).__name__}.")
45
+ if not isinstance(key, str):
46
+ raise TypeError(f"Expected a string key, got {type(key).__name__}.")
47
+ super().__setitem__(key, item)
48
+
49
+ def __repr__(self):
50
+ """
51
+ Print the information of all DataBundles stored in the ProcessingData.
52
+ """
53
+ result = []
54
+ for key in self.keys():
55
+ result.append(f"DataBundle '{key}': contains datasets {list(self[key].keys())}")
56
+ for dkey in self[key].keys():
57
+ result.append(f" Dataset '{dkey}': shape {self[key][dkey].signal.shape}, units {self[key][dkey].units}")
58
+ result.append(f" available uncertainties: {list(self[key][dkey].uncertainties.keys())}")
59
+ return "\n".join(result)