modacor 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. modacor/__init__.py +30 -0
  2. modacor/dataclasses/__init__.py +0 -0
  3. modacor/dataclasses/basedata.py +973 -0
  4. modacor/dataclasses/databundle.py +23 -0
  5. modacor/dataclasses/helpers.py +45 -0
  6. modacor/dataclasses/messagehandler.py +75 -0
  7. modacor/dataclasses/process_step.py +233 -0
  8. modacor/dataclasses/process_step_describer.py +146 -0
  9. modacor/dataclasses/processing_data.py +59 -0
  10. modacor/dataclasses/trace_event.py +118 -0
  11. modacor/dataclasses/uncertainty_tools.py +132 -0
  12. modacor/dataclasses/validators.py +84 -0
  13. modacor/debug/pipeline_tracer.py +548 -0
  14. modacor/io/__init__.py +33 -0
  15. modacor/io/csv/__init__.py +0 -0
  16. modacor/io/csv/csv_sink.py +114 -0
  17. modacor/io/csv/csv_source.py +210 -0
  18. modacor/io/hdf/__init__.py +27 -0
  19. modacor/io/hdf/hdf_source.py +120 -0
  20. modacor/io/io_sink.py +41 -0
  21. modacor/io/io_sinks.py +61 -0
  22. modacor/io/io_source.py +164 -0
  23. modacor/io/io_sources.py +208 -0
  24. modacor/io/processing_path.py +113 -0
  25. modacor/io/tiled/__init__.py +16 -0
  26. modacor/io/tiled/tiled_source.py +403 -0
  27. modacor/io/yaml/__init__.py +27 -0
  28. modacor/io/yaml/yaml_source.py +116 -0
  29. modacor/modules/__init__.py +53 -0
  30. modacor/modules/base_modules/__init__.py +0 -0
  31. modacor/modules/base_modules/append_processing_data.py +329 -0
  32. modacor/modules/base_modules/append_sink.py +141 -0
  33. modacor/modules/base_modules/append_source.py +181 -0
  34. modacor/modules/base_modules/bitwise_or_masks.py +113 -0
  35. modacor/modules/base_modules/combine_uncertainties.py +120 -0
  36. modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
  37. modacor/modules/base_modules/divide.py +82 -0
  38. modacor/modules/base_modules/find_scale_factor1d.py +373 -0
  39. modacor/modules/base_modules/multiply.py +77 -0
  40. modacor/modules/base_modules/multiply_databundles.py +73 -0
  41. modacor/modules/base_modules/poisson_uncertainties.py +69 -0
  42. modacor/modules/base_modules/reduce_dimensionality.py +252 -0
  43. modacor/modules/base_modules/sink_processing_data.py +80 -0
  44. modacor/modules/base_modules/subtract.py +80 -0
  45. modacor/modules/base_modules/subtract_databundles.py +67 -0
  46. modacor/modules/base_modules/units_label_update.py +66 -0
  47. modacor/modules/instrument_modules/__init__.py +0 -0
  48. modacor/modules/instrument_modules/readme.md +9 -0
  49. modacor/modules/technique_modules/__init__.py +0 -0
  50. modacor/modules/technique_modules/scattering/__init__.py +0 -0
  51. modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
  52. modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
  53. modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
  54. modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
  55. modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
  56. modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
  57. modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
  58. modacor/runner/__init__.py +0 -0
  59. modacor/runner/pipeline.py +749 -0
  60. modacor/runner/process_step_registry.py +224 -0
  61. modacor/tests/__init__.py +27 -0
  62. modacor/tests/dataclasses/test_basedata.py +519 -0
  63. modacor/tests/dataclasses/test_basedata_operations.py +439 -0
  64. modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
  65. modacor/tests/dataclasses/test_process_step_describer.py +73 -0
  66. modacor/tests/dataclasses/test_processstep.py +282 -0
  67. modacor/tests/debug/test_tracing_integration.py +188 -0
  68. modacor/tests/integration/__init__.py +0 -0
  69. modacor/tests/integration/test_pipeline_run.py +238 -0
  70. modacor/tests/io/__init__.py +27 -0
  71. modacor/tests/io/csv/__init__.py +0 -0
  72. modacor/tests/io/csv/test_csv_source.py +156 -0
  73. modacor/tests/io/hdf/__init__.py +27 -0
  74. modacor/tests/io/hdf/test_hdf_source.py +92 -0
  75. modacor/tests/io/test_io_sources.py +119 -0
  76. modacor/tests/io/tiled/__init__.py +12 -0
  77. modacor/tests/io/tiled/test_tiled_source.py +120 -0
  78. modacor/tests/io/yaml/__init__.py +27 -0
  79. modacor/tests/io/yaml/static_data_example.yaml +26 -0
  80. modacor/tests/io/yaml/test_yaml_source.py +47 -0
  81. modacor/tests/modules/__init__.py +27 -0
  82. modacor/tests/modules/base_modules/__init__.py +27 -0
  83. modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
  84. modacor/tests/modules/base_modules/test_append_sink.py +76 -0
  85. modacor/tests/modules/base_modules/test_append_source.py +180 -0
  86. modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
  87. modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
  88. modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
  89. modacor/tests/modules/base_modules/test_divide.py +140 -0
  90. modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
  91. modacor/tests/modules/base_modules/test_multiply.py +113 -0
  92. modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
  93. modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
  94. modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
  95. modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
  96. modacor/tests/modules/base_modules/test_subtract.py +111 -0
  97. modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
  98. modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
  99. modacor/tests/modules/technique_modules/__init__.py +0 -0
  100. modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
  101. modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
  102. modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
  103. modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
  104. modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
  105. modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
  106. modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
  107. modacor/tests/requirements.txt +12 -0
  108. modacor/tests/runner/test_pipeline.py +438 -0
  109. modacor/tests/runner/test_process_step_registry.py +65 -0
  110. modacor/tests/test_import.py +43 -0
  111. modacor/tests/test_modacor.py +17 -0
  112. modacor/tests/test_units.py +79 -0
  113. modacor/units.py +97 -0
  114. modacor-1.0.0.dist-info/METADATA +482 -0
  115. modacor-1.0.0.dist-info/RECORD +120 -0
  116. modacor-1.0.0.dist-info/WHEEL +5 -0
  117. modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
  118. modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
  119. modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
  120. modacor-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,210 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"]
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "12/12/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["CSVSource"]
15
+
16
+ from collections.abc import Callable
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ import numpy as np
21
+ from attrs import define, field, validators
22
+
23
+ from modacor.dataclasses.messagehandler import MessageHandler
24
+ from modacor.io.io_source import ArraySlice
25
+
26
+ from ..io_source import IoSource
27
+
28
+
29
+ def _is_callable(_, __, value):
30
+ if not callable(value):
31
+ raise TypeError("method must be callable")
32
+
33
+
34
+ @define(kw_only=True)
35
+ class CSVSource(IoSource):
36
+ """
37
+ IoSource for loading columnar data from CSV-like text files using NumPy's
38
+ loadtxt or genfromtxt.
39
+
40
+ Expected usage
41
+ --------------
42
+ - Data is 1D per column (no multi-dimensional fields).
43
+ - Columns are returned as 1D arrays; each column corresponds to one data_key.
44
+ - for np.loadtxt, column names must be provided via dtype with field names, e.g.:
45
+ dtype=[("q", float), ("I", float), ("I_sigma", float)]
46
+ - for np.genfromtxt, column names come from the first row or are specified explicitly via the `names` parameter. Typical patterns:
47
+ * np.genfromtxt(..., names=True, delimiter=..., ...) # use first row as names
48
+ * np.genfromtxt(..., names=["q", "I", "I_sigma"], ...) # specify names explicitly
49
+ so that they can be clearly identified later.
50
+
51
+ Configuration
52
+ -------------
53
+ `iosource_method_kwargs` is passed directly to the NumPy function `method`.
54
+ This allows you to use all standard NumPy options, e.g.:
55
+
56
+ For np.genfromtxt:
57
+ delimiter=","
58
+ skip_header=3
59
+ max_rows=1000
60
+ usecols=(0, 1, 2)
61
+ names=True or names=["q", "I", "sigma"]
62
+ dtype=None or dtype=float
63
+ encoding="utf-8"
64
+ comments="#"
65
+ ...
66
+
67
+ For np.loadtxt:
68
+ delimiter=","
69
+ skiprows=3
70
+ max_rows=1000
71
+ usecols=(0, 1, 2)
72
+ dtype=float
73
+ encoding="utf-8"
74
+ comments="#"
75
+ ...
76
+
77
+ Notes
78
+ -----
79
+ - 2D arrays (no field names) are not supported in this implementation.
80
+ If the resulting array does not have `dtype.names`, a ValueError is raised.
81
+ """
82
+
83
+ # external API:
84
+ resource_location: Path = field(converter=Path, validator=validators.instance_of((Path)))
85
+ method: Callable[..., np.ndarray] = field(
86
+ default=np.genfromtxt, validator=_is_callable
87
+ ) # default to genfromtxt, better for names
88
+ # internal use (type hints; real values set per-instance)
89
+ _data_cache: np.ndarray | None = field(init=False, default=None)
90
+ _data_dict_cache: dict[str, np.ndarray] = field(factory=dict)
91
+ _file_datasets_dtypes: dict[str, np.dtype] = field(init=False)
92
+ _file_datasets_shapes: dict[str, tuple[int, ...]] = field(init=False)
93
+ logger: MessageHandler = field(init=False)
94
+
95
+ def __attrs_post_init__(self) -> None:
96
+ # super().__init__(source_reference=self.source_reference, iosource_method_kwargs=self.iosource_method_kwargs)
97
+ self.logger = MessageHandler(level=self.logging_level, name="CSVSource")
98
+ # Set file path
99
+ if not self.resource_location.is_file():
100
+ self.logger.error(f"CSVSource: file {self.resource_location} does not exist.")
101
+
102
+ # Bookkeeping structures for IoSource API
103
+ self._file_datasets_shapes: dict[str, tuple[int, ...]] = {}
104
+ self._file_datasets_dtypes: dict[str, np.dtype] = {}
105
+
106
+ # Load and preprocess data immediately
107
+ self._load_data()
108
+ self._preload()
109
+
110
+ # ------------------------------------------------------------------ #
111
+ # Internal loading / preprocessing #
112
+ # ------------------------------------------------------------------ #
113
+
114
+ def _load_data(self) -> None:
115
+ """
116
+ Load the CSV data into a structured NumPy array using the configured
117
+ method (np.genfromtxt or np.loadtxt).
118
+
119
+ iosource_method_kwargs are passed directly to that method.
120
+ """
121
+ self.logger.info(
122
+ f"CSVSource loading data from {self.resource_location} "
123
+ f"using {self.method.__name__} with options: {self.iosource_method_kwargs}"
124
+ )
125
+
126
+ try:
127
+ self._data_cache = self.method(self.resource_location, **self.iosource_method_kwargs)
128
+ except Exception as exc: # noqa: BLE001
129
+ self.logger.error(f"Error while loading CSV data from {self.resource_location}: {exc}")
130
+ raise
131
+
132
+ if self._data_cache is None:
133
+ raise ValueError(f"CSVSource: no data loaded from file {self.resource_location}.")
134
+ # Ensure we have a structured array with named fields
135
+ if self._data_cache.dtype.names is None:
136
+ raise ValueError(
137
+ "CSVSource expected a structured array with named fields, "
138
+ "but dtype.names is None.\n"
139
+ "Hint: use np.genfromtxt with 'names=True' or 'names=[...]', "
140
+ "or provide an appropriate 'dtype' with field names."
141
+ )
142
+
143
+ def _preload(self) -> None:
144
+ """
145
+ Populate dataset lists, shapes, and dtypes from the structured array.
146
+ """
147
+ assert self._data_cache is not None # for type checkers
148
+
149
+ self._data_dict_cache = {}
150
+ self._file_datasets_shapes.clear()
151
+ self._file_datasets_dtypes.clear()
152
+
153
+ for name in self._data_cache.dtype.names:
154
+ column = self._data_cache[name]
155
+ self._data_dict_cache[name] = self._data_cache[name]
156
+ self._file_datasets_shapes[name] = column.shape
157
+ self._file_datasets_dtypes[name] = column.dtype
158
+
159
+ self.logger.info(f"CSVSource loaded datasets: {self._file_datasets_shapes.keys()}")
160
+
161
+ # ------------------------------------------------------------------ #
162
+ # IoSource API #
163
+ # ------------------------------------------------------------------ #
164
+
165
+ def get_static_metadata(self, data_key: str) -> None:
166
+ """
167
+ CSVSource does not support static metadata; always returns None.
168
+ """
169
+ self.logger.warning(
170
+ f"You asked for static metadata '{data_key}', but CSVSource does not support static metadata."
171
+ )
172
+ return None
173
+
174
+ def get_data(self, data_key: str, load_slice: ArraySlice = ...) -> np.ndarray:
175
+ """
176
+ Return the data column corresponding to `data_key`, cast to float, apply `load_slice`.
177
+
178
+ - data_key must match one of the field names in the structured array.
179
+ - `load_slice` is applied to that 1D column (e.g. ellipsis, slice, array of indices).
180
+ """
181
+ if self._data_cache is None:
182
+ raise RuntimeError("CSVSource data cache is empty; loading may have failed.")
183
+
184
+ try:
185
+ column = self._data_dict_cache[data_key]
186
+ except KeyError:
187
+ raise KeyError(
188
+ f"Data key '{data_key}' not found in CSV data. Available keys: {list(self._data_dict_cache.keys())}" # noqa: E713
189
+ ) from None
190
+
191
+ return np.asarray(column[load_slice]).astype(float)
192
+
193
+ def get_data_shape(self, data_key: str) -> tuple[int, ...]:
194
+ if data_key in self._file_datasets_shapes:
195
+ return self._file_datasets_shapes[data_key]
196
+ return ()
197
+
198
+ def get_data_dtype(self, data_key: str) -> np.dtype | None:
199
+ if data_key in self._file_datasets_dtypes:
200
+ return self._file_datasets_dtypes[data_key]
201
+ return None
202
+
203
+ def get_data_attributes(self, data_key: str) -> dict[str, Any]:
204
+ """
205
+ CSV has no per-dataset attributes; return a dict with None.
206
+ """
207
+ self.logger.warning(
208
+ f"You asked for attributes of '{data_key}', but CSVSource does not support data attributes."
209
+ )
210
+ return {data_key: None}
@@ -0,0 +1,27 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # Copyright 2025 MoDaCor Authors
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without modification,
5
+ # are permitted provided that the following conditions are met:
6
+ # 1. Redistributions of source code must retain the above copyright notice, this
7
+ # list of conditions and the following disclaimer.
8
+ # 2. Redistributions in binary form must reproduce the above copyright notice,
9
+ # this list of conditions and the following disclaimer in the documentation
10
+ # and/or other materials provided with the distribution.
11
+ # 3. Neither the name of the copyright holder nor the names of its contributors
12
+ # may be used to endorse or promote products derived from this software without
13
+ # specific prior written permission.
14
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
15
+ # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
+ # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18
+ # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21
+ # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+
25
+ __license__ = "BSD-3-Clause"
26
+ __copyright__ = "Copyright 2025 MoDaCor Authors"
27
+ __status__ = "Alpha"
@@ -0,0 +1,120 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ __coding__ = "utf-8"
10
+ __authors__ = ["Tim Snow", "Brian R. Pauw"]
11
+ __copyright__ = "Copyright 2025, The MoDaCor team"
12
+ __date__ = "22/10/2025"
13
+ __status__ = "Development" # "Development", "Production"
14
+ # end of header and standard imports
15
+
16
+ __all__ = ["HDFSource"]
17
+
18
+ from pathlib import Path
19
+
20
+ import h5py
21
+ import numpy as np
22
+ from attrs import define, field, validators
23
+
24
+ from modacor.dataclasses.messagehandler import MessageHandler
25
+
26
+ # from modacor.dataclasses.basedata import BaseData
27
+ from modacor.io.io_source import ArraySlice
28
+
29
+ from ..io_source import IoSource
30
+
31
+
32
+ @define(kw_only=True)
33
+ class HDFSource(IoSource):
34
+ resource_location: Path | str | None = field(
35
+ init=True, default=None, validator=validators.optional(validators.instance_of((Path, str)))
36
+ )
37
+ _data_cache: dict[str, np.ndarray] = field(init=False, factory=dict, validator=validators.instance_of(dict))
38
+ _file_path: Path | None = field(
39
+ init=False, default=None, validator=validators.optional(validators.instance_of(Path))
40
+ )
41
+ _file_datasets_shapes: dict[str, tuple[int, ...]] = field(
42
+ init=False, factory=dict, validator=validators.instance_of(dict)
43
+ )
44
+ _file_datasets_dtypes: dict[str, np.dtype] = field(init=False, factory=dict, validator=validators.instance_of(dict))
45
+ _static_metadata_cache: dict[str, Any] = field(init=False, factory=dict, validator=validators.instance_of(dict))
46
+ logger: MessageHandler = field(init=False)
47
+
48
+ # source_reference comes from IoSource
49
+ # iosource_method_kwargs comes from IoSource
50
+
51
+ def __attrs_post_init__(self):
52
+ # super().__init__(source_reference=source_reference)
53
+ self.logger = MessageHandler(level=self.logging_level, name="HDFSource")
54
+ self._file_path = Path(self.resource_location) if self.resource_location is not None else None
55
+ # self._file_datasets = []
56
+ self._file_datasets_shapes = {}
57
+ self._file_datasets_dtypes = {}
58
+ self._data_cache = {}
59
+ self._static_metadata_cache = {}
60
+ self._preload() # load the HDF5 file structure immediately so we have some information, but not the data
61
+
62
+ def _preload(self):
63
+ assert self._file_path.is_file(), self.logger.error(f"HDF5 file {self._file_path} does not exist.")
64
+ try:
65
+ with h5py.File(self._file_path, "r") as f:
66
+ f.visititems(self._find_datasets)
67
+ except OSError as error:
68
+ self.logger.log.error(error)
69
+ raise OSError(error)
70
+
71
+ def _find_datasets(self, path_name, path_object):
72
+ """
73
+ An internal function to be used to walk the tree of an HDF5 file and return a list of
74
+ the datasets within
75
+ """
76
+ if isinstance(path_object, h5py._hl.dataset.Dataset):
77
+ # self._file_datasets.append(path_name)
78
+ self._file_datasets_shapes[path_name] = path_object.shape
79
+ self._file_datasets_dtypes[path_name] = path_object.dtype
80
+
81
+ def get_static_metadata(self, data_key):
82
+ if data_key not in self._static_metadata_cache:
83
+ # if there's an "@" in the key, it's an attribute, we need to split it
84
+ if "@" in data_key:
85
+ dkey, akey = data_key.rsplit("@", 1)
86
+ self._static_metadata_cache[data_key] = self.get_data_attributes(dkey).get(akey, None)
87
+ else:
88
+ with h5py.File(self._file_path, "r") as f:
89
+ value = f[data_key][()]
90
+ # decode bytes to string if necessary
91
+ if isinstance(value, bytes):
92
+ value = value.decode("utf-8")
93
+ self._static_metadata_cache[data_key] = value
94
+ return self._static_metadata_cache[data_key]
95
+
96
+ def get_data(self, data_key: str, load_slice: ArraySlice = ...) -> np.ndarray:
97
+ if data_key not in self._data_cache:
98
+ with h5py.File(self._file_path, "r") as f:
99
+ data_array = f[data_key][load_slice] # if load_slice is not None else f[data_key][()]
100
+ self._data_cache[data_key] = np.array(data_array)
101
+ return self._data_cache[data_key]
102
+
103
+ def get_data_shape(self, data_key: str) -> tuple[int, ...]:
104
+ if data_key in self._file_datasets_shapes:
105
+ return self._file_datasets_shapes[data_key]
106
+ return ()
107
+
108
+ def get_data_dtype(self, data_key: str) -> np.dtype | None:
109
+ if data_key in self._file_datasets_dtypes:
110
+ return self._file_datasets_dtypes[data_key]
111
+ return None
112
+
113
+ def get_data_attributes(self, data_key: str) -> dict[str, Any]:
114
+ attributes = {}
115
+ with h5py.File(self._file_path, "r") as f:
116
+ if data_key in f:
117
+ dataset = f[data_key]
118
+ for attr_key in dataset.attrs:
119
+ attributes[attr_key] = dataset.attrs[attr_key]
120
+ return attributes
modacor/io/io_sink.py ADDED
@@ -0,0 +1,41 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2026, The MoDaCor team"
10
+ __date__ = "09/01/2026"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ from logging import WARNING
15
+ from typing import Any
16
+
17
+ import attrs
18
+ from attrs import define, field
19
+
20
+
21
+ def default_config() -> dict[str, Any]:
22
+ return {}
23
+
24
+
25
+ @define
26
+ class IoSink:
27
+ """
28
+ Base class for IO sinks. Mirrors IoSource.
29
+
30
+ Sinks are registered with a resource_location (file/socket/etc.).
31
+ The routed write call passes an optional 'subpath' after '::', which may be empty.
32
+ """
33
+
34
+ configuration: dict[str, Any] = field(factory=default_config)
35
+ sink_reference: str = field(default="", converter=str, validator=attrs.validators.instance_of(str))
36
+ type_reference: str = "IoSink"
37
+ iosink_method_kwargs: dict[str, Any] = field(factory=dict, validator=attrs.validators.instance_of(dict))
38
+ logging_level: int = field(default=WARNING, validator=attrs.validators.instance_of(int))
39
+
40
+ def write(self, subpath: str, *args, **kwargs):
41
+ raise NotImplementedError("This method should be implemented in subclasses.")
modacor/io/io_sinks.py ADDED
@@ -0,0 +1,61 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2026, The MoDaCor team"
10
+ __date__ = "09/01/2026"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ __all__ = ["IoSinks"]
15
+
16
+ from typing import Any
17
+
18
+ from attrs import define, field
19
+
20
+ from modacor.io.io_sink import IoSink
21
+
22
+
23
+ @define
24
+ class IoSinks:
25
+ """
26
+ Registry for IoSink instances. Mirrors IoSources.
27
+ """
28
+
29
+ defined_sinks: dict[str, IoSink] = field(factory=dict)
30
+
31
+ def register_sink(self, sink: IoSink, sink_reference: str | None = None) -> None:
32
+ if not isinstance(sink, IoSink):
33
+ raise TypeError("sink must be an instance of IoSink")
34
+ if sink_reference is None:
35
+ sink_reference = sink.sink_reference
36
+ if not isinstance(sink_reference, str):
37
+ raise TypeError("sink_reference must be a string")
38
+ if sink_reference in self.defined_sinks:
39
+ raise ValueError(f"Sink {sink_reference} already registered.")
40
+ self.defined_sinks[sink_reference] = sink
41
+
42
+ def get_sink(self, sink_reference: str) -> IoSink:
43
+ if sink_reference not in self.defined_sinks:
44
+ raise KeyError(f"Sink {sink_reference} not registered.")
45
+ return self.defined_sinks[sink_reference]
46
+
47
+ def split_target_reference(self, target_reference: str) -> tuple[str, str]:
48
+ """
49
+ Split 'sink_ref::subpath'. Subpath may be empty (e.g. 'export_csv::').
50
+ """
51
+ _split = target_reference.split("::", 1)
52
+ if len(_split) != 2:
53
+ raise ValueError(
54
+ "target_reference must be in the format 'sink_ref::subpath' with a double colon separator."
55
+ )
56
+ return _split[0], _split[1]
57
+
58
+ def write_data(self, target_reference: str, *args, **kwargs) -> Any:
59
+ sink_ref, subpath = self.split_target_reference(target_reference)
60
+ sink = self.get_sink(sink_ref)
61
+ return sink.write(subpath, *args, **kwargs)
@@ -0,0 +1,164 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # /usr/bin/env python3
3
+ # -*- coding: utf-8 -*-
4
+
5
+ from __future__ import annotations
6
+
7
+ __coding__ = "utf-8"
8
+ __authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
9
+ __copyright__ = "Copyright 2025, The MoDaCor team"
10
+ __date__ = "14/06/2025"
11
+ __status__ = "Development" # "Development", "Production"
12
+ # end of header and standard imports
13
+
14
+ from logging import WARNING
15
+ from typing import Any, Optional, Tuple, Union
16
+
17
+ import attrs
18
+ import numpy as np
19
+ from attrs import define, field
20
+
21
+ # for type hinting of slicing:
22
+ Index = Union[int, slice, type(Ellipsis)]
23
+ ArraySlice = Union[Index, Tuple[Index, ...]]
24
+
25
+
26
+ def default_config() -> dict[str, Any]:
27
+ """
28
+ Default configuration for the IoSource class.
29
+
30
+ Returns
31
+ -------
32
+ dict[str, Any] :
33
+ A dictionary containing the default configuration.
34
+ """
35
+ return {
36
+ "data_rank": 1,
37
+ "data_key": None,
38
+ "data_rank_dims": (0,),
39
+ "metadata_key": None,
40
+ "non_data_slicing": "",
41
+ }
42
+
43
+
44
+ @define
45
+ class IoSource:
46
+ """
47
+ IoSource is a base class for all IO sources in the MoDaCor framework.
48
+
49
+ It provides access to a specific IO source and its associated methods.
50
+
51
+ Required configuration keys are:
52
+
53
+ data_rank : int
54
+ The rank of the data.
55
+ data_key : str
56
+ The key to access the data.
57
+ Special note for get_static_metadata: If the key contains an @ character, the part before the @ is
58
+ considered the group/dataset path, and the part after the @ is considered the attribute name.
59
+ data_rank_dims : tuple[int]
60
+ The dimensions of the data rank.
61
+ non_data_slicing : str
62
+ Slicing information for non-data dimensions. This must be a
63
+ string that can be evaluated to a slice object. Multiple data
64
+ slices can be separated by double semicolon ';;'.
65
+ """
66
+
67
+ configuration: dict[str, Any] = field(factory=default_config)
68
+ source_reference: str = field(default="", converter=str, validator=attrs.validators.instance_of(str))
69
+ type_reference: str = "IoSource"
70
+ # for passing extra kwargs to the data loading method if needed (e.g. csv_source)
71
+ iosource_method_kwargs: dict[str, Any] = field(factory=dict, validator=attrs.validators.instance_of(dict))
72
+ logging_level: int = field(default=WARNING, validator=attrs.validators.instance_of(int))
73
+
74
+ def get_data(self, data_key: str, load_slice: Optional[ArraySlice] = None) -> np.ndarray:
75
+ """
76
+ Get data from the IO source using the provided data key.
77
+
78
+ Parameters
79
+ ----------
80
+ data_key : str
81
+ The key to access the data, e.g. '/entry1/instrument/detector00/data'.
82
+ load_slice : Optional[ArraySlice]
83
+ A slice or tuple of slices to apply to the data. If None, the entire data is returned.
84
+ Slicing is not yet implemented, so this will raise NotImplementedError if used.
85
+ Consider using the numpy.s_ or numpy.index_exp for simplifying the slicing syntax.
86
+
87
+ Returns
88
+ -------
89
+ np.ndarray :
90
+ The data array associated with the provided key. For scalars, this is a 0-d array.
91
+ """
92
+ if load_slice is not None:
93
+ raise NotImplementedError("Slicing is not yet implemented.")
94
+ raise NotImplementedError("This method should be implemented in subclasses.")
95
+
96
+ def get_data_shape(self, data_key: str) -> Tuple[int, ...]:
97
+ """
98
+ Get the shape of the data from the IO source if the format supports it else empty tuple.
99
+
100
+ Parameters
101
+ ----------
102
+ data_key : str
103
+ The key to the data for which the shape is requested.
104
+
105
+ Returns
106
+ -------
107
+ Tuple[int, ...] :
108
+ The shape of the data associated with the provided key.
109
+ Returns an empty tuple if nothing available or unsupported.
110
+ """
111
+ raise NotImplementedError("This method should be implemented in subclasses.")
112
+
113
+ def get_data_dtype(self, data_key: str) -> Optional[np.dtype]:
114
+ """
115
+ Get the data type of the data from the IO source if the format supports it else None.
116
+
117
+ Parameters
118
+ ----------
119
+ data_key : str
120
+ The key to the data for which the dtype is requested.
121
+
122
+ Returns
123
+ -------
124
+ Optional[np.dtype] :
125
+ The data type of the data associated with the provided key.
126
+ Returns None if nothing available or unsupported.
127
+ """
128
+ raise NotImplementedError("This method should be implemented in subclasses.")
129
+
130
+ def get_data_attributes(self, data_key: str) -> dict[str, Any]:
131
+ """
132
+ Get data attributes from the IO source if the format supports it else empty dict.
133
+
134
+ Parameters
135
+ ----------
136
+ data_key : str
137
+ The key to the data for which attributes are requested.
138
+
139
+ Returns
140
+ -------
141
+ dict[str, Any] :
142
+ The attributes associated with the data.
143
+ Returns an empty dictionary if nothing available or unsupported.
144
+ """
145
+ raise NotImplementedError("This method should be implemented in subclasses.")
146
+
147
+ def get_static_metadata(self, data_key: str) -> Any:
148
+ """
149
+ Get static metadata from the IO source using the provided data key.
150
+ If the key contains an @ character, the part before the @ is
151
+ considered the group/dataset path, and the part after the @ is considered the attribute name.
152
+ Useful, for example, to get units from HDF5 attributes.
153
+
154
+ Parameters
155
+ ----------
156
+ data_key : str
157
+ The key to access the metadata.
158
+
159
+ Returns
160
+ -------
161
+ Any :
162
+ The static metadata associated with the provided key.
163
+ """
164
+ raise NotImplementedError("This method should be implemented in subclasses.")