modacor 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modacor/__init__.py +30 -0
- modacor/dataclasses/__init__.py +0 -0
- modacor/dataclasses/basedata.py +973 -0
- modacor/dataclasses/databundle.py +23 -0
- modacor/dataclasses/helpers.py +45 -0
- modacor/dataclasses/messagehandler.py +75 -0
- modacor/dataclasses/process_step.py +233 -0
- modacor/dataclasses/process_step_describer.py +146 -0
- modacor/dataclasses/processing_data.py +59 -0
- modacor/dataclasses/trace_event.py +118 -0
- modacor/dataclasses/uncertainty_tools.py +132 -0
- modacor/dataclasses/validators.py +84 -0
- modacor/debug/pipeline_tracer.py +548 -0
- modacor/io/__init__.py +33 -0
- modacor/io/csv/__init__.py +0 -0
- modacor/io/csv/csv_sink.py +114 -0
- modacor/io/csv/csv_source.py +210 -0
- modacor/io/hdf/__init__.py +27 -0
- modacor/io/hdf/hdf_source.py +120 -0
- modacor/io/io_sink.py +41 -0
- modacor/io/io_sinks.py +61 -0
- modacor/io/io_source.py +164 -0
- modacor/io/io_sources.py +208 -0
- modacor/io/processing_path.py +113 -0
- modacor/io/tiled/__init__.py +16 -0
- modacor/io/tiled/tiled_source.py +403 -0
- modacor/io/yaml/__init__.py +27 -0
- modacor/io/yaml/yaml_source.py +116 -0
- modacor/modules/__init__.py +53 -0
- modacor/modules/base_modules/__init__.py +0 -0
- modacor/modules/base_modules/append_processing_data.py +329 -0
- modacor/modules/base_modules/append_sink.py +141 -0
- modacor/modules/base_modules/append_source.py +181 -0
- modacor/modules/base_modules/bitwise_or_masks.py +113 -0
- modacor/modules/base_modules/combine_uncertainties.py +120 -0
- modacor/modules/base_modules/combine_uncertainties_max.py +105 -0
- modacor/modules/base_modules/divide.py +82 -0
- modacor/modules/base_modules/find_scale_factor1d.py +373 -0
- modacor/modules/base_modules/multiply.py +77 -0
- modacor/modules/base_modules/multiply_databundles.py +73 -0
- modacor/modules/base_modules/poisson_uncertainties.py +69 -0
- modacor/modules/base_modules/reduce_dimensionality.py +252 -0
- modacor/modules/base_modules/sink_processing_data.py +80 -0
- modacor/modules/base_modules/subtract.py +80 -0
- modacor/modules/base_modules/subtract_databundles.py +67 -0
- modacor/modules/base_modules/units_label_update.py +66 -0
- modacor/modules/instrument_modules/__init__.py +0 -0
- modacor/modules/instrument_modules/readme.md +9 -0
- modacor/modules/technique_modules/__init__.py +0 -0
- modacor/modules/technique_modules/scattering/__init__.py +0 -0
- modacor/modules/technique_modules/scattering/geometry_helpers.py +114 -0
- modacor/modules/technique_modules/scattering/index_pixels.py +492 -0
- modacor/modules/technique_modules/scattering/indexed_averager.py +628 -0
- modacor/modules/technique_modules/scattering/pixel_coordinates_3d.py +417 -0
- modacor/modules/technique_modules/scattering/solid_angle_correction.py +63 -0
- modacor/modules/technique_modules/scattering/xs_geometry.py +571 -0
- modacor/modules/technique_modules/scattering/xs_geometry_from_pixel_coordinates.py +293 -0
- modacor/runner/__init__.py +0 -0
- modacor/runner/pipeline.py +749 -0
- modacor/runner/process_step_registry.py +224 -0
- modacor/tests/__init__.py +27 -0
- modacor/tests/dataclasses/test_basedata.py +519 -0
- modacor/tests/dataclasses/test_basedata_operations.py +439 -0
- modacor/tests/dataclasses/test_basedata_to_base_units.py +57 -0
- modacor/tests/dataclasses/test_process_step_describer.py +73 -0
- modacor/tests/dataclasses/test_processstep.py +282 -0
- modacor/tests/debug/test_tracing_integration.py +188 -0
- modacor/tests/integration/__init__.py +0 -0
- modacor/tests/integration/test_pipeline_run.py +238 -0
- modacor/tests/io/__init__.py +27 -0
- modacor/tests/io/csv/__init__.py +0 -0
- modacor/tests/io/csv/test_csv_source.py +156 -0
- modacor/tests/io/hdf/__init__.py +27 -0
- modacor/tests/io/hdf/test_hdf_source.py +92 -0
- modacor/tests/io/test_io_sources.py +119 -0
- modacor/tests/io/tiled/__init__.py +12 -0
- modacor/tests/io/tiled/test_tiled_source.py +120 -0
- modacor/tests/io/yaml/__init__.py +27 -0
- modacor/tests/io/yaml/static_data_example.yaml +26 -0
- modacor/tests/io/yaml/test_yaml_source.py +47 -0
- modacor/tests/modules/__init__.py +27 -0
- modacor/tests/modules/base_modules/__init__.py +27 -0
- modacor/tests/modules/base_modules/test_append_processing_data.py +219 -0
- modacor/tests/modules/base_modules/test_append_sink.py +76 -0
- modacor/tests/modules/base_modules/test_append_source.py +180 -0
- modacor/tests/modules/base_modules/test_bitwise_or_masks.py +264 -0
- modacor/tests/modules/base_modules/test_combine_uncertainties.py +105 -0
- modacor/tests/modules/base_modules/test_combine_uncertainties_max.py +109 -0
- modacor/tests/modules/base_modules/test_divide.py +140 -0
- modacor/tests/modules/base_modules/test_find_scale_factor1d.py +220 -0
- modacor/tests/modules/base_modules/test_multiply.py +113 -0
- modacor/tests/modules/base_modules/test_multiply_databundles.py +136 -0
- modacor/tests/modules/base_modules/test_poisson_uncertainties.py +61 -0
- modacor/tests/modules/base_modules/test_reduce_dimensionality.py +358 -0
- modacor/tests/modules/base_modules/test_sink_processing_data.py +119 -0
- modacor/tests/modules/base_modules/test_subtract.py +111 -0
- modacor/tests/modules/base_modules/test_subtract_databundles.py +136 -0
- modacor/tests/modules/base_modules/test_units_label_update.py +91 -0
- modacor/tests/modules/technique_modules/__init__.py +0 -0
- modacor/tests/modules/technique_modules/scattering/__init__.py +0 -0
- modacor/tests/modules/technique_modules/scattering/test_geometry_helpers.py +198 -0
- modacor/tests/modules/technique_modules/scattering/test_index_pixels.py +426 -0
- modacor/tests/modules/technique_modules/scattering/test_indexed_averaging.py +559 -0
- modacor/tests/modules/technique_modules/scattering/test_pixel_coordinates_3d.py +282 -0
- modacor/tests/modules/technique_modules/scattering/test_xs_geometry_from_pixel_coordinates.py +224 -0
- modacor/tests/modules/technique_modules/scattering/test_xsgeometry.py +635 -0
- modacor/tests/requirements.txt +12 -0
- modacor/tests/runner/test_pipeline.py +438 -0
- modacor/tests/runner/test_process_step_registry.py +65 -0
- modacor/tests/test_import.py +43 -0
- modacor/tests/test_modacor.py +17 -0
- modacor/tests/test_units.py +79 -0
- modacor/units.py +97 -0
- modacor-1.0.0.dist-info/METADATA +482 -0
- modacor-1.0.0.dist-info/RECORD +120 -0
- modacor-1.0.0.dist-info/WHEEL +5 -0
- modacor-1.0.0.dist-info/licenses/AUTHORS.md +11 -0
- modacor-1.0.0.dist-info/licenses/LICENSE +11 -0
- modacor-1.0.0.dist-info/licenses/LICENSE.txt +11 -0
- modacor-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
__coding__ = "utf-8"
|
|
8
|
+
__authors__ = ["Brian R. Pauw"]
|
|
9
|
+
__copyright__ = "Copyright 2025, The MoDaCor team"
|
|
10
|
+
__date__ = "12/12/2025"
|
|
11
|
+
__status__ = "Development" # "Development", "Production"
|
|
12
|
+
# end of header and standard imports
|
|
13
|
+
|
|
14
|
+
__all__ = ["CSVSource"]
|
|
15
|
+
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from attrs import define, field, validators
|
|
22
|
+
|
|
23
|
+
from modacor.dataclasses.messagehandler import MessageHandler
|
|
24
|
+
from modacor.io.io_source import ArraySlice
|
|
25
|
+
|
|
26
|
+
from ..io_source import IoSource
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _is_callable(_, __, value):
|
|
30
|
+
if not callable(value):
|
|
31
|
+
raise TypeError("method must be callable")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@define(kw_only=True)
|
|
35
|
+
class CSVSource(IoSource):
|
|
36
|
+
"""
|
|
37
|
+
IoSource for loading columnar data from CSV-like text files using NumPy's
|
|
38
|
+
loadtxt or genfromtxt.
|
|
39
|
+
|
|
40
|
+
Expected usage
|
|
41
|
+
--------------
|
|
42
|
+
- Data is 1D per column (no multi-dimensional fields).
|
|
43
|
+
- Columns are returned as 1D arrays; each column corresponds to one data_key.
|
|
44
|
+
- for np.loadtxt, column names must be provided via dtype with field names, e.g.:
|
|
45
|
+
dtype=[("q", float), ("I", float), ("I_sigma", float)]
|
|
46
|
+
- for np.genfromtxt, column names come from the first row or are specified explicitly via the `names` parameter. Typical patterns:
|
|
47
|
+
* np.genfromtxt(..., names=True, delimiter=..., ...) # use first row as names
|
|
48
|
+
* np.genfromtxt(..., names=["q", "I", "I_sigma"], ...) # specify names explicitly
|
|
49
|
+
so that they can be clearly identified later.
|
|
50
|
+
|
|
51
|
+
Configuration
|
|
52
|
+
-------------
|
|
53
|
+
`iosource_method_kwargs` is passed directly to the NumPy function `method`.
|
|
54
|
+
This allows you to use all standard NumPy options, e.g.:
|
|
55
|
+
|
|
56
|
+
For np.genfromtxt:
|
|
57
|
+
delimiter=","
|
|
58
|
+
skip_header=3
|
|
59
|
+
max_rows=1000
|
|
60
|
+
usecols=(0, 1, 2)
|
|
61
|
+
names=True or names=["q", "I", "sigma"]
|
|
62
|
+
dtype=None or dtype=float
|
|
63
|
+
encoding="utf-8"
|
|
64
|
+
comments="#"
|
|
65
|
+
...
|
|
66
|
+
|
|
67
|
+
For np.loadtxt:
|
|
68
|
+
delimiter=","
|
|
69
|
+
skiprows=3
|
|
70
|
+
max_rows=1000
|
|
71
|
+
usecols=(0, 1, 2)
|
|
72
|
+
dtype=float
|
|
73
|
+
encoding="utf-8"
|
|
74
|
+
comments="#"
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
Notes
|
|
78
|
+
-----
|
|
79
|
+
- 2D arrays (no field names) are not supported in this implementation.
|
|
80
|
+
If the resulting array does not have `dtype.names`, a ValueError is raised.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
# external API:
|
|
84
|
+
resource_location: Path = field(converter=Path, validator=validators.instance_of((Path)))
|
|
85
|
+
method: Callable[..., np.ndarray] = field(
|
|
86
|
+
default=np.genfromtxt, validator=_is_callable
|
|
87
|
+
) # default to genfromtxt, better for names
|
|
88
|
+
# internal use (type hints; real values set per-instance)
|
|
89
|
+
_data_cache: np.ndarray | None = field(init=False, default=None)
|
|
90
|
+
_data_dict_cache: dict[str, np.ndarray] = field(factory=dict)
|
|
91
|
+
_file_datasets_dtypes: dict[str, np.dtype] = field(init=False)
|
|
92
|
+
_file_datasets_shapes: dict[str, tuple[int, ...]] = field(init=False)
|
|
93
|
+
logger: MessageHandler = field(init=False)
|
|
94
|
+
|
|
95
|
+
def __attrs_post_init__(self) -> None:
|
|
96
|
+
# super().__init__(source_reference=self.source_reference, iosource_method_kwargs=self.iosource_method_kwargs)
|
|
97
|
+
self.logger = MessageHandler(level=self.logging_level, name="CSVSource")
|
|
98
|
+
# Set file path
|
|
99
|
+
if not self.resource_location.is_file():
|
|
100
|
+
self.logger.error(f"CSVSource: file {self.resource_location} does not exist.")
|
|
101
|
+
|
|
102
|
+
# Bookkeeping structures for IoSource API
|
|
103
|
+
self._file_datasets_shapes: dict[str, tuple[int, ...]] = {}
|
|
104
|
+
self._file_datasets_dtypes: dict[str, np.dtype] = {}
|
|
105
|
+
|
|
106
|
+
# Load and preprocess data immediately
|
|
107
|
+
self._load_data()
|
|
108
|
+
self._preload()
|
|
109
|
+
|
|
110
|
+
# ------------------------------------------------------------------ #
|
|
111
|
+
# Internal loading / preprocessing #
|
|
112
|
+
# ------------------------------------------------------------------ #
|
|
113
|
+
|
|
114
|
+
def _load_data(self) -> None:
|
|
115
|
+
"""
|
|
116
|
+
Load the CSV data into a structured NumPy array using the configured
|
|
117
|
+
method (np.genfromtxt or np.loadtxt).
|
|
118
|
+
|
|
119
|
+
iosource_method_kwargs are passed directly to that method.
|
|
120
|
+
"""
|
|
121
|
+
self.logger.info(
|
|
122
|
+
f"CSVSource loading data from {self.resource_location} "
|
|
123
|
+
f"using {self.method.__name__} with options: {self.iosource_method_kwargs}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
self._data_cache = self.method(self.resource_location, **self.iosource_method_kwargs)
|
|
128
|
+
except Exception as exc: # noqa: BLE001
|
|
129
|
+
self.logger.error(f"Error while loading CSV data from {self.resource_location}: {exc}")
|
|
130
|
+
raise
|
|
131
|
+
|
|
132
|
+
if self._data_cache is None:
|
|
133
|
+
raise ValueError(f"CSVSource: no data loaded from file {self.resource_location}.")
|
|
134
|
+
# Ensure we have a structured array with named fields
|
|
135
|
+
if self._data_cache.dtype.names is None:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
"CSVSource expected a structured array with named fields, "
|
|
138
|
+
"but dtype.names is None.\n"
|
|
139
|
+
"Hint: use np.genfromtxt with 'names=True' or 'names=[...]', "
|
|
140
|
+
"or provide an appropriate 'dtype' with field names."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _preload(self) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Populate dataset lists, shapes, and dtypes from the structured array.
|
|
146
|
+
"""
|
|
147
|
+
assert self._data_cache is not None # for type checkers
|
|
148
|
+
|
|
149
|
+
self._data_dict_cache = {}
|
|
150
|
+
self._file_datasets_shapes.clear()
|
|
151
|
+
self._file_datasets_dtypes.clear()
|
|
152
|
+
|
|
153
|
+
for name in self._data_cache.dtype.names:
|
|
154
|
+
column = self._data_cache[name]
|
|
155
|
+
self._data_dict_cache[name] = self._data_cache[name]
|
|
156
|
+
self._file_datasets_shapes[name] = column.shape
|
|
157
|
+
self._file_datasets_dtypes[name] = column.dtype
|
|
158
|
+
|
|
159
|
+
self.logger.info(f"CSVSource loaded datasets: {self._file_datasets_shapes.keys()}")
|
|
160
|
+
|
|
161
|
+
# ------------------------------------------------------------------ #
|
|
162
|
+
# IoSource API #
|
|
163
|
+
# ------------------------------------------------------------------ #
|
|
164
|
+
|
|
165
|
+
def get_static_metadata(self, data_key: str) -> None:
|
|
166
|
+
"""
|
|
167
|
+
CSVSource does not support static metadata; always returns None.
|
|
168
|
+
"""
|
|
169
|
+
self.logger.warning(
|
|
170
|
+
f"You asked for static metadata '{data_key}', but CSVSource does not support static metadata."
|
|
171
|
+
)
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
def get_data(self, data_key: str, load_slice: ArraySlice = ...) -> np.ndarray:
|
|
175
|
+
"""
|
|
176
|
+
Return the data column corresponding to `data_key`, cast to float, apply `load_slice`.
|
|
177
|
+
|
|
178
|
+
- data_key must match one of the field names in the structured array.
|
|
179
|
+
- `load_slice` is applied to that 1D column (e.g. ellipsis, slice, array of indices).
|
|
180
|
+
"""
|
|
181
|
+
if self._data_cache is None:
|
|
182
|
+
raise RuntimeError("CSVSource data cache is empty; loading may have failed.")
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
column = self._data_dict_cache[data_key]
|
|
186
|
+
except KeyError:
|
|
187
|
+
raise KeyError(
|
|
188
|
+
f"Data key '{data_key}' not found in CSV data. Available keys: {list(self._data_dict_cache.keys())}" # noqa: E713
|
|
189
|
+
) from None
|
|
190
|
+
|
|
191
|
+
return np.asarray(column[load_slice]).astype(float)
|
|
192
|
+
|
|
193
|
+
def get_data_shape(self, data_key: str) -> tuple[int, ...]:
|
|
194
|
+
if data_key in self._file_datasets_shapes:
|
|
195
|
+
return self._file_datasets_shapes[data_key]
|
|
196
|
+
return ()
|
|
197
|
+
|
|
198
|
+
def get_data_dtype(self, data_key: str) -> np.dtype | None:
|
|
199
|
+
if data_key in self._file_datasets_dtypes:
|
|
200
|
+
return self._file_datasets_dtypes[data_key]
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
def get_data_attributes(self, data_key: str) -> dict[str, Any]:
|
|
204
|
+
"""
|
|
205
|
+
CSV has no per-dataset attributes; return a dict with None.
|
|
206
|
+
"""
|
|
207
|
+
self.logger.warning(
|
|
208
|
+
f"You asked for attributes of '{data_key}', but CSVSource does not support data attributes."
|
|
209
|
+
)
|
|
210
|
+
return {data_key: None}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# Copyright 2025 MoDaCor Authors
|
|
3
|
+
#
|
|
4
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
|
5
|
+
# are permitted provided that the following conditions are met:
|
|
6
|
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
|
7
|
+
# list of conditions and the following disclaimer.
|
|
8
|
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
9
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
10
|
+
# and/or other materials provided with the distribution.
|
|
11
|
+
# 3. Neither the name of the copyright holder nor the names of its contributors
|
|
12
|
+
# may be used to endorse or promote products derived from this software without
|
|
13
|
+
# specific prior written permission.
|
|
14
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
|
|
15
|
+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
16
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
17
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
18
|
+
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
19
|
+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
20
|
+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
21
|
+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
22
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
23
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
24
|
+
|
|
25
|
+
__license__ = "BSD-3-Clause"
|
|
26
|
+
__copyright__ = "Copyright 2025 MoDaCor Authors"
|
|
27
|
+
__status__ = "Alpha"
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
__coding__ = "utf-8"
|
|
10
|
+
__authors__ = ["Tim Snow", "Brian R. Pauw"]
|
|
11
|
+
__copyright__ = "Copyright 2025, The MoDaCor team"
|
|
12
|
+
__date__ = "22/10/2025"
|
|
13
|
+
__status__ = "Development" # "Development", "Production"
|
|
14
|
+
# end of header and standard imports
|
|
15
|
+
|
|
16
|
+
__all__ = ["HDFSource"]
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import h5py
|
|
21
|
+
import numpy as np
|
|
22
|
+
from attrs import define, field, validators
|
|
23
|
+
|
|
24
|
+
from modacor.dataclasses.messagehandler import MessageHandler
|
|
25
|
+
|
|
26
|
+
# from modacor.dataclasses.basedata import BaseData
|
|
27
|
+
from modacor.io.io_source import ArraySlice
|
|
28
|
+
|
|
29
|
+
from ..io_source import IoSource
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@define(kw_only=True)
|
|
33
|
+
class HDFSource(IoSource):
|
|
34
|
+
resource_location: Path | str | None = field(
|
|
35
|
+
init=True, default=None, validator=validators.optional(validators.instance_of((Path, str)))
|
|
36
|
+
)
|
|
37
|
+
_data_cache: dict[str, np.ndarray] = field(init=False, factory=dict, validator=validators.instance_of(dict))
|
|
38
|
+
_file_path: Path | None = field(
|
|
39
|
+
init=False, default=None, validator=validators.optional(validators.instance_of(Path))
|
|
40
|
+
)
|
|
41
|
+
_file_datasets_shapes: dict[str, tuple[int, ...]] = field(
|
|
42
|
+
init=False, factory=dict, validator=validators.instance_of(dict)
|
|
43
|
+
)
|
|
44
|
+
_file_datasets_dtypes: dict[str, np.dtype] = field(init=False, factory=dict, validator=validators.instance_of(dict))
|
|
45
|
+
_static_metadata_cache: dict[str, Any] = field(init=False, factory=dict, validator=validators.instance_of(dict))
|
|
46
|
+
logger: MessageHandler = field(init=False)
|
|
47
|
+
|
|
48
|
+
# source_reference comes from IoSource
|
|
49
|
+
# iosource_method_kwargs comes from IoSource
|
|
50
|
+
|
|
51
|
+
def __attrs_post_init__(self):
|
|
52
|
+
# super().__init__(source_reference=source_reference)
|
|
53
|
+
self.logger = MessageHandler(level=self.logging_level, name="HDFSource")
|
|
54
|
+
self._file_path = Path(self.resource_location) if self.resource_location is not None else None
|
|
55
|
+
# self._file_datasets = []
|
|
56
|
+
self._file_datasets_shapes = {}
|
|
57
|
+
self._file_datasets_dtypes = {}
|
|
58
|
+
self._data_cache = {}
|
|
59
|
+
self._static_metadata_cache = {}
|
|
60
|
+
self._preload() # load the HDF5 file structure immediately so we have some information, but not the data
|
|
61
|
+
|
|
62
|
+
def _preload(self):
|
|
63
|
+
assert self._file_path.is_file(), self.logger.error(f"HDF5 file {self._file_path} does not exist.")
|
|
64
|
+
try:
|
|
65
|
+
with h5py.File(self._file_path, "r") as f:
|
|
66
|
+
f.visititems(self._find_datasets)
|
|
67
|
+
except OSError as error:
|
|
68
|
+
self.logger.log.error(error)
|
|
69
|
+
raise OSError(error)
|
|
70
|
+
|
|
71
|
+
def _find_datasets(self, path_name, path_object):
|
|
72
|
+
"""
|
|
73
|
+
An internal function to be used to walk the tree of an HDF5 file and return a list of
|
|
74
|
+
the datasets within
|
|
75
|
+
"""
|
|
76
|
+
if isinstance(path_object, h5py._hl.dataset.Dataset):
|
|
77
|
+
# self._file_datasets.append(path_name)
|
|
78
|
+
self._file_datasets_shapes[path_name] = path_object.shape
|
|
79
|
+
self._file_datasets_dtypes[path_name] = path_object.dtype
|
|
80
|
+
|
|
81
|
+
def get_static_metadata(self, data_key):
|
|
82
|
+
if data_key not in self._static_metadata_cache:
|
|
83
|
+
# if there's an "@" in the key, it's an attribute, we need to split it
|
|
84
|
+
if "@" in data_key:
|
|
85
|
+
dkey, akey = data_key.rsplit("@", 1)
|
|
86
|
+
self._static_metadata_cache[data_key] = self.get_data_attributes(dkey).get(akey, None)
|
|
87
|
+
else:
|
|
88
|
+
with h5py.File(self._file_path, "r") as f:
|
|
89
|
+
value = f[data_key][()]
|
|
90
|
+
# decode bytes to string if necessary
|
|
91
|
+
if isinstance(value, bytes):
|
|
92
|
+
value = value.decode("utf-8")
|
|
93
|
+
self._static_metadata_cache[data_key] = value
|
|
94
|
+
return self._static_metadata_cache[data_key]
|
|
95
|
+
|
|
96
|
+
def get_data(self, data_key: str, load_slice: ArraySlice = ...) -> np.ndarray:
|
|
97
|
+
if data_key not in self._data_cache:
|
|
98
|
+
with h5py.File(self._file_path, "r") as f:
|
|
99
|
+
data_array = f[data_key][load_slice] # if load_slice is not None else f[data_key][()]
|
|
100
|
+
self._data_cache[data_key] = np.array(data_array)
|
|
101
|
+
return self._data_cache[data_key]
|
|
102
|
+
|
|
103
|
+
def get_data_shape(self, data_key: str) -> tuple[int, ...]:
|
|
104
|
+
if data_key in self._file_datasets_shapes:
|
|
105
|
+
return self._file_datasets_shapes[data_key]
|
|
106
|
+
return ()
|
|
107
|
+
|
|
108
|
+
def get_data_dtype(self, data_key: str) -> np.dtype | None:
|
|
109
|
+
if data_key in self._file_datasets_dtypes:
|
|
110
|
+
return self._file_datasets_dtypes[data_key]
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def get_data_attributes(self, data_key: str) -> dict[str, Any]:
|
|
114
|
+
attributes = {}
|
|
115
|
+
with h5py.File(self._file_path, "r") as f:
|
|
116
|
+
if data_key in f:
|
|
117
|
+
dataset = f[data_key]
|
|
118
|
+
for attr_key in dataset.attrs:
|
|
119
|
+
attributes[attr_key] = dataset.attrs[attr_key]
|
|
120
|
+
return attributes
|
modacor/io/io_sink.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
__coding__ = "utf-8"
|
|
8
|
+
__authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
|
|
9
|
+
__copyright__ = "Copyright 2026, The MoDaCor team"
|
|
10
|
+
__date__ = "09/01/2026"
|
|
11
|
+
__status__ = "Development" # "Development", "Production"
|
|
12
|
+
# end of header and standard imports
|
|
13
|
+
|
|
14
|
+
from logging import WARNING
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import attrs
|
|
18
|
+
from attrs import define, field
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def default_config() -> dict[str, Any]:
|
|
22
|
+
return {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@define
|
|
26
|
+
class IoSink:
|
|
27
|
+
"""
|
|
28
|
+
Base class for IO sinks. Mirrors IoSource.
|
|
29
|
+
|
|
30
|
+
Sinks are registered with a resource_location (file/socket/etc.).
|
|
31
|
+
The routed write call passes an optional 'subpath' after '::', which may be empty.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
configuration: dict[str, Any] = field(factory=default_config)
|
|
35
|
+
sink_reference: str = field(default="", converter=str, validator=attrs.validators.instance_of(str))
|
|
36
|
+
type_reference: str = "IoSink"
|
|
37
|
+
iosink_method_kwargs: dict[str, Any] = field(factory=dict, validator=attrs.validators.instance_of(dict))
|
|
38
|
+
logging_level: int = field(default=WARNING, validator=attrs.validators.instance_of(int))
|
|
39
|
+
|
|
40
|
+
def write(self, subpath: str, *args, **kwargs):
|
|
41
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
modacor/io/io_sinks.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
__coding__ = "utf-8"
|
|
8
|
+
__authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
|
|
9
|
+
__copyright__ = "Copyright 2026, The MoDaCor team"
|
|
10
|
+
__date__ = "09/01/2026"
|
|
11
|
+
__status__ = "Development" # "Development", "Production"
|
|
12
|
+
# end of header and standard imports
|
|
13
|
+
|
|
14
|
+
__all__ = ["IoSinks"]
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from attrs import define, field
|
|
19
|
+
|
|
20
|
+
from modacor.io.io_sink import IoSink
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@define
|
|
24
|
+
class IoSinks:
|
|
25
|
+
"""
|
|
26
|
+
Registry for IoSink instances. Mirrors IoSources.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
defined_sinks: dict[str, IoSink] = field(factory=dict)
|
|
30
|
+
|
|
31
|
+
def register_sink(self, sink: IoSink, sink_reference: str | None = None) -> None:
|
|
32
|
+
if not isinstance(sink, IoSink):
|
|
33
|
+
raise TypeError("sink must be an instance of IoSink")
|
|
34
|
+
if sink_reference is None:
|
|
35
|
+
sink_reference = sink.sink_reference
|
|
36
|
+
if not isinstance(sink_reference, str):
|
|
37
|
+
raise TypeError("sink_reference must be a string")
|
|
38
|
+
if sink_reference in self.defined_sinks:
|
|
39
|
+
raise ValueError(f"Sink {sink_reference} already registered.")
|
|
40
|
+
self.defined_sinks[sink_reference] = sink
|
|
41
|
+
|
|
42
|
+
def get_sink(self, sink_reference: str) -> IoSink:
|
|
43
|
+
if sink_reference not in self.defined_sinks:
|
|
44
|
+
raise KeyError(f"Sink {sink_reference} not registered.")
|
|
45
|
+
return self.defined_sinks[sink_reference]
|
|
46
|
+
|
|
47
|
+
def split_target_reference(self, target_reference: str) -> tuple[str, str]:
|
|
48
|
+
"""
|
|
49
|
+
Split 'sink_ref::subpath'. Subpath may be empty (e.g. 'export_csv::').
|
|
50
|
+
"""
|
|
51
|
+
_split = target_reference.split("::", 1)
|
|
52
|
+
if len(_split) != 2:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"target_reference must be in the format 'sink_ref::subpath' with a double colon separator."
|
|
55
|
+
)
|
|
56
|
+
return _split[0], _split[1]
|
|
57
|
+
|
|
58
|
+
def write_data(self, target_reference: str, *args, **kwargs) -> Any:
|
|
59
|
+
sink_ref, subpath = self.split_target_reference(target_reference)
|
|
60
|
+
sink = self.get_sink(sink_ref)
|
|
61
|
+
return sink.write(subpath, *args, **kwargs)
|
modacor/io/io_source.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
|
2
|
+
# /usr/bin/env python3
|
|
3
|
+
# -*- coding: utf-8 -*-
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
__coding__ = "utf-8"
|
|
8
|
+
__authors__ = ["Brian R. Pauw"] # add names to the list as appropriate
|
|
9
|
+
__copyright__ = "Copyright 2025, The MoDaCor team"
|
|
10
|
+
__date__ = "14/06/2025"
|
|
11
|
+
__status__ = "Development" # "Development", "Production"
|
|
12
|
+
# end of header and standard imports
|
|
13
|
+
|
|
14
|
+
from logging import WARNING
|
|
15
|
+
from typing import Any, Optional, Tuple, Union
|
|
16
|
+
|
|
17
|
+
import attrs
|
|
18
|
+
import numpy as np
|
|
19
|
+
from attrs import define, field
|
|
20
|
+
|
|
21
|
+
# for type hinting of slicing:
|
|
22
|
+
Index = Union[int, slice, type(Ellipsis)]
|
|
23
|
+
ArraySlice = Union[Index, Tuple[Index, ...]]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def default_config() -> dict[str, Any]:
|
|
27
|
+
"""
|
|
28
|
+
Default configuration for the IoSource class.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
dict[str, Any] :
|
|
33
|
+
A dictionary containing the default configuration.
|
|
34
|
+
"""
|
|
35
|
+
return {
|
|
36
|
+
"data_rank": 1,
|
|
37
|
+
"data_key": None,
|
|
38
|
+
"data_rank_dims": (0,),
|
|
39
|
+
"metadata_key": None,
|
|
40
|
+
"non_data_slicing": "",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@define
|
|
45
|
+
class IoSource:
|
|
46
|
+
"""
|
|
47
|
+
IoSource is a base class for all IO sources in the MoDaCor framework.
|
|
48
|
+
|
|
49
|
+
It provides access to a specific IO source and its associated methods.
|
|
50
|
+
|
|
51
|
+
Required configuration keys are:
|
|
52
|
+
|
|
53
|
+
data_rank : int
|
|
54
|
+
The rank of the data.
|
|
55
|
+
data_key : str
|
|
56
|
+
The key to access the data.
|
|
57
|
+
Special note for get_static_metadata: If the key contains an @ character, the part before the @ is
|
|
58
|
+
considered the group/dataset path, and the part after the @ is considered the attribute name.
|
|
59
|
+
data_rank_dims : tuple[int]
|
|
60
|
+
The dimensions of the data rank.
|
|
61
|
+
non_data_slicing : str
|
|
62
|
+
Slicing information for non-data dimensions. This must be a
|
|
63
|
+
string that can be evaluated to a slice object. Multiple data
|
|
64
|
+
slices can be separated by double semicolon ';;'.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
configuration: dict[str, Any] = field(factory=default_config)
|
|
68
|
+
source_reference: str = field(default="", converter=str, validator=attrs.validators.instance_of(str))
|
|
69
|
+
type_reference: str = "IoSource"
|
|
70
|
+
# for passing extra kwargs to the data loading method if needed (e.g. csv_source)
|
|
71
|
+
iosource_method_kwargs: dict[str, Any] = field(factory=dict, validator=attrs.validators.instance_of(dict))
|
|
72
|
+
logging_level: int = field(default=WARNING, validator=attrs.validators.instance_of(int))
|
|
73
|
+
|
|
74
|
+
def get_data(self, data_key: str, load_slice: Optional[ArraySlice] = None) -> np.ndarray:
|
|
75
|
+
"""
|
|
76
|
+
Get data from the IO source using the provided data key.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
data_key : str
|
|
81
|
+
The key to access the data, e.g. '/entry1/instrument/detector00/data'.
|
|
82
|
+
load_slice : Optional[ArraySlice]
|
|
83
|
+
A slice or tuple of slices to apply to the data. If None, the entire data is returned.
|
|
84
|
+
Slicing is not yet implemented, so this will raise NotImplementedError if used.
|
|
85
|
+
Consider using the numpy.s_ or numpy.index_exp for simplifying the slicing syntax.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
np.ndarray :
|
|
90
|
+
The data array associated with the provided key. For scalars, this is a 0-d array.
|
|
91
|
+
"""
|
|
92
|
+
if load_slice is not None:
|
|
93
|
+
raise NotImplementedError("Slicing is not yet implemented.")
|
|
94
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
95
|
+
|
|
96
|
+
def get_data_shape(self, data_key: str) -> Tuple[int, ...]:
|
|
97
|
+
"""
|
|
98
|
+
Get the shape of the data from the IO source if the format supports it else empty tuple.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
data_key : str
|
|
103
|
+
The key to the data for which the shape is requested.
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
Tuple[int, ...] :
|
|
108
|
+
The shape of the data associated with the provided key.
|
|
109
|
+
Returns an empty tuple if nothing available or unsupported.
|
|
110
|
+
"""
|
|
111
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
112
|
+
|
|
113
|
+
def get_data_dtype(self, data_key: str) -> Optional[np.dtype]:
|
|
114
|
+
"""
|
|
115
|
+
Get the data type of the data from the IO source if the format supports it else None.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
data_key : str
|
|
120
|
+
The key to the data for which the dtype is requested.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
Optional[np.dtype] :
|
|
125
|
+
The data type of the data associated with the provided key.
|
|
126
|
+
Returns None if nothing available or unsupported.
|
|
127
|
+
"""
|
|
128
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
129
|
+
|
|
130
|
+
def get_data_attributes(self, data_key: str) -> dict[str, Any]:
|
|
131
|
+
"""
|
|
132
|
+
Get data attributes from the IO source if the format supports it else empty dict.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
data_key : str
|
|
137
|
+
The key to the data for which attributes are requested.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
dict[str, Any] :
|
|
142
|
+
The attributes associated with the data.
|
|
143
|
+
Returns an empty dictionary if nothing available or unsupported.
|
|
144
|
+
"""
|
|
145
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|
|
146
|
+
|
|
147
|
+
def get_static_metadata(self, data_key: str) -> Any:
|
|
148
|
+
"""
|
|
149
|
+
Get static metadata from the IO source using the provided data key.
|
|
150
|
+
If the key contains an @ character, the part before the @ is
|
|
151
|
+
considered the group/dataset path, and the part after the @ is considered the attribute name.
|
|
152
|
+
Useful, for example, to get units from HDF5 attributes.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
data_key : str
|
|
157
|
+
The key to access the metadata.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
Any :
|
|
162
|
+
The static metadata associated with the provided key.
|
|
163
|
+
"""
|
|
164
|
+
raise NotImplementedError("This method should be implemented in subclasses.")
|