guts-base 2.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
guts_base/sim/units.py ADDED
@@ -0,0 +1,313 @@
1
+ import re
2
+ import xarray as xr
3
+ import pandas as pd
4
+ from typing import Mapping, Tuple, Optional, Any, Callable
5
+ from pint import UnitRegistry
6
+ from guts_base.sim.utils import GutsBaseError
7
+ from pymob.sim.config import Config
8
+ from typing import Dict
9
+ from pymob.sim.config import Modelparameters
10
+
11
+
12
+ ureg = UnitRegistry()
13
+ ureg.define("ai = 1 = AI")
14
+
15
+
16
+
17
+
18
+
19
+ ASSUMPTIONS = 0
20
+ pattern = re.compile(r'\{(?P<content>[^\}]+)\}')
21
+
22
+ def parse_units(
23
+ model_parameters: Modelparameters,
24
+ units: Dict[str,xr.DataArray|str],
25
+ ):
26
+
27
+
28
+ units_explicit = {}
29
+ for key, param in model_parameters.all.items():
30
+
31
+ problem_dims = _get_dimensionality_of_the_problem(units, param)
32
+
33
+ # if the dimension is not existing, having a list makes no sense
34
+ if len(problem_dims) > 0:
35
+ # forcefully extract only the first element of the list, even
36
+ # if it has more than one element
37
+ # for placeholder, dims in problem_dims.items():
38
+ # for dim in dims:
39
+
40
+ # for i, coord in enumerate(units[placeholder].coords[dim]):
41
+ for dim, coords in problem_dims.items():
42
+ units_explicit_param = {}
43
+ if param.unit is None:
44
+ _unit = ""
45
+ elif isinstance(param.unit, str):
46
+ _units = [param.unit] * len(coords)
47
+ else:
48
+ _units = param.unit
49
+
50
+ for i, coord in enumerate(coords):
51
+ _unit = _units[i]
52
+
53
+ inject_into_template = {}
54
+ open_placeholders = [p for p in pattern.findall(_unit)]
55
+ for _placeholder in open_placeholders:
56
+ if "_i" in _placeholder:
57
+ placeholder = _placeholder.strip("_i")
58
+ idx = i
59
+ else:
60
+ placeholder = _placeholder
61
+ idx = ASSUMPTIONS
62
+
63
+ value = _get_placeholder_from_units_dict(
64
+ placeholder=placeholder, units=units, index=idx
65
+ )
66
+ inject_into_template.update({_placeholder: f"({value})"})
67
+
68
+ explicit_unit = _unit.format(**inject_into_template)
69
+ print(f"{key}[{coord}]: {explicit_unit}")
70
+
71
+ units_explicit_param.update({coord: explicit_unit})
72
+ units_explicit.update({key: units_explicit_param})
73
+
74
+
75
+ else:
76
+ if param.unit is None:
77
+ _unit = ""
78
+ elif isinstance(param.unit, str):
79
+ _unit = param.unit
80
+ else:
81
+ _unit = param.unit[0]
82
+
83
+ open_placeholders = [p for p in pattern.findall(_unit)]
84
+
85
+ inject_into_template = {}
86
+ for placeholder in open_placeholders:
87
+ value = _get_placeholder_from_units_dict(
88
+ placeholder=placeholder, units=units, index=ASSUMPTIONS
89
+ )
90
+ inject_into_template.update({placeholder: f"({value})"})
91
+
92
+ explicit_unit = _unit.format(**inject_into_template)
93
+
94
+ print(f"{key}: {explicit_unit}")
95
+
96
+ units_explicit.update({key: explicit_unit})
97
+
98
+ return units_explicit
99
+
100
+
101
+
102
+ def _get_dimensionality_of_the_problem(units, param):
103
+ if param.unit is None:
104
+ open_placeholders = []
105
+ else:
106
+ open_placeholders = [p for p in pattern.findall(param.unit)]
107
+
108
+ dimensionality = {}
109
+ for _placeholder in open_placeholders:
110
+ placeholder = _placeholder.strip("_i")
111
+ _u = units[placeholder]
112
+
113
+ if isinstance(_u, xr.DataArray):
114
+ matching_dims = [d for d in _u.dims if d in param.dims]
115
+ dims = {d: _u.coords[d].values for d in matching_dims}
116
+
117
+ elif isinstance(_u, str):
118
+ dims = {}
119
+
120
+ else:
121
+ raise NotImplementedError
122
+
123
+ dimensionality.update(dims)
124
+ return dimensionality
125
+
126
+
127
+
128
+ def _get_placeholder_from_units_dict(placeholder, units, index):
129
+ _u = units.get(placeholder, None)
130
+ if isinstance(_u, xr.DataArray):
131
+ value = _get_placeholder_value(_u, placeholder, index)
132
+ return value
133
+
134
+ elif isinstance(_u, str):
135
+ return _u
136
+
137
+ elif _u is None:
138
+ return f"{{{placeholder}}}"
139
+
140
+
141
+
142
+ def _get_placeholder_value(_u, placeholder, index=0):
143
+ if len(_u.dims) == 1:
144
+ dim = _u.dims[0]
145
+ else:
146
+ raise GutsBaseError(
147
+ "Currently only 1 dimensional units are supported"
148
+ )
149
+ replaced = False
150
+ if not replaced:
151
+ try:
152
+ # if the replacement is a string, assume that it is a coordinate and
153
+ # should be used. This is of course more explicit, but not good
154
+ # when using default settings.
155
+ v = str(_u.sel({dim:placeholder}).values)
156
+ replaced = True
157
+ except KeyError:
158
+ pass
159
+
160
+ if not replaced:
161
+ try:
162
+ # The placeholder is replaced with the
163
+ # i-th value in the unit dataarray
164
+ v = str(_u[index].values)
165
+ replaced = True
166
+ except IndexError:
167
+ pass
168
+
169
+ if not replaced:
170
+ v = f"{{{placeholder}}}"
171
+
172
+ return v
173
+
174
+
175
+
176
+ def _walk_nested_dict(dct, func):
177
+ """Expects a function that takes key and value as arguments to process the """
178
+ result = {}
179
+ for key, value in dct.items():
180
+ if not isinstance(value, dict):
181
+ _dct = func(key, value)
182
+ result.update(_dct)
183
+ else:
184
+ dct_l1 = {}
185
+ for coord, val in value.items():
186
+ _subdct = func(coord, val)
187
+ dct_l1.update(_subdct)
188
+ result.update({key: dct_l1})
189
+ return result
190
+
191
+
192
+ def derive_explicit_units(config: Config, unit: xr.DataArray, parsing_func: Optional[Callable] = None) -> xr.Dataset:
193
+ # here we have the base units
194
+ T = config.guts_base.unit_time
195
+ fmt_unit = config.guts_base.unit_format_pint
196
+ units_dict = unit.to_pandas().to_dict()
197
+
198
+
199
+
200
+ parsed_units = parse_units(
201
+ model_parameters=config.model_parameters,
202
+ units={"X": unit, "T": T, **units_dict}, # type: ignore
203
+ )
204
+
205
+ if parsing_func is None:
206
+ def parse_func(key, value) -> Dict[str, Any]:
207
+ upint = ureg.parse_expression(value)
208
+ assert upint.magnitude == 1.0
209
+ upint = format(upint.u, fmt_unit)
210
+ return {key: upint}
211
+ else:
212
+ parse_func = parsing_func
213
+
214
+ pint_parsed_units = _walk_nested_dict(parsed_units, parse_func)
215
+
216
+ exposure_dim = [
217
+ k for k in config.data_structure.exposure.dimensions
218
+ if k not in [config.simulation.batch_dimension, config.simulation.x_dimension]
219
+ ]
220
+
221
+ if len(exposure_dim) > 1:
222
+ raise NotImplementedError(
223
+ "More than three exposure dimensions are currently not implemented."
224
+ )
225
+ elif len(exposure_dim) == 0:
226
+ raise NotImplementedError(
227
+ "Exposure should have one extra dimension, even if only one coordinate is present"
228
+ )
229
+ else:
230
+ exposure_dim = exposure_dim[0]
231
+
232
+ units = xr.Dataset({
233
+ "metric": ["unit"],
234
+ exposure_dim: list(units_dict.keys()),
235
+ ** {
236
+ key: ("metric", [val]) if isinstance(val, str)
237
+ else xr.Dataset(val).to_array(exposure_dim, name=key).expand_dims({"metric": ["unit"]})
238
+ for key, val in pint_parsed_units.items()
239
+ }
240
+ })
241
+ return units
242
+
243
+
244
+ def _get_unit_from_dataframe_index(df: pd.DataFrame) -> str:
245
+ """Searches the index name in a DataFrame for square parentheses `[...].`
246
+ Extracts the content inside the parentheses
247
+ """
248
+ pattern = re.compile(r'\[(?P<content>[^\]]+)\]')
249
+ match = pattern.search(str(df.index.name))
250
+ if match:
251
+ time_unit = match.group('content')
252
+ else:
253
+ time_unit = ""
254
+
255
+ return time_unit
256
+
257
+
258
+ def _convert_units(
259
+ units: xr.DataArray,
260
+ target_units: Mapping[str,str] = {"default": "{x}->{x}"}
261
+ ) -> Tuple[xr.DataArray, xr.DataArray]:
262
+ """Converts units of values associated with the exposure dimension
263
+ TODO: Converting before inference could be problem for the calibration, because
264
+ it is usually good if the values are both not too small and not too large
265
+ """
266
+
267
+ if len(units.dims) != 1:
268
+ raise GutsBaseError(
269
+ "GutsBase_convert_exposure_units only supports 1 dimensional exposure units"
270
+ )
271
+
272
+ _dim = units.dims[0]
273
+ _coordinates = units.coords[_dim]
274
+
275
+ converted_units = {}
276
+ _target_units = {}
277
+
278
+ for coord in _coordinates.values:
279
+ unit = str(units.sel({_dim: coord}).values)
280
+
281
+ # get item from config
282
+ # split transformation expression from target expression
283
+ transform, target = target_units.get(coord, target_units["default"]).split("->")
284
+ # insert unit from observations coordinates
285
+ transform = transform.strip(" ").format(x=unit)
286
+ target = target.strip(" ").format(x=unit)
287
+
288
+ # parse and convert units
289
+ new_unit = ureg.parse_expression(transform).to(target)
290
+ converted_units.update({coord: new_unit})
291
+ _target_units.update({coord: target})
292
+
293
+ _units = {k: f"{cu.units:C}" for k, cu in converted_units.items()}
294
+
295
+ # assert whether the converted units are the same as the target units
296
+ # so the target units can be used, because the converted units may reduce
297
+ # to dimensionless quantities.
298
+ if not all([
299
+ cu.units == ureg.parse_expression(tu)
300
+ for cu, tu in zip(converted_units.values(), _target_units.values())
301
+ ]):
302
+ raise GutsBaseError(
303
+ f"Mismatch between target units {_target_units} and converted units " +
304
+ f"{converted_units}."
305
+ )
306
+
307
+ _conversion_factors = {k: cu.magnitude for k, cu in converted_units.items()}
308
+ new_unit_coords = xr.Dataset(_target_units).to_array(dim=_dim)
309
+ conversion_factor_coords = xr.Dataset(_conversion_factors).to_array(dim=_dim)
310
+
311
+
312
+
313
+ return new_unit_coords, conversion_factor_coords
guts_base/sim/utils.py ADDED
@@ -0,0 +1,10 @@
1
+ class GutsBaseError(Exception):
2
+ """Exception raised for custom error scenarios.
3
+
4
+ Attributes:
5
+ message -- explanation of the error
6
+ """
7
+
8
+ def __init__(self, message):
9
+ self.message = message
10
+ super().__init__(self.message)