guts-base 2.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- guts_base/__init__.py +15 -0
- guts_base/data/__init__.py +35 -0
- guts_base/data/expydb.py +248 -0
- guts_base/data/generator.py +191 -0
- guts_base/data/openguts.py +296 -0
- guts_base/data/preprocessing.py +55 -0
- guts_base/data/survival.py +148 -0
- guts_base/data/time_of_death.py +595 -0
- guts_base/data/utils.py +8 -0
- guts_base/mod.py +332 -0
- guts_base/plot.py +201 -0
- guts_base/prob/__init__.py +13 -0
- guts_base/prob/binom.py +18 -0
- guts_base/prob/conditional_binom.py +118 -0
- guts_base/prob/conditional_binom_mv.py +233 -0
- guts_base/prob/predictions.py +164 -0
- guts_base/sim/__init__.py +28 -0
- guts_base/sim/base.py +1286 -0
- guts_base/sim/config.py +170 -0
- guts_base/sim/constructors.py +31 -0
- guts_base/sim/ecx.py +585 -0
- guts_base/sim/mempy.py +290 -0
- guts_base/sim/report.py +405 -0
- guts_base/sim/transformer.py +548 -0
- guts_base/sim/units.py +313 -0
- guts_base/sim/utils.py +10 -0
- guts_base-2.0.0b0.dist-info/METADATA +853 -0
- guts_base-2.0.0b0.dist-info/RECORD +32 -0
- guts_base-2.0.0b0.dist-info/WHEEL +5 -0
- guts_base-2.0.0b0.dist-info/entry_points.txt +3 -0
- guts_base-2.0.0b0.dist-info/licenses/LICENSE +674 -0
- guts_base-2.0.0b0.dist-info/top_level.txt +1 -0
guts_base/sim/units.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import xarray as xr
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Mapping, Tuple, Optional, Any, Callable
|
|
5
|
+
from pint import UnitRegistry
|
|
6
|
+
from guts_base.sim.utils import GutsBaseError
|
|
7
|
+
from pymob.sim.config import Config
|
|
8
|
+
from typing import Dict
|
|
9
|
+
from pymob.sim.config import Modelparameters
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
ureg = UnitRegistry()
|
|
13
|
+
ureg.define("ai = 1 = AI")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
ASSUMPTIONS = 0
|
|
20
|
+
pattern = re.compile(r'\{(?P<content>[^\}]+)\}')
|
|
21
|
+
|
|
22
|
+
def parse_units(
|
|
23
|
+
model_parameters: Modelparameters,
|
|
24
|
+
units: Dict[str,xr.DataArray|str],
|
|
25
|
+
):
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
units_explicit = {}
|
|
29
|
+
for key, param in model_parameters.all.items():
|
|
30
|
+
|
|
31
|
+
problem_dims = _get_dimensionality_of_the_problem(units, param)
|
|
32
|
+
|
|
33
|
+
# if the dimension is not existing, having a list makes no sense
|
|
34
|
+
if len(problem_dims) > 0:
|
|
35
|
+
# forcefully extract only the first element of the list, even
|
|
36
|
+
# if it has more than one element
|
|
37
|
+
# for placeholder, dims in problem_dims.items():
|
|
38
|
+
# for dim in dims:
|
|
39
|
+
|
|
40
|
+
# for i, coord in enumerate(units[placeholder].coords[dim]):
|
|
41
|
+
for dim, coords in problem_dims.items():
|
|
42
|
+
units_explicit_param = {}
|
|
43
|
+
if param.unit is None:
|
|
44
|
+
_unit = ""
|
|
45
|
+
elif isinstance(param.unit, str):
|
|
46
|
+
_units = [param.unit] * len(coords)
|
|
47
|
+
else:
|
|
48
|
+
_units = param.unit
|
|
49
|
+
|
|
50
|
+
for i, coord in enumerate(coords):
|
|
51
|
+
_unit = _units[i]
|
|
52
|
+
|
|
53
|
+
inject_into_template = {}
|
|
54
|
+
open_placeholders = [p for p in pattern.findall(_unit)]
|
|
55
|
+
for _placeholder in open_placeholders:
|
|
56
|
+
if "_i" in _placeholder:
|
|
57
|
+
placeholder = _placeholder.strip("_i")
|
|
58
|
+
idx = i
|
|
59
|
+
else:
|
|
60
|
+
placeholder = _placeholder
|
|
61
|
+
idx = ASSUMPTIONS
|
|
62
|
+
|
|
63
|
+
value = _get_placeholder_from_units_dict(
|
|
64
|
+
placeholder=placeholder, units=units, index=idx
|
|
65
|
+
)
|
|
66
|
+
inject_into_template.update({_placeholder: f"({value})"})
|
|
67
|
+
|
|
68
|
+
explicit_unit = _unit.format(**inject_into_template)
|
|
69
|
+
print(f"{key}[{coord}]: {explicit_unit}")
|
|
70
|
+
|
|
71
|
+
units_explicit_param.update({coord: explicit_unit})
|
|
72
|
+
units_explicit.update({key: units_explicit_param})
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
else:
|
|
76
|
+
if param.unit is None:
|
|
77
|
+
_unit = ""
|
|
78
|
+
elif isinstance(param.unit, str):
|
|
79
|
+
_unit = param.unit
|
|
80
|
+
else:
|
|
81
|
+
_unit = param.unit[0]
|
|
82
|
+
|
|
83
|
+
open_placeholders = [p for p in pattern.findall(_unit)]
|
|
84
|
+
|
|
85
|
+
inject_into_template = {}
|
|
86
|
+
for placeholder in open_placeholders:
|
|
87
|
+
value = _get_placeholder_from_units_dict(
|
|
88
|
+
placeholder=placeholder, units=units, index=ASSUMPTIONS
|
|
89
|
+
)
|
|
90
|
+
inject_into_template.update({placeholder: f"({value})"})
|
|
91
|
+
|
|
92
|
+
explicit_unit = _unit.format(**inject_into_template)
|
|
93
|
+
|
|
94
|
+
print(f"{key}: {explicit_unit}")
|
|
95
|
+
|
|
96
|
+
units_explicit.update({key: explicit_unit})
|
|
97
|
+
|
|
98
|
+
return units_explicit
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _get_dimensionality_of_the_problem(units, param):
|
|
103
|
+
if param.unit is None:
|
|
104
|
+
open_placeholders = []
|
|
105
|
+
else:
|
|
106
|
+
open_placeholders = [p for p in pattern.findall(param.unit)]
|
|
107
|
+
|
|
108
|
+
dimensionality = {}
|
|
109
|
+
for _placeholder in open_placeholders:
|
|
110
|
+
placeholder = _placeholder.strip("_i")
|
|
111
|
+
_u = units[placeholder]
|
|
112
|
+
|
|
113
|
+
if isinstance(_u, xr.DataArray):
|
|
114
|
+
matching_dims = [d for d in _u.dims if d in param.dims]
|
|
115
|
+
dims = {d: _u.coords[d].values for d in matching_dims}
|
|
116
|
+
|
|
117
|
+
elif isinstance(_u, str):
|
|
118
|
+
dims = {}
|
|
119
|
+
|
|
120
|
+
else:
|
|
121
|
+
raise NotImplementedError
|
|
122
|
+
|
|
123
|
+
dimensionality.update(dims)
|
|
124
|
+
return dimensionality
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _get_placeholder_from_units_dict(placeholder, units, index):
|
|
129
|
+
_u = units.get(placeholder, None)
|
|
130
|
+
if isinstance(_u, xr.DataArray):
|
|
131
|
+
value = _get_placeholder_value(_u, placeholder, index)
|
|
132
|
+
return value
|
|
133
|
+
|
|
134
|
+
elif isinstance(_u, str):
|
|
135
|
+
return _u
|
|
136
|
+
|
|
137
|
+
elif _u is None:
|
|
138
|
+
return f"{{{placeholder}}}"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _get_placeholder_value(_u, placeholder, index=0):
|
|
143
|
+
if len(_u.dims) == 1:
|
|
144
|
+
dim = _u.dims[0]
|
|
145
|
+
else:
|
|
146
|
+
raise GutsBaseError(
|
|
147
|
+
"Currently only 1 dimensional units are supported"
|
|
148
|
+
)
|
|
149
|
+
replaced = False
|
|
150
|
+
if not replaced:
|
|
151
|
+
try:
|
|
152
|
+
# if the replacement is a string, assume that it is a coordinate and
|
|
153
|
+
# should be used. This is of course more explicit, but not good
|
|
154
|
+
# when using default settings.
|
|
155
|
+
v = str(_u.sel({dim:placeholder}).values)
|
|
156
|
+
replaced = True
|
|
157
|
+
except KeyError:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
if not replaced:
|
|
161
|
+
try:
|
|
162
|
+
# The placeholder is replaced with the
|
|
163
|
+
# i-th value in the unit dataarray
|
|
164
|
+
v = str(_u[index].values)
|
|
165
|
+
replaced = True
|
|
166
|
+
except IndexError:
|
|
167
|
+
pass
|
|
168
|
+
|
|
169
|
+
if not replaced:
|
|
170
|
+
v = f"{{{placeholder}}}"
|
|
171
|
+
|
|
172
|
+
return v
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _walk_nested_dict(dct, func):
|
|
177
|
+
"""Expects a function that takes key and value as arguments to process the """
|
|
178
|
+
result = {}
|
|
179
|
+
for key, value in dct.items():
|
|
180
|
+
if not isinstance(value, dict):
|
|
181
|
+
_dct = func(key, value)
|
|
182
|
+
result.update(_dct)
|
|
183
|
+
else:
|
|
184
|
+
dct_l1 = {}
|
|
185
|
+
for coord, val in value.items():
|
|
186
|
+
_subdct = func(coord, val)
|
|
187
|
+
dct_l1.update(_subdct)
|
|
188
|
+
result.update({key: dct_l1})
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def derive_explicit_units(config: Config, unit: xr.DataArray, parsing_func: Optional[Callable] = None) -> xr.Dataset:
|
|
193
|
+
# here we have the base units
|
|
194
|
+
T = config.guts_base.unit_time
|
|
195
|
+
fmt_unit = config.guts_base.unit_format_pint
|
|
196
|
+
units_dict = unit.to_pandas().to_dict()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
parsed_units = parse_units(
|
|
201
|
+
model_parameters=config.model_parameters,
|
|
202
|
+
units={"X": unit, "T": T, **units_dict}, # type: ignore
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if parsing_func is None:
|
|
206
|
+
def parse_func(key, value) -> Dict[str, Any]:
|
|
207
|
+
upint = ureg.parse_expression(value)
|
|
208
|
+
assert upint.magnitude == 1.0
|
|
209
|
+
upint = format(upint.u, fmt_unit)
|
|
210
|
+
return {key: upint}
|
|
211
|
+
else:
|
|
212
|
+
parse_func = parsing_func
|
|
213
|
+
|
|
214
|
+
pint_parsed_units = _walk_nested_dict(parsed_units, parse_func)
|
|
215
|
+
|
|
216
|
+
exposure_dim = [
|
|
217
|
+
k for k in config.data_structure.exposure.dimensions
|
|
218
|
+
if k not in [config.simulation.batch_dimension, config.simulation.x_dimension]
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
if len(exposure_dim) > 1:
|
|
222
|
+
raise NotImplementedError(
|
|
223
|
+
"More than three exposure dimensions are currently not implemented."
|
|
224
|
+
)
|
|
225
|
+
elif len(exposure_dim) == 0:
|
|
226
|
+
raise NotImplementedError(
|
|
227
|
+
"Exposure should have one extra dimension, even if only one coordinate is present"
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
exposure_dim = exposure_dim[0]
|
|
231
|
+
|
|
232
|
+
units = xr.Dataset({
|
|
233
|
+
"metric": ["unit"],
|
|
234
|
+
exposure_dim: list(units_dict.keys()),
|
|
235
|
+
** {
|
|
236
|
+
key: ("metric", [val]) if isinstance(val, str)
|
|
237
|
+
else xr.Dataset(val).to_array(exposure_dim, name=key).expand_dims({"metric": ["unit"]})
|
|
238
|
+
for key, val in pint_parsed_units.items()
|
|
239
|
+
}
|
|
240
|
+
})
|
|
241
|
+
return units
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _get_unit_from_dataframe_index(df: pd.DataFrame) -> str:
|
|
245
|
+
"""Searches the index name in a DataFrame for square parentheses `[...].`
|
|
246
|
+
Extracts the content inside the parentheses
|
|
247
|
+
"""
|
|
248
|
+
pattern = re.compile(r'\[(?P<content>[^\]]+)\]')
|
|
249
|
+
match = pattern.search(str(df.index.name))
|
|
250
|
+
if match:
|
|
251
|
+
time_unit = match.group('content')
|
|
252
|
+
else:
|
|
253
|
+
time_unit = ""
|
|
254
|
+
|
|
255
|
+
return time_unit
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _convert_units(
|
|
259
|
+
units: xr.DataArray,
|
|
260
|
+
target_units: Mapping[str,str] = {"default": "{x}->{x}"}
|
|
261
|
+
) -> Tuple[xr.DataArray, xr.DataArray]:
|
|
262
|
+
"""Converts units of values associated with the exposure dimension
|
|
263
|
+
TODO: Converting before inference could be problem for the calibration, because
|
|
264
|
+
it is usually good if the values are both not too small and not too large
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
if len(units.dims) != 1:
|
|
268
|
+
raise GutsBaseError(
|
|
269
|
+
"GutsBase_convert_exposure_units only supports 1 dimensional exposure units"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
_dim = units.dims[0]
|
|
273
|
+
_coordinates = units.coords[_dim]
|
|
274
|
+
|
|
275
|
+
converted_units = {}
|
|
276
|
+
_target_units = {}
|
|
277
|
+
|
|
278
|
+
for coord in _coordinates.values:
|
|
279
|
+
unit = str(units.sel({_dim: coord}).values)
|
|
280
|
+
|
|
281
|
+
# get item from config
|
|
282
|
+
# split transformation expression from target expression
|
|
283
|
+
transform, target = target_units.get(coord, target_units["default"]).split("->")
|
|
284
|
+
# insert unit from observations coordinates
|
|
285
|
+
transform = transform.strip(" ").format(x=unit)
|
|
286
|
+
target = target.strip(" ").format(x=unit)
|
|
287
|
+
|
|
288
|
+
# parse and convert units
|
|
289
|
+
new_unit = ureg.parse_expression(transform).to(target)
|
|
290
|
+
converted_units.update({coord: new_unit})
|
|
291
|
+
_target_units.update({coord: target})
|
|
292
|
+
|
|
293
|
+
_units = {k: f"{cu.units:C}" for k, cu in converted_units.items()}
|
|
294
|
+
|
|
295
|
+
# assert whether the converted units are the same as the target units
|
|
296
|
+
# so the target units can be used, because the converted units may reduce
|
|
297
|
+
# to dimensionless quantities.
|
|
298
|
+
if not all([
|
|
299
|
+
cu.units == ureg.parse_expression(tu)
|
|
300
|
+
for cu, tu in zip(converted_units.values(), _target_units.values())
|
|
301
|
+
]):
|
|
302
|
+
raise GutsBaseError(
|
|
303
|
+
f"Mismatch between target units {_target_units} and converted units " +
|
|
304
|
+
f"{converted_units}."
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
_conversion_factors = {k: cu.magnitude for k, cu in converted_units.items()}
|
|
308
|
+
new_unit_coords = xr.Dataset(_target_units).to_array(dim=_dim)
|
|
309
|
+
conversion_factor_coords = xr.Dataset(_conversion_factors).to_array(dim=_dim)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
return new_unit_coords, conversion_factor_coords
|
guts_base/sim/utils.py
ADDED