hestia-earth-models 0.73.1__py3-none-any.whl → 0.73.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/models/akagiEtAl2011/utils.py +3 -1
- hestia_earth/models/config/Cycle.json +35 -37
- hestia_earth/models/config/Site.json +26 -24
- hestia_earth/models/geospatialDatabase/utils.py +7 -6
- hestia_earth/models/hestia/aboveGroundCropResidue.py +6 -5
- hestia_earth/models/hestia/cropResidueManagement.py +3 -2
- hestia_earth/models/ipcc2019/aboveGroundBiomass.py +2 -4
- hestia_earth/models/ipcc2019/belowGroundBiomass.py +2 -4
- hestia_earth/models/ipcc2019/biomass_utils.py +1 -1
- hestia_earth/models/ipcc2019/ch4ToAirOrganicSoilCultivation.py +3 -4
- hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +2 -4
- hestia_earth/models/ipcc2019/co2ToAirOrganicSoilCultivation.py +2 -3
- hestia_earth/models/ipcc2019/nonCo2EmissionsToAirNaturalVegetationBurning.py +2 -3
- hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1.py +2 -3
- hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_2.py +3 -4
- hestia_earth/models/ipcc2019/organicCarbonPerHa_utils.py +2 -3
- hestia_earth/models/mocking/search-results.json +660 -660
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.73.1.dist-info → hestia_earth_models-0.73.2.dist-info}/METADATA +3 -2
- {hestia_earth_models-0.73.1.dist-info → hestia_earth_models-0.73.2.dist-info}/RECORD +26 -31
- tests/models/geospatialDatabase/test_utils.py +12 -1
- tests/models/ipcc2019/test_organicCarbonPerHa_tier_2.py +1 -1
- tests/models/utils/test_array_builders.py +1 -1
- hestia_earth/models/utils/array_builders.py +0 -590
- hestia_earth/models/utils/descriptive_stats.py +0 -49
- hestia_earth/models/utils/stats.py +0 -429
- tests/models/utils/test_descriptive_stats.py +0 -50
- tests/models/utils/test_stats.py +0 -186
- {hestia_earth_models-0.73.1.dist-info → hestia_earth_models-0.73.2.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.73.1.dist-info → hestia_earth_models-0.73.2.dist-info}/WHEEL +0 -0
- {hestia_earth_models-0.73.1.dist-info → hestia_earth_models-0.73.2.dist-info}/top_level.txt +0 -0
@@ -1,429 +0,0 @@
|
|
1
|
-
from functools import reduce
|
2
|
-
from numpy import abs, array, concatenate, exp, float64, inf, pi, prod, random, sign, sqrt
|
3
|
-
from numpy.typing import NDArray
|
4
|
-
from typing import Union
|
5
|
-
|
6
|
-
|
7
|
-
def calc_z_critical(
|
8
|
-
confidence_interval: float,
|
9
|
-
n_sided: int = 2
|
10
|
-
) -> float64:
|
11
|
-
"""
|
12
|
-
Calculate the z-critical value from the confidence interval.
|
13
|
-
|
14
|
-
Parameters
|
15
|
-
----------
|
16
|
-
confidence_interval : float
|
17
|
-
The confidence interval as a percentage between 0 and 100%.
|
18
|
-
n_sided : int, optional
|
19
|
-
The number of tails (default value = `2`).
|
20
|
-
|
21
|
-
Returns
|
22
|
-
-------
|
23
|
-
float64
|
24
|
-
The z-critical value as a floating point between 0 and infinity.
|
25
|
-
"""
|
26
|
-
alpha = 1 - confidence_interval / 100
|
27
|
-
return _normal_ppf(1 - alpha / n_sided)
|
28
|
-
|
29
|
-
|
30
|
-
def _normal_ppf(q: float64, tol: float64 = 1e-10) -> float64:
|
31
|
-
"""
|
32
|
-
Calculates the percent point function (PPF), also known as the inverse cumulative distribution function (CDF), of a
|
33
|
-
standard normal distribution using the Newton-Raphson method.
|
34
|
-
|
35
|
-
Parameters
|
36
|
-
----------
|
37
|
-
q : float64
|
38
|
-
The quantile at which to evaluate the PPF.
|
39
|
-
tol : float64, optional
|
40
|
-
The tolerance for the Newton-Raphson method. Defaults to 1e-10.
|
41
|
-
|
42
|
-
Returns
|
43
|
-
-------
|
44
|
-
float64
|
45
|
-
The PPF value at the given quantile.
|
46
|
-
"""
|
47
|
-
INITIAL_GUESS = 0
|
48
|
-
MAX_ITER = 100
|
49
|
-
|
50
|
-
def step(x):
|
51
|
-
"""Perform one step of the Newton-Raphson method."""
|
52
|
-
x_new = x - (_normal_cdf(x) - q) / _normal_pdf(x)
|
53
|
-
return x_new if abs(x_new - x) >= tol else x
|
54
|
-
|
55
|
-
return (
|
56
|
-
inf if q == 1 else
|
57
|
-
-inf if q == 0 else
|
58
|
-
reduce(lambda x, _: step(x), range(MAX_ITER), INITIAL_GUESS)
|
59
|
-
)
|
60
|
-
|
61
|
-
|
62
|
-
def _normal_cdf(x: float64) -> float64:
|
63
|
-
"""
|
64
|
-
Calculates the cumulative distribution function (CDF) of a standard normal distribution for a single value using a
|
65
|
-
custom error function (erf).
|
66
|
-
|
67
|
-
Parameters
|
68
|
-
----------
|
69
|
-
x : float64
|
70
|
-
The point at which to evaluate the CDF.
|
71
|
-
|
72
|
-
Returns
|
73
|
-
-------
|
74
|
-
float64
|
75
|
-
The CDF value at the given point.
|
76
|
-
"""
|
77
|
-
return 0.5 * (1 + _erf(x / sqrt(2)))
|
78
|
-
|
79
|
-
|
80
|
-
def _erf(x: float64) -> float64:
|
81
|
-
"""
|
82
|
-
Approximates the error function of a standard normal distribution using a numerical approximation based on
|
83
|
-
Abramowitz and Stegun formula 7.1.26.
|
84
|
-
|
85
|
-
Parameters
|
86
|
-
----------
|
87
|
-
x : float64
|
88
|
-
The input value.
|
89
|
-
|
90
|
-
Returns
|
91
|
-
-------
|
92
|
-
float64
|
93
|
-
The approximated value of the error function.
|
94
|
-
"""
|
95
|
-
# constants
|
96
|
-
A_1 = 0.254829592
|
97
|
-
A_2 = -0.284496736
|
98
|
-
A_3 = 1.421413741
|
99
|
-
A_4 = -1.453152027
|
100
|
-
A_5 = 1.061405429
|
101
|
-
P = 0.3275911
|
102
|
-
|
103
|
-
# Save the sign of x
|
104
|
-
sign_ = sign(x)
|
105
|
-
x_ = abs(x)
|
106
|
-
|
107
|
-
# A&S formula 7.1.26
|
108
|
-
t = 1.0 / (1.0 + P * x_)
|
109
|
-
y = 1.0 - (((((A_5 * t + A_4) * t) + A_3) * t + A_2) * t + A_1) * t * exp(-x_ * x_)
|
110
|
-
|
111
|
-
return sign_ * y
|
112
|
-
|
113
|
-
|
114
|
-
def _normal_pdf(x: float64) -> float64:
|
115
|
-
"""
|
116
|
-
Calculates the probability density function (PDF) of a standard normal distribution for a single value.
|
117
|
-
|
118
|
-
Parameters
|
119
|
-
----------
|
120
|
-
x : float64
|
121
|
-
The point at which to evaluate the PDF.
|
122
|
-
|
123
|
-
Returns
|
124
|
-
-------
|
125
|
-
float64
|
126
|
-
The PDF value at the given point.
|
127
|
-
"""
|
128
|
-
return 1 / sqrt(2 * pi) * exp(-0.5 * x**2)
|
129
|
-
|
130
|
-
|
131
|
-
def _calc_confidence_level(
|
132
|
-
z_critical: float64,
|
133
|
-
n_sided: int = 2
|
134
|
-
) -> float64:
|
135
|
-
"""
|
136
|
-
Calculate the confidence interval from the z-critical value.
|
137
|
-
|
138
|
-
Parameters
|
139
|
-
----------
|
140
|
-
z_critical_value : np.float64
|
141
|
-
The confidence interval as a floating point number between 0 and infinity.
|
142
|
-
n_sided : int, optional
|
143
|
-
The number of tails (default value = `2`).
|
144
|
-
|
145
|
-
Returns
|
146
|
-
-------
|
147
|
-
np.float64
|
148
|
-
The confidence interval as a percentage between 0 and 100%.
|
149
|
-
"""
|
150
|
-
alpha = (1 - _normal_cdf(z_critical)) * n_sided
|
151
|
-
return (1 - alpha) * 100
|
152
|
-
|
153
|
-
|
154
|
-
def calc_required_iterations_monte_carlo(
|
155
|
-
confidence_level: float,
|
156
|
-
precision: float,
|
157
|
-
sd: float
|
158
|
-
) -> int:
|
159
|
-
"""
|
160
|
-
Calculate the number of iterations required for a Monte Carlo simulation to have a desired precision, subject to a
|
161
|
-
given confidence level.
|
162
|
-
|
163
|
-
Parameters
|
164
|
-
----------
|
165
|
-
confidence_level : float
|
166
|
-
The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
|
167
|
-
sure that the sample mean deviates from the true populatation mean by less than the desired precision).
|
168
|
-
precision : float
|
169
|
-
The desired precision as a floating point value (i.e., if the Monte Carlo simulation will be used to estimate
|
170
|
-
`organicCarbonPerHa` to a precision of 100 kg C ha-1 this value should be 100).
|
171
|
-
sd : float
|
172
|
-
The standard deviation of the sample. This can be estimated by running the model 500 times (a number that does
|
173
|
-
not take too much time to run but is large enough for the sample standard deviation to converge reasonably
|
174
|
-
well).
|
175
|
-
|
176
|
-
Returns
|
177
|
-
-------
|
178
|
-
int
|
179
|
-
The required number of iterations.
|
180
|
-
"""
|
181
|
-
z_critical_value = calc_z_critical(confidence_level)
|
182
|
-
return round(((sd * z_critical_value) / precision) ** 2)
|
183
|
-
|
184
|
-
|
185
|
-
def calc_confidence_level_monte_carlo(
|
186
|
-
n_iterations: int,
|
187
|
-
precision: float,
|
188
|
-
sd: float
|
189
|
-
) -> float:
|
190
|
-
"""
|
191
|
-
Calculate the confidence level that the sample mean calculated by the Monte Carlo simulation deviates from the
|
192
|
-
true population mean by less than the desired precision.
|
193
|
-
|
194
|
-
Parameters
|
195
|
-
----------
|
196
|
-
n_iterations : int
|
197
|
-
The number of iterations that the Monte Carlo simulation was run for.
|
198
|
-
precision : float
|
199
|
-
The desired precision as a floating point value (i.e., if the Monte Carlo simulation will be used to estimate
|
200
|
-
`organicCarbonPerHa` to a precision of 100 kg C ha-1 this value should be 100).
|
201
|
-
sd : float
|
202
|
-
The standard deviation of the sample.
|
203
|
-
|
204
|
-
Returns
|
205
|
-
-------
|
206
|
-
float
|
207
|
-
The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
|
208
|
-
sure that the sample mean deviates from the true populatation mean by less than the desired precision).
|
209
|
-
"""
|
210
|
-
return _calc_confidence_level(precision*sqrt(n_iterations)/sd)
|
211
|
-
|
212
|
-
|
213
|
-
def calc_precision_monte_carlo(
|
214
|
-
confidence_level: float,
|
215
|
-
n_iterations: int,
|
216
|
-
sd: float
|
217
|
-
) -> float:
|
218
|
-
"""
|
219
|
-
Calculate the +/- precision of a Monte Carlo simulation for a desired confidence level.
|
220
|
-
|
221
|
-
Parameters
|
222
|
-
----------
|
223
|
-
confidence_level : float
|
224
|
-
The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
|
225
|
-
sure that the sample mean deviates from the true populatation mean by less than the desired precision).
|
226
|
-
n_iterations : int
|
227
|
-
The number of iterations that the Monte Carlo simulation was run for.
|
228
|
-
sd : float
|
229
|
-
The standard deviation of the sample.
|
230
|
-
|
231
|
-
Returns
|
232
|
-
-------
|
233
|
-
float
|
234
|
-
The precision of the sample mean estimated by the Monte Carlo model as a floating point value with the same
|
235
|
-
units as the estimated mean.
|
236
|
-
"""
|
237
|
-
z_critical = calc_z_critical(confidence_level)
|
238
|
-
return (sd*z_critical)/sqrt(n_iterations)
|
239
|
-
|
240
|
-
|
241
|
-
def truncnorm_rvs(
|
242
|
-
a: float,
|
243
|
-
b: float,
|
244
|
-
loc: float,
|
245
|
-
scale: float,
|
246
|
-
shape: Union[int, tuple[int, ...]],
|
247
|
-
seed: Union[int, random.Generator, None] = None
|
248
|
-
) -> NDArray:
|
249
|
-
"""
|
250
|
-
Generate random samples from a truncated normal distribution. Unlike the `scipy` equivalent, the `a` and `b` values
|
251
|
-
are the abscissae at which we wish to truncate the distribution (as opposed to the number of standard deviations
|
252
|
-
from `loc`).
|
253
|
-
|
254
|
-
Parameters
|
255
|
-
----------
|
256
|
-
a : float
|
257
|
-
The lower bound of the distribution.
|
258
|
-
b : float
|
259
|
-
The upper bound of the distribution.
|
260
|
-
loc : float
|
261
|
-
Mean ("centre") of the distribution.
|
262
|
-
scale : float
|
263
|
-
Standard deviation (spread or "width") of the distribution. Must be non-negative.
|
264
|
-
size : int | tuple[int, ...]
|
265
|
-
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
|
266
|
-
seed : int | Generator | None, optional
|
267
|
-
A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
|
268
|
-
fresh, unpredictable entropy will be pulled from the OS.
|
269
|
-
|
270
|
-
Returns
|
271
|
-
-------
|
272
|
-
NDArray
|
273
|
-
Array of samples.
|
274
|
-
"""
|
275
|
-
size = prod(shape)
|
276
|
-
samples = array([])
|
277
|
-
rng = random.default_rng(seed)
|
278
|
-
|
279
|
-
while samples.size < size:
|
280
|
-
samples_temp = rng.normal(loc, scale, (size - samples.size) * 2)
|
281
|
-
valid_samples = samples_temp[(a <= samples_temp) & (samples_temp <= b)]
|
282
|
-
samples = concatenate([samples, valid_samples])
|
283
|
-
|
284
|
-
return samples[:size].reshape(shape)
|
285
|
-
|
286
|
-
|
287
|
-
def add_normal_distributions(
|
288
|
-
mu_1: float, sigma_1: float, mu_2: float, sigma_2: float, rho: float = 0
|
289
|
-
) -> tuple[float, float]:
|
290
|
-
"""
|
291
|
-
Add together two normal distributions, with optional correlation.
|
292
|
-
|
293
|
-
Given two normal distributions **X<sub>1</sub> ~ N(mu<sub>1</sub>, sigma<sub>1</sub><sup>2</sup>)** and
|
294
|
-
**X<sub>2</sub> ~ N(mu<sub>2</sub>, sigma<sub>2</sub><sup>2</sup>)**, this function calculates the resulting mean
|
295
|
-
and standard deviation of the sum **Z = X<sub>1</sub> + X<sub>2</sub>**, taking into account the correlation
|
296
|
-
between them.
|
297
|
-
|
298
|
-
n.b. Positive correlations (`rho` > `0`) increase the standard deviation of **Z** because positively correlated
|
299
|
-
variables tend to move together, increasing combined uncertainty. Negative correlations (`rho` < `0`) reduces the
|
300
|
-
standard deviation since the variables move in opposite directions, cancelling out some of the variability.
|
301
|
-
Independant variables (`rho` = `0`) result in an intermediate level of uncertainty.
|
302
|
-
|
303
|
-
Parameters
|
304
|
-
----------
|
305
|
-
mu_1 : float
|
306
|
-
Mean of the first normal distribution (X<sub>1</sub>).
|
307
|
-
sigma_1 : float
|
308
|
-
Standard deviation of the first normal distribution (X<sub>1</sub>).
|
309
|
-
mu_2 : float
|
310
|
-
Mean of the second normal distribution (X<sub>2</sub>).
|
311
|
-
sigma_2 : float
|
312
|
-
Standard deviation of the second normal distribution (X<sub>2</sub>).
|
313
|
-
rho : float, optional
|
314
|
-
Correlation coefficient between **X<sub>1</sub>** and **X<sub>2</sub>**. `rho` must be a value between -1
|
315
|
-
(perfectly negative correlation) and 1 (perfectly positive correlation). Default is 0 (independent variables).
|
316
|
-
|
317
|
-
Returns
|
318
|
-
-------
|
319
|
-
tuple[float, float]
|
320
|
-
A tuple in the shape `(mu_sum, sigma_sum)` containing the mean and standard deviation of the distribution
|
321
|
-
**Z = X<sub>1</sub> + X<sub>2</sub>**.
|
322
|
-
"""
|
323
|
-
mu_sum = mu_1 + mu_2
|
324
|
-
sigma_sum = sqrt(
|
325
|
-
sigma_1 ** 2
|
326
|
-
+ sigma_2 ** 2
|
327
|
-
+ 2 * rho * sigma_1 * sigma_2
|
328
|
-
)
|
329
|
-
return mu_sum, sigma_sum
|
330
|
-
|
331
|
-
|
332
|
-
def subtract_normal_distributions(
|
333
|
-
mu_1: float, sigma_1: float, mu_2: float, sigma_2: float, rho: float = 0
|
334
|
-
) -> tuple[float, float]:
|
335
|
-
"""
|
336
|
-
Subtract a normal distribution from another, with optional correlation.
|
337
|
-
|
338
|
-
Given two normal distributions **X<sub>1</sub> ~ N(mu<sub>1</sub>, sigma<sub>1</sub><sup>2</sup>)** and
|
339
|
-
**X<sub>2</sub> ~ N(mu<sub>2</sub>, sigma<sub>2</sub><sup>2</sup>)**, this function calculates the resulting mean
|
340
|
-
and standard deviation of the difference **Z = X<sub>1</sub> - X<sub>2</sub>**, taking into account the correlation
|
341
|
-
between them.
|
342
|
-
|
343
|
-
n.b. Positive correlations (`rho` > `0`) reduce the standard deviation of **Z** because positively correlated
|
344
|
-
variables tend to move together, cancelling out some of the variability when subtracted. Negative correlations
|
345
|
-
(`rho` < `0`) increase the standard deviation since the variables move in opposite directions, amplifying the
|
346
|
-
variability when subtracted. Independant variables (`rho` = `0`) result in an intermediate level of uncertainty.
|
347
|
-
|
348
|
-
Parameters
|
349
|
-
----------
|
350
|
-
mu_1 : float
|
351
|
-
Mean of the first normal distribution (X<sub>1</sub>).
|
352
|
-
sigma_1 : float
|
353
|
-
Standard deviation of the first normal distribution (X<sub>1</sub>).
|
354
|
-
mu_2 : float
|
355
|
-
Mean of the second normal distribution (X<sub>2</sub>).
|
356
|
-
sigma_2 : float
|
357
|
-
Standard deviation of the second normal distribution (X<sub>2</sub>).
|
358
|
-
rho : float, optional
|
359
|
-
Correlation coefficient between **X<sub>1</sub>** and **X<sub>2</sub>**. `rho` must be a value between -1
|
360
|
-
(perfectly negative correlation) and 1 (perfectly positive correlation). Default is 0 (independent variables).
|
361
|
-
|
362
|
-
Returns
|
363
|
-
-------
|
364
|
-
tuple[float, float]
|
365
|
-
A tuple in the shape `(mu_diff, sigma_diff)` containing the mean and standard deviation of the distribution
|
366
|
-
**Z = X<sub>1</sub> - X<sub>2</sub>**.
|
367
|
-
"""
|
368
|
-
mu_sum = mu_1 - mu_2
|
369
|
-
sigma_sum = sqrt(
|
370
|
-
sigma_1 ** 2
|
371
|
-
+ sigma_2 ** 2
|
372
|
-
- 2 * rho * sigma_1 * sigma_2
|
373
|
-
)
|
374
|
-
return mu_sum, sigma_sum
|
375
|
-
|
376
|
-
|
377
|
-
def lerp_normal_distributions(
|
378
|
-
mu_1: float,
|
379
|
-
sigma_1: float,
|
380
|
-
mu_2: float,
|
381
|
-
sigma_2: float,
|
382
|
-
alpha: float,
|
383
|
-
rho: float = 0
|
384
|
-
) -> tuple[float, float]:
|
385
|
-
"""
|
386
|
-
Linearly interpolate between two normal distributions, with optional correlation.
|
387
|
-
|
388
|
-
Given two normal distributions **X<sub>1</sub> ~ N(mu<sub>1</sub>, sigma<sub>1</sub><sup>2</sup>)** and
|
389
|
-
**X<sub>2</sub> ~ N(mu<sub>2</sub>, sigma<sub>2</sub><sup>2</sup>)**, this function calculates the resulting mean
|
390
|
-
and standard deviation of the interpolated distribution **Z = (1 - alpha) * X<sub>1</sub> + alpha * X<sub>2</sub>**,
|
391
|
-
taking into account the correlation between them.
|
392
|
-
|
393
|
-
n.b. Positive correlations (`rho` > `0`) increase the standard deviation of **Z** because positively correlated
|
394
|
-
variables tend to move together, increasing combined uncertainty. Negative correlations (`rho` < `0`) reduces the
|
395
|
-
standard deviation since the variables move in opposite directions, cancelling out some of the variability.
|
396
|
-
Independant variables (`rho` = `0`) result in an intermediate level of uncertainty.
|
397
|
-
|
398
|
-
Parameters
|
399
|
-
----------
|
400
|
-
mu_1 : float
|
401
|
-
Mean of the first normal distribution (X<sub>1</sub>).
|
402
|
-
sigma_1 : float
|
403
|
-
Standard deviation of the first normal distribution (X<sub>1</sub>).
|
404
|
-
mu_2 : float
|
405
|
-
Mean of the second normal distribution (X<sub>2</sub>).
|
406
|
-
sigma_2 : float
|
407
|
-
Standard deviation of the second normal distribution (X<sub>2</sub>).
|
408
|
-
alpha : float
|
409
|
-
Interpolation factor (0 <= alpha <= 1). A value of 0 results in X1, a value of 1 results in X2, and values
|
410
|
-
between 0 and 1 interpolate between the two. Values of below 0 and above 1 will extrapolate beyond the
|
411
|
-
X<sub>1</sub> and X<sub>2</sub> respectively.
|
412
|
-
rho : float, optional
|
413
|
-
Correlation coefficient between **X<sub>1</sub>** and **X<sub>2</sub>**. `rho` must be a value between -1
|
414
|
-
(perfectly negative correlation) and 1 (perfectly positive correlation). Default is 0 (independent variables).
|
415
|
-
|
416
|
-
Returns
|
417
|
-
-------
|
418
|
-
tuple[float, float]
|
419
|
-
A tuple in the shape `(mu_Z sigma_Z)` containing the mean and standard deviation of the distribution
|
420
|
-
**Z = (1 - alpha) * X<sub>1</sub> + alpha * X<sub>2</sub>**.
|
421
|
-
"""
|
422
|
-
mu_Z = (1 - alpha) * mu_1 + alpha * mu_2
|
423
|
-
var_Z = (
|
424
|
-
((1 - alpha) ** 2) * sigma_1 ** 2
|
425
|
-
+ (alpha ** 2) * sigma_2 ** 2
|
426
|
-
+ 2 * alpha * (1 - alpha) * rho * sigma_1 * sigma_2
|
427
|
-
)
|
428
|
-
sigma_Z = sqrt(var_Z)
|
429
|
-
return mu_Z, sigma_Z
|
@@ -1,50 +0,0 @@
|
|
1
|
-
from numpy import array
|
2
|
-
from pytest import mark
|
3
|
-
|
4
|
-
from hestia_earth.schema import MeasurementStatsDefinition
|
5
|
-
|
6
|
-
from hestia_earth.models.utils.descriptive_stats import calc_descriptive_stats
|
7
|
-
|
8
|
-
EXPECTED_FLATTENED = {
|
9
|
-
"value": [5],
|
10
|
-
"sd": [2.581989],
|
11
|
-
"min": [1],
|
12
|
-
"max": [9],
|
13
|
-
"statsDefinition": "simulated",
|
14
|
-
"observations": [9]
|
15
|
-
}
|
16
|
-
|
17
|
-
EXPECTED_COLUMNWISE = {
|
18
|
-
"value": [4, 5, 6],
|
19
|
-
"sd": [2.44949, 2.44949, 2.44949],
|
20
|
-
"min": [1, 2, 3],
|
21
|
-
"max": [7, 8, 9],
|
22
|
-
"statsDefinition": "simulated",
|
23
|
-
"observations": [3, 3, 3]
|
24
|
-
}
|
25
|
-
|
26
|
-
EXPECTED_ROWWISE = {
|
27
|
-
"value": [2, 5, 8],
|
28
|
-
"sd": [0.816497, 0.816497, 0.816497],
|
29
|
-
"min": [1, 4, 7],
|
30
|
-
"max": [3, 6, 9],
|
31
|
-
"statsDefinition": "simulated",
|
32
|
-
"observations": [3, 3, 3]
|
33
|
-
}
|
34
|
-
|
35
|
-
|
36
|
-
@mark.parametrize(
|
37
|
-
"axis, expected",
|
38
|
-
[(None, EXPECTED_FLATTENED), (0, EXPECTED_COLUMNWISE), (1, EXPECTED_ROWWISE)],
|
39
|
-
ids=["flattened", "columnwise", "rowwise"]
|
40
|
-
)
|
41
|
-
@mark.parametrize("stats_definition", [MeasurementStatsDefinition.SIMULATED, "simulated"], ids=["Enum", "str"])
|
42
|
-
def test_calc_descriptive_stats(stats_definition, axis, expected):
|
43
|
-
ARR = array([
|
44
|
-
[1, 2, 3],
|
45
|
-
[4, 5, 6],
|
46
|
-
[7, 8, 9]
|
47
|
-
])
|
48
|
-
|
49
|
-
result = calc_descriptive_stats(ARR, stats_definition, axis=axis)
|
50
|
-
assert result == expected
|
tests/models/utils/test_stats.py
DELETED
@@ -1,186 +0,0 @@
|
|
1
|
-
from numpy import inf, sqrt
|
2
|
-
from numpy.testing import assert_almost_equal
|
3
|
-
from pytest import mark
|
4
|
-
|
5
|
-
from hestia_earth.models.utils.stats import (
|
6
|
-
_calc_confidence_level, add_normal_distributions, calc_confidence_level_monte_carlo, calc_precision_monte_carlo,
|
7
|
-
calc_required_iterations_monte_carlo, calc_z_critical, lerp_normal_distributions, subtract_normal_distributions
|
8
|
-
)
|
9
|
-
|
10
|
-
|
11
|
-
# confidence_level, n_sided, z_critical
|
12
|
-
CONFIDENCE_INTERVAL_PARAMS = [
|
13
|
-
# 1 sided
|
14
|
-
(0, 1, -inf),
|
15
|
-
(50, 1, 0),
|
16
|
-
(80, 1, 0.8416),
|
17
|
-
(90, 1, 1.2816),
|
18
|
-
(95, 1, 1.6449),
|
19
|
-
(99, 1, 2.3263),
|
20
|
-
(100, 1, inf),
|
21
|
-
# 2 sided
|
22
|
-
(0, 2, 0),
|
23
|
-
(50, 2, 0.6745),
|
24
|
-
(80, 2, 1.2816),
|
25
|
-
(90, 2, 1.6449),
|
26
|
-
(95, 2, 1.9600),
|
27
|
-
(99, 2, 2.5758),
|
28
|
-
(100, 2, inf)
|
29
|
-
]
|
30
|
-
|
31
|
-
|
32
|
-
@mark.parametrize(
|
33
|
-
"confidence_level, n_sided, z_critical",
|
34
|
-
CONFIDENCE_INTERVAL_PARAMS,
|
35
|
-
ids=[f"z={z}, n={n}" for _, n, z in CONFIDENCE_INTERVAL_PARAMS]
|
36
|
-
)
|
37
|
-
def test_calc_confidence_level(confidence_level, n_sided, z_critical):
|
38
|
-
result = _calc_confidence_level(z_critical, n_sided=n_sided)
|
39
|
-
assert_almost_equal(result, confidence_level, decimal=2)
|
40
|
-
|
41
|
-
|
42
|
-
@mark.parametrize(
|
43
|
-
"confidence_level, n_sided, z_critical",
|
44
|
-
CONFIDENCE_INTERVAL_PARAMS,
|
45
|
-
ids=[f"conf={conf}, n={n}" for conf, n, _ in CONFIDENCE_INTERVAL_PARAMS]
|
46
|
-
)
|
47
|
-
def test_calc_z_critical(confidence_level, n_sided, z_critical):
|
48
|
-
result = calc_z_critical(confidence_level, n_sided=n_sided)
|
49
|
-
assert_almost_equal(result, z_critical, decimal=4)
|
50
|
-
|
51
|
-
|
52
|
-
# confidence_level, n_iterations, precision, sd
|
53
|
-
MONTE_CARLO_PARAMS = [
|
54
|
-
(95, 80767, 0.01, 1.45),
|
55
|
-
(95, 1110, 0.01, 0.17),
|
56
|
-
(99, 1917, 0.01, 0.17),
|
57
|
-
(50, 102, 100.18, 1500)
|
58
|
-
]
|
59
|
-
|
60
|
-
|
61
|
-
@mark.parametrize(
|
62
|
-
"confidence_level, n_iterations, precision, sd",
|
63
|
-
MONTE_CARLO_PARAMS,
|
64
|
-
ids=[f"n={n}, prec={prec}, sd={sd}" for _, n, prec, sd in MONTE_CARLO_PARAMS]
|
65
|
-
)
|
66
|
-
def test_calc_confidence_level_monte_carlo(confidence_level, n_iterations, precision, sd):
|
67
|
-
result = calc_confidence_level_monte_carlo(n_iterations, precision, sd,)
|
68
|
-
assert_almost_equal(result, confidence_level, decimal=2)
|
69
|
-
|
70
|
-
|
71
|
-
@mark.parametrize(
|
72
|
-
"confidence_level, n_iterations, precision, sd",
|
73
|
-
MONTE_CARLO_PARAMS,
|
74
|
-
ids=[f"conf={conf}, prec={prec}, sd={sd}" for conf, _, prec, sd in MONTE_CARLO_PARAMS]
|
75
|
-
)
|
76
|
-
def test_calc_required_iterations_monte_carlo(confidence_level, n_iterations, precision, sd):
|
77
|
-
result = calc_required_iterations_monte_carlo(confidence_level, precision, sd)
|
78
|
-
assert result == n_iterations
|
79
|
-
|
80
|
-
|
81
|
-
@mark.parametrize(
|
82
|
-
"confidence_level, n_iterations, precision, sd",
|
83
|
-
MONTE_CARLO_PARAMS,
|
84
|
-
ids=[f"conf={conf}, n={n}, sd={sd}" for conf, n, _, sd in MONTE_CARLO_PARAMS]
|
85
|
-
)
|
86
|
-
def test_calc_precision_monte_carlo(confidence_level, n_iterations, precision, sd):
|
87
|
-
result = calc_precision_monte_carlo(confidence_level, n_iterations, sd)
|
88
|
-
assert_almost_equal(result, precision, decimal=2)
|
89
|
-
|
90
|
-
|
91
|
-
# mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, diff_mean, diff_sigma
|
92
|
-
PARAMS_NORMAL_DIST = [
|
93
|
-
# 2 standard normal distributions, perfectly negative correlation
|
94
|
-
(0, 1, 0, 1, -1, 0, 0, 0, 2),
|
95
|
-
# 2 standard normal distributions, negative correlation
|
96
|
-
(0, 1, 0, 1, -0.5, 0, 1, 0, sqrt(3)),
|
97
|
-
# 2 standard normal distributions, no correlation
|
98
|
-
(0, 1, 0, 1, 0, 0, sqrt(2), 0, sqrt(2)),
|
99
|
-
# 2 standard normal distributions, positive correlation
|
100
|
-
(0, 1, 0, 1, 0.5, 0, sqrt(3), 0, 1),
|
101
|
-
# 2 standard normal distributions, perfectly positive correlation
|
102
|
-
(0, 1, 0, 1, 1, 0, 2, 0, 0),
|
103
|
-
# different normal distributions, perfectly negative correlation
|
104
|
-
(50000, 3000, 45000, 9000, -1, 95000, 6000, 5000, 12000),
|
105
|
-
# different normal distributions, no correlation
|
106
|
-
(50000, 3000, 45000, 9000, 0, 95000, sqrt(90000000), 5000, sqrt(90000000)),
|
107
|
-
# different normal distributions, perfectly positive correlation
|
108
|
-
(50000, 3000, 45000, 9000, 1, 95000, 12000, 5000, 6000)
|
109
|
-
]
|
110
|
-
IDS_ADD_NORMAL_DIST = [
|
111
|
-
f"N({mu_1}, {sigma_1}^2) + N({mu_2}, {sigma_2}^2), rho: {rho}"
|
112
|
-
for mu_1, sigma_1, mu_2, sigma_2, rho, *_ in PARAMS_NORMAL_DIST
|
113
|
-
]
|
114
|
-
IDS_SUBTRACT_DIST = [
|
115
|
-
f"N({mu_1}, {sigma_1}^2) - N({mu_2}, {sigma_2}^2), rho: {rho}"
|
116
|
-
for mu_1, sigma_1, mu_2, sigma_2, rho, *_ in PARAMS_NORMAL_DIST
|
117
|
-
]
|
118
|
-
|
119
|
-
|
120
|
-
@mark.parametrize(
|
121
|
-
"mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, _diff_mean, _diff_sigma",
|
122
|
-
PARAMS_NORMAL_DIST,
|
123
|
-
ids=IDS_ADD_NORMAL_DIST
|
124
|
-
)
|
125
|
-
def test_add_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, _diff_mean, _diff_sigma):
|
126
|
-
result = add_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho)
|
127
|
-
assert result == (sum_mean, sum_sigma)
|
128
|
-
|
129
|
-
|
130
|
-
@mark.parametrize(
|
131
|
-
"mu_1, sigma_1, mu_2, sigma_2, rho, _sum_mean, _sum_sigma, diff_mean, diff_sigma",
|
132
|
-
PARAMS_NORMAL_DIST,
|
133
|
-
ids=IDS_SUBTRACT_DIST
|
134
|
-
)
|
135
|
-
def test_subtract_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho, _sum_mean, _sum_sigma, diff_mean, diff_sigma):
|
136
|
-
result = subtract_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho)
|
137
|
-
assert result == (diff_mean, diff_sigma)
|
138
|
-
|
139
|
-
|
140
|
-
# mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma
|
141
|
-
PARAMS_LERP_NORMAL_DIST = [
|
142
|
-
# 2 standard normal distributions, perfectly negative correlation
|
143
|
-
(0, 1, 0, 1, 0, -1, 0, 1),
|
144
|
-
(0, 1, 0, 1, 0.5, -1, 0, 0),
|
145
|
-
(0, 1, 0, 1, 1, -1, 0, 1),
|
146
|
-
# 2 standard normal distributions, no correlation
|
147
|
-
(0, 1, 0, 1, 0, 0, 0, 1),
|
148
|
-
(0, 1, 0, 1, 0.5, 0, 0, sqrt(0.5)),
|
149
|
-
(0, 1, 0, 1, 1, 0, 0, 1),
|
150
|
-
# 2 standard normal distributions, perfectly positive correlation
|
151
|
-
(0, 1, 0, 1, 0, 1, 0, 1),
|
152
|
-
(0, 1, 0, 1, 0.5, 1, 0, 1),
|
153
|
-
(0, 1, 0, 1, 1, 1, 0, 1),
|
154
|
-
# different normal distributions, perfectly negative correlation
|
155
|
-
(10000, 3000, 5000, 2500, -0.5, -1, 12500, 5750),
|
156
|
-
(10000, 3000, 5000, 2500, 0, -1, 10000, 3000),
|
157
|
-
(10000, 3000, 5000, 2500, 0.5, -1, 7500, 250),
|
158
|
-
(10000, 3000, 5000, 2500, 1, -1, 5000, 2500),
|
159
|
-
(10000, 3000, 5000, 2500, 1.5, -1, 2500, 5250),
|
160
|
-
# different normal distributions, no correlation
|
161
|
-
(10000, 3000, 5000, 2500, -0.5, 0, 12500, sqrt(21812500)),
|
162
|
-
(10000, 3000, 5000, 2500, 0, 0, 10000, 3000),
|
163
|
-
(10000, 3000, 5000, 2500, 0.5, 0, 7500, sqrt(3812500)),
|
164
|
-
(10000, 3000, 5000, 2500, 1, 0, 5000, 2500),
|
165
|
-
(10000, 3000, 5000, 2500, 1.5, 0, 2500, sqrt(16312500)),
|
166
|
-
# different normal distributions, perfectly positive correlation
|
167
|
-
(10000, 3000, 5000, 2500, -0.5, 1, 12500, 3250),
|
168
|
-
(10000, 3000, 5000, 2500, 0, 1, 10000, 3000),
|
169
|
-
(10000, 3000, 5000, 2500, 0.5, 1, 7500, 2750.0),
|
170
|
-
(10000, 3000, 5000, 2500, 1, 1, 5000, 2500),
|
171
|
-
(10000, 3000, 5000, 2500, 1.5, 1, 2500, 2250)
|
172
|
-
]
|
173
|
-
IDS_LERP_NORMAL_DIST = [
|
174
|
-
f"N({mu_1}, {sigma_1}^2) - N({mu_2}, {sigma_2}^2), alpha: {alpha}, rho: {rho}"
|
175
|
-
for mu_1, sigma_1, mu_2, sigma_2, alpha, rho, *_ in PARAMS_LERP_NORMAL_DIST
|
176
|
-
]
|
177
|
-
|
178
|
-
|
179
|
-
@mark.parametrize(
|
180
|
-
"mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma",
|
181
|
-
PARAMS_LERP_NORMAL_DIST,
|
182
|
-
ids=IDS_LERP_NORMAL_DIST
|
183
|
-
)
|
184
|
-
def test_lerp_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma):
|
185
|
-
result = lerp_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, alpha, rho)
|
186
|
-
assert result == (Z_mean, Z_sigma)
|
File without changes
|
File without changes
|
File without changes
|