hestia-earth-models 0.73.0__py3-none-any.whl → 0.73.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. hestia_earth/models/akagiEtAl2011/utils.py +4 -2
  2. hestia_earth/models/aware/scarcityWeightedWaterUse.py +8 -7
  3. hestia_earth/models/cache_sites.py +8 -4
  4. hestia_earth/models/chaudharyBrooks2018/damageToTerrestrialEcosystemsLandTransformation.py +14 -11
  5. hestia_earth/models/chaudharyBrooks2018/utils.py +4 -2
  6. hestia_earth/models/config/Cycle.json +35 -37
  7. hestia_earth/models/config/Site.json +26 -24
  8. hestia_earth/models/cycle/completeness/electricityFuel.py +1 -1
  9. hestia_earth/models/cycle/completeness/freshForage.py +1 -1
  10. hestia_earth/models/cycle/input/hestiaAggregatedData.py +1 -2
  11. hestia_earth/models/cycle/product/economicValueShare.py +1 -1
  12. hestia_earth/models/environmentalFootprintV3_1/environmentalFootprintSingleOverallScore.py +1 -1
  13. hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandOccupation.py +11 -11
  14. hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandTransformation.py +14 -11
  15. hestia_earth/models/faostat2018/utils.py +26 -13
  16. hestia_earth/models/geospatialDatabase/utils.py +13 -10
  17. hestia_earth/models/hestia/aboveGroundCropResidue.py +6 -5
  18. hestia_earth/models/hestia/cropResidueManagement.py +3 -2
  19. hestia_earth/models/hestia/default_emissions.py +1 -1
  20. hestia_earth/models/hestia/default_resourceUse.py +1 -1
  21. hestia_earth/models/hestia/excretaKgMass.py +7 -9
  22. hestia_earth/models/hestia/seed_emissions.py +1 -1
  23. hestia_earth/models/hestia/stockingDensityAnimalHousingAverage.py +1 -1
  24. hestia_earth/models/hestia/waterSalinity.py +1 -1
  25. hestia_earth/models/ipcc2019/aboveGroundBiomass.py +2 -4
  26. hestia_earth/models/ipcc2019/belowGroundBiomass.py +2 -4
  27. hestia_earth/models/ipcc2019/biomass_utils.py +1 -1
  28. hestia_earth/models/ipcc2019/ch4ToAirAquacultureSystems.py +1 -1
  29. hestia_earth/models/ipcc2019/ch4ToAirOrganicSoilCultivation.py +3 -4
  30. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +2 -4
  31. hestia_earth/models/ipcc2019/co2ToAirOrganicSoilCultivation.py +2 -3
  32. hestia_earth/models/ipcc2019/n2OToAirCropResidueBurningDirect.py +1 -1
  33. hestia_earth/models/ipcc2019/nonCo2EmissionsToAirNaturalVegetationBurning.py +2 -3
  34. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1.py +2 -3
  35. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_2.py +3 -4
  36. hestia_earth/models/ipcc2019/organicCarbonPerHa_utils.py +2 -3
  37. hestia_earth/models/mocking/search-results.json +1582 -1582
  38. hestia_earth/models/pooreNemecek2018/freshwaterWithdrawalsDuringCycle.py +43 -5
  39. hestia_earth/models/site/grouped_measurement.py +1 -1
  40. hestia_earth/models/utils/background_emissions.py +1 -1
  41. hestia_earth/models/utils/impact_assessment.py +29 -14
  42. hestia_earth/models/utils/lookup.py +5 -1
  43. hestia_earth/models/utils/measurement.py +0 -1
  44. hestia_earth/models/version.py +1 -1
  45. {hestia_earth_models-0.73.0.dist-info → hestia_earth_models-0.73.2.dist-info}/METADATA +3 -2
  46. {hestia_earth_models-0.73.0.dist-info → hestia_earth_models-0.73.2.dist-info}/RECORD +54 -59
  47. tests/models/geospatialDatabase/test_utils.py +12 -1
  48. tests/models/ipcc2019/test_organicCarbonPerHa_tier_2.py +1 -1
  49. tests/models/pooreNemecek2018/test_freshwaterWithdrawalsDuringCycle.py +12 -0
  50. tests/models/utils/test_array_builders.py +1 -1
  51. tests/models/utils/test_impact_assessment.py +29 -13
  52. hestia_earth/models/utils/array_builders.py +0 -590
  53. hestia_earth/models/utils/descriptive_stats.py +0 -49
  54. hestia_earth/models/utils/stats.py +0 -429
  55. tests/models/utils/test_descriptive_stats.py +0 -50
  56. tests/models/utils/test_stats.py +0 -186
  57. {hestia_earth_models-0.73.0.dist-info → hestia_earth_models-0.73.2.dist-info}/LICENSE +0 -0
  58. {hestia_earth_models-0.73.0.dist-info → hestia_earth_models-0.73.2.dist-info}/WHEEL +0 -0
  59. {hestia_earth_models-0.73.0.dist-info → hestia_earth_models-0.73.2.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,13 @@
1
+ import json
1
2
  from unittest.mock import patch
2
3
  from hestia_earth.schema import TermTermType
4
+ from tests.utils import fixtures_path
3
5
 
4
- from hestia_earth.models.geospatialDatabase.utils import get_region_factor, get_area_size
6
+ from hestia_earth.models.geospatialDatabase import MODEL
7
+ from hestia_earth.models.geospatialDatabase.utils import get_region_factor, get_area_size, _get_boundary_area_size
5
8
 
6
9
  class_path = 'hestia_earth.models.geospatialDatabase.utils'
10
+ fixtures_folder = f"{fixtures_path}/{MODEL}/utils"
7
11
 
8
12
  AREA = 1000
9
13
  COUNTRY = {
@@ -26,3 +30,10 @@ def test_get_area_size(*args):
26
30
  site['boundary'] = {'type': 'Polygon'}
27
31
  site['boundaryArea'] = AREA
28
32
  assert get_area_size(site) == AREA
33
+
34
+
35
+ def test_get_boundary_area_size():
36
+ with open(f"{fixtures_folder}/site.jsonld", encoding='utf-8') as f:
37
+ site = json.load(f)
38
+
39
+ assert _get_boundary_area_size(boundary=site.get('boundary')) == 4896.1559583013795
@@ -3,7 +3,7 @@ from numpy.testing import assert_array_almost_equal
3
3
  from numpy.typing import NDArray
4
4
  from pytest import mark
5
5
 
6
- from hestia_earth.models.utils.array_builders import discrete_uniform_2d, repeat_single
6
+ from hestia_earth.utils.stats import discrete_uniform_2d, repeat_single
7
7
 
8
8
  from hestia_earth.models.ipcc2019.organicCarbonPerHa import MODEL, TERM_ID
9
9
  from hestia_earth.models.ipcc2019.organicCarbonPerHa_tier_2 import (
@@ -46,3 +46,15 @@ def test_run_data_complete(*args):
46
46
 
47
47
  value = run(impact)
48
48
  assert value == expected
49
+
50
+
51
+ @patch(f"{class_path}._new_indicator", side_effect=fake_new_indicator)
52
+ def test_run_with_waterRegime(*args):
53
+ with open(f"{fixtures_folder}/with-waterRegime/impact-assessment.jsonld", encoding='utf-8') as f:
54
+ impact = json.load(f)
55
+
56
+ with open(f"{fixtures_folder}/with-waterRegime/result.jsonld", encoding='utf-8') as f:
57
+ expected = json.load(f)
58
+
59
+ value = run(impact)
60
+ assert value == expected
@@ -3,7 +3,7 @@ from numpy.testing import assert_array_equal, assert_allclose
3
3
  from numpy.typing import NDArray
4
4
  from pytest import mark
5
5
 
6
- from hestia_earth.models.utils.array_builders import (
6
+ from hestia_earth.utils.stats import (
7
7
  avg_run_in_columnwise, avg_run_in_rowwise, correlated_normal_2d, discrete_uniform_1d, discrete_uniform_2d,
8
8
  gen_seed, grouped_avg, normal_1d, normal_2d, plus_minus_uncertainty_to_normal_1d,
9
9
  plus_minus_uncertainty_to_normal_2d, repeat_1d_array_as_columns, repeat_array_as_columns, repeat_array_as_rows,
@@ -1,4 +1,6 @@
1
- from hestia_earth.models.utils.impact_assessment import impact_emission_lookup_value, get_region_id
1
+ import pytest
2
+
3
+ from hestia_earth.models.utils.impact_assessment import impact_emission_lookup_value, get_region_id, get_country_id
2
4
 
3
5
 
4
6
  def test_impact_emission_lookup_value():
@@ -17,16 +19,30 @@ def test_impact_emission_lookup_value():
17
19
  assert impact_emission_lookup_value('', '', impact, 'co2EqGwp100ExcludingClimate-CarbonFeedbacksIpcc2013') == 2800
18
20
 
19
21
 
20
- def test_get_region_id():
21
- impact = {'country': {'@id': ''}}
22
+ @pytest.mark.parametrize(
23
+ 'impact,expected',
24
+ [
25
+ ({}, None),
26
+ ({'country': {'@id': ''}}, None),
27
+ ({'country': {'@id': 'region-world'}}, 'region-world'),
28
+ ({'country': {'@id': 'GADM-AUS'}}, 'GADM-AUS'),
29
+ ({'site': {'country': {'@id': 'GADM-AUS'}, 'region': {'@id': 'GADM-AUS.101_1'}}}, 'GADM-AUS.101_1'),
30
+ ({'site': {'region': {'@id': 'GADM-ZAF.5.1.2_1'}}}, 'GADM-ZAF.5_1'),
31
+ ]
32
+ )
33
+ def test_get_region_id(impact: dict, expected: str):
34
+ assert get_region_id(impact) == expected, expected
35
+
22
36
 
23
- impact['country']['@id'] = 'region-world'
24
- assert get_region_id(impact) == 'region-world'
25
- impact['country']['@id'] = 'GADM-AUS'
26
- assert get_region_id(impact) == 'GADM-AUS'
27
- impact['country']['@id'] = 'GADM-AUS.101_1'
28
- assert get_region_id(impact) == 'GADM-AUS.101_1'
29
- impact['country']['@id'] = 'GADM-AUS.1.2_1'
30
- assert get_region_id(impact) == 'GADM-AUS.1_1'
31
- impact['country']['@id'] = 'GADM-ZAF.5.1.2_1'
32
- assert get_region_id(impact) == 'GADM-ZAF.5_1'
37
+ @pytest.mark.parametrize(
38
+ 'impact,expected',
39
+ [
40
+ ({}, None),
41
+ ({'country': {'@id': ''}}, None),
42
+ ({'country': {'@id': 'region-world'}}, 'region-world'),
43
+ ({'country': {'@id': 'GADM-AUS'}}, 'GADM-AUS'),
44
+ ({'site': {'country': {'@id': 'GADM-AUS'}}}, 'GADM-AUS'),
45
+ ]
46
+ )
47
+ def test_get_country_id(impact: dict, expected: str):
48
+ assert get_country_id(impact) == expected, expected
@@ -1,590 +0,0 @@
1
- """
2
- Based on code by Cool Farm Tool:
3
- https://gitlab.com/MethodsCFT/coolfarm-soc/-/blob/main/src/cfasoc/builders.py
4
- """
5
- import hashlib
6
- from numpy import cumsum, dot, full, linalg, hstack, random, mean, vstack
7
- from numpy.typing import NDArray, DTypeLike
8
- from typing import Union
9
-
10
- from .stats import calc_z_critical, truncnorm_rvs
11
-
12
-
13
- def repeat_single(shape: tuple, value: float, dtype: DTypeLike = None) -> NDArray:
14
- """
15
- Repeat a single value to form an array of a defined shape.
16
-
17
- Parameters
18
- ----------
19
- shape : tuple
20
- Shape (rows, columns).
21
- value : float
22
- Value to be repeated.
23
- dtype : DTypeLike, optional
24
- The desired data-type for the array.
25
-
26
- Returns
27
- -------
28
- NDArray
29
- Array with repeated value.
30
- """
31
- return full(shape=shape, fill_value=value, dtype=dtype)
32
-
33
-
34
- def repeat_array_as_columns(n_iterations: int, arr: NDArray) -> NDArray:
35
- """
36
- Repeat a numpy array horizontally as columns.
37
-
38
- Parameters
39
- ----------
40
- n_iterations : int
41
- Number of times the columns should be repeated.
42
- arr : NDArray
43
- Array to repeat.
44
-
45
- Returns
46
- -------
47
- NDArray
48
- Repeated array.
49
- """
50
- return hstack([arr for _ in range(n_iterations)])
51
-
52
-
53
- def repeat_array_as_rows(n_iterations: int, arr: NDArray) -> NDArray:
54
- """
55
- Repeat a numpy array vertically as rows.
56
-
57
- Parameters
58
- ----------
59
- n_iterations : int
60
- Number of times the rows should be repeated.
61
- arr : NDArray
62
- Array to repeat.
63
-
64
- Returns
65
- -------
66
- NDArray
67
- Repeated array.
68
- """
69
- return vstack([arr for _ in range(n_iterations)])
70
-
71
-
72
- def repeat_1d_array_as_columns(n_columns: int, column: NDArray) -> NDArray:
73
- """
74
- Repeat a column (NDArray) to form an array of a defined shape
75
-
76
- Parameters
77
- ----------
78
- n_columns : int
79
- How many times the column (NDArray) should be repeated.
80
- column : NDArray
81
- The column (NDArray) to be repeated.
82
-
83
- Returns
84
- -------
85
- NDArray
86
- Repeated array.
87
- """
88
- return vstack([column for _ in range(n_columns)]).transpose()
89
-
90
-
91
- def discrete_uniform_1d(
92
- shape: tuple, low: float, high: float, seed: Union[int, random.Generator, None] = None
93
- ) -> NDArray:
94
- """
95
- Sample from a discrete uniform distribution and produce an array of a specified shape.
96
- All rows in a specified column will have the same sample value, but each column will be different (1 dimensional
97
- variability).
98
-
99
- Parameters
100
- ----------
101
- shape : tuple
102
- Shape (rows, columns).
103
- low : float
104
- Lower bound of the discrete uniform distribution to be sampled.
105
- high : float
106
- Upper bound of the discrete uniform distribution to be sampled.
107
- seed : int | Generator | None, optional
108
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
109
- fresh, unpredictable entropy will be pulled from the OS.
110
-
111
- Returns
112
- -------
113
- NDArray
114
- Array of samples with 1 dimensional variability.
115
- """
116
- n_rows, n_columns = shape
117
- rng = random.default_rng(seed)
118
- return repeat_array_as_rows(
119
- n_rows,
120
- rng.uniform(low=low, high=high, size=n_columns)
121
- )
122
-
123
-
124
- def discrete_uniform_2d(
125
- shape: tuple, low: float, high: float, seed: Union[int, random.Generator, None] = None
126
- ) -> NDArray:
127
- """
128
- Sample from a discrete uniform distribution and produce an array of a specified shape.
129
- All rows and columns contain different sample values (2 dimensional variability).
130
-
131
- Parameters
132
- ----------
133
- shape : tuple
134
- Shape (rows, columns).
135
- low : float
136
- Lower bound of the discrete uniform distribution to be sampled.
137
- high : float
138
- Upper bound of the discrete uniform distribution to be sampled.
139
- seed : int | Generator | None, optional
140
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
141
- fresh, unpredictable entropy will be pulled from the OS.
142
-
143
- Returns
144
- -------
145
- NDArray
146
- Array of samples with 2 dimensional variability.
147
- """
148
- rng = random.default_rng(seed)
149
- return rng.uniform(low=low, high=high, size=shape)
150
-
151
-
152
- def triangular_1d(
153
- shape: tuple, low: float, high: float, mode: float, seed: Union[int, random.Generator, None] = None
154
- ) -> NDArray:
155
- """
156
- Sample from a triangular distribution and produce an array of a specified shape.
157
- All rows in a specified column will have the same sample value, but each column will be different (1 dimensional
158
- variability).
159
-
160
- Parameters
161
- ----------
162
- shape : tuple
163
- Shape (rows, columns).
164
- low : float
165
- Lower bound of the triangular distribution to be sampled.
166
- high : float
167
- Upper bound of the triangular distribution to be sampled.
168
- mode : float
169
- Mode of the triangular distribution to be sampled.
170
- seed : int | Generator | None, optional
171
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
172
- fresh, unpredictable entropy will be pulled from the OS.
173
-
174
- Returns
175
- -------
176
- NDArray
177
- Array of samples with 1 dimensional variability.
178
- """
179
- n_rows, n_columns = shape
180
- rng = random.default_rng(seed)
181
- return repeat_array_as_rows(
182
- n_rows,
183
- rng.triangular(left=low, mode=mode, right=high, size=n_columns)
184
- )
185
-
186
-
187
- def triangular_2d(
188
- shape: tuple, low: float, high: float, mode: float, seed: Union[int, random.Generator, None] = None
189
- ) -> NDArray:
190
- """
191
- Sample from a triangular distribution and produce an array of a specified shape.
192
- All rows and columns contain different sample values (2 dimensional variability).
193
-
194
- Parameters
195
- ----------
196
- shape : tuple
197
- Shape (rows, columns).
198
- low : float
199
- Lower bound of the triangular distribution to be sampled.
200
- high : float
201
- Upper bound of the triangular distribution to be sampled.
202
- mode : float
203
- Mode of the triangular distribution to be sampled.
204
- seed : int | Generator | None, optional
205
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
206
- fresh, unpredictable entropy will be pulled from the OS.
207
-
208
- Returns
209
- -------
210
- NDArray
211
- Array of samples with 2 dimensional variability.
212
- """
213
- rng = random.default_rng(seed)
214
- return rng.triangular(left=low, mode=mode, right=high, size=shape)
215
-
216
-
217
- def normal_1d(
218
- shape: tuple, mu: float, sigma: float, seed: Union[int, random.Generator, None] = None
219
- ) -> NDArray:
220
- """
221
- Sample from a normal distribution and produce an array of a specified shape.
222
- All rows in a specified column will have the same sample value, but each column will be different (1 dimensional
223
- variability).
224
-
225
- Parameters
226
- ----------
227
- shape : tuple
228
- Shape (rows, columns).
229
- mu : float
230
- Mean of the normal distribution to be sampled.
231
- sigma : float
232
- Standard deviation of the normal distribution to be sampled.
233
- seed : int | Generator | None, optional
234
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
235
- fresh, unpredictable entropy will be pulled from the OS.
236
-
237
- Returns
238
- -------
239
- NDArray
240
- Array of samples with 1 dimensional variability.
241
- """
242
- n_rows, n_columns = shape
243
- rng = random.default_rng(seed)
244
- return repeat_array_as_rows(
245
- n_rows,
246
- rng.normal(loc=mu, scale=sigma, size=n_columns)
247
- )
248
-
249
-
250
- def normal_2d(
251
- shape: tuple, mu: float, sigma: float, seed: Union[int, random.Generator, None] = None
252
- ) -> NDArray:
253
- """
254
- Sample from a normal distribution and produce an array of a specified shape.
255
- All rows and columns contain different sample values (2 dimensional variability).
256
-
257
- Parameters
258
- ----------
259
- shape : tuple
260
- Shape (rows, columns).
261
- mu : float
262
- Mean of the normal distribution to be sampled.
263
- sigma : float
264
- Standard deviation of the normal distribution to be sampled.
265
- seed : int | Generator | None, optional
266
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
267
- fresh, unpredictable entropy will be pulled from the OS.
268
-
269
- Returns
270
- -------
271
- NDArray
272
- Array of samples with 2 dimensional variability.
273
- """
274
- rng = random.default_rng(seed)
275
- return rng.normal(loc=mu, scale=sigma, size=shape)
276
-
277
-
278
- def truncated_normal_1d(
279
- shape: tuple, mu: float, sigma: float, low: float, high: float, seed: Union[int, random.Generator, None] = None
280
- ) -> NDArray:
281
- """
282
- Sample from a truncated normal distribution and produce an array of a specified shape.
283
- All rows in a specified column will have the same sample value, but each column will be different (1 dimensional
284
- variability).
285
-
286
- Parameters
287
- ----------
288
- shape : tuple
289
- Shape (rows, columns).
290
- mu : float
291
- Mean of the normal distribution to be sampled.
292
- sigma : float
293
- Standard deviation of the normal distribution to be sampled.
294
- low : float
295
- Lower bound of the normal distribution to be sampled.
296
- high : float
297
- Upper bound of the normal distribution to be sampled.
298
- seed : int | Generator | None, optional
299
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
300
- fresh, unpredictable entropy will be pulled from the OS.
301
-
302
- Returns
303
- -------
304
- NDArray
305
- Array of samples with 1 dimensional variability.
306
- """
307
- n_rows, n_columns = shape
308
- return repeat_array_as_rows(
309
- n_rows,
310
- truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
311
- )
312
-
313
-
314
- def truncated_normal_2d(
315
- shape: tuple, mu: float, sigma: float, low: float, high: float, seed: Union[int, random.Generator, None] = None
316
- ) -> NDArray:
317
- """
318
- Sample from a truncated normal distribution and produce an array of a specified shape.
319
- All rows and columns contain different sample values (2 dimensional variability).
320
-
321
- Parameters
322
- ----------
323
- shape : tuple
324
- Shape (rows, columns).
325
- mu : float
326
- Mean of the normal distribution to be sampled.
327
- sigma : float
328
- Standard deviation of the normal distribution to be sampled.
329
- low : float
330
- Lower bound of the normal distribution to be sampled.
331
- high : float
332
- Upper bound of the normal distribution to be sampled.
333
- seed : int | Generator | None, optional
334
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
335
- fresh, unpredictable entropy will be pulled from the OS.
336
-
337
- Returns
338
- -------
339
- NDArray
340
- Array of samples with 2 dimensional variability.
341
- """
342
- return truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=shape, seed=seed)
343
-
344
-
345
- def plus_minus_uncertainty_to_normal_1d(
346
- shape: tuple,
347
- value: float,
348
- uncertainty: float,
349
- confidence_interval: float = 95,
350
- seed: Union[int, random.Generator, None] = None
351
- ) -> NDArray:
352
- """
353
- Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
354
-
355
- All rows in a specified column will have the same sample value, but each column will be different (1 dimensional
356
- variability).
357
-
358
- This function has been written to serve Table 5.5b on Page 5.32, Tier 2 Steady State Method for Mineral Soils,
359
- Chapter 5 Cropland, 2019 Refinement to the 2006 IPCC Guidelines for National Greenhouse Gas Inventories. Table 5.5b
360
- notes:
361
-
362
- "Uncertainty is assumed to be ±75% for the N content estimates and ±50% for the lignin content estimates,
363
- expressed as a 95% confidence intervals."
364
-
365
- This function also serves Table 11.2 on Page 11.19, Tier 2 Steady State Method for Mineral Soils, Chapter 11 N2O
366
- Emissions from Managed Soils, and CO2 Emissions from Lime and Urea Application, 2019 Refinement to the 2006 IPCC
367
- Guidelines for National Greenhouse Gas Inventories.
368
-
369
- Parameters
370
- ----------
371
- shape : tuple
372
- Shape (rows, columns).
373
- value : float
374
- Reported value.
375
- uncertainty : float
376
- Uncertainty expressed as +/- a percentage.
377
- confidence_interval : float
378
- Confidence interval the uncertainty represents.
379
- seed : int | Generator | None, optional
380
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
381
- fresh, unpredictable entropy will be pulled from the OS.
382
-
383
- Returns
384
- -------
385
- NDArray
386
- Array of samples with 1 dimensional variability.
387
- """
388
- n_rows, n_columns = shape
389
- n_sds = calc_z_critical(confidence_interval)
390
- sigma = (value * (uncertainty / 100)) / n_sds
391
- return repeat_array_as_rows(
392
- n_rows,
393
- normal_1d(shape=(1, n_columns), mu=value, sigma=sigma, seed=seed)
394
- )
395
-
396
-
397
- def plus_minus_uncertainty_to_normal_2d(
398
- shape: tuple,
399
- value: float,
400
- uncertainty: float,
401
- confidence_interval: float = 95,
402
- seed: Union[int, random.Generator, None] = None
403
- ) -> NDArray:
404
- """
405
- Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
406
-
407
- All rows and columns contain different sample values (2 dimensional variability).
408
-
409
- This function has been written to serve Table 5.5b on Page 5.32, Tier 2 Steady State Method for Mineral Soils,
410
- Chapter 5 Cropland, 2019 Refinement to the 2006 IPCC Guidelines for National Greenhouse Gas Inventories. Table 5.5b
411
- notes:
412
-
413
- "Uncertainty is assumed to be ±75% for the N content estimates and ±50% for the lignin content estimates,
414
- expressed as a 95% confidence intervals."
415
-
416
- This function also serves Table 11.2 on Page 11.19, Tier 2 Steady State Method for Mineral Soils, Chapter 11 N2O
417
- Emissions from Managed Soils, and CO2 Emissions from Lime and Urea Application, 2019 Refinement to the 2006 IPCC
418
- Guidelines for National Greenhouse Gas Inventories.
419
-
420
- Parameters
421
- ----------
422
- shape : tuple
423
- Shape (rows, columns).
424
- value : float
425
- Reported value.
426
- uncertainty : float
427
- Uncertainty expressed as +/- a percentage.
428
- confidence_interval : float
429
- Confidence interval the uncertainty represents.
430
- seed : int | Generator | None, optional
431
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
432
- fresh, unpredictable entropy will be pulled from the OS.
433
-
434
- Returns
435
- -------
436
- NDArray
437
- Array of samples with 2 dimensional variability.
438
- """
439
- n_sds = calc_z_critical(confidence_interval)
440
- sigma = (value * (uncertainty / 100)) / n_sds
441
- return normal_2d(shape=shape, mu=value, sigma=sigma, seed=seed)
442
-
443
-
444
- def grouped_avg(arr: NDArray, n: int = 12) -> NDArray:
445
- """ Row-wise averaging of numpy arrays. For example:
446
- 1 2 3
447
- 4 5 6
448
- 7 8 9
449
- 10 11 12
450
- 13 14 15
451
- 16 17 18
452
-
453
- if n = 6, becomes:
454
- 8.5 9.5 10.5
455
-
456
- because:
457
- (1 + 4 + 7 + 10 + 13 + 16) / 6 = 8.5
458
- (2 + 5 + 8 + 11 + 14 + 17) / 6 = 9.5
459
- etc.
460
-
461
- if n = 2, becomes:
462
- 2.5 3.5 4.5
463
- 8.5 9.5 10.5
464
- 14.5 15.5 16.5
465
-
466
- because:
467
- (in column 0) (1 + 4) / 2 = 2.5, (7 + 10) / 2 = 8.5, (13 + 16) / 2 = 14.5
468
- (in column 1) (2 + 5) / 2 = 3.5, (8 + 11) / 2 = 9.5, (14 + 17) / 2 = 15.5
469
-
470
- Source: https://stackoverflow.com/questions/30379311/fast-way-to-take-average-of-every-n-rows-in-a-npy-array
471
-
472
- Parameters
473
- ----------
474
- arr : NDArray
475
- Input array.
476
- n : int, optional
477
- Number of rows to average. Defaults to 12.
478
-
479
- Returns
480
- -------
481
- NDArray
482
- Output array
483
- """
484
- result = cumsum(arr, 0)[n-1::n] / float(n)
485
- result[1:] = result[1:] - result[:-1]
486
- return result
487
-
488
-
489
- def avg_run_in_columnwise(arr: NDArray, n: int):
490
- """
491
- Reduce the first `n` elements of each column in an array by averaging them, while leaving the rest of the array
492
- modified.
493
-
494
- Parameters
495
- ----------
496
- arr : NDArray
497
- Input array.
498
- n : int
499
- The number of run-in elements to average.
500
-
501
- Returns
502
- -------
503
- NDArray
504
- The new array where the first element in each column is an average of the run in elements.
505
- """
506
- run_in: NDArray = mean(arr[:n], 0)
507
- return vstack([run_in, arr[n:]])
508
-
509
-
510
- def avg_run_in_rowwise(arr: NDArray, n: int):
511
- """
512
- Reduce the first `n` elements of each row in an array by averaging them, while leaving the rest of the array
513
- modified.
514
-
515
- Parameters
516
- ----------
517
- arr : NDArray
518
- Input array.
519
- n : int
520
- The number of run-in elements to average.
521
-
522
- Returns
523
- -------
524
- NDArray
525
- The new array where the first element in each row is an average of the run in elements.
526
- """
527
- return avg_run_in_columnwise(arr.transpose(), n).transpose()
528
-
529
-
530
- def gen_seed(node: dict, *args: tuple[str]) -> int:
531
- """
532
- Generate a seed based on a node's `@id` and optional args so that rng is the same each time the model is re-run.
533
- """
534
- node_id = node.get("@id", "")
535
- seed_str = "".join([node_id] + [str(arg) for arg in args])
536
- hashed = hashlib.shake_128(seed_str.encode(), usedforsecurity=False).hexdigest(4)
537
- return abs(int(hashed, 16))
538
-
539
-
540
- def correlated_normal_2d(
541
- n_iterations: int,
542
- means: NDArray,
543
- sds: NDArray,
544
- correlation_matrix: NDArray,
545
- seed: Union[int, random.Generator, None] = None,
546
- ) -> NDArray:
547
- """
548
- Generate correlated random samples from a multivariate normal distribution with specified means, standard
549
- deviations, and a correlation matrix. Each row represents a different variable (e.g., different years), and each
550
- column represents a different iteration (sample).
551
-
552
- Parameters
553
- ----------
554
- n_iterations : int
555
- The number of samples (iterations) to generate for each variable.
556
- means : NDArray
557
- An array of mean values for each variable (row).
558
- sds : NDArray
559
- An array of standard deviations for each variable (row).
560
- correlation_matrix : NDArray
561
- A positive-definite matrix representing the correlations between the variables (rows).
562
- seed : int | Generator | None, optional
563
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
564
- fresh, unpredictable entropy will be pulled from the OS.
565
-
566
- Returns
567
- -------
568
- NDArray
569
- A 2D array of shape (len(means), n_iterations), where each row corresponds to a different variable and each
570
- column corresponds to a sample iteration. The values in each row are correlated according to the provided
571
- correlation matrix.
572
- """
573
- # Generate independent random samples for each year
574
- shape = (len(means), n_iterations)
575
- independent_samples = normal_2d(shape, 0, 1, seed=seed)
576
-
577
- # Apply Cholesky decomposition to the correlation matrix
578
- cholesky_decomp = linalg.cholesky(correlation_matrix)
579
-
580
- # Apply Cholesky transformation to introduce correlation across years (rows) for each sample
581
- correlated_samples = dot(cholesky_decomp, independent_samples)
582
-
583
- # Scale by standard deviations and shift by means
584
- scaled_samples = (
585
- correlated_samples
586
- * repeat_1d_array_as_columns(n_iterations, sds)
587
- + repeat_1d_array_as_columns(n_iterations, means)
588
- )
589
-
590
- return scaled_samples