hestia-earth-models 0.64.4__py3-none-any.whl → 0.64.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hestia-earth-models might be problematic. Click here for more details.
- hestia_earth/models/blonkConsultants2016/ch4ToAirNaturalVegetationBurning.py +5 -9
- hestia_earth/models/blonkConsultants2016/co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +5 -9
- hestia_earth/models/blonkConsultants2016/n2OToAirNaturalVegetationBurningDirect.py +6 -13
- hestia_earth/models/cycle/animal/input/properties.py +6 -0
- hestia_earth/models/cycle/completeness/soilAmendment.py +3 -2
- hestia_earth/models/cycle/concentrateFeed.py +10 -4
- hestia_earth/models/cycle/input/properties.py +6 -0
- hestia_earth/models/cycle/liveAnimal.py +2 -2
- hestia_earth/models/cycle/milkYield.py +3 -3
- hestia_earth/models/cycle/otherSitesArea.py +59 -0
- hestia_earth/models/cycle/otherSitesUnusedDuration.py +9 -8
- hestia_earth/models/cycle/pastureSystem.py +3 -2
- hestia_earth/models/cycle/product/properties.py +6 -0
- hestia_earth/models/cycle/siteArea.py +83 -0
- hestia_earth/models/cycle/stockingDensityAnimalHousingAverage.py +28 -16
- hestia_earth/models/cycle/utils.py +1 -1
- hestia_earth/models/environmentalFootprintV3/soilQualityIndexLandOccupation.py +128 -0
- hestia_earth/models/environmentalFootprintV3/utils.py +17 -0
- hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +17 -6
- hestia_earth/models/ipcc2006/n2OToAirOrganicSoilCultivationDirect.py +17 -6
- hestia_earth/models/ipcc2019/ch4ToAirEntericFermentation.py +3 -1
- hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +904 -0
- hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +70 -618
- hestia_earth/models/mocking/search-results.json +392 -320
- hestia_earth/models/pooreNemecek2018/saplings.py +10 -7
- hestia_earth/models/site/management.py +18 -14
- hestia_earth/models/utils/__init__.py +38 -0
- hestia_earth/models/utils/array_builders.py +63 -52
- hestia_earth/models/utils/blank_node.py +137 -82
- hestia_earth/models/utils/descriptive_stats.py +3 -239
- hestia_earth/models/utils/feedipedia.py +15 -2
- hestia_earth/models/utils/landCover.py +9 -0
- hestia_earth/models/utils/lookup.py +13 -2
- hestia_earth/models/utils/measurement.py +3 -28
- hestia_earth/models/utils/stats.py +429 -0
- hestia_earth/models/utils/term.py +15 -3
- hestia_earth/models/utils/time_series.py +90 -0
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.6.dist-info}/METADATA +1 -1
- {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.6.dist-info}/RECORD +63 -49
- tests/models/blonkConsultants2016/test_ch4ToAirNaturalVegetationBurning.py +2 -2
- tests/models/blonkConsultants2016/test_co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +2 -2
- tests/models/blonkConsultants2016/test_n2OToAirNaturalVegetationBurningDirect.py +2 -2
- tests/models/cycle/completeness/test_soilAmendment.py +1 -1
- tests/models/cycle/test_liveAnimal.py +1 -1
- tests/models/cycle/test_milkYield.py +1 -1
- tests/models/cycle/test_otherSitesArea.py +68 -0
- tests/models/cycle/test_siteArea.py +51 -0
- tests/models/cycle/test_stockingDensityAnimalHousingAverage.py +2 -2
- tests/models/environmentalFootprintV3/test_soilQualityIndexLandOccupation.py +136 -0
- tests/models/ipcc2019/test_co2ToAirCarbonStockChange_utils.py +50 -0
- tests/models/ipcc2019/test_co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +1 -39
- tests/models/pooreNemecek2018/test_saplings.py +1 -1
- tests/models/site/test_management.py +3 -153
- tests/models/utils/test_array_builders.py +67 -6
- tests/models/utils/test_blank_node.py +191 -7
- tests/models/utils/test_descriptive_stats.py +2 -86
- tests/models/utils/test_measurement.py +1 -22
- tests/models/utils/test_stats.py +186 -0
- tests/models/utils/test_time_series.py +88 -0
- {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.6.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.6.dist-info}/WHEEL +0 -0
- {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.6.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,7 @@ from .plantationLifespan import TERM_ID as PRACTICE_TERM_ID
|
|
|
11
11
|
REQUIREMENTS = {
|
|
12
12
|
"Cycle": {
|
|
13
13
|
"completeness.other": "False",
|
|
14
|
+
"cycleDuration": "> 0",
|
|
14
15
|
"products": [{"@type": "Product", "value": "", "term.termType": "crop"}],
|
|
15
16
|
"practices": [{"@type": "Practice", "value": "", "term.@id": "plantationLifespan"}]
|
|
16
17
|
}
|
|
@@ -37,15 +38,16 @@ def _get_value(product: dict):
|
|
|
37
38
|
return safe_parse_float(get_crop_lookup_value(MODEL, TERM_ID, term_id, LOOKUPS['crop']), None)
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
def _run(product: dict, plantation_duration: float):
|
|
41
|
+
def _run(product: dict, plantation_duration: float, cycleDuration: float):
|
|
41
42
|
value = _get_value(product)
|
|
42
|
-
return [_input(value / plantation_duration)]
|
|
43
|
+
return [_input(value / plantation_duration * cycleDuration)]
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
def _should_run_product(product: dict): return _get_value(product) is not None
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def _should_run(cycle: dict):
|
|
50
|
+
cycleDuration = cycle.get('cycleDuration')
|
|
49
51
|
term_type_incomplete = _is_term_type_incomplete(cycle, TERM_ID)
|
|
50
52
|
product = next((p for p in cycle.get('products', []) if _should_run_product(p)), None)
|
|
51
53
|
plantation_duration = list_sum(find_term_match(cycle.get('practices', []), PRACTICE_TERM_ID).get('value'), None)
|
|
@@ -53,13 +55,14 @@ def _should_run(cycle: dict):
|
|
|
53
55
|
logRequirements(cycle, model=MODEL, term=TERM_ID,
|
|
54
56
|
term_type_seed_incomplete=term_type_incomplete,
|
|
55
57
|
product_id=(product or {}).get('term', {}).get('@id'),
|
|
56
|
-
plantation_duration=plantation_duration
|
|
58
|
+
plantation_duration=plantation_duration,
|
|
59
|
+
cycleDuration=cycleDuration)
|
|
57
60
|
|
|
58
|
-
should_run = all([term_type_incomplete, product, plantation_duration])
|
|
61
|
+
should_run = all([term_type_incomplete, product, plantation_duration, (cycleDuration or 0) > 0])
|
|
59
62
|
logShouldRun(cycle, MODEL, TERM_ID, should_run)
|
|
60
|
-
return should_run, product, plantation_duration
|
|
63
|
+
return should_run, product, plantation_duration, cycleDuration
|
|
61
64
|
|
|
62
65
|
|
|
63
66
|
def run(cycle: dict):
|
|
64
|
-
should_run, product, plantation_duration = _should_run(cycle)
|
|
65
|
-
return _run(product, plantation_duration) if should_run else []
|
|
67
|
+
should_run, product, plantation_duration, cycleDuration = _should_run(cycle)
|
|
68
|
+
return _run(product, plantation_duration, cycleDuration) if should_run else []
|
|
@@ -160,16 +160,18 @@ def _get_landCover_term_id(product: dict) -> str:
|
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
def _get_relevant_items(
|
|
163
|
-
|
|
164
|
-
)
|
|
163
|
+
cycles: list[dict], item_name: str, relevant_terms: list, date_fill: callable = _default_dates
|
|
164
|
+
):
|
|
165
165
|
"""
|
|
166
166
|
Get items from the list of cycles with any of the relevant terms.
|
|
167
167
|
Also adds dates if missing.
|
|
168
168
|
"""
|
|
169
169
|
return [
|
|
170
|
-
|
|
170
|
+
[
|
|
171
|
+
item
|
|
172
|
+
for item in date_fill(cycle=cycle, values=filter_list_term_type(cycle.get(item_name, []), relevant_terms))
|
|
173
|
+
]
|
|
171
174
|
for cycle in cycles
|
|
172
|
-
for item in date_fill(cycle=cycle, values=filter_list_term_type(cycle.get(item_name, []), relevant_terms))
|
|
173
175
|
]
|
|
174
176
|
|
|
175
177
|
|
|
@@ -230,18 +232,19 @@ def _has_gap_fill_to_management_set(practices: list) -> list:
|
|
|
230
232
|
|
|
231
233
|
|
|
232
234
|
def _should_run_all_products(cycles: list, site_type: str):
|
|
235
|
+
products_land_cover = flatten(_get_relevant_items(
|
|
236
|
+
cycles=cycles,
|
|
237
|
+
item_name="products",
|
|
238
|
+
relevant_terms=[TermTermType.LANDCOVER]
|
|
239
|
+
)) if site_type else []
|
|
233
240
|
products_land_cover = [
|
|
234
241
|
_extract_node_value(
|
|
235
242
|
_include(
|
|
236
243
|
value=product,
|
|
237
244
|
keys=["term", "value", "startDate", "endDate", "properties"]
|
|
238
245
|
)
|
|
239
|
-
) for product in
|
|
240
|
-
|
|
241
|
-
item_name="products",
|
|
242
|
-
relevant_terms=[TermTermType.LANDCOVER]
|
|
243
|
-
)
|
|
244
|
-
] if site_type else []
|
|
246
|
+
) for product in products_land_cover
|
|
247
|
+
]
|
|
245
248
|
|
|
246
249
|
products_crop_forage = _get_relevant_items(
|
|
247
250
|
cycles=cycles,
|
|
@@ -255,10 +258,11 @@ def _should_run_all_products(cycles: list, site_type: str):
|
|
|
255
258
|
keys=["startDate", "endDate", "properties"],
|
|
256
259
|
dest={
|
|
257
260
|
"term": linked_node(download_hestia(_get_landCover_term_id(product))),
|
|
258
|
-
"value": 100
|
|
261
|
+
"value": round(100 / len(_products), 2)
|
|
259
262
|
}
|
|
260
263
|
)
|
|
261
|
-
for
|
|
264
|
+
for _products in products_crop_forage
|
|
265
|
+
for product in list(filter(_get_landCover_term_id, _products))
|
|
262
266
|
] if site_type else []
|
|
263
267
|
dates = sorted(list(set(
|
|
264
268
|
non_empty_list(flatten([[cycle.get('startDate'), cycle.get('endDate')] for cycle in cycles]))
|
|
@@ -293,7 +297,7 @@ def _should_run(site: dict):
|
|
|
293
297
|
value=practice,
|
|
294
298
|
keys=["term", "value", "startDate", "endDate"]
|
|
295
299
|
)
|
|
296
|
-
) for practice in _get_relevant_items(
|
|
300
|
+
) for practice in flatten(_get_relevant_items(
|
|
297
301
|
cycles=cycles,
|
|
298
302
|
item_name="practices",
|
|
299
303
|
relevant_terms=[
|
|
@@ -303,7 +307,7 @@ def _should_run(site: dict):
|
|
|
303
307
|
TermTermType.LANDUSEMANAGEMENT,
|
|
304
308
|
TermTermType.SYSTEM
|
|
305
309
|
]
|
|
306
|
-
)
|
|
310
|
+
))
|
|
307
311
|
]
|
|
308
312
|
practices = _has_gap_fill_to_management_set(practices)
|
|
309
313
|
practices = condense_nodes(practices)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from os.path import dirname, abspath
|
|
2
2
|
from collections.abc import Generator, Iterable
|
|
3
3
|
from itertools import tee
|
|
4
|
+
from decimal import Decimal
|
|
5
|
+
from statistics import mean
|
|
4
6
|
import sys
|
|
5
7
|
import datetime
|
|
6
8
|
from functools import reduce
|
|
@@ -10,6 +12,7 @@ from hestia_earth.schema import SchemaType
|
|
|
10
12
|
from hestia_earth.utils.api import download_hestia
|
|
11
13
|
from hestia_earth.utils.model import linked_node
|
|
12
14
|
from hestia_earth.utils.tools import flatten, non_empty_list
|
|
15
|
+
from hestia_earth.utils.date import is_in_days, is_in_months
|
|
13
16
|
|
|
14
17
|
from .constant import Units
|
|
15
18
|
|
|
@@ -94,6 +97,23 @@ def multiply_values(values: list):
|
|
|
94
97
|
return reduce(operator.mul, filtered_values, 1) if len(filtered_values) > 1 else None
|
|
95
98
|
|
|
96
99
|
|
|
100
|
+
def _numeric_weighted_average(values: list):
|
|
101
|
+
total_weight = sum(Decimal(str(weight)) for _v, weight in values) if values else Decimal(0)
|
|
102
|
+
weighted_values = [Decimal(str(value)) * Decimal(str(weight)) for value, weight in values]
|
|
103
|
+
average = sum(weighted_values) / (total_weight if total_weight else 1) if weighted_values else None
|
|
104
|
+
return None if average is None else float(average)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _bool_weighted_average(values: list):
|
|
108
|
+
return mean(map(int, values)) >= 0.5
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def weighted_average(weighted_values: list):
|
|
112
|
+
values = [v for v, _w in weighted_values]
|
|
113
|
+
all_boolean = all([isinstance(v, bool) for v in values])
|
|
114
|
+
return _bool_weighted_average(values) if all_boolean else _numeric_weighted_average(weighted_values)
|
|
115
|
+
|
|
116
|
+
|
|
97
117
|
def term_id_prefix(term_id: str): return term_id.split('Kg')[0]
|
|
98
118
|
|
|
99
119
|
|
|
@@ -156,3 +176,21 @@ def pairwise(iterable):
|
|
|
156
176
|
a, b = tee(iterable)
|
|
157
177
|
next(b, None)
|
|
158
178
|
return zip(a, b)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def full_date_str(date_str: str, is_end: bool = False):
|
|
182
|
+
"""
|
|
183
|
+
Return the date in format YYY-MM-dd, by setting the month and day if they are not provided.
|
|
184
|
+
"""
|
|
185
|
+
return date_str if is_in_days(date_str) else (
|
|
186
|
+
f"{date_str}-{14 if is_end else 15}" if is_in_months(date_str)
|
|
187
|
+
else f"{date_str}-{'12-31' if is_end else '01-01'}"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def days_to_years(days):
|
|
192
|
+
return days / 365
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def hectar_to_square_meter(value):
|
|
196
|
+
return value * 10000
|
|
@@ -3,11 +3,11 @@ Based on code by Cool Farm Tool:
|
|
|
3
3
|
https://gitlab.com/MethodsCFT/coolfarm-soc/-/blob/main/src/cfasoc/builders.py
|
|
4
4
|
"""
|
|
5
5
|
import hashlib
|
|
6
|
-
from numpy import
|
|
6
|
+
from numpy import cumsum, dot, full, linalg, hstack, random, mean, vstack
|
|
7
7
|
from numpy.typing import NDArray, DTypeLike
|
|
8
8
|
from typing import Union
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from .stats import calc_z_critical, truncnorm_rvs
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def repeat_single(shape: tuple, value: float, dtype: DTypeLike = None) -> NDArray:
|
|
@@ -31,7 +31,7 @@ def repeat_single(shape: tuple, value: float, dtype: DTypeLike = None) -> NDArra
|
|
|
31
31
|
return full(shape=shape, fill_value=value, dtype=dtype)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def repeat_array_as_columns(n_iterations: int,
|
|
34
|
+
def repeat_array_as_columns(n_iterations: int, arr: NDArray) -> NDArray:
|
|
35
35
|
"""
|
|
36
36
|
Repeat a numpy array horizontally as columns.
|
|
37
37
|
|
|
@@ -39,7 +39,7 @@ def repeat_array_as_columns(n_iterations: int, array: NDArray) -> NDArray:
|
|
|
39
39
|
----------
|
|
40
40
|
n_iterations : int
|
|
41
41
|
Number of times the columns should be repeated.
|
|
42
|
-
|
|
42
|
+
arr : NDArray
|
|
43
43
|
Array to repeat.
|
|
44
44
|
|
|
45
45
|
Returns
|
|
@@ -47,10 +47,10 @@ def repeat_array_as_columns(n_iterations: int, array: NDArray) -> NDArray:
|
|
|
47
47
|
NDArray
|
|
48
48
|
Repeated array.
|
|
49
49
|
"""
|
|
50
|
-
return hstack([
|
|
50
|
+
return hstack([arr for _ in range(n_iterations)])
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def repeat_array_as_rows(n_iterations: int,
|
|
53
|
+
def repeat_array_as_rows(n_iterations: int, arr: NDArray) -> NDArray:
|
|
54
54
|
"""
|
|
55
55
|
Repeat a numpy array vertically as rows.
|
|
56
56
|
|
|
@@ -58,7 +58,7 @@ def repeat_array_as_rows(n_iterations: int, array: NDArray) -> NDArray:
|
|
|
58
58
|
----------
|
|
59
59
|
n_iterations : int
|
|
60
60
|
Number of times the rows should be repeated.
|
|
61
|
-
|
|
61
|
+
arr : NDArray
|
|
62
62
|
Array to repeat.
|
|
63
63
|
|
|
64
64
|
Returns
|
|
@@ -66,7 +66,7 @@ def repeat_array_as_rows(n_iterations: int, array: NDArray) -> NDArray:
|
|
|
66
66
|
NDArray
|
|
67
67
|
Repeated array.
|
|
68
68
|
"""
|
|
69
|
-
return vstack([
|
|
69
|
+
return vstack([arr for _ in range(n_iterations)])
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
def repeat_1d_array_as_columns(n_columns: int, column: NDArray) -> NDArray:
|
|
@@ -307,7 +307,7 @@ def truncated_normal_1d(
|
|
|
307
307
|
n_rows, n_columns = shape
|
|
308
308
|
return repeat_array_as_rows(
|
|
309
309
|
n_rows,
|
|
310
|
-
|
|
310
|
+
truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
|
|
311
311
|
)
|
|
312
312
|
|
|
313
313
|
|
|
@@ -339,49 +339,7 @@ def truncated_normal_2d(
|
|
|
339
339
|
NDArray
|
|
340
340
|
Array of samples with 2 dimensional variability.
|
|
341
341
|
"""
|
|
342
|
-
return
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def _truncnorm_rvs(
|
|
346
|
-
a: float,
|
|
347
|
-
b: float,
|
|
348
|
-
loc: float,
|
|
349
|
-
scale: float,
|
|
350
|
-
shape: Union[int, tuple[int, ...]],
|
|
351
|
-
seed: Union[int, random.Generator, None] = None
|
|
352
|
-
) -> NDArray:
|
|
353
|
-
"""
|
|
354
|
-
Generate random samples from a truncated normal distribution. Unlike the `scipy` equivalent, the `a` and `b` values
|
|
355
|
-
are the abscissae at which we wish to truncate the distribution (as opposed to the number of standard deviations
|
|
356
|
-
from `loc`).
|
|
357
|
-
|
|
358
|
-
Parameters
|
|
359
|
-
----------
|
|
360
|
-
loc : float
|
|
361
|
-
Mean ("centre") of the distribution.
|
|
362
|
-
scale : float
|
|
363
|
-
Standard deviation (spread or "width") of the distribution. Must be non-negative.
|
|
364
|
-
size : int | tuple[int, ...]
|
|
365
|
-
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
|
|
366
|
-
seed : int | Generator | None, optional
|
|
367
|
-
A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
|
|
368
|
-
fresh, unpredictable entropy will be pulled from the OS.
|
|
369
|
-
|
|
370
|
-
Returns
|
|
371
|
-
-------
|
|
372
|
-
NDArray
|
|
373
|
-
Array of samples.
|
|
374
|
-
"""
|
|
375
|
-
size = prod(shape)
|
|
376
|
-
samples = array([])
|
|
377
|
-
rng = random.default_rng(seed)
|
|
378
|
-
|
|
379
|
-
while samples.size < size:
|
|
380
|
-
samples_temp = rng.normal(loc, scale, (size - samples.size) * 2)
|
|
381
|
-
valid_samples = samples_temp[(a <= samples_temp) & (samples_temp <= b)]
|
|
382
|
-
samples = concatenate([samples, valid_samples])
|
|
383
|
-
|
|
384
|
-
return samples[:size].reshape(shape)
|
|
342
|
+
return truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=shape, seed=seed)
|
|
385
343
|
|
|
386
344
|
|
|
387
345
|
def plus_minus_uncertainty_to_normal_1d(
|
|
@@ -576,3 +534,56 @@ def gen_seed(node: dict) -> int:
|
|
|
576
534
|
node_id = node.get("@id", "")
|
|
577
535
|
hashed = hashlib.shake_128(node_id.encode(), usedforsecurity=False).hexdigest(4)
|
|
578
536
|
return abs(int(hashed, 16))
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def correlated_normal_2d(
|
|
540
|
+
n_iterations: int,
|
|
541
|
+
means: NDArray,
|
|
542
|
+
sds: NDArray,
|
|
543
|
+
correlation_matrix: NDArray,
|
|
544
|
+
seed: Union[int, random.Generator, None] = None,
|
|
545
|
+
) -> NDArray:
|
|
546
|
+
"""
|
|
547
|
+
Generate correlated random samples from a multivariate normal distribution with specified means, standard
|
|
548
|
+
deviations, and a correlation matrix. Each row represents a different variable (e.g., different years), and each
|
|
549
|
+
column represents a different iteration (sample).
|
|
550
|
+
|
|
551
|
+
Parameters
|
|
552
|
+
----------
|
|
553
|
+
n_iterations : int
|
|
554
|
+
The number of samples (iterations) to generate for each variable.
|
|
555
|
+
means : NDArray
|
|
556
|
+
An array of mean values for each variable (row).
|
|
557
|
+
sds : NDArray
|
|
558
|
+
An array of standard deviations for each variable (row).
|
|
559
|
+
correlation_matrix : NDArray
|
|
560
|
+
A positive-definite matrix representing the correlations between the variables (rows).
|
|
561
|
+
seed : int | Generator | None, optional
|
|
562
|
+
A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
|
|
563
|
+
fresh, unpredictable entropy will be pulled from the OS.
|
|
564
|
+
|
|
565
|
+
Returns
|
|
566
|
+
-------
|
|
567
|
+
NDArray
|
|
568
|
+
A 2D array of shape (len(means), n_iterations), where each row corresponds to a different variable and each
|
|
569
|
+
column corresponds to a sample iteration. The values in each row are correlated according to the provided
|
|
570
|
+
correlation matrix.
|
|
571
|
+
"""
|
|
572
|
+
# Generate independent random samples for each year
|
|
573
|
+
shape = (len(means), n_iterations)
|
|
574
|
+
independent_samples = normal_2d(shape, 0, 1, seed=seed)
|
|
575
|
+
|
|
576
|
+
# Apply Cholesky decomposition to the correlation matrix
|
|
577
|
+
cholesky_decomp = linalg.cholesky(correlation_matrix)
|
|
578
|
+
|
|
579
|
+
# Apply Cholesky transformation to introduce correlation across years (rows) for each sample
|
|
580
|
+
correlated_samples = dot(cholesky_decomp, independent_samples)
|
|
581
|
+
|
|
582
|
+
# Scale by standard deviations and shift by means
|
|
583
|
+
scaled_samples = (
|
|
584
|
+
correlated_samples
|
|
585
|
+
* repeat_1d_array_as_columns(n_iterations, sds)
|
|
586
|
+
+ repeat_1d_array_as_columns(n_iterations, means)
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
return scaled_samples
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import calendar
|
|
2
1
|
from calendar import monthrange
|
|
3
2
|
from collections import defaultdict
|
|
4
3
|
from collections.abc import Iterable
|
|
5
4
|
from datetime import datetime, timedelta
|
|
6
|
-
from uuid import uuid4
|
|
7
|
-
|
|
8
|
-
from dateutil.relativedelta import relativedelta
|
|
9
5
|
from enum import Enum
|
|
10
6
|
from functools import reduce
|
|
11
7
|
from typing import (
|
|
@@ -16,8 +12,12 @@ from typing import (
|
|
|
16
12
|
Optional,
|
|
17
13
|
Union
|
|
18
14
|
)
|
|
15
|
+
|
|
16
|
+
from dateutil import parser
|
|
17
|
+
from dateutil.relativedelta import relativedelta
|
|
19
18
|
from hestia_earth.schema import TermTermType
|
|
20
19
|
from hestia_earth.utils.api import download_hestia
|
|
20
|
+
from hestia_earth.utils.blank_node import ArrayTreatment, get_node_value
|
|
21
21
|
from hestia_earth.utils.model import filter_list_term_type
|
|
22
22
|
from hestia_earth.utils.tools import (
|
|
23
23
|
flatten,
|
|
@@ -26,19 +26,22 @@ from hestia_earth.utils.tools import (
|
|
|
26
26
|
safe_parse_float,
|
|
27
27
|
non_empty_list
|
|
28
28
|
)
|
|
29
|
-
from hestia_earth.utils.blank_node import ArrayTreatment, get_node_value
|
|
30
29
|
|
|
31
|
-
from
|
|
32
|
-
from . import is_from_model, _filter_list_term_unit, is_iterable, _omit
|
|
30
|
+
from . import is_from_model, _filter_list_term_unit, is_iterable, full_date_str
|
|
33
31
|
from .constant import Units
|
|
34
|
-
from .property import get_node_property, get_node_property_value
|
|
35
32
|
from .lookup import (
|
|
36
33
|
is_model_siteType_allowed,
|
|
37
34
|
is_siteType_allowed,
|
|
38
35
|
is_product_id_allowed, is_product_termType_allowed,
|
|
39
36
|
is_input_id_allowed, is_input_termType_allowed
|
|
40
37
|
)
|
|
38
|
+
from .property import get_node_property, get_node_property_value
|
|
41
39
|
from .term import get_lookup_value
|
|
40
|
+
from ..log import debugValues, log_as_table
|
|
41
|
+
|
|
42
|
+
# TODO: verify those values
|
|
43
|
+
MAX_DEPTH = 1000
|
|
44
|
+
OLDEST_DATE = '1800'
|
|
42
45
|
|
|
43
46
|
|
|
44
47
|
def merge_blank_nodes(source: list, new_values: list):
|
|
@@ -1228,7 +1231,11 @@ def get_inputs_from_properties(input: dict, term_types: Union[TermTermType, List
|
|
|
1228
1231
|
A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
|
|
1229
1232
|
"""
|
|
1230
1233
|
input_value = list_sum(input.get('value', []))
|
|
1231
|
-
properties =
|
|
1234
|
+
properties = (
|
|
1235
|
+
input.get('properties') or
|
|
1236
|
+
input.get('term', {}).get('defaultProperties') or
|
|
1237
|
+
download_hestia(input.get('term', {}).get('@id')).get('defaultProperties')
|
|
1238
|
+
)
|
|
1232
1239
|
inputs = non_empty_list([
|
|
1233
1240
|
{
|
|
1234
1241
|
'term': p.get('key'),
|
|
@@ -1238,97 +1245,145 @@ def get_inputs_from_properties(input: dict, term_types: Union[TermTermType, List
|
|
|
1238
1245
|
return filter_list_term_type(inputs, term_types)
|
|
1239
1246
|
|
|
1240
1247
|
|
|
1241
|
-
def
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
if len(nodes_by_start_date) != len(nodes):
|
|
1248
|
-
return nodes, False
|
|
1248
|
+
def _should_group_node(node: dict): return node.get('startDate') and node.get('endDate')
|
|
1249
|
+
|
|
1250
|
+
|
|
1251
|
+
def _parse_date(node: dict, key: str):
|
|
1252
|
+
return safe_parse_date(full_date_str(node.get(key), is_end=key == 'endDate'))
|
|
1253
|
+
|
|
1249
1254
|
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1255
|
+
def _group_nodes_by_consecutive_dates(nodes: list):
|
|
1256
|
+
"""Groups dictionaries in a list based on consecutive start and end dates within a 1-day tolerance.
|
|
1257
|
+
|
|
1258
|
+
Args:
|
|
1259
|
+
dicts: A list of dictionaries containing 'startDate' and 'endDate' keys.
|
|
1260
|
+
|
|
1261
|
+
Returns:
|
|
1262
|
+
A list of lists, where each inner list contains dictionaries with consecutive start and end dates.
|
|
1263
|
+
"""
|
|
1264
|
+
groups = []
|
|
1265
|
+
group = []
|
|
1266
|
+
|
|
1267
|
+
# make sure the nodes are sorted by dates to group by consecutive dates
|
|
1268
|
+
for n in sorted(nodes, key=lambda d: (_parse_date(d, 'startDate'), _parse_date(d, 'endDate'))):
|
|
1269
|
+
if not group or (
|
|
1270
|
+
_should_group_node(n) and
|
|
1271
|
+
_parse_date(n, 'startDate') - _parse_date(group[-1], 'endDate') <= timedelta(days=1)
|
|
1272
|
+
):
|
|
1273
|
+
group.append(n)
|
|
1274
|
+
else:
|
|
1275
|
+
groups.append(group)
|
|
1276
|
+
group = [n]
|
|
1262
1277
|
|
|
1263
|
-
|
|
1278
|
+
if group:
|
|
1279
|
+
groups.append(group)
|
|
1280
|
+
|
|
1281
|
+
return groups
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
def _node_from_group(nodes: list):
|
|
1285
|
+
# `nodes` contain list with consecutive dates
|
|
1286
|
+
return nodes[0] if len(nodes) == 1 else nodes[0] | {
|
|
1287
|
+
'startDate': min(n.get('startDate') for n in nodes),
|
|
1288
|
+
'endDate': max(n.get('endDate') for n in nodes)
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
def _condense_nodes(nodes: list):
|
|
1293
|
+
# `nodes` contain list with same `term.@id` and `value`
|
|
1294
|
+
grouped_nodes = _group_nodes_by_consecutive_dates(nodes)
|
|
1295
|
+
return flatten(map(_node_from_group, grouped_nodes))
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
def _group_nodes_to_condense(nodes: list) -> dict:
|
|
1299
|
+
def _group_node(group: dict, node: dict):
|
|
1300
|
+
value = node.get('value', [])
|
|
1301
|
+
value = '-'.join(map(str, value if isinstance(value, list) else [value]))
|
|
1302
|
+
properties = '_'.join(non_empty_list([
|
|
1303
|
+
';'.join(non_empty_list([
|
|
1304
|
+
p.get('term', {}).get('@id'),
|
|
1305
|
+
f"{p.get('value')}"
|
|
1306
|
+
])) for p in node.get('properties', [])
|
|
1307
|
+
]))
|
|
1308
|
+
# group by term, value, and properties
|
|
1309
|
+
group_key = '-'.join(non_empty_list([
|
|
1310
|
+
node.get('term', {}).get('@id', ''),
|
|
1311
|
+
value,
|
|
1312
|
+
properties
|
|
1313
|
+
]))
|
|
1314
|
+
group[group_key] = group.get(group_key, []) + [node]
|
|
1315
|
+
return group
|
|
1316
|
+
|
|
1317
|
+
return reduce(_group_node, nodes, {})
|
|
1264
1318
|
|
|
1265
1319
|
|
|
1266
1320
|
def condense_nodes(nodes: list) -> list:
|
|
1267
|
-
grouped_nodes =
|
|
1268
|
-
|
|
1269
|
-
any_changes_made = False
|
|
1270
|
-
|
|
1271
|
-
for key, node_group in grouped_nodes.items():
|
|
1272
|
-
condensed_nodes[key] = node_group
|
|
1273
|
-
while len(condensed_nodes[key]) > 1:
|
|
1274
|
-
condensed_nodes[key], changes_made = _get_condensed_nodes(condensed_nodes[key])
|
|
1275
|
-
if not changes_made:
|
|
1276
|
-
break
|
|
1277
|
-
any_changes_made = True
|
|
1278
|
-
|
|
1279
|
-
if not any_changes_made:
|
|
1280
|
-
return [_omit(values=n, keys=["uuid"]) for n in nodes]
|
|
1281
|
-
|
|
1282
|
-
return sorted(
|
|
1283
|
-
flatten([_omit(values=n, keys=["uuid"]) for nodes in condensed_nodes.values() for n in nodes]),
|
|
1284
|
-
key=lambda x: x["startDate"]
|
|
1285
|
-
)
|
|
1321
|
+
grouped_nodes = _group_nodes_to_condense(nodes)
|
|
1322
|
+
return flatten(map(_condense_nodes, grouped_nodes.values()))
|
|
1286
1323
|
|
|
1287
1324
|
|
|
1288
|
-
|
|
1325
|
+
def _node_date(node: dict): return parser.isoparse(node.get('endDate', OLDEST_DATE))
|
|
1289
1326
|
|
|
1290
1327
|
|
|
1291
|
-
def
|
|
1292
|
-
"""Converts to date, adding start or end of year to YYYY strings as indicated by is_end."""
|
|
1293
|
-
return datetime.strptime(_full_date_str(date_str, is_end=is_end), DATE_FORMAT)
|
|
1328
|
+
def _distance(node: dict, date): return abs((_node_date(node) - date).days)
|
|
1294
1329
|
|
|
1295
1330
|
|
|
1296
|
-
def
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
suffix = "-12-31" if is_end else "-01-01"
|
|
1301
|
-
elif len(date_str) == 7:
|
|
1302
|
-
# Format YYYY-MM
|
|
1303
|
-
suffix = f"-{calendar.monthrange(int(date_str[:4]), int(date_str[5:7]))[1]}" if is_end else "-01"
|
|
1331
|
+
def _most_recent_nodes(nodes: list, date: str) -> list:
|
|
1332
|
+
closest_date = parser.isoparse(date)
|
|
1333
|
+
min_distance = min([_distance(m, closest_date) for m in nodes])
|
|
1334
|
+
return list(filter(lambda m: _distance(m, closest_date) == min_distance, nodes))
|
|
1304
1335
|
|
|
1305
|
-
return date_str + suffix
|
|
1306
1336
|
|
|
1337
|
+
def _shallowest_node(nodes: list) -> dict:
|
|
1338
|
+
min_depth = min([m.get('depthUpper', MAX_DEPTH) for m in nodes])
|
|
1339
|
+
return next((m for m in nodes if m.get('depthUpper', MAX_DEPTH) == min_depth), {})
|
|
1307
1340
|
|
|
1308
|
-
def _with_full_dates(node: dict) -> dict:
|
|
1309
|
-
output_node = node.copy()
|
|
1310
|
-
if "startDate" in output_node:
|
|
1311
|
-
output_node["startDate"] = _full_date_str(output_node["startDate"], is_end=False)
|
|
1312
|
-
if "endDate" in output_node:
|
|
1313
|
-
output_node["endDate"] = _full_date_str(output_node["endDate"], is_end=True)
|
|
1314
1341
|
|
|
1315
|
-
|
|
1342
|
+
def most_relevant_blank_node_by_type(nodes: List[dict],
|
|
1343
|
+
term_type: Union[TermTermType, str, List[TermTermType], List[str]], date: str):
|
|
1344
|
+
"""
|
|
1345
|
+
Given a list of cycle specific dated entries like
|
|
1346
|
+
a list of measurements terms or a list of management terms,
|
|
1347
|
+
find the entry closest to a given date
|
|
1348
|
+
Parameters
|
|
1349
|
+
----------
|
|
1350
|
+
nodes: List[dict]
|
|
1351
|
+
should contain a 'endDate' field otherwise defaults to OLDEST_DATE
|
|
1352
|
+
term_type : TermTermType or List[TermTermType]
|
|
1353
|
+
The `termType` of the `Term`, or a list of `termType`. Example: `TermTermType.CROP`
|
|
1354
|
+
date: str
|
|
1355
|
+
An ISO-8601 datetime compatible string
|
|
1316
1356
|
|
|
1357
|
+
Returns
|
|
1358
|
+
-------
|
|
1317
1359
|
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
_variable_length_str_to_date(date_str=date_str, is_end=is_end) + timedelta(days=days)
|
|
1321
|
-
).strftime(DATE_FORMAT)
|
|
1360
|
+
"""
|
|
1361
|
+
filtered_nodes = filter_list_term_type(nodes, term_type)
|
|
1322
1362
|
|
|
1363
|
+
return {} if len(filtered_nodes) == 0 \
|
|
1364
|
+
else _shallowest_node(_most_recent_nodes(filtered_nodes, date)) \
|
|
1365
|
+
if date and len(filtered_nodes) > 1 else filtered_nodes[0]
|
|
1323
1366
|
|
|
1324
|
-
def _group_nodes_by_term_and_value(nodes: list) -> dict:
|
|
1325
|
-
grouped_nodes = defaultdict(list)
|
|
1326
1367
|
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1368
|
+
def most_relevant_blank_node_by_id(nodes: list, term_id: str, date: str):
|
|
1369
|
+
"""
|
|
1370
|
+
Given a list of nodes with term_id like
|
|
1371
|
+
a list of measurements terms or a list of management terms,
|
|
1372
|
+
find the entry closest to a given date
|
|
1373
|
+
Parameters
|
|
1374
|
+
----------
|
|
1375
|
+
nodes: List[dict]
|
|
1376
|
+
should contain a 'endDate' field otherwise defaults to OLDEST_DATE
|
|
1377
|
+
term_id : str
|
|
1378
|
+
the term "@id" of the node we want to match to
|
|
1379
|
+
date: str
|
|
1380
|
+
An ISO-8601 datetime compatible string
|
|
1381
|
+
|
|
1382
|
+
Returns
|
|
1383
|
+
-------
|
|
1333
1384
|
|
|
1334
|
-
|
|
1385
|
+
"""
|
|
1386
|
+
filtered_nodes = [m for m in nodes if m.get('term', {}).get('@id') == term_id]
|
|
1387
|
+
return {} if len(filtered_nodes) == 0 \
|
|
1388
|
+
else _shallowest_node(_most_recent_nodes(filtered_nodes, date)) \
|
|
1389
|
+
if date and len(filtered_nodes) > 1 else filtered_nodes[0]
|