hestia-earth-models 0.74.4__py3-none-any.whl → 0.74.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hestia-earth-models might be problematic. Click here for more details.
- hestia_earth/models/cml2001Baseline/abioticResourceDepletionMineralsAndMetals.py +0 -1
- hestia_earth/models/config/Cycle.json +15 -0
- hestia_earth/models/config/ImpactAssessment.json +9 -1
- hestia_earth/models/cycle/animal/input/hestiaAggregatedData.py +3 -3
- hestia_earth/models/cycle/completeness/seed.py +1 -1
- hestia_earth/models/cycle/input/hestiaAggregatedData.py +25 -16
- hestia_earth/models/data/hestiaAggregatedData/__init__.py +73 -0
- hestia_earth/models/environmentalFootprintV3_1/scarcityWeightedWaterUse.py +1 -1
- hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandOccupation.py +5 -6
- hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandTransformation.py +10 -13
- hestia_earth/models/fantkeEtAl2016/damageToHumanHealthParticulateMatterFormation.py +1 -1
- hestia_earth/models/hestia/landCover.py +24 -0
- hestia_earth/models/hestia/landOccupationDuringCycle.py +80 -51
- hestia_earth/models/hestia/landTransformation100YearAverageDuringCycle.py +7 -1
- hestia_earth/models/hestia/landTransformation20YearAverageDuringCycle.py +7 -1
- hestia_earth/models/hestia/resourceUse_utils.py +58 -119
- hestia_earth/models/hestia/waterSalinity.py +57 -12
- hestia_earth/models/impact_assessment/post_checks/__init__.py +3 -2
- hestia_earth/models/impact_assessment/post_checks/remove_cache_fields.py +9 -0
- hestia_earth/models/impact_assessment/pre_checks/cache_emissionsResourceUse.py +21 -0
- hestia_earth/models/impact_assessment/pre_checks/cycle.py +5 -0
- hestia_earth/models/ipcc2019/co2ToAirAboveGroundBiomassStockChange.py +6 -64
- hestia_earth/models/ipcc2019/co2ToAirBelowGroundBiomassStockChange.py +9 -87
- hestia_earth/models/ipcc2019/co2ToAirBiocharStockChange.py +140 -0
- hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +329 -217
- hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChange.py +10 -87
- hestia_earth/models/mocking/__init__.py +2 -2
- hestia_earth/models/mocking/mock_search.py +20 -10
- hestia_earth/models/mocking/search-results.json +1 -7679
- hestia_earth/models/pooreNemecek2018/landOccupationDuringCycle.py +8 -7
- hestia_earth/models/poschEtAl2008/terrestrialAcidificationPotentialAccumulatedExceedance.py +1 -1
- hestia_earth/models/poschEtAl2008/terrestrialEutrophicationPotentialAccumulatedExceedance.py +1 -1
- hestia_earth/models/preload_requests.py +18 -4
- hestia_earth/models/schmidt2007/utils.py +3 -3
- hestia_earth/models/utils/__init__.py +4 -1
- hestia_earth/models/utils/aggregated.py +21 -68
- hestia_earth/models/utils/cycle.py +3 -3
- hestia_earth/models/utils/impact_assessment.py +45 -41
- hestia_earth/models/utils/lookup.py +92 -67
- hestia_earth/models/version.py +1 -1
- hestia_earth/orchestrator/models/__init__.py +47 -10
- hestia_earth/orchestrator/models/transformations.py +3 -1
- hestia_earth/orchestrator/strategies/merge/__init__.py +1 -2
- hestia_earth/orchestrator/strategies/merge/merge_list.py +31 -8
- hestia_earth/orchestrator/utils.py +29 -0
- {hestia_earth_models-0.74.4.dist-info → hestia_earth_models-0.74.5.dist-info}/METADATA +2 -3
- {hestia_earth_models-0.74.4.dist-info → hestia_earth_models-0.74.5.dist-info}/RECORD +62 -55
- tests/models/cycle/animal/input/test_hestiaAggregatedData.py +3 -3
- tests/models/cycle/input/test_hestiaAggregatedData.py +9 -18
- tests/models/data/__init__.py +0 -0
- tests/models/data/test_hestiaAggregatedData.py +32 -0
- tests/models/hestia/test_landCover.py +32 -1
- tests/models/hestia/test_waterSalinity.py +16 -4
- tests/models/ipcc2019/test_co2ToAirAboveGroundBiomassStockChange.py +1 -6
- tests/models/ipcc2019/test_co2ToAirBelowGroundBiomassStockChange.py +1 -6
- tests/models/ipcc2019/test_co2ToAirBiocharStockChange.py +90 -0
- tests/models/ipcc2019/test_co2ToAirSoilOrganicCarbonStockChange.py +1 -6
- tests/models/pooreNemecek2018/test_landOccupationDuringCycle.py +1 -0
- tests/orchestrator/strategies/merge/test_merge_list.py +5 -0
- {hestia_earth_models-0.74.4.dist-info → hestia_earth_models-0.74.5.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.74.4.dist-info → hestia_earth_models-0.74.5.dist-info}/WHEEL +0 -0
- {hestia_earth_models-0.74.4.dist-info → hestia_earth_models-0.74.5.dist-info}/top_level.txt +0 -0
|
@@ -5,8 +5,7 @@ from hestia_earth.utils.lookup import (
|
|
|
5
5
|
get_table_value,
|
|
6
6
|
column_name,
|
|
7
7
|
extract_grouped_data,
|
|
8
|
-
_get_single_table_value
|
|
9
|
-
lookup_term_ids
|
|
8
|
+
_get_single_table_value
|
|
10
9
|
)
|
|
11
10
|
from hestia_earth.utils.tools import list_sum, safe_parse_float, non_empty_list
|
|
12
11
|
|
|
@@ -18,59 +17,99 @@ def _node_value(node):
|
|
|
18
17
|
return list_sum(value, default=None) if isinstance(value, list) else value
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
def
|
|
20
|
+
def _log_value_coeff(log_node: dict, value: float, coefficient: float, **log_args):
|
|
21
|
+
if value is not None and coefficient:
|
|
22
|
+
debugValues(log_node, value=value, coefficient=coefficient, **log_args)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _factor_value(
|
|
26
|
+
log_node: dict,
|
|
27
|
+
model: str,
|
|
28
|
+
term_id: str,
|
|
29
|
+
lookup_name: str,
|
|
30
|
+
lookup_col: str,
|
|
31
|
+
group_key: Optional[str] = None,
|
|
32
|
+
default_world_value: Optional[bool] = False
|
|
33
|
+
):
|
|
22
34
|
@lru_cache()
|
|
23
35
|
def get_coefficient(node_term_id: str, grouped_data_key: str):
|
|
24
|
-
coefficient = get_region_lookup_value(
|
|
36
|
+
coefficient = get_region_lookup_value(
|
|
37
|
+
lookup_name=lookup_name,
|
|
38
|
+
term_id=node_term_id,
|
|
39
|
+
column=lookup_col,
|
|
40
|
+
fallback_world=default_world_value,
|
|
41
|
+
model=model, term=term_id
|
|
42
|
+
)
|
|
25
43
|
# value is either a number or matching between a model and a value (restrict value to specific model only)
|
|
26
44
|
return safe_parse_float(
|
|
27
45
|
extract_grouped_data(coefficient, grouped_data_key),
|
|
28
46
|
default=None
|
|
29
47
|
) if ':' in str(coefficient) else safe_parse_float(coefficient, default=None)
|
|
30
48
|
|
|
31
|
-
def get_value(
|
|
32
|
-
node_term_id =
|
|
33
|
-
grouped_data_key =
|
|
34
|
-
value = _node_value(
|
|
49
|
+
def get_value(blank_node: dict):
|
|
50
|
+
node_term_id = blank_node.get('term', {}).get('@id')
|
|
51
|
+
grouped_data_key = group_key or blank_node.get('methodModel', {}).get('@id')
|
|
52
|
+
value = _node_value(blank_node)
|
|
35
53
|
coefficient = get_coefficient(node_term_id, grouped_data_key)
|
|
36
|
-
if
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
coefficient=coefficient)
|
|
54
|
+
if model:
|
|
55
|
+
_log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
|
|
56
|
+
model=model,
|
|
57
|
+
term=term_id,
|
|
58
|
+
node=node_term_id,
|
|
59
|
+
operation=blank_node.get('operation', {}).get('@id'))
|
|
43
60
|
return {'id': node_term_id, 'value': value, 'coefficient': coefficient}
|
|
44
61
|
return get_value
|
|
45
62
|
|
|
46
63
|
|
|
47
|
-
def region_factor_value(
|
|
64
|
+
def region_factor_value(
|
|
65
|
+
log_node: dict,
|
|
66
|
+
model: str,
|
|
67
|
+
term_id: str,
|
|
68
|
+
lookup_name: str,
|
|
69
|
+
lookup_term_id: str,
|
|
70
|
+
group_key: Optional[str] = None,
|
|
71
|
+
default_world_value: Optional[bool] = False
|
|
72
|
+
):
|
|
48
73
|
@lru_cache()
|
|
49
74
|
def get_coefficient(node_term_id: str, region_term_id: str):
|
|
50
|
-
coefficient = get_region_lookup_value(
|
|
75
|
+
coefficient = get_region_lookup_value(
|
|
76
|
+
lookup_name=lookup_name,
|
|
77
|
+
term_id=region_term_id,
|
|
78
|
+
column=node_term_id,
|
|
79
|
+
fallback_world=default_world_value,
|
|
80
|
+
model=model, term=term_id
|
|
81
|
+
)
|
|
51
82
|
return safe_parse_float(
|
|
52
83
|
extract_grouped_data(coefficient, group_key) if group_key else coefficient,
|
|
53
84
|
default=None
|
|
54
85
|
)
|
|
55
86
|
|
|
56
|
-
def get_value(
|
|
57
|
-
node_term_id =
|
|
58
|
-
value = _node_value(
|
|
87
|
+
def get_value(blank_node: dict):
|
|
88
|
+
node_term_id = blank_node.get('term', {}).get('@id')
|
|
89
|
+
value = _node_value(blank_node)
|
|
59
90
|
# when getting data for a `region`, we can try to get the `region` on the node first, in case it is set
|
|
60
91
|
region_term_id = (
|
|
61
|
-
(
|
|
92
|
+
(blank_node.get('region') or blank_node.get('country') or {'@id': lookup_term_id}).get('@id')
|
|
62
93
|
) if lookup_term_id.startswith('GADM-') else lookup_term_id
|
|
63
94
|
coefficient = get_coefficient(node_term_id, region_term_id)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
95
|
+
_log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
|
|
96
|
+
model=model,
|
|
97
|
+
term=term_id,
|
|
98
|
+
node=node_term_id,
|
|
99
|
+
operation=blank_node.get('operation', {}).get('@id'))
|
|
69
100
|
return {'id': node_term_id, 'region-id': region_term_id, 'value': value, 'coefficient': coefficient}
|
|
70
101
|
return get_value
|
|
71
102
|
|
|
72
103
|
|
|
73
|
-
def aware_factor_value(
|
|
104
|
+
def aware_factor_value(
|
|
105
|
+
log_node: dict,
|
|
106
|
+
model: str,
|
|
107
|
+
term_id: str,
|
|
108
|
+
lookup_name: str,
|
|
109
|
+
aware_id: str,
|
|
110
|
+
group_key: Optional[str] = None,
|
|
111
|
+
default_world_value: Optional[bool] = False
|
|
112
|
+
):
|
|
74
113
|
lookup = download_lookup(lookup_name, False) # avoid saving in memory as there could be many different files used
|
|
75
114
|
lookup_col = column_name('awareWaterBasinId')
|
|
76
115
|
|
|
@@ -82,17 +121,16 @@ def aware_factor_value(model: str, term_id: str, lookup_name: str, aware_id: str
|
|
|
82
121
|
default=None
|
|
83
122
|
) if group_key else coefficient
|
|
84
123
|
|
|
85
|
-
def get_value(
|
|
86
|
-
node_term_id =
|
|
87
|
-
value = _node_value(
|
|
124
|
+
def get_value(blank_node: dict):
|
|
125
|
+
node_term_id = blank_node.get('term', {}).get('@id')
|
|
126
|
+
value = _node_value(blank_node)
|
|
88
127
|
|
|
89
128
|
try:
|
|
90
129
|
coefficient = get_coefficient(node_term_id)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
coefficient=coefficient)
|
|
130
|
+
_log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
|
|
131
|
+
model=model,
|
|
132
|
+
term=term_id,
|
|
133
|
+
node=node_term_id)
|
|
96
134
|
except Exception: # factor does not exist
|
|
97
135
|
coefficient = None
|
|
98
136
|
|
|
@@ -101,17 +139,21 @@ def aware_factor_value(model: str, term_id: str, lookup_name: str, aware_id: str
|
|
|
101
139
|
|
|
102
140
|
|
|
103
141
|
def all_factor_value(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
142
|
+
log_model: str,
|
|
143
|
+
log_term_id: str,
|
|
144
|
+
log_node: dict,
|
|
107
145
|
lookup_name: str,
|
|
108
146
|
lookup_col: str,
|
|
109
147
|
blank_nodes: List[dict],
|
|
110
|
-
|
|
148
|
+
group_key: Optional[str] = None,
|
|
111
149
|
default_no_values=0,
|
|
112
|
-
factor_value_func=_factor_value
|
|
150
|
+
factor_value_func=_factor_value,
|
|
151
|
+
default_world_value: bool = False
|
|
113
152
|
):
|
|
114
|
-
values = list(map(
|
|
153
|
+
values = list(map(
|
|
154
|
+
factor_value_func(log_node, log_model, log_term_id, lookup_name, lookup_col, group_key, default_world_value),
|
|
155
|
+
blank_nodes
|
|
156
|
+
))
|
|
115
157
|
|
|
116
158
|
has_values = len(values) > 0
|
|
117
159
|
missing_values = set([
|
|
@@ -129,11 +171,11 @@ def all_factor_value(
|
|
|
129
171
|
row_value=debug_values[1] if len(debug_values) == 2 else debug_values[0],
|
|
130
172
|
col=debug_values[0] if len(debug_values) == 2 else lookup_col,
|
|
131
173
|
value=None,
|
|
132
|
-
model=
|
|
133
|
-
term=
|
|
174
|
+
model=log_model,
|
|
175
|
+
term=log_term_id
|
|
134
176
|
)
|
|
135
177
|
|
|
136
|
-
debugValues(
|
|
178
|
+
debugValues(log_node, model=log_model, term=log_term_id,
|
|
137
179
|
all_with_factors=all_with_factors,
|
|
138
180
|
missing_lookup_factor=log_as_table([
|
|
139
181
|
{
|
|
@@ -144,7 +186,7 @@ def all_factor_value(
|
|
|
144
186
|
for v in missing_values
|
|
145
187
|
]),
|
|
146
188
|
has_values=has_values,
|
|
147
|
-
values_used=log_as_table(values))
|
|
189
|
+
values_used=log_as_table([v for v in values if v.get('coefficient')]))
|
|
148
190
|
|
|
149
191
|
values = [float((v.get('value') or 0) * (v.get('coefficient') or 0)) for v in values]
|
|
150
192
|
|
|
@@ -152,37 +194,20 @@ def all_factor_value(
|
|
|
152
194
|
return None if not all_with_factors else (list_sum(values) if has_values else default_no_values)
|
|
153
195
|
|
|
154
196
|
|
|
155
|
-
def _country_in_lookup(country_id: str):
|
|
156
|
-
def in_lookup(lookup_name: str):
|
|
157
|
-
return (
|
|
158
|
-
download_lookup(lookup_name.replace('region', country_id)) is not None or
|
|
159
|
-
country_id in lookup_term_ids(download_lookup(lookup_name))
|
|
160
|
-
)
|
|
161
|
-
return in_lookup
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def fallback_country(country_id: str, lookups: List[str]) -> str:
|
|
165
|
-
"""
|
|
166
|
-
Given a country `@id`, and lookup tables, checks if a location can be used in lookup file
|
|
167
|
-
else fallback to the default "region-world".
|
|
168
|
-
"""
|
|
169
|
-
is_in_lookup = lambda v: all(map(_country_in_lookup(v), lookups)) # noqa: E731
|
|
170
|
-
fallback_id = 'region-world'
|
|
171
|
-
return country_id if country_id and is_in_lookup(country_id) else fallback_id if is_in_lookup(fallback_id) else None
|
|
172
|
-
|
|
173
|
-
|
|
174
197
|
def get_region_lookup(lookup_name: str, term_id: str):
|
|
175
198
|
# for performance, try to load the region specific lookup if exists
|
|
176
199
|
return (
|
|
177
|
-
download_lookup(lookup_name.replace('region-', f"{term_id}-"))
|
|
200
|
+
download_lookup(lookup_name.replace('region-', f"{term_id}-"), build_index=True)
|
|
178
201
|
if lookup_name and lookup_name.startswith('region-') else None
|
|
179
|
-
) or download_lookup(lookup_name)
|
|
202
|
+
) or download_lookup(lookup_name, build_index=True)
|
|
180
203
|
|
|
181
204
|
|
|
182
205
|
@lru_cache()
|
|
183
|
-
def get_region_lookup_value(lookup_name: str, term_id: str, column: str, **log_args):
|
|
206
|
+
def get_region_lookup_value(lookup_name: str, term_id: str, column: str, fallback_world: bool = False, **log_args):
|
|
184
207
|
# for performance, try to load the region specific lookup if exists
|
|
185
208
|
lookup = get_region_lookup(lookup_name, term_id)
|
|
186
209
|
value = get_table_value(lookup, 'termid', term_id, column_name(column))
|
|
210
|
+
if value is None and fallback_world:
|
|
211
|
+
return get_region_lookup_value(lookup_name, 'region-world', column, **log_args)
|
|
187
212
|
debugMissingLookup(lookup_name, 'termid', term_id, column, value, **log_args)
|
|
188
213
|
return value
|
hestia_earth/models/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = '0.74.
|
|
1
|
+
VERSION = '0.74.5'
|
|
@@ -9,10 +9,10 @@ from copy import deepcopy
|
|
|
9
9
|
from hestia_earth.utils.tools import non_empty_list, current_time_ms
|
|
10
10
|
|
|
11
11
|
from hestia_earth.models.version import VERSION
|
|
12
|
-
from
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
from
|
|
12
|
+
from hestia_earth.orchestrator.log import logger
|
|
13
|
+
from hestia_earth.orchestrator.utils import get_required_model_param, _snakecase, reset_index
|
|
14
|
+
from hestia_earth.orchestrator.strategies.run import should_run
|
|
15
|
+
from hestia_earth.orchestrator.strategies.merge import merge
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _memory_usage():
|
|
@@ -29,6 +29,23 @@ def _max_workers(type: str):
|
|
|
29
29
|
return None
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
# do not deep copy to improve performance, only set on low risk keys
|
|
33
|
+
_SKIP_DEEPCOPY_KEYS = [
|
|
34
|
+
'emissions',
|
|
35
|
+
'emissionsResourceUse'
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _node_copy(node: dict):
|
|
40
|
+
skip_keys = [key for key in _SKIP_DEEPCOPY_KEYS if key in node and isinstance(node[key], list)]
|
|
41
|
+
new_node = deepcopy(node | {
|
|
42
|
+
key: [] for key in skip_keys
|
|
43
|
+
}) | {
|
|
44
|
+
key: node[key] for key in skip_keys
|
|
45
|
+
}
|
|
46
|
+
return new_node
|
|
47
|
+
|
|
48
|
+
|
|
32
49
|
def _list_except_item(list, item):
|
|
33
50
|
idx = list.index(item)
|
|
34
51
|
return list[:idx] + list[idx+1:]
|
|
@@ -66,22 +83,40 @@ def _import_model(name: str):
|
|
|
66
83
|
|
|
67
84
|
def _run_pre_checks(data: dict):
|
|
68
85
|
node_type = _snakecase(data.get('@type', data.get('type')))
|
|
86
|
+
|
|
87
|
+
now = current_time_ms()
|
|
88
|
+
memory_usage = _memory_usage()
|
|
89
|
+
|
|
69
90
|
try:
|
|
70
91
|
pre_checks = _import_model('.'.join([node_type, 'pre_checks'])).get('run')
|
|
71
92
|
logger.info('running pre checks for %s', node_type)
|
|
72
|
-
|
|
93
|
+
data = pre_checks(data)
|
|
73
94
|
except Exception:
|
|
74
|
-
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
logger.info('model_model=%s, model_value=%s, time=%s, memory_used=%s',
|
|
98
|
+
node_type, 'pre_checks', current_time_ms() - now, _memory_usage() - memory_usage)
|
|
99
|
+
|
|
100
|
+
return data
|
|
75
101
|
|
|
76
102
|
|
|
77
103
|
def _run_post_checks(data: dict):
|
|
78
104
|
node_type = _snakecase(data.get('@type', data.get('type')))
|
|
105
|
+
|
|
106
|
+
now = current_time_ms()
|
|
107
|
+
memory_usage = _memory_usage()
|
|
108
|
+
|
|
79
109
|
try:
|
|
80
110
|
post_checks = _import_model('.'.join([node_type, 'post_checks'])).get('run')
|
|
81
111
|
logger.info('running post checks for %s', node_type)
|
|
82
|
-
|
|
112
|
+
data = post_checks(data)
|
|
83
113
|
except Exception:
|
|
84
|
-
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
logger.info('model_model=%s, model_value=%s, time=%s, memory_used=%s',
|
|
117
|
+
node_type, 'post_checks', current_time_ms() - now, _memory_usage() - memory_usage)
|
|
118
|
+
|
|
119
|
+
return data
|
|
85
120
|
|
|
86
121
|
|
|
87
122
|
def _run_model(data: dict, model: dict, all_models: list):
|
|
@@ -108,7 +143,7 @@ def _run(data: dict, model: dict, all_models: list):
|
|
|
108
143
|
def _run_serie(data: dict, models: list, stage: Union[int, List[int]] = None):
|
|
109
144
|
return reduce(
|
|
110
145
|
lambda prev, m: merge(
|
|
111
|
-
prev, _run_parallel(prev, m, models) if isinstance(m, list) else [_run(
|
|
146
|
+
prev, _run_parallel(prev, m, models) if isinstance(m, list) else [_run(_node_copy(prev), m, models)]
|
|
112
147
|
),
|
|
113
148
|
_filter_models_stage(models, stage=stage),
|
|
114
149
|
data
|
|
@@ -120,7 +155,7 @@ def _run_parallel(data: dict, model: list, all_models: list):
|
|
|
120
155
|
|
|
121
156
|
max_workers = _max_workers(data.get('@type', data.get('type')))
|
|
122
157
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
123
|
-
futures = [executor.submit(_run,
|
|
158
|
+
futures = [executor.submit(_run, _node_copy(data), m, all_models) for m in model]
|
|
124
159
|
|
|
125
160
|
for future in concurrent.futures.as_completed(futures):
|
|
126
161
|
results.append(future.result())
|
|
@@ -129,6 +164,8 @@ def _run_parallel(data: dict, model: list, all_models: list):
|
|
|
129
164
|
|
|
130
165
|
|
|
131
166
|
def run(data: dict, models: list, stage: Union[int, List[int]] = None):
|
|
167
|
+
# make sure we reset before recalculating the node
|
|
168
|
+
reset_index()
|
|
132
169
|
# run pre-checks if exist
|
|
133
170
|
data = _run_pre_checks(data)
|
|
134
171
|
data = _run_serie(data, models, stage=stage)
|
|
@@ -7,7 +7,7 @@ from hestia_earth.models.utils.transformation import previous_transformation
|
|
|
7
7
|
from hestia_earth.models.utils.product import find_by_product
|
|
8
8
|
|
|
9
9
|
from . import run as run_node, _import_model
|
|
10
|
-
from hestia_earth.orchestrator.utils import new_practice, _filter_by_keys
|
|
10
|
+
from hestia_earth.orchestrator.utils import new_practice, _filter_by_keys, reset_index
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def _full_completeness():
|
|
@@ -94,6 +94,8 @@ def _run_transformation(cycle: dict, models: list):
|
|
|
94
94
|
transformation = _add_excreta_inputs(previous, transformation)
|
|
95
95
|
transformation = _apply_transformation_share(previous, transformation)
|
|
96
96
|
transformation = _run_models(cycle, transformation, models)
|
|
97
|
+
# reset the index between 2 transformations, as they dont share the same values
|
|
98
|
+
reset_index()
|
|
97
99
|
return transformations + [transformation]
|
|
98
100
|
return run
|
|
99
101
|
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
|
-
import pydash
|
|
3
2
|
|
|
4
3
|
from hestia_earth.orchestrator.utils import _non_empty, _non_empty_list, update_node_version
|
|
5
4
|
from . import merge_append
|
|
@@ -34,7 +33,7 @@ def _merge_result(data: dict, result: dict):
|
|
|
34
33
|
node_type = data.get('type', data.get('@type'))
|
|
35
34
|
values = [values] if not isinstance(values, list) and merge_type == 'list' else values
|
|
36
35
|
new_value = _STRATEGIES[merge_type](current, values, version, model, merge_args, node_type)
|
|
37
|
-
new_data =
|
|
36
|
+
new_data = data | {key: new_value}
|
|
38
37
|
return update_node_version(version, new_data, data)
|
|
39
38
|
|
|
40
39
|
|
|
@@ -3,7 +3,12 @@ from datetime import datetime
|
|
|
3
3
|
from hestia_earth.schema import UNIQUENESS_FIELDS
|
|
4
4
|
from hestia_earth.utils.tools import safe_parse_date, flatten
|
|
5
5
|
|
|
6
|
-
from hestia_earth.orchestrator.utils import
|
|
6
|
+
from hestia_earth.orchestrator.utils import (
|
|
7
|
+
_non_empty_list,
|
|
8
|
+
update_node_version,
|
|
9
|
+
wrap_index,
|
|
10
|
+
update_index
|
|
11
|
+
)
|
|
7
12
|
from .merge_node import merge as merge_node
|
|
8
13
|
|
|
9
14
|
_METHOD_MODEL_KEY = 'methodModel.@id'
|
|
@@ -74,7 +79,14 @@ def _build_matching_properties(values: list, model: dict = {}, merge_args: dict
|
|
|
74
79
|
return _handle_local_property(values, properties, 'impactAssessment.id')
|
|
75
80
|
|
|
76
81
|
|
|
77
|
-
def merge(
|
|
82
|
+
def merge(
|
|
83
|
+
source: list,
|
|
84
|
+
new_values: list,
|
|
85
|
+
version: str,
|
|
86
|
+
model: dict = {},
|
|
87
|
+
merge_args: dict = {},
|
|
88
|
+
node_type: str = ''
|
|
89
|
+
):
|
|
78
90
|
source = [] if source is None else source
|
|
79
91
|
|
|
80
92
|
# only merge if the
|
|
@@ -83,19 +95,30 @@ def merge(source: list, new_values: list, version: str, model: dict = {}, merge_
|
|
|
83
95
|
# build list of properties used to do the matching
|
|
84
96
|
properties = _build_matching_properties(source, model, merge_args, node_type)
|
|
85
97
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
98
|
+
# store previous identical index to speed merging
|
|
99
|
+
index_key = '-'.join([node_type, model.get('key', '')])
|
|
100
|
+
# when the subkey changes, we need to completely rebuild the index
|
|
101
|
+
index_sub_key = '-'.join(properties + [str(merge_args)])
|
|
102
|
+
|
|
103
|
+
def build_index():
|
|
104
|
+
return {
|
|
105
|
+
_value_index_key(value, properties, merge_args): index
|
|
106
|
+
for index, value in enumerate(source)
|
|
107
|
+
} if properties else None
|
|
108
|
+
|
|
109
|
+
source_index_keys = wrap_index(index_key, index_sub_key, build_index)
|
|
90
110
|
|
|
91
111
|
for el in _non_empty_list(new_values):
|
|
92
112
|
new_value_index_key = _value_index_key(el, properties, merge_args)
|
|
93
113
|
source_index = source_index_keys.get(new_value_index_key) if source_index_keys else None
|
|
94
114
|
if source_index is None:
|
|
95
115
|
# add to index keys for next elements
|
|
96
|
-
|
|
97
|
-
|
|
116
|
+
source_index_keys = source_index_keys or {}
|
|
117
|
+
source_index_keys[new_value_index_key] = len(source)
|
|
98
118
|
source.append(update_node_version(version, el))
|
|
99
119
|
elif not skip_same_term:
|
|
100
120
|
source[source_index] = merge_node(source[source_index], el, version, model, merge_args)
|
|
121
|
+
|
|
122
|
+
update_index(index_key, index_sub_key, source_index_keys)
|
|
123
|
+
|
|
101
124
|
return source
|
|
@@ -6,6 +6,35 @@ from functools import reduce
|
|
|
6
6
|
EXCLUDED_VERSION_KEYS = [
|
|
7
7
|
'@type'
|
|
8
8
|
]
|
|
9
|
+
_memory = {}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def wrap_index(key: str, sub_key: str, func):
|
|
13
|
+
global _memory # noqa: F824
|
|
14
|
+
memory_data = _memory.get(key, {})
|
|
15
|
+
data = (memory_data.get('data') if memory_data.get('sub_key') == sub_key else None) or func()
|
|
16
|
+
_memory[key] = {
|
|
17
|
+
'sub_key': sub_key,
|
|
18
|
+
'data': data
|
|
19
|
+
}
|
|
20
|
+
return data
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def update_index(key: str, sub_key: str, data):
|
|
24
|
+
global _memory # noqa: F824
|
|
25
|
+
_memory[key] = {
|
|
26
|
+
'sub_key': sub_key,
|
|
27
|
+
'data': data
|
|
28
|
+
}
|
|
29
|
+
return data
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def reset_index():
|
|
33
|
+
"""
|
|
34
|
+
Reset the merging index between different nodes.
|
|
35
|
+
"""
|
|
36
|
+
global _memory
|
|
37
|
+
_memory = {}
|
|
9
38
|
|
|
10
39
|
|
|
11
40
|
def get_required_model_param(model, key: str):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hestia-earth-models
|
|
3
|
-
Version: 0.74.
|
|
3
|
+
Version: 0.74.5
|
|
4
4
|
Summary: HESTIA's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
|
|
5
5
|
Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
|
|
6
6
|
Author: HESTIA Team
|
|
@@ -12,8 +12,7 @@ Classifier: Programming Language :: Python :: 3.6
|
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: hestia-earth-schema<34.0.0,>=33.5.0
|
|
15
|
-
Requires-Dist: hestia-earth-utils>=0.15.
|
|
16
|
-
Requires-Dist: python-dateutil>=2.8.1
|
|
15
|
+
Requires-Dist: hestia-earth-utils>=0.15.3
|
|
17
16
|
Requires-Dist: CurrencyConverter==0.16.8
|
|
18
17
|
Requires-Dist: haversine>=2.7.0
|
|
19
18
|
Requires-Dist: pydash
|