hestia-earth-models 0.70.6__py3-none-any.whl → 0.71.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/models/cache_nodes.py +157 -0
- hestia_earth/models/cache_sites.py +1 -1
- hestia_earth/models/config/Cycle.json +0 -30
- hestia_earth/models/data/ecoinventV3/__init__.py +7 -5
- hestia_earth/models/ecoinventV3/__init__.py +8 -1
- hestia_earth/models/hestia/aboveGroundCropResidue.py +3 -3
- hestia_earth/models/hestia/seed_emissions.py +25 -21
- hestia_earth/models/mocking/search-results.json +1509 -1505
- hestia_earth/models/utils/background_emissions.py +24 -0
- hestia_earth/models/utils/pesticideAI.py +1 -1
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.71.0.dist-info}/METADATA +2 -2
- {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.71.0.dist-info}/RECORD +17 -19
- tests/models/test_cache_nodes.py +31 -0
- hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +0 -100
- hestia_earth/models/ipcc2006/n2OToAirOrganicSoilCultivationDirect.py +0 -99
- tests/models/ipcc2006/test_co2ToAirOrganicSoilCultivation.py +0 -49
- tests/models/ipcc2006/test_n2OToAirOrganicSoilCultivationDirect.py +0 -32
- {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.71.0.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.71.0.dist-info}/WHEEL +0 -0
- {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.71.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
import os
|
2
|
+
from functools import reduce
|
3
|
+
from hestia_earth.schema import NodeType
|
4
|
+
from hestia_earth.utils.tools import current_time_ms, flatten
|
5
|
+
from hestia_earth.earth_engine import init_gee
|
6
|
+
|
7
|
+
from .log import logger
|
8
|
+
from .utils import CACHE_KEY
|
9
|
+
from .utils.site import years_from_cycles
|
10
|
+
from .utils.source import CACHE_SOURCES_KEY, find_sources
|
11
|
+
from .cache_sites import run as cache_sites
|
12
|
+
|
13
|
+
CACHE_RELATED_KEY = 'related'
|
14
|
+
CACHE_NESTED_KEY = 'nested'
|
15
|
+
|
16
|
+
_CACHE_BATCH_SIZE = int(os.getenv('CACHE_SITES_BATCH_SIZE', '5000'))
|
17
|
+
_ENABLE_CACHE_YEARS = os.getenv('ENABLE_CACHE_YEARS', 'true') == 'true'
|
18
|
+
_ENABLE_CACHE_RELATED_NODES = os.getenv('ENABLE_CACHE_RELATED_NODES', 'true') == 'true'
|
19
|
+
_CACHE_NODE_TYPES = [
|
20
|
+
NodeType.SITE.value,
|
21
|
+
NodeType.CYCLE.value,
|
22
|
+
NodeType.IMPACTASSESSMENT.value
|
23
|
+
]
|
24
|
+
|
25
|
+
|
26
|
+
def _pop_items(values: list, nb_items: int):
|
27
|
+
if len(values) < nb_items:
|
28
|
+
removed_items = values[:] # Get a copy of the entire array
|
29
|
+
values.clear() # Remove all items from the original array
|
30
|
+
else:
|
31
|
+
removed_items = values[:nb_items] # Get the first N items
|
32
|
+
del values[:nb_items] # Remove the first N items from the original array
|
33
|
+
|
34
|
+
return removed_items
|
35
|
+
|
36
|
+
|
37
|
+
def _filter_by_type(nodes: list, type: str): return [n for n in nodes if n.get('@type', n.get('type')) == type]
|
38
|
+
|
39
|
+
|
40
|
+
def _node_key(node: dict): return '/'.join([node.get('type', node.get('@type')), node.get('id', node.get('@id'))])
|
41
|
+
|
42
|
+
|
43
|
+
def _years_from_cycles(nodes: dict): return years_from_cycles(_filter_by_type(nodes, NodeType.CYCLE.value))
|
44
|
+
|
45
|
+
|
46
|
+
def _linked_node(data: dict): return {'type': data.get('type'), 'id': data.get('id')}
|
47
|
+
|
48
|
+
|
49
|
+
def _find_nested_nodes(data) -> list[dict]:
|
50
|
+
if isinstance(data, dict):
|
51
|
+
if data.get('type') in _CACHE_NODE_TYPES and data.get('id'):
|
52
|
+
return [_linked_node(data)]
|
53
|
+
return flatten(_find_nested_nodes(list(data.values())))
|
54
|
+
if isinstance(data, list):
|
55
|
+
return flatten(map(_find_nested_nodes, data))
|
56
|
+
return []
|
57
|
+
|
58
|
+
|
59
|
+
def _nested_nodes(node_keys: list[str]):
|
60
|
+
def exec(group: dict, node: dict):
|
61
|
+
nested_nodes = _find_nested_nodes(list(node.values()))
|
62
|
+
|
63
|
+
for nested_node in nested_nodes:
|
64
|
+
group_id = _node_key(nested_node)
|
65
|
+
group[group_id] = group.get(group_id, {})
|
66
|
+
group[group_id][CACHE_RELATED_KEY] = group.get(group_id, {}).get(CACHE_RELATED_KEY, []) + [
|
67
|
+
_linked_node(node)
|
68
|
+
]
|
69
|
+
|
70
|
+
# cache nodes that current node refers (nesting)
|
71
|
+
if group_id in node_keys:
|
72
|
+
group_id = _node_key(node)
|
73
|
+
group[group_id] = group.get(group_id, {})
|
74
|
+
group[group_id][CACHE_NESTED_KEY] = group.get(group_id, {}).get(CACHE_NESTED_KEY, []) + [
|
75
|
+
_linked_node(nested_node)
|
76
|
+
]
|
77
|
+
|
78
|
+
return group
|
79
|
+
return exec
|
80
|
+
|
81
|
+
|
82
|
+
def _cache_related_nodes(nodes: list):
|
83
|
+
# only cache nodes included in the file
|
84
|
+
nodes_keys = list(map(_node_key, nodes))
|
85
|
+
# for each node, compile list of nested nodes
|
86
|
+
nested_nodes_mapping = reduce(_nested_nodes(nodes_keys), nodes, {})
|
87
|
+
|
88
|
+
def cache_related_node(node: dict):
|
89
|
+
nodes_mapping = nested_nodes_mapping.get(_node_key(node), {})
|
90
|
+
related_nodes = nodes_mapping.get(CACHE_RELATED_KEY) or []
|
91
|
+
nested_nodes = nodes_mapping.get(CACHE_NESTED_KEY) or []
|
92
|
+
# save in cache
|
93
|
+
cached_data = node.get(CACHE_KEY, {}) | {
|
94
|
+
CACHE_RELATED_KEY: related_nodes,
|
95
|
+
CACHE_NESTED_KEY: nested_nodes
|
96
|
+
}
|
97
|
+
return node | {CACHE_KEY: cached_data}
|
98
|
+
|
99
|
+
return list(map(cache_related_node, nodes))
|
100
|
+
|
101
|
+
|
102
|
+
def _cache_sources(nodes: list):
|
103
|
+
sources = find_sources()
|
104
|
+
return [
|
105
|
+
n | ({
|
106
|
+
CACHE_KEY: n.get(CACHE_KEY, {}) | {CACHE_SOURCES_KEY: sources}
|
107
|
+
} if n.get('type', n.get('@type')) in _CACHE_NODE_TYPES else {})
|
108
|
+
for n in nodes
|
109
|
+
]
|
110
|
+
|
111
|
+
|
112
|
+
def _safe_cache_sites(sites: list, years: list):
|
113
|
+
try:
|
114
|
+
return cache_sites(sites, years)
|
115
|
+
except Exception as e:
|
116
|
+
logger.error(f"An error occured while caching nodes on EE: {str(e)}")
|
117
|
+
if 'exceeded' in str(e):
|
118
|
+
logger.debug('Fallback to caching sites one by one')
|
119
|
+
# run one by one in case the batching does not work
|
120
|
+
return flatten([cache_sites([site], years) for site in sites])
|
121
|
+
else:
|
122
|
+
raise e
|
123
|
+
|
124
|
+
|
125
|
+
def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
|
126
|
+
start = current_time_ms()
|
127
|
+
|
128
|
+
# build list of nodes by key to update as sites are processed
|
129
|
+
nodes_mapping = {_node_key(n): n for n in nodes}
|
130
|
+
|
131
|
+
years = _years_from_cycles(nodes) if _ENABLE_CACHE_YEARS else []
|
132
|
+
sites = _filter_by_type(nodes, 'Site')
|
133
|
+
|
134
|
+
while len(sites) > 0:
|
135
|
+
batch_values = _pop_items(sites, batch_size)
|
136
|
+
logger.info(f"Processing {len(batch_values)} sites / {len(sites)} remaining.")
|
137
|
+
results = _safe_cache_sites(batch_values, years)
|
138
|
+
for result in results:
|
139
|
+
nodes_mapping[_node_key(result)] = result
|
140
|
+
|
141
|
+
logger.info(f"Done caching sites in {current_time_ms() - start} ms")
|
142
|
+
|
143
|
+
# replace original sites with new cached sites
|
144
|
+
return list(nodes_mapping.values())
|
145
|
+
|
146
|
+
|
147
|
+
def run(nodes: list):
|
148
|
+
init_gee()
|
149
|
+
|
150
|
+
# cache sites data
|
151
|
+
cached_nodes = _cache_sites(nodes)
|
152
|
+
|
153
|
+
# cache related nodes
|
154
|
+
cached_nodes = _cache_related_nodes(cached_nodes) if _ENABLE_CACHE_RELATED_NODES else cached_nodes
|
155
|
+
|
156
|
+
# cache sources
|
157
|
+
return _cache_sources(cached_nodes)
|
@@ -81,7 +81,7 @@ def _run_values(
|
|
81
81
|
site_cache = merge(
|
82
82
|
site.get(CACHE_KEY, {}),
|
83
83
|
{CACHE_GEOSPATIAL_KEY: cached_data},
|
84
|
-
({CACHE_YEARS_KEY: list(set(cached_value(site, CACHE_YEARS_KEY, []) + years))} if years else {})
|
84
|
+
({CACHE_YEARS_KEY: sorted(list(set(cached_value(site, CACHE_YEARS_KEY, []) + years)))} if years else {})
|
85
85
|
)
|
86
86
|
return merge(site, {CACHE_KEY: site_cache})
|
87
87
|
|
@@ -2054,36 +2054,6 @@
|
|
2054
2054
|
},
|
2055
2055
|
"stage": 2
|
2056
2056
|
},
|
2057
|
-
{
|
2058
|
-
"key": "emissions",
|
2059
|
-
"model": "ipcc2006",
|
2060
|
-
"value": "n2OToAirOrganicSoilCultivationDirect",
|
2061
|
-
"runStrategy": "add_blank_node_if_missing",
|
2062
|
-
"runArgs": {
|
2063
|
-
"runNonMeasured": true,
|
2064
|
-
"runNonAddedTerm": true
|
2065
|
-
},
|
2066
|
-
"mergeStrategy": "list",
|
2067
|
-
"mergeArgs": {
|
2068
|
-
"replaceThreshold": ["value", 0.01]
|
2069
|
-
},
|
2070
|
-
"stage": 2
|
2071
|
-
},
|
2072
|
-
{
|
2073
|
-
"key": "emissions",
|
2074
|
-
"model": "ipcc2006",
|
2075
|
-
"value": "co2ToAirOrganicSoilCultivation",
|
2076
|
-
"runStrategy": "add_blank_node_if_missing",
|
2077
|
-
"runArgs": {
|
2078
|
-
"runNonMeasured": true,
|
2079
|
-
"runNonAddedTerm": true
|
2080
|
-
},
|
2081
|
-
"mergeStrategy": "list",
|
2082
|
-
"mergeArgs": {
|
2083
|
-
"replaceThreshold": ["value", 0.01]
|
2084
|
-
},
|
2085
|
-
"stage": 2
|
2086
|
-
},
|
2087
2057
|
{
|
2088
2058
|
"key": "emissions",
|
2089
2059
|
"model": "ipcc2006",
|
@@ -5,17 +5,19 @@ from hestia_earth.utils.tools import non_empty_list
|
|
5
5
|
|
6
6
|
from hestia_earth.models.log import logger
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
_CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
9
|
+
_ENV_NAME = 'ECOINVENT_V3_FILEPATH'
|
10
10
|
|
11
11
|
|
12
12
|
@lru_cache()
|
13
13
|
def _get_file():
|
14
|
-
|
15
|
-
|
14
|
+
filepath = os.getenv(_ENV_NAME, f"{os.path.join(_CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
|
15
|
+
|
16
|
+
if not os.path.exists(filepath):
|
17
|
+
logger.warning('Ecoinvent file not found. Please make sure to set env variable "%s".', _ENV_NAME)
|
16
18
|
return None
|
17
19
|
|
18
|
-
return load_lookup(filepath=
|
20
|
+
return load_lookup(filepath=filepath, keep_in_memory=True)
|
19
21
|
|
20
22
|
|
21
23
|
def ecoinventV3_emissions(ecoinventName: str):
|
@@ -5,7 +5,11 @@ from hestia_earth.utils.tools import flatten, list_sum
|
|
5
5
|
from hestia_earth.models.log import debugValues, logShouldRun, logRequirements
|
6
6
|
from hestia_earth.models.data.ecoinventV3 import ecoinventV3_emissions
|
7
7
|
from hestia_earth.models.utils.emission import _new_emission
|
8
|
-
from hestia_earth.models.utils.background_emissions import
|
8
|
+
from hestia_earth.models.utils.background_emissions import (
|
9
|
+
get_background_inputs,
|
10
|
+
no_gap_filled_background_emissions,
|
11
|
+
log_missing_emissions
|
12
|
+
)
|
9
13
|
from hestia_earth.models.utils.blank_node import group_by_keys
|
10
14
|
from hestia_earth.models.utils.pesticideAI import get_pesticides_from_inputs
|
11
15
|
from hestia_earth.models.utils.fertiliser import get_fertilisers_from_inputs
|
@@ -47,6 +51,7 @@ RETURNS = {
|
|
47
51
|
}]
|
48
52
|
}
|
49
53
|
LOOKUPS = {
|
54
|
+
"emission": "inputProductionGroupId",
|
50
55
|
"electricity": "ecoinventMapping",
|
51
56
|
"fuel": "ecoinventMapping",
|
52
57
|
"inorganicFertiliser": "ecoinventMapping",
|
@@ -97,6 +102,7 @@ def _add_emission(cycle: dict, input: dict):
|
|
97
102
|
|
98
103
|
def _run_input(cycle: dict):
|
99
104
|
no_gap_filled_background_emissions_func = no_gap_filled_background_emissions(cycle)
|
105
|
+
log_missing_emissions_func = log_missing_emissions(cycle, model=MODEL, methodTier=TIER)
|
100
106
|
|
101
107
|
def run(inputs: list):
|
102
108
|
input = inputs[0]
|
@@ -118,6 +124,7 @@ def _run_input(cycle: dict):
|
|
118
124
|
logShouldRun(cycle, MODEL, input_term_id, should_run, methodTier=TIER)
|
119
125
|
|
120
126
|
grouped_emissions = reduce(_add_emission(cycle, input), mappings, {}) if should_run else {}
|
127
|
+
log_missing_emissions_func(input_term_id, list(grouped_emissions.keys()))
|
121
128
|
return [
|
122
129
|
_emission(term_id, value * input_value, input)
|
123
130
|
for term_id, value in grouped_emissions.items()
|
@@ -105,9 +105,9 @@ def _run(cycle: dict, total_values: list):
|
|
105
105
|
term_id = model.get('product')
|
106
106
|
value = _run_model(model, cycle, total_value)
|
107
107
|
debugValues(cycle, model=MODEL, term=term_id,
|
108
|
-
|
109
|
-
|
110
|
-
|
108
|
+
total_above_ground_crop_residue=total_value,
|
109
|
+
remaining_crop_residue_value=remaining_value,
|
110
|
+
allocated_value=value)
|
111
111
|
|
112
112
|
if value == 0:
|
113
113
|
values.extend([_product(term_id, value)])
|
@@ -98,6 +98,23 @@ def _run(cycle: dict, economicValueShare: float, total_yield: float, seed_input:
|
|
98
98
|
]
|
99
99
|
|
100
100
|
|
101
|
+
def _map_group_emissions(group_id: str, required_emission_term_ids: list, emission_ids: list):
|
102
|
+
lookup = download_lookup('emission.csv')
|
103
|
+
emissions = list(filter(
|
104
|
+
lambda id: id in required_emission_term_ids,
|
105
|
+
find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
|
106
|
+
))
|
107
|
+
included_emissions = list(filter(lambda v: v in emission_ids, emissions))
|
108
|
+
missing_emissions = list(filter(lambda v: v not in emission_ids, emissions))
|
109
|
+
return {
|
110
|
+
'id': group_id,
|
111
|
+
'total-emissions': len(emissions),
|
112
|
+
'included-emissions': len(included_emissions),
|
113
|
+
'missing-emissions': '-'.join(missing_emissions),
|
114
|
+
'is-valid': len(emissions) == len(included_emissions)
|
115
|
+
}
|
116
|
+
|
117
|
+
|
101
118
|
def _filter_emissions(cycle: dict):
|
102
119
|
required_emission_term_ids = cycle_emissions_in_system_boundary(cycle)
|
103
120
|
|
@@ -117,31 +134,12 @@ def _filter_emissions(cycle: dict):
|
|
117
134
|
group_ids = set([v.get('group-id') for v in emissions if v.get('group-id')])
|
118
135
|
|
119
136
|
# for each group, get the list of all required emissions
|
120
|
-
lookup = download_lookup('emission.csv')
|
121
137
|
emissions_per_group = [
|
122
|
-
|
123
|
-
'id': group_id,
|
124
|
-
'emissions': list(filter(
|
125
|
-
lambda id: id in required_emission_term_ids,
|
126
|
-
find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
|
127
|
-
))
|
128
|
-
}
|
138
|
+
_map_group_emissions(group_id, required_emission_term_ids, emission_ids)
|
129
139
|
for group_id in group_ids
|
130
140
|
]
|
131
|
-
emissions_per_group = [
|
132
|
-
{
|
133
|
-
'id': group.get('id'),
|
134
|
-
'total-emissions': len(group.get('emissions', [])),
|
135
|
-
'included-emissions': len(list(filter(lambda v: v in emission_ids, group.get('emissions', [])))),
|
136
|
-
'missing-emissions': '-'.join(list(filter(lambda v: v not in emission_ids, group.get('emissions', []))))
|
137
|
-
}
|
138
|
-
for group in emissions_per_group
|
139
|
-
]
|
140
141
|
# only keep groups that have all emissions present in the Cycle
|
141
|
-
valid_groups = list(filter(
|
142
|
-
lambda group: group.get('total-emissions') == group.get('included-emissions'),
|
143
|
-
emissions_per_group
|
144
|
-
))
|
142
|
+
valid_groups = list(filter(lambda group: group.get('is-valid'), emissions_per_group))
|
145
143
|
valid_group_ids = set([v.get('id') for v in valid_groups])
|
146
144
|
|
147
145
|
# finally, only return emissions which groups are valid
|
@@ -273,6 +271,12 @@ def _should_run(cycle: dict):
|
|
273
271
|
|
274
272
|
logShouldRun(cycle, MODEL, term_id, should_run, methodTier=TIER, model_key=MODEL_KEY)
|
275
273
|
|
274
|
+
# log missing emissions to show in the logs
|
275
|
+
for group in emissions_per_group:
|
276
|
+
if not group.get('is-valid'):
|
277
|
+
logShouldRun(cycle, MODEL, term_id, False,
|
278
|
+
methodTier=TIER, model_key=MODEL_KEY, emission_id=group.get('id'))
|
279
|
+
|
276
280
|
return should_run, total_economicValueShare, total_yield, grouped_seed_inputs, grouped_emissions
|
277
281
|
|
278
282
|
|