hestia-earth-models 0.70.6__py3-none-any.whl → 0.72.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. hestia_earth/models/cache_nodes.py +157 -0
  2. hestia_earth/models/cache_sites.py +1 -1
  3. hestia_earth/models/config/Cycle.json +0 -30
  4. hestia_earth/models/config/Site.json +8 -0
  5. hestia_earth/models/data/ecoinventV3/__init__.py +7 -5
  6. hestia_earth/models/ecoinventV3/__init__.py +8 -1
  7. hestia_earth/models/geospatialDatabase/histosol.py +14 -7
  8. hestia_earth/models/hestia/aboveGroundCropResidue.py +3 -3
  9. hestia_earth/models/hestia/histosol.py +53 -0
  10. hestia_earth/models/hestia/seed_emissions.py +25 -21
  11. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1.py +22 -7
  12. hestia_earth/models/mocking/search-results.json +1172 -1168
  13. hestia_earth/models/utils/aggregated.py +3 -3
  14. hestia_earth/models/utils/background_emissions.py +24 -0
  15. hestia_earth/models/utils/measurement.py +16 -3
  16. hestia_earth/models/utils/pesticideAI.py +1 -1
  17. hestia_earth/models/version.py +1 -1
  18. {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.72.0.dist-info}/METADATA +3 -3
  19. {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.72.0.dist-info}/RECORD +26 -26
  20. tests/models/geospatialDatabase/test_histosol.py +21 -20
  21. tests/models/hestia/test_histosol.py +24 -0
  22. tests/models/ipcc2019/test_organicCarbonPerHa_tier_1.py +4 -3
  23. tests/models/test_cache_nodes.py +31 -0
  24. hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +0 -100
  25. hestia_earth/models/ipcc2006/n2OToAirOrganicSoilCultivationDirect.py +0 -99
  26. tests/models/ipcc2006/test_co2ToAirOrganicSoilCultivation.py +0 -49
  27. tests/models/ipcc2006/test_n2OToAirOrganicSoilCultivationDirect.py +0 -32
  28. {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.72.0.dist-info}/LICENSE +0 -0
  29. {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.72.0.dist-info}/WHEEL +0 -0
  30. {hestia_earth_models-0.70.6.dist-info → hestia_earth_models-0.72.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,157 @@
1
+ import os
2
+ from functools import reduce
3
+ from hestia_earth.schema import NodeType
4
+ from hestia_earth.utils.tools import current_time_ms, flatten
5
+ from hestia_earth.earth_engine import init_gee
6
+
7
+ from .log import logger
8
+ from .utils import CACHE_KEY
9
+ from .utils.site import years_from_cycles
10
+ from .utils.source import CACHE_SOURCES_KEY, find_sources
11
+ from .cache_sites import run as cache_sites
12
+
13
+ CACHE_RELATED_KEY = 'related'
14
+ CACHE_NESTED_KEY = 'nested'
15
+
16
+ _CACHE_BATCH_SIZE = int(os.getenv('CACHE_SITES_BATCH_SIZE', '5000'))
17
+ _ENABLE_CACHE_YEARS = os.getenv('ENABLE_CACHE_YEARS', 'true') == 'true'
18
+ _ENABLE_CACHE_RELATED_NODES = os.getenv('ENABLE_CACHE_RELATED_NODES', 'true') == 'true'
19
+ _CACHE_NODE_TYPES = [
20
+ NodeType.SITE.value,
21
+ NodeType.CYCLE.value,
22
+ NodeType.IMPACTASSESSMENT.value
23
+ ]
24
+
25
+
26
+ def _pop_items(values: list, nb_items: int):
27
+ if len(values) < nb_items:
28
+ removed_items = values[:] # Get a copy of the entire array
29
+ values.clear() # Remove all items from the original array
30
+ else:
31
+ removed_items = values[:nb_items] # Get the first N items
32
+ del values[:nb_items] # Remove the first N items from the original array
33
+
34
+ return removed_items
35
+
36
+
37
+ def _filter_by_type(nodes: list, type: str): return [n for n in nodes if n.get('@type', n.get('type')) == type]
38
+
39
+
40
+ def _node_key(node: dict): return '/'.join([node.get('type', node.get('@type')), node.get('id', node.get('@id'))])
41
+
42
+
43
+ def _years_from_cycles(nodes: dict): return years_from_cycles(_filter_by_type(nodes, NodeType.CYCLE.value))
44
+
45
+
46
+ def _linked_node(data: dict): return {'type': data.get('type'), 'id': data.get('id')}
47
+
48
+
49
+ def _find_nested_nodes(data) -> list[dict]:
50
+ if isinstance(data, dict):
51
+ if data.get('type') in _CACHE_NODE_TYPES and data.get('id'):
52
+ return [_linked_node(data)]
53
+ return flatten(_find_nested_nodes(list(data.values())))
54
+ if isinstance(data, list):
55
+ return flatten(map(_find_nested_nodes, data))
56
+ return []
57
+
58
+
59
+ def _nested_nodes(node_keys: list[str]):
60
+ def exec(group: dict, node: dict):
61
+ nested_nodes = _find_nested_nodes(list(node.values()))
62
+
63
+ for nested_node in nested_nodes:
64
+ group_id = _node_key(nested_node)
65
+ group[group_id] = group.get(group_id, {})
66
+ group[group_id][CACHE_RELATED_KEY] = group.get(group_id, {}).get(CACHE_RELATED_KEY, []) + [
67
+ _linked_node(node)
68
+ ]
69
+
70
+ # cache nodes that current node refers (nesting)
71
+ if group_id in node_keys:
72
+ group_id = _node_key(node)
73
+ group[group_id] = group.get(group_id, {})
74
+ group[group_id][CACHE_NESTED_KEY] = group.get(group_id, {}).get(CACHE_NESTED_KEY, []) + [
75
+ _linked_node(nested_node)
76
+ ]
77
+
78
+ return group
79
+ return exec
80
+
81
+
82
+ def _cache_related_nodes(nodes: list):
83
+ # only cache nodes included in the file
84
+ nodes_keys = list(map(_node_key, nodes))
85
+ # for each node, compile list of nested nodes
86
+ nested_nodes_mapping = reduce(_nested_nodes(nodes_keys), nodes, {})
87
+
88
+ def cache_related_node(node: dict):
89
+ nodes_mapping = nested_nodes_mapping.get(_node_key(node), {})
90
+ related_nodes = nodes_mapping.get(CACHE_RELATED_KEY) or []
91
+ nested_nodes = nodes_mapping.get(CACHE_NESTED_KEY) or []
92
+ # save in cache
93
+ cached_data = node.get(CACHE_KEY, {}) | {
94
+ CACHE_RELATED_KEY: related_nodes,
95
+ CACHE_NESTED_KEY: nested_nodes
96
+ }
97
+ return node | {CACHE_KEY: cached_data}
98
+
99
+ return list(map(cache_related_node, nodes))
100
+
101
+
102
+ def _cache_sources(nodes: list):
103
+ sources = find_sources()
104
+ return [
105
+ n | ({
106
+ CACHE_KEY: n.get(CACHE_KEY, {}) | {CACHE_SOURCES_KEY: sources}
107
+ } if n.get('type', n.get('@type')) in _CACHE_NODE_TYPES else {})
108
+ for n in nodes
109
+ ]
110
+
111
+
112
+ def _safe_cache_sites(sites: list, years: list):
113
+ try:
114
+ return cache_sites(sites, years)
115
+ except Exception as e:
116
+ logger.error(f"An error occured while caching nodes on EE: {str(e)}")
117
+ if 'exceeded' in str(e):
118
+ logger.debug('Fallback to caching sites one by one')
119
+ # run one by one in case the batching does not work
120
+ return flatten([cache_sites([site], years) for site in sites])
121
+ else:
122
+ raise e
123
+
124
+
125
+ def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
126
+ start = current_time_ms()
127
+
128
+ # build list of nodes by key to update as sites are processed
129
+ nodes_mapping = {_node_key(n): n for n in nodes}
130
+
131
+ years = _years_from_cycles(nodes) if _ENABLE_CACHE_YEARS else []
132
+ sites = _filter_by_type(nodes, 'Site')
133
+
134
+ while len(sites) > 0:
135
+ batch_values = _pop_items(sites, batch_size)
136
+ logger.info(f"Processing {len(batch_values)} sites / {len(sites)} remaining.")
137
+ results = _safe_cache_sites(batch_values, years)
138
+ for result in results:
139
+ nodes_mapping[_node_key(result)] = result
140
+
141
+ logger.info(f"Done caching sites in {current_time_ms() - start} ms")
142
+
143
+ # replace original sites with new cached sites
144
+ return list(nodes_mapping.values())
145
+
146
+
147
+ def run(nodes: list):
148
+ init_gee()
149
+
150
+ # cache sites data
151
+ cached_nodes = _cache_sites(nodes)
152
+
153
+ # cache related nodes
154
+ cached_nodes = _cache_related_nodes(cached_nodes) if _ENABLE_CACHE_RELATED_NODES else cached_nodes
155
+
156
+ # cache sources
157
+ return _cache_sources(cached_nodes)
@@ -81,7 +81,7 @@ def _run_values(
81
81
  site_cache = merge(
82
82
  site.get(CACHE_KEY, {}),
83
83
  {CACHE_GEOSPATIAL_KEY: cached_data},
84
- ({CACHE_YEARS_KEY: list(set(cached_value(site, CACHE_YEARS_KEY, []) + years))} if years else {})
84
+ ({CACHE_YEARS_KEY: sorted(list(set(cached_value(site, CACHE_YEARS_KEY, []) + years)))} if years else {})
85
85
  )
86
86
  return merge(site, {CACHE_KEY: site_cache})
87
87
 
@@ -2054,36 +2054,6 @@
2054
2054
  },
2055
2055
  "stage": 2
2056
2056
  },
2057
- {
2058
- "key": "emissions",
2059
- "model": "ipcc2006",
2060
- "value": "n2OToAirOrganicSoilCultivationDirect",
2061
- "runStrategy": "add_blank_node_if_missing",
2062
- "runArgs": {
2063
- "runNonMeasured": true,
2064
- "runNonAddedTerm": true
2065
- },
2066
- "mergeStrategy": "list",
2067
- "mergeArgs": {
2068
- "replaceThreshold": ["value", 0.01]
2069
- },
2070
- "stage": 2
2071
- },
2072
- {
2073
- "key": "emissions",
2074
- "model": "ipcc2006",
2075
- "value": "co2ToAirOrganicSoilCultivation",
2076
- "runStrategy": "add_blank_node_if_missing",
2077
- "runArgs": {
2078
- "runNonMeasured": true,
2079
- "runNonAddedTerm": true
2080
- },
2081
- "mergeStrategy": "list",
2082
- "mergeArgs": {
2083
- "replaceThreshold": ["value", 0.01]
2084
- },
2085
- "stage": 2
2086
- },
2087
2057
  {
2088
2058
  "key": "emissions",
2089
2059
  "model": "ipcc2006",
@@ -88,6 +88,14 @@
88
88
  "runStrategy": "add_blank_node_if_missing",
89
89
  "mergeStrategy": "list",
90
90
  "stage": 1
91
+ },
92
+ {
93
+ "key": "measurements",
94
+ "model": "hestia",
95
+ "value": "histosol",
96
+ "runStrategy": "add_blank_node_if_missing",
97
+ "mergeStrategy": "list",
98
+ "stage": 1
91
99
  }
92
100
  ],
93
101
  [
@@ -5,17 +5,19 @@ from hestia_earth.utils.tools import non_empty_list
5
5
 
6
6
  from hestia_earth.models.log import logger
7
7
 
8
- CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
9
- _FILEPATH = os.getenv('ECOINVENT_V3_FILEPATH', f"{os.path.join(CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
8
+ _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
9
+ _ENV_NAME = 'ECOINVENT_V3_FILEPATH'
10
10
 
11
11
 
12
12
  @lru_cache()
13
13
  def _get_file():
14
- if not os.path.exists(_FILEPATH):
15
- logger.warning('Ecoinvent file not found. Please make sure to set env variable "ECOINVENT_V3_FILEPATH".')
14
+ filepath = os.getenv(_ENV_NAME, f"{os.path.join(_CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
15
+
16
+ if not os.path.exists(filepath):
17
+ logger.warning('Ecoinvent file not found. Please make sure to set env variable "%s".', _ENV_NAME)
16
18
  return None
17
19
 
18
- return load_lookup(filepath=_FILEPATH, keep_in_memory=True)
20
+ return load_lookup(filepath=filepath, keep_in_memory=True)
19
21
 
20
22
 
21
23
  def ecoinventV3_emissions(ecoinventName: str):
@@ -5,7 +5,11 @@ from hestia_earth.utils.tools import flatten, list_sum
5
5
  from hestia_earth.models.log import debugValues, logShouldRun, logRequirements
6
6
  from hestia_earth.models.data.ecoinventV3 import ecoinventV3_emissions
7
7
  from hestia_earth.models.utils.emission import _new_emission
8
- from hestia_earth.models.utils.background_emissions import get_background_inputs, no_gap_filled_background_emissions
8
+ from hestia_earth.models.utils.background_emissions import (
9
+ get_background_inputs,
10
+ no_gap_filled_background_emissions,
11
+ log_missing_emissions
12
+ )
9
13
  from hestia_earth.models.utils.blank_node import group_by_keys
10
14
  from hestia_earth.models.utils.pesticideAI import get_pesticides_from_inputs
11
15
  from hestia_earth.models.utils.fertiliser import get_fertilisers_from_inputs
@@ -47,6 +51,7 @@ RETURNS = {
47
51
  }]
48
52
  }
49
53
  LOOKUPS = {
54
+ "emission": "inputProductionGroupId",
50
55
  "electricity": "ecoinventMapping",
51
56
  "fuel": "ecoinventMapping",
52
57
  "inorganicFertiliser": "ecoinventMapping",
@@ -97,6 +102,7 @@ def _add_emission(cycle: dict, input: dict):
97
102
 
98
103
  def _run_input(cycle: dict):
99
104
  no_gap_filled_background_emissions_func = no_gap_filled_background_emissions(cycle)
105
+ log_missing_emissions_func = log_missing_emissions(cycle, model=MODEL, methodTier=TIER)
100
106
 
101
107
  def run(inputs: list):
102
108
  input = inputs[0]
@@ -118,6 +124,7 @@ def _run_input(cycle: dict):
118
124
  logShouldRun(cycle, MODEL, input_term_id, should_run, methodTier=TIER)
119
125
 
120
126
  grouped_emissions = reduce(_add_emission(cycle, input), mappings, {}) if should_run else {}
127
+ log_missing_emissions_func(input_term_id, list(grouped_emissions.keys()))
121
128
  return [
122
129
  _emission(term_id, value * input_value, input)
123
130
  for term_id, value in grouped_emissions.items()
@@ -1,7 +1,7 @@
1
- from hestia_earth.schema import MeasurementMethodClassification, TermTermType
1
+ from hestia_earth.schema import MeasurementMethodClassification
2
2
 
3
3
  from hestia_earth.models.log import logRequirements, logShouldRun
4
- from hestia_earth.models.utils.measurement import _new_measurement
4
+ from hestia_earth.models.utils.measurement import _new_measurement, total_other_soilType_value
5
5
  from hestia_earth.models.utils.source import get_source
6
6
  from .utils import download, has_geospatial_data, should_download
7
7
  from . import MODEL
@@ -14,7 +14,13 @@ REQUIREMENTS = {
14
14
  {"region": {"@type": "Term", "termType": "region"}}
15
15
  ],
16
16
  "none": {
17
- "measurements": [{"@type": "Measurement", "value": "", "term.termType": "soilType"}]
17
+ "measurements": [{
18
+ "@type": "Measurement",
19
+ "value": "100",
20
+ "depthUpper": "0",
21
+ "depthLower": "30",
22
+ "term.termType": "soilType"
23
+ }]
18
24
  }
19
25
  }
20
26
  }
@@ -50,17 +56,18 @@ def _run(site: dict):
50
56
 
51
57
 
52
58
  def _should_run(site: dict):
53
- measurements = site.get('measurements', [])
54
- no_soil_type = all([m.get('term', {}).get('termType') != TermTermType.SOILTYPE.value for m in measurements])
55
59
  contains_geospatial_data = has_geospatial_data(site)
56
60
  below_max_area_size = should_download(TERM_ID, site)
57
61
 
62
+ total_measurements_value = total_other_soilType_value(site.get('measurements', []), TERM_ID)
63
+
58
64
  logRequirements(site, model=MODEL, term=TERM_ID,
59
65
  contains_geospatial_data=contains_geospatial_data,
60
66
  below_max_area_size=below_max_area_size,
61
- no_soil_type=no_soil_type)
67
+ total_soilType_measurements_value=total_measurements_value,
68
+ total_soilType_measurements_value_is_0=total_measurements_value == 0)
62
69
 
63
- should_run = all([contains_geospatial_data, below_max_area_size, no_soil_type])
70
+ should_run = all([contains_geospatial_data, below_max_area_size, total_measurements_value == 0])
64
71
  logShouldRun(site, MODEL, TERM_ID, should_run)
65
72
  return should_run
66
73
 
@@ -105,9 +105,9 @@ def _run(cycle: dict, total_values: list):
105
105
  term_id = model.get('product')
106
106
  value = _run_model(model, cycle, total_value)
107
107
  debugValues(cycle, model=MODEL, term=term_id,
108
- total_value=total_value,
109
- remaining_value=remaining_value,
110
- value=value)
108
+ total_above_ground_crop_residue=total_value,
109
+ remaining_crop_residue_value=remaining_value,
110
+ allocated_value=value)
111
111
 
112
112
  if value == 0:
113
113
  values.extend([_product(term_id, value)])
@@ -0,0 +1,53 @@
1
+ from hestia_earth.schema import MeasurementMethodClassification
2
+
3
+ from hestia_earth.models.log import logRequirements, logShouldRun
4
+ from hestia_earth.models.utils.measurement import _new_measurement, total_other_soilType_value
5
+ from . import MODEL
6
+
7
+ REQUIREMENTS = {
8
+ "Site": {
9
+ "measurements": [{
10
+ "@type": "Measurement",
11
+ "value": "100",
12
+ "depthUpper": "0",
13
+ "depthLower": "30",
14
+ "term.termType": "soilType"
15
+ }]
16
+ }
17
+ }
18
+ RETURNS = {
19
+ "Measurement": [{
20
+ "value": "0",
21
+ "depthUpper": "0",
22
+ "depthLower": "30",
23
+ "methodClassification": "modelled using other measurements"
24
+ }]
25
+ }
26
+ LOOKUPS = {
27
+ "soilType": "sumMax100Group"
28
+ }
29
+ TERM_ID = 'histosol'
30
+
31
+
32
+ def _measurement():
33
+ measurement = _new_measurement(TERM_ID)
34
+ measurement['value'] = [0]
35
+ measurement['depthUpper'] = 0
36
+ measurement['depthLower'] = 30
37
+ measurement['methodClassification'] = MeasurementMethodClassification.MODELLED_USING_OTHER_MEASUREMENTS.value
38
+ return measurement
39
+
40
+
41
+ def _should_run(site: dict):
42
+ total_measurements_value = total_other_soilType_value(site.get('measurements', []), TERM_ID)
43
+
44
+ logRequirements(site, model=MODEL, term=TERM_ID,
45
+ total_soilType_measurements_value=total_measurements_value,
46
+ total_soilType_measurements_value_is_100=total_measurements_value == 100)
47
+
48
+ should_run = all([total_measurements_value == 100])
49
+ logShouldRun(site, MODEL, TERM_ID, should_run)
50
+ return should_run
51
+
52
+
53
+ def run(site: dict): return [_measurement()] if _should_run(site) else []
@@ -98,6 +98,23 @@ def _run(cycle: dict, economicValueShare: float, total_yield: float, seed_input:
98
98
  ]
99
99
 
100
100
 
101
+ def _map_group_emissions(group_id: str, required_emission_term_ids: list, emission_ids: list):
102
+ lookup = download_lookup('emission.csv')
103
+ emissions = list(filter(
104
+ lambda id: id in required_emission_term_ids,
105
+ find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
106
+ ))
107
+ included_emissions = list(filter(lambda v: v in emission_ids, emissions))
108
+ missing_emissions = list(filter(lambda v: v not in emission_ids, emissions))
109
+ return {
110
+ 'id': group_id,
111
+ 'total-emissions': len(emissions),
112
+ 'included-emissions': len(included_emissions),
113
+ 'missing-emissions': '-'.join(missing_emissions),
114
+ 'is-valid': len(emissions) == len(included_emissions)
115
+ }
116
+
117
+
101
118
  def _filter_emissions(cycle: dict):
102
119
  required_emission_term_ids = cycle_emissions_in_system_boundary(cycle)
103
120
 
@@ -117,31 +134,12 @@ def _filter_emissions(cycle: dict):
117
134
  group_ids = set([v.get('group-id') for v in emissions if v.get('group-id')])
118
135
 
119
136
  # for each group, get the list of all required emissions
120
- lookup = download_lookup('emission.csv')
121
137
  emissions_per_group = [
122
- {
123
- 'id': group_id,
124
- 'emissions': list(filter(
125
- lambda id: id in required_emission_term_ids,
126
- find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
127
- ))
128
- }
138
+ _map_group_emissions(group_id, required_emission_term_ids, emission_ids)
129
139
  for group_id in group_ids
130
140
  ]
131
- emissions_per_group = [
132
- {
133
- 'id': group.get('id'),
134
- 'total-emissions': len(group.get('emissions', [])),
135
- 'included-emissions': len(list(filter(lambda v: v in emission_ids, group.get('emissions', [])))),
136
- 'missing-emissions': '-'.join(list(filter(lambda v: v not in emission_ids, group.get('emissions', []))))
137
- }
138
- for group in emissions_per_group
139
- ]
140
141
  # only keep groups that have all emissions present in the Cycle
141
- valid_groups = list(filter(
142
- lambda group: group.get('total-emissions') == group.get('included-emissions'),
143
- emissions_per_group
144
- ))
142
+ valid_groups = list(filter(lambda group: group.get('is-valid'), emissions_per_group))
145
143
  valid_group_ids = set([v.get('id') for v in valid_groups])
146
144
 
147
145
  # finally, only return emissions which groups are valid
@@ -273,6 +271,12 @@ def _should_run(cycle: dict):
273
271
 
274
272
  logShouldRun(cycle, MODEL, term_id, should_run, methodTier=TIER, model_key=MODEL_KEY)
275
273
 
274
+ # log missing emissions to show in the logs
275
+ for group in emissions_per_group:
276
+ if not group.get('is-valid'):
277
+ logShouldRun(cycle, MODEL, term_id, False,
278
+ methodTier=TIER, model_key=MODEL_KEY, emission_id=group.get('id'))
279
+
276
280
  return should_run, total_economicValueShare, total_yield, grouped_seed_inputs, grouped_emissions
277
281
 
278
282
 
@@ -3,17 +3,18 @@ from functools import reduce
3
3
  from numpy import empty_like, random, vstack
4
4
  from numpy.typing import NDArray
5
5
  from pydash.objects import merge
6
- from typing import Callable, Optional, Union
6
+ from typing import Callable, Literal, Optional, Union
7
7
 
8
8
  from hestia_earth.schema import MeasurementMethodClassification, SiteSiteType, TermTermType
9
- from hestia_earth.utils.model import find_term_match, filter_list_term_type
10
9
  from hestia_earth.utils.blank_node import get_node_value
10
+ from hestia_earth.utils.model import find_term_match, filter_list_term_type
11
+ from hestia_earth.utils.tools import non_empty_list
11
12
 
12
13
  from hestia_earth.models.utils import split_on_condition
13
14
  from hestia_earth.models.utils.array_builders import gen_seed
14
15
  from hestia_earth.models.utils.blank_node import (
15
- cumulative_nodes_match, cumulative_nodes_lookup_match, cumulative_nodes_term_match, node_lookup_match,
16
- node_term_match, group_nodes_by_year, validate_start_date_end_date
16
+ cumulative_nodes_match, cumulative_nodes_lookup_match, cumulative_nodes_term_match, group_by_term,
17
+ node_lookup_match, node_term_match, group_nodes_by_year, validate_start_date_end_date
17
18
  )
18
19
  from hestia_earth.models.utils.ecoClimateZone import EcoClimateZone, get_eco_climate_zone_value
19
20
  from hestia_earth.models.utils.descriptive_stats import calc_descriptive_stats
@@ -1031,8 +1032,8 @@ def _assign_ipcc_soil_category(
1031
1032
  IpccSoilCategory
1032
1033
  The assigned IPCC soil category.
1033
1034
  """
1034
- soil_types = filter_list_term_type(measurement_nodes, TermTermType.SOILTYPE)
1035
- usda_soil_types = filter_list_term_type(measurement_nodes, TermTermType.USDASOILTYPE)
1035
+ soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.SOILTYPE)
1036
+ usda_soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.USDASOILTYPE)
1036
1037
 
1037
1038
  clay_content = get_node_value(find_term_match(measurement_nodes, _CLAY_CONTENT_TERM_ID))
1038
1039
  sand_content = get_node_value(find_term_match(measurement_nodes, _SAND_CONTENT_TERM_ID))
@@ -1053,6 +1054,20 @@ def _assign_ipcc_soil_category(
1053
1054
  ) if len(soil_types) > 0 or len(usda_soil_types) > 0 else default
1054
1055
 
1055
1056
 
1057
+ def _get_soil_type_measurements(
1058
+ nodes: list[dict], term_type: Literal[TermTermType.SOILTYPE, TermTermType.USDASOILTYPE]
1059
+ ) -> list[dict]:
1060
+ grouped = group_by_term(filter_list_term_type(nodes, term_type))
1061
+
1062
+ def depth_distance(node):
1063
+ upper, lower = node.get("depthUpper", 0), node.get("depthLower", 100)
1064
+ return abs(upper - DEPTH_UPPER) + abs(lower - DEPTH_LOWER)
1065
+
1066
+ return non_empty_list(
1067
+ min(nodes_, key=depth_distance) for key in grouped if (nodes_ := grouped.get(key, []))
1068
+ )
1069
+
1070
+
1056
1071
  def _check_soil_category(
1057
1072
  *,
1058
1073
  key: IpccSoilCategory,
@@ -1461,7 +1476,7 @@ Value: Corresponding decision tree for IPCC management categories based on land
1461
1476
  """
1462
1477
 
1463
1478
  _IPCC_LAND_USE_CATEGORY_TO_DEFAULT_IPCC_MANAGEMENT_CATEGORY = {
1464
- IpccLandUseCategory.GRASSLAND: IpccManagementCategory.NOMINALLY_MANAGED,
1479
+ IpccLandUseCategory.GRASSLAND: IpccManagementCategory.UNKNOWN,
1465
1480
  IpccLandUseCategory.ANNUAL_CROPS_WET: IpccManagementCategory.UNKNOWN,
1466
1481
  IpccLandUseCategory.ANNUAL_CROPS: IpccManagementCategory.UNKNOWN
1467
1482
  }