hestia-earth-models 0.74.14__py3-none-any.whl → 0.74.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hestia-earth-models might be problematic. Click here for more details.
- hestia_earth/models/cache_nodes.py +9 -6
- hestia_earth/models/config/ImpactAssessment.json +0 -22
- hestia_earth/models/config/Site.json +11 -3
- hestia_earth/models/cycle/completeness/material.py +2 -3
- hestia_earth/models/emepEea2019/fuelCombustion_utils.py +21 -21
- hestia_earth/models/hestia/landOccupationDuringCycle.py +9 -27
- hestia_earth/models/hestia/resourceUse_utils.py +49 -20
- hestia_earth/models/hestia/soilClassification.py +314 -0
- hestia_earth/models/ipcc2019/aboveGroundBiomass.py +5 -15
- hestia_earth/models/ipcc2019/belowGroundBiomass.py +5 -15
- hestia_earth/models/ipcc2019/biocharOrganicCarbonPerHa.py +5 -39
- hestia_earth/models/ipcc2019/ch4ToAirOrganicSoilCultivation.py +5 -5
- hestia_earth/models/ipcc2019/co2ToAirAboveGroundBiomassStockChange.py +10 -15
- hestia_earth/models/ipcc2019/co2ToAirBelowGroundBiomassStockChange.py +11 -16
- hestia_earth/models/ipcc2019/co2ToAirBiocharStockChange.py +7 -17
- hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +123 -74
- hestia_earth/models/ipcc2019/co2ToAirOrganicSoilCultivation.py +4 -5
- hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChange.py +10 -15
- hestia_earth/models/ipcc2019/n2OToAirOrganicSoilCultivationDirect.py +5 -5
- hestia_earth/models/ipcc2019/nonCo2EmissionsToAirNaturalVegetationBurning.py +18 -47
- hestia_earth/models/ipcc2019/organicCarbonPerHa.py +10 -10
- hestia_earth/models/ipcc2019/organicCarbonPerHa_utils.py +4 -19
- hestia_earth/models/ipcc2019/organicSoilCultivation_utils.py +0 -9
- hestia_earth/models/log.py +75 -1
- hestia_earth/models/mocking/search-results.json +1 -1
- hestia_earth/models/utils/blank_node.py +12 -4
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/METADATA +15 -7
- {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/RECORD +37 -34
- {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/WHEEL +1 -1
- tests/models/ecoalimV9/test_cycle.py +2 -2
- tests/models/hestia/test_landTransformation20YearAverageDuringCycle.py +4 -8
- tests/models/hestia/test_soilClassification.py +72 -0
- tests/models/ipcc2019/test_organicCarbonPerHa_utils.py +4 -48
- tests/models/test_log.py +128 -0
- {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info/licenses}/LICENSE +0 -0
- {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/top_level.txt +0 -0
|
@@ -144,14 +144,17 @@ def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
|
|
|
144
144
|
return list(nodes_mapping.values())
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def
|
|
148
|
-
init_gee()
|
|
149
|
-
|
|
147
|
+
def cache_nodes(nodes: list):
|
|
150
148
|
# cache sites data
|
|
151
|
-
|
|
149
|
+
nodes = _cache_sites(nodes)
|
|
152
150
|
|
|
153
151
|
# cache related nodes
|
|
154
|
-
|
|
152
|
+
nodes = _cache_related_nodes(nodes) if _ENABLE_CACHE_RELATED_NODES else nodes
|
|
155
153
|
|
|
156
154
|
# cache sources
|
|
157
|
-
return _cache_sources(
|
|
155
|
+
return _cache_sources(nodes)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def run(nodes: list):
|
|
159
|
+
init_gee()
|
|
160
|
+
return cache_nodes(nodes)
|
|
@@ -133,17 +133,6 @@
|
|
|
133
133
|
"replaceThreshold": ["value", 0.01]
|
|
134
134
|
},
|
|
135
135
|
"stage": 1
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
"key": "emissionsResourceUse",
|
|
139
|
-
"model": "linkedImpactAssessment",
|
|
140
|
-
"value": "landTransformation100YearAverageInputsProduction",
|
|
141
|
-
"runStrategy": "add_blank_node_if_missing",
|
|
142
|
-
"mergeStrategy": "list",
|
|
143
|
-
"mergeArgs": {
|
|
144
|
-
"replaceThreshold": ["value", 0.01]
|
|
145
|
-
},
|
|
146
|
-
"stage": 1
|
|
147
136
|
}
|
|
148
137
|
],
|
|
149
138
|
[
|
|
@@ -158,17 +147,6 @@
|
|
|
158
147
|
},
|
|
159
148
|
"stage": 1
|
|
160
149
|
},
|
|
161
|
-
{
|
|
162
|
-
"key": "emissionsResourceUse",
|
|
163
|
-
"model": "hestia",
|
|
164
|
-
"value": "landTransformation100YearAverageDuringCycle",
|
|
165
|
-
"runStrategy": "always",
|
|
166
|
-
"mergeStrategy": "list",
|
|
167
|
-
"mergeArgs": {
|
|
168
|
-
"replaceThreshold": ["value", 0.01]
|
|
169
|
-
},
|
|
170
|
-
"stage": 1
|
|
171
|
-
},
|
|
172
150
|
{
|
|
173
151
|
"key": "emissionsResourceUse",
|
|
174
152
|
"model": "cml2001Baseline",
|
|
@@ -416,6 +416,14 @@
|
|
|
416
416
|
"mergeStrategy": "list",
|
|
417
417
|
"stage": 1
|
|
418
418
|
},
|
|
419
|
+
{
|
|
420
|
+
"key": "measurements",
|
|
421
|
+
"model": "hestia",
|
|
422
|
+
"value": "soilClassification",
|
|
423
|
+
"runStrategy": "add_blank_node_if_missing",
|
|
424
|
+
"mergeStrategy": "list",
|
|
425
|
+
"stage": 1
|
|
426
|
+
},
|
|
419
427
|
{
|
|
420
428
|
"key": "management",
|
|
421
429
|
"model": "hestia",
|
|
@@ -469,7 +477,7 @@
|
|
|
469
477
|
"value": "brackishWater",
|
|
470
478
|
"runStrategy": "add_blank_node_if_missing",
|
|
471
479
|
"mergeStrategy": "list",
|
|
472
|
-
"stage":
|
|
480
|
+
"stage": 2
|
|
473
481
|
},
|
|
474
482
|
{
|
|
475
483
|
"key": "measurements",
|
|
@@ -477,7 +485,7 @@
|
|
|
477
485
|
"value": "freshWater",
|
|
478
486
|
"runStrategy": "add_blank_node_if_missing",
|
|
479
487
|
"mergeStrategy": "list",
|
|
480
|
-
"stage":
|
|
488
|
+
"stage": 2
|
|
481
489
|
},
|
|
482
490
|
{
|
|
483
491
|
"key": "measurements",
|
|
@@ -485,7 +493,7 @@
|
|
|
485
493
|
"value": "salineWater",
|
|
486
494
|
"runStrategy": "add_blank_node_if_missing",
|
|
487
495
|
"mergeStrategy": "list",
|
|
488
|
-
"stage":
|
|
496
|
+
"stage": 2
|
|
489
497
|
}
|
|
490
498
|
],
|
|
491
499
|
[
|
|
@@ -9,7 +9,7 @@ REQUIREMENTS = {
|
|
|
9
9
|
"inputs": [{"@type": "Input", "value": "", "term.@id": "machineryInfrastructureDepreciatedAmountPerCycle"}],
|
|
10
10
|
"site": {
|
|
11
11
|
"@type": "Site",
|
|
12
|
-
"siteType": ["cropland"
|
|
12
|
+
"siteType": ["cropland"]
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
}
|
|
@@ -20,8 +20,7 @@ RETURNS = {
|
|
|
20
20
|
}
|
|
21
21
|
MODEL_KEY = 'material'
|
|
22
22
|
ALLOWED_SITE_TYPES = [
|
|
23
|
-
SiteSiteType.CROPLAND.value
|
|
24
|
-
SiteSiteType.GLASS_OR_HIGH_ACCESSIBLE_COVER.value
|
|
23
|
+
SiteSiteType.CROPLAND.value
|
|
25
24
|
]
|
|
26
25
|
|
|
27
26
|
|
|
@@ -15,7 +15,7 @@ _TIER = EmissionMethodTier.TIER_1.value
|
|
|
15
15
|
|
|
16
16
|
def _run_inputs(inputs: list, tier: str, term_id: str):
|
|
17
17
|
total_value = list_sum([
|
|
18
|
-
(i.get('input-value') or 0) * (i.get('operation-factor') or i.get('input-
|
|
18
|
+
(i.get('input-value') or 0) * (i.get('operation-factor') or i.get('input-factor') or 0)
|
|
19
19
|
for i in inputs
|
|
20
20
|
])
|
|
21
21
|
input_term = {
|
|
@@ -45,37 +45,35 @@ def _fuel_input_data(term_id: str, lookup_col: str, input: dict):
|
|
|
45
45
|
operation_term = input.get('operation', {})
|
|
46
46
|
input_value = list_sum(input.get('value', []), None)
|
|
47
47
|
|
|
48
|
-
operation_factor =
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
48
|
+
operation_factor = safe_parse_float(
|
|
49
|
+
extract_grouped_data(
|
|
50
|
+
data=get_lookup_value(operation_term, lookup_col, model=MODEL, term=term_id),
|
|
51
|
+
key=input_term_id
|
|
52
|
+
) if operation_term else None,
|
|
53
|
+
default=None
|
|
54
|
+
)
|
|
55
|
+
input_factor = safe_parse_float(get_lookup_value(input_term, lookup_col, model=MODEL, term=term_id), default=None)
|
|
53
56
|
|
|
54
57
|
return {
|
|
55
58
|
'input-id': input_term_id,
|
|
56
59
|
'input-termType': input_term.get('termType'),
|
|
57
60
|
'input-units': input_term.get('units'),
|
|
58
61
|
'input-value': input_value,
|
|
59
|
-
'input-
|
|
62
|
+
'input-factor': input_factor,
|
|
63
|
+
'is-valid': all([input_value is not None, (input_factor or operation_factor) is not None])
|
|
64
|
+
} | ({
|
|
60
65
|
'operation-id': operation_term.get('@id'),
|
|
61
66
|
'operation-termType': operation_term.get('termType'),
|
|
62
67
|
'operation-units': operation_term.get('units'),
|
|
63
|
-
'operation-factor':
|
|
64
|
-
}
|
|
68
|
+
'operation-factor': operation_factor,
|
|
69
|
+
} if operation_term else {})
|
|
65
70
|
|
|
66
71
|
|
|
67
72
|
def get_fuel_inputs(term_id: str, cycle: dict, lookup_col: str):
|
|
68
|
-
|
|
73
|
+
return [
|
|
69
74
|
_fuel_input_data(term_id, lookup_col, i)
|
|
70
75
|
for i in filter_list_term_type(cycle.get('inputs', []), TermTermType.FUEL)
|
|
71
76
|
]
|
|
72
|
-
valid_inputs = [
|
|
73
|
-
i for i in inputs if all([
|
|
74
|
-
i.get('input-value') is not None,
|
|
75
|
-
(i.get('operation-factor') or i.get('input-default-factor')) is not None
|
|
76
|
-
])
|
|
77
|
-
]
|
|
78
|
-
return inputs, valid_inputs
|
|
79
77
|
|
|
80
78
|
|
|
81
79
|
def group_fuel_inputs(inputs: list):
|
|
@@ -84,15 +82,17 @@ def group_fuel_inputs(inputs: list):
|
|
|
84
82
|
|
|
85
83
|
def _should_run(cycle: dict, term_id: str, lookup_prefix: str = None):
|
|
86
84
|
electricity_complete = _is_term_type_complete(cycle, 'electricityFuel')
|
|
87
|
-
fuel_inputs
|
|
85
|
+
fuel_inputs = get_fuel_inputs(term_id, cycle, f"{lookup_prefix or term_id}EmepEea2019")
|
|
86
|
+
all_valid_inputs = all([v['is-valid'] for v in fuel_inputs])
|
|
88
87
|
|
|
89
88
|
logRequirements(cycle, model=MODEL, term=term_id,
|
|
90
89
|
termType_electricityFuel_complete=electricity_complete,
|
|
91
|
-
fuel_inputs=log_as_table(fuel_inputs)
|
|
90
|
+
fuel_inputs=log_as_table(fuel_inputs),
|
|
91
|
+
all_valid_inputs=all_valid_inputs)
|
|
92
92
|
|
|
93
|
-
should_run =
|
|
93
|
+
should_run = all([all_valid_inputs, electricity_complete])
|
|
94
94
|
logShouldRun(cycle, MODEL, term_id, should_run, methodTier=_TIER)
|
|
95
|
-
return should_run, group_fuel_inputs(
|
|
95
|
+
return should_run, group_fuel_inputs(fuel_inputs)
|
|
96
96
|
|
|
97
97
|
|
|
98
98
|
def run(cycle: dict, term_id: str, lookup_prefix: str = None):
|
|
@@ -2,7 +2,7 @@ from functools import reduce
|
|
|
2
2
|
from itertools import zip_longest
|
|
3
3
|
from typing import NamedTuple
|
|
4
4
|
|
|
5
|
-
from hestia_earth.models.log import logRequirements, logShouldRun, log_as_table
|
|
5
|
+
from hestia_earth.models.log import format_float, format_str, logRequirements, logShouldRun, log_as_table
|
|
6
6
|
|
|
7
7
|
from hestia_earth.models.utils import hectar_to_square_meter
|
|
8
8
|
from hestia_earth.models.utils.constant import DAYS_IN_YEAR
|
|
@@ -178,33 +178,15 @@ def _should_run_site_data(site_data: SiteData) -> bool:
|
|
|
178
178
|
])
|
|
179
179
|
|
|
180
180
|
|
|
181
|
-
def _format_float(value: float, unit: str = "", default: str = "None") -> str:
|
|
182
|
-
return " ".join(
|
|
183
|
-
string for string in [f"{value}", unit] if string
|
|
184
|
-
) if isinstance(value, (float, int)) else default
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
_INVALID_CHARS = {"_", ":", ",", "="}
|
|
188
|
-
_REPLACEMENT_CHAR = "-"
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
def _format_str(value: str, default: str = "None") -> str:
|
|
192
|
-
"""Format a string for logging in a table. Remove all characters used to render the table on the front end."""
|
|
193
|
-
return (
|
|
194
|
-
reduce(lambda x, char: x.replace(char, _REPLACEMENT_CHAR), _INVALID_CHARS, str(value))
|
|
195
|
-
if value else default
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
|
|
199
181
|
def _format_inventory(inventory: list[SiteData], default: str = "None") -> str:
|
|
200
182
|
return log_as_table(
|
|
201
183
|
{
|
|
202
|
-
"site-id":
|
|
203
|
-
"site-area":
|
|
204
|
-
"site-duration":
|
|
205
|
-
"site-unused-duration":
|
|
206
|
-
"land-cover-id":
|
|
207
|
-
"country-id":
|
|
184
|
+
"site-id": format_str(site_data.id),
|
|
185
|
+
"site-area": format_float(site_data.area, "ha"),
|
|
186
|
+
"site-duration": format_float(site_data.duration, "days"),
|
|
187
|
+
"site-unused-duration": format_float(site_data.unused_duration, "days"),
|
|
188
|
+
"land-cover-id": format_str(site_data.land_cover_id),
|
|
189
|
+
"country-id": format_str(site_data.country_id)
|
|
208
190
|
} for site_data in inventory
|
|
209
191
|
) if inventory else default
|
|
210
192
|
|
|
@@ -238,8 +220,8 @@ def _should_run(impact_assessment: dict):
|
|
|
238
220
|
model=MODEL,
|
|
239
221
|
term=TERM_ID,
|
|
240
222
|
functional_unit=functional_unit,
|
|
241
|
-
product_yield=
|
|
242
|
-
economic_value_share=
|
|
223
|
+
product_yield=format_float(product_yield, product.get("term", {}).get("units")),
|
|
224
|
+
economic_value_share=format_float(economic_value_share, "pct"),
|
|
243
225
|
valid_inventory=valid_inventory,
|
|
244
226
|
site_data_is_valid=site_data_is_valid,
|
|
245
227
|
**site_logs,
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from dateutil.relativedelta import relativedelta
|
|
3
3
|
from hestia_earth.schema import TermTermType
|
|
4
|
-
from hestia_earth.utils.tools import list_sum
|
|
4
|
+
from hestia_earth.utils.tools import list_sum, flatten
|
|
5
5
|
|
|
6
6
|
from hestia_earth.models.log import logRequirements, logShouldRun, log_as_table
|
|
7
|
-
from hestia_earth.models.utils.blank_node import
|
|
7
|
+
from hestia_earth.models.utils.blank_node import (
|
|
8
|
+
_gapfill_datestr, DatestrGapfillMode, DatestrFormat, _str_dates_match
|
|
9
|
+
)
|
|
8
10
|
from hestia_earth.models.utils.impact_assessment import get_site
|
|
9
11
|
from hestia_earth.models.utils.indicator import _new_indicator
|
|
10
12
|
from .utils import LAND_USE_TERMS_FOR_TRANSFORMATION, crop_ipcc_land_use_category
|
|
@@ -42,7 +44,17 @@ def _find_closest_node_date(
|
|
|
42
44
|
return filtered_dates[min(filtered_dates.keys())] if filtered_dates else ""
|
|
43
45
|
|
|
44
46
|
|
|
45
|
-
def
|
|
47
|
+
def _get_current_nodes(management_nodes: list, ia_date_str: str) -> list:
|
|
48
|
+
return [
|
|
49
|
+
node for node in management_nodes
|
|
50
|
+
if (
|
|
51
|
+
node.get("startDate") and node.get("endDate")
|
|
52
|
+
and node.get("startDate") <= ia_date_str <= node.get("endDate")
|
|
53
|
+
)
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int) -> tuple[bool, list]:
|
|
46
58
|
cycle = impact_assessment.get('cycle', {})
|
|
47
59
|
has_otherSites = len(cycle.get('otherSites') or []) != 0
|
|
48
60
|
|
|
@@ -64,26 +76,45 @@ def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int)
|
|
|
64
76
|
node_date_field=match_date
|
|
65
77
|
)
|
|
66
78
|
closest_start_date, closest_end_date = (closest_date, None) if match_date == "startDate" else (None, closest_date)
|
|
67
|
-
current_node_index = next(
|
|
68
|
-
(i for i, node in enumerate(filtered_management_nodes)
|
|
69
|
-
if _str_dates_match(
|
|
70
|
-
date_str_one=node.get(match_date, ""),
|
|
71
|
-
date_str_two=impact_assessment.get(match_date, ""),
|
|
72
|
-
mode=match_mode
|
|
73
|
-
)),
|
|
74
|
-
None
|
|
75
|
-
)
|
|
76
|
-
current_node = filtered_management_nodes.pop(current_node_index) if current_node_index is not None else None
|
|
77
|
-
landCover_term_id = (current_node or {}).get('term', {}).get('@id')
|
|
78
|
-
|
|
79
79
|
prior_management_nodes = [
|
|
80
80
|
node for node in filtered_management_nodes
|
|
81
81
|
if _str_dates_match(node.get("endDate", ""), closest_end_date) or
|
|
82
82
|
_str_dates_match(node.get("startDate", ""), closest_start_date)
|
|
83
83
|
]
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
current_nodes = _get_current_nodes(
|
|
86
|
+
management_nodes=filtered_management_nodes,
|
|
87
|
+
ia_date_str=_gapfill_datestr(impact_assessment.get(match_date, ""), mode=match_mode)[:10],
|
|
88
|
+
)
|
|
86
89
|
|
|
90
|
+
should_run_node_results = [
|
|
91
|
+
should_run_node(
|
|
92
|
+
current_node=node,
|
|
93
|
+
closest_end_date=closest_end_date,
|
|
94
|
+
closest_start_date=closest_start_date,
|
|
95
|
+
has_otherSites=has_otherSites,
|
|
96
|
+
impact_assessment=impact_assessment,
|
|
97
|
+
prior_management_nodes=prior_management_nodes,
|
|
98
|
+
term_id=term_id
|
|
99
|
+
)
|
|
100
|
+
for node in current_nodes
|
|
101
|
+
]
|
|
102
|
+
should_run_result = all([n[0] for n in should_run_node_results])
|
|
103
|
+
logShouldRun(impact_assessment, MODEL, term=term_id, should_run=should_run_result)
|
|
104
|
+
return should_run_result, flatten([n[1] for n in should_run_node_results])
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def should_run_node(
|
|
108
|
+
current_node,
|
|
109
|
+
closest_end_date,
|
|
110
|
+
closest_start_date,
|
|
111
|
+
has_otherSites: bool,
|
|
112
|
+
impact_assessment: dict,
|
|
113
|
+
prior_management_nodes: list,
|
|
114
|
+
term_id: str
|
|
115
|
+
) -> tuple[bool, list]:
|
|
116
|
+
landCover_term_id = (current_node or {}).get('term', {}).get('@id')
|
|
117
|
+
ipcc_land_use_category = crop_ipcc_land_use_category(landCover_term_id)
|
|
87
118
|
total_landOccupationDuringCycle = list_sum([
|
|
88
119
|
node.get("value") for node in impact_assessment.get("emissionsResourceUse", [])
|
|
89
120
|
if node.get("term", {}).get("@id", "") == _RESOURCE_USE_TERM_ID
|
|
@@ -112,15 +143,13 @@ def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int)
|
|
|
112
143
|
ipcc_land_use_category=ipcc_land_use_category,
|
|
113
144
|
indicators=log_as_table(indicators))
|
|
114
145
|
|
|
115
|
-
|
|
146
|
+
should_run_node_result = all([
|
|
116
147
|
not has_otherSites,
|
|
117
148
|
ipcc_land_use_category,
|
|
118
149
|
total_landOccupationDuringCycle is not None,
|
|
119
150
|
valid_indicators
|
|
120
151
|
])
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
return should_run_result, valid_indicators
|
|
152
|
+
return should_run_node_result, valid_indicators
|
|
124
153
|
|
|
125
154
|
|
|
126
155
|
def run_resource_use(
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
from functools import reduce
|
|
2
|
+
from typing import NamedTuple, Optional
|
|
3
|
+
from pydash import merge
|
|
4
|
+
|
|
5
|
+
from hestia_earth.schema import MeasurementMethodClassification, TermTermType
|
|
6
|
+
from hestia_earth.utils.blank_node import get_node_value, flatten
|
|
7
|
+
from hestia_earth.utils.model import filter_list_term_type
|
|
8
|
+
|
|
9
|
+
from hestia_earth.models.hestia.soilMeasurement import STANDARD_DEPTHS
|
|
10
|
+
from hestia_earth.models.ipcc2019.organicCarbonPerHa_utils import (
|
|
11
|
+
IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE, IpccSoilCategory
|
|
12
|
+
)
|
|
13
|
+
from hestia_earth.models.log import format_bool, format_float, format_str, log_as_table, logRequirements, logShouldRun
|
|
14
|
+
from hestia_earth.models.utils import split_on_condition
|
|
15
|
+
from hestia_earth.models.utils.blank_node import node_lookup_match, split_nodes_by_dates
|
|
16
|
+
from hestia_earth.models.utils.measurement import _new_measurement
|
|
17
|
+
from . import MODEL
|
|
18
|
+
|
|
19
|
+
REQUIREMENTS = {
|
|
20
|
+
"Site": {
|
|
21
|
+
"optional": {
|
|
22
|
+
"measurements": [{
|
|
23
|
+
"@type": "Measurement",
|
|
24
|
+
"value": "",
|
|
25
|
+
"depthUpper": "",
|
|
26
|
+
"depthLower": "",
|
|
27
|
+
"term.termType": "soilType",
|
|
28
|
+
"optional": {
|
|
29
|
+
"dates": ""
|
|
30
|
+
}
|
|
31
|
+
}]
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
RETURNS = {
|
|
36
|
+
"Measurement": [{
|
|
37
|
+
"value": "",
|
|
38
|
+
"depthUpper": "",
|
|
39
|
+
"depthLower": "",
|
|
40
|
+
"methodClassification": "modelled using other measurements"
|
|
41
|
+
}]
|
|
42
|
+
}
|
|
43
|
+
LOOKUPS = {
|
|
44
|
+
"soilType": "IPCC_SOIL_CATEGORY"
|
|
45
|
+
}
|
|
46
|
+
TERM_ID = 'organicSoils,mineralSoils'
|
|
47
|
+
|
|
48
|
+
MEASUREMENT_TERM_IDS = TERM_ID.split(',')
|
|
49
|
+
ORGANIC_SOILS_TERM_ID = MEASUREMENT_TERM_IDS[0]
|
|
50
|
+
MINERAL_SOILS_TERM_ID = MEASUREMENT_TERM_IDS[1]
|
|
51
|
+
METHOD = MeasurementMethodClassification.MODELLED_USING_OTHER_MEASUREMENTS.value
|
|
52
|
+
|
|
53
|
+
TARGET_LOOKUP_VALUE = IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE[IpccSoilCategory.ORGANIC_SOILS]
|
|
54
|
+
|
|
55
|
+
IS_100_THRESHOLD = 99.5
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _measurement(term_id: str, **kwargs):
|
|
59
|
+
measurement = _new_measurement(term_id)
|
|
60
|
+
return measurement | {
|
|
61
|
+
**{k: v for k, v in kwargs.items()},
|
|
62
|
+
"methodClassification": METHOD
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class _SoilTypeDatum(NamedTuple):
|
|
67
|
+
term_id: str
|
|
68
|
+
depth_upper: float
|
|
69
|
+
depth_lower: float
|
|
70
|
+
dates: list[str]
|
|
71
|
+
value: float
|
|
72
|
+
is_organic: bool
|
|
73
|
+
is_complete_depth: bool
|
|
74
|
+
is_standard_depth: bool
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class _InventoryKey(NamedTuple):
|
|
78
|
+
depth_upper: float
|
|
79
|
+
depth_lower: float
|
|
80
|
+
date: Optional[str]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
_InventoryGroup = dict[str, float]
|
|
84
|
+
|
|
85
|
+
_SoilTypeInventory = dict[_InventoryKey, _InventoryGroup]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
_DEFAULT_INVENTORY: _SoilTypeInventory = {
|
|
89
|
+
_InventoryKey(None, None, None): {
|
|
90
|
+
"organicSoils": 0,
|
|
91
|
+
"mineralSoils": 100
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _soil_type_data_to_inventory_keys(datum: _SoilTypeDatum):
|
|
97
|
+
return (
|
|
98
|
+
[_InventoryKey(datum.depth_upper, datum.depth_lower, date) for date in dates]
|
|
99
|
+
if len((dates := datum.dates)) > 0
|
|
100
|
+
else [_InventoryKey(datum.depth_upper, datum.depth_lower, None)]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _extract_soil_type_data(node: dict) -> _SoilTypeDatum:
|
|
105
|
+
depth_upper = node.get("depthUpper")
|
|
106
|
+
depth_lower = node.get("depthLower")
|
|
107
|
+
depth_interval = (depth_upper, depth_lower)
|
|
108
|
+
|
|
109
|
+
return _SoilTypeDatum(
|
|
110
|
+
term_id=node.get("term", {}).get("@id"),
|
|
111
|
+
depth_upper=depth_upper,
|
|
112
|
+
depth_lower=depth_lower,
|
|
113
|
+
dates=node.get("dates", []),
|
|
114
|
+
value=get_node_value(node),
|
|
115
|
+
is_organic=node_lookup_match(node, LOOKUPS["soilType"], TARGET_LOOKUP_VALUE),
|
|
116
|
+
is_complete_depth=all(depth is not None for depth in depth_interval),
|
|
117
|
+
is_standard_depth=depth_interval in STANDARD_DEPTHS,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _classify_soil_type_data(soil_type_data: list[_SoilTypeDatum]):
|
|
122
|
+
"""
|
|
123
|
+
Calculate the values of `organicSoils` and `mineralSoils` from `soilType` measurements for each unique combination
|
|
124
|
+
of depth interval and date.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
def classify(inventory: _SoilTypeInventory, datum: _SoilTypeDatum) -> _SoilTypeInventory:
|
|
128
|
+
"""
|
|
129
|
+
Sum the values of organic and mineral `soilType` Measurements by depth interval and date.
|
|
130
|
+
"""
|
|
131
|
+
keys = _soil_type_data_to_inventory_keys(datum)
|
|
132
|
+
|
|
133
|
+
inner_key = ORGANIC_SOILS_TERM_ID if datum.is_organic else MINERAL_SOILS_TERM_ID
|
|
134
|
+
|
|
135
|
+
update_dict = {
|
|
136
|
+
key: (inner := inventory.get(key, {})) | {
|
|
137
|
+
inner_key: min(inner.get(inner_key, 0) + datum.value, 100)
|
|
138
|
+
} for key in keys
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return merge(dict(), inventory, update_dict)
|
|
142
|
+
|
|
143
|
+
inventory = _select_most_complete_groups(reduce(classify, soil_type_data, {}))
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
key: {
|
|
147
|
+
ORGANIC_SOILS_TERM_ID: (org := group.get(ORGANIC_SOILS_TERM_ID, 0)),
|
|
148
|
+
MINERAL_SOILS_TERM_ID: 100 - org
|
|
149
|
+
} for key, group in inventory.items()
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _group_keys_by_depth(inventory: _SoilTypeInventory) -> dict[tuple, list[_InventoryKey]]:
|
|
154
|
+
|
|
155
|
+
def group(result: dict[tuple, list[_InventoryKey]], key: _InventoryKey) -> dict[tuple, list[_InventoryKey]]:
|
|
156
|
+
depth_interval = (key.depth_upper, key.depth_lower)
|
|
157
|
+
update_dict = {depth_interval: result.get(depth_interval, []) + [key]}
|
|
158
|
+
return result | update_dict
|
|
159
|
+
|
|
160
|
+
return reduce(group, inventory.keys(), {})
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _select_most_complete_groups(inventory: _SoilTypeInventory):
|
|
164
|
+
"""
|
|
165
|
+
For each depth interval, we need to choose the inventory items that have the most complete information.
|
|
166
|
+
|
|
167
|
+
Items should be prioritised in the following order:
|
|
168
|
+
|
|
169
|
+
- If only dated items are available, use dated
|
|
170
|
+
- If only undated items are available, use undated
|
|
171
|
+
- If there are a mix of dated and undated items:
|
|
172
|
+
- If dated items include organic soils measurements, use dated
|
|
173
|
+
- If undated items include organic soils measurements, use undated
|
|
174
|
+
- Otherwise, use dated
|
|
175
|
+
"""
|
|
176
|
+
grouped = _group_keys_by_depth(inventory)
|
|
177
|
+
|
|
178
|
+
def select(result: set[_InventoryKey], keys: list[_InventoryKey]) -> set[_InventoryKey]:
|
|
179
|
+
with_dates, without_dates = split_on_condition(set(keys), lambda k: k.date is not None)
|
|
180
|
+
|
|
181
|
+
with_dates_have_org_value = any(
|
|
182
|
+
(
|
|
183
|
+
ORGANIC_SOILS_TERM_ID in (group := inventory.get(key, {}))
|
|
184
|
+
or group.get(MINERAL_SOILS_TERM_ID, 0) >= IS_100_THRESHOLD
|
|
185
|
+
) for key in with_dates
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
without_dates_have_org_value = any(
|
|
189
|
+
(
|
|
190
|
+
ORGANIC_SOILS_TERM_ID in (group := inventory.get(key, {}))
|
|
191
|
+
or group.get(MINERAL_SOILS_TERM_ID, 0) >= IS_100_THRESHOLD
|
|
192
|
+
) for key in without_dates
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
run_with_dates = (
|
|
196
|
+
with_dates_have_org_value
|
|
197
|
+
or (with_dates and not without_dates_have_org_value)
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return result | (with_dates if run_with_dates else without_dates)
|
|
201
|
+
|
|
202
|
+
selected_keys = reduce(select, grouped.values(), set())
|
|
203
|
+
|
|
204
|
+
return {k: v for k, v in inventory.items() if k in selected_keys}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _format_dates(dates: list[str]):
|
|
208
|
+
"""Format a list of datestrings for logging."""
|
|
209
|
+
return " ".join(format_str(date) for date in dates) if isinstance(dates, list) and len(dates) else "None"
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
_DATUM_KEY_TO_FORMAT_FUNC = {
|
|
213
|
+
"depth_upper": lambda x: format_float(x, "cm"),
|
|
214
|
+
"depth_lower": lambda x: format_float(x, "cm"),
|
|
215
|
+
"dates": _format_dates,
|
|
216
|
+
"value": lambda x: format_float(x, "pct area"),
|
|
217
|
+
"is_organic": format_bool,
|
|
218
|
+
"is_complete_depth": format_bool,
|
|
219
|
+
"is_standard_depth": format_bool,
|
|
220
|
+
}
|
|
221
|
+
DEFAULT_FORMAT_FUNC = format_str
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _format_soil_data(data: list[_SoilTypeDatum]):
|
|
225
|
+
return log_as_table(
|
|
226
|
+
{
|
|
227
|
+
format_str(k): _DATUM_KEY_TO_FORMAT_FUNC.get(k, DEFAULT_FORMAT_FUNC)(v) for k, v in datum._asdict().items()
|
|
228
|
+
} for datum in data
|
|
229
|
+
) if data else "None"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
_FILTER_BY = (
|
|
233
|
+
"is_standard_depth",
|
|
234
|
+
"is_complete_depth"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _filter_data_by_depth_availability(data: list[_SoilTypeDatum]):
|
|
239
|
+
"""
|
|
240
|
+
If measurements with depth available -> discard measurements without depth
|
|
241
|
+
If measurements with standard depth available -> discard non-standard depths
|
|
242
|
+
Else, use measurements with depth
|
|
243
|
+
"""
|
|
244
|
+
return next(
|
|
245
|
+
(
|
|
246
|
+
(filter_, result) for filter_ in _FILTER_BY
|
|
247
|
+
if (result := [datum for datum in data if datum.__getattribute__(filter_)])
|
|
248
|
+
),
|
|
249
|
+
(None, data)
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _should_run(site: dict):
|
|
254
|
+
soil_type_nodes = split_nodes_by_dates(
|
|
255
|
+
filter_list_term_type(site.get("measurements", []), TermTermType.SOILTYPE)
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
filtered_by, soil_type_data = _filter_data_by_depth_availability(
|
|
259
|
+
[_extract_soil_type_data(node) for node in soil_type_nodes]
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
inventory = _classify_soil_type_data(soil_type_data) if soil_type_data else _DEFAULT_INVENTORY
|
|
263
|
+
|
|
264
|
+
should_run = all([
|
|
265
|
+
inventory
|
|
266
|
+
])
|
|
267
|
+
|
|
268
|
+
for term_id in MEASUREMENT_TERM_IDS:
|
|
269
|
+
|
|
270
|
+
logRequirements(
|
|
271
|
+
site,
|
|
272
|
+
model=MODEL,
|
|
273
|
+
term=term_id,
|
|
274
|
+
soil_type_data=_format_soil_data(soil_type_data),
|
|
275
|
+
filtered_by=format_str(filtered_by)
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
logShouldRun(site, MODEL, term_id, should_run)
|
|
279
|
+
|
|
280
|
+
return should_run, inventory
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
_INVENTORY_KEY_TO_FIELD_KEY = {
|
|
284
|
+
"depth_upper": "depthUpper",
|
|
285
|
+
"depth_lower": "depthLower",
|
|
286
|
+
"date": "dates"
|
|
287
|
+
}
|
|
288
|
+
_INVENTORY_KEY_TO_FIELD_VALUE = {
|
|
289
|
+
"date": lambda x: [x]
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _key_to_measurement_fields(key: _InventoryKey):
|
|
294
|
+
return {
|
|
295
|
+
_INVENTORY_KEY_TO_FIELD_KEY.get(k, k): _INVENTORY_KEY_TO_FIELD_VALUE.get(k, lambda x: x)(v)
|
|
296
|
+
for k, v in key._asdict().items() if v is not None
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _run(inventory: _SoilTypeInventory) -> list[dict]:
|
|
301
|
+
return flatten(
|
|
302
|
+
[
|
|
303
|
+
_measurement(
|
|
304
|
+
term_id,
|
|
305
|
+
value=[value],
|
|
306
|
+
**_key_to_measurement_fields(key)
|
|
307
|
+
) for term_id, value in value.items()
|
|
308
|
+
] for key, value in inventory.items()
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def run(site: dict):
|
|
313
|
+
should_run, valid_inventory = _should_run(site)
|
|
314
|
+
return _run(valid_inventory) if should_run else []
|