hestia-earth-models 0.74.14__py3-none-any.whl → 0.74.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (37) hide show
  1. hestia_earth/models/cache_nodes.py +9 -6
  2. hestia_earth/models/config/ImpactAssessment.json +0 -22
  3. hestia_earth/models/config/Site.json +11 -3
  4. hestia_earth/models/cycle/completeness/material.py +2 -3
  5. hestia_earth/models/emepEea2019/fuelCombustion_utils.py +21 -21
  6. hestia_earth/models/hestia/landOccupationDuringCycle.py +9 -27
  7. hestia_earth/models/hestia/resourceUse_utils.py +49 -20
  8. hestia_earth/models/hestia/soilClassification.py +314 -0
  9. hestia_earth/models/ipcc2019/aboveGroundBiomass.py +5 -15
  10. hestia_earth/models/ipcc2019/belowGroundBiomass.py +5 -15
  11. hestia_earth/models/ipcc2019/biocharOrganicCarbonPerHa.py +5 -39
  12. hestia_earth/models/ipcc2019/ch4ToAirOrganicSoilCultivation.py +5 -5
  13. hestia_earth/models/ipcc2019/co2ToAirAboveGroundBiomassStockChange.py +10 -15
  14. hestia_earth/models/ipcc2019/co2ToAirBelowGroundBiomassStockChange.py +11 -16
  15. hestia_earth/models/ipcc2019/co2ToAirBiocharStockChange.py +7 -17
  16. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +123 -74
  17. hestia_earth/models/ipcc2019/co2ToAirOrganicSoilCultivation.py +4 -5
  18. hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChange.py +10 -15
  19. hestia_earth/models/ipcc2019/n2OToAirOrganicSoilCultivationDirect.py +5 -5
  20. hestia_earth/models/ipcc2019/nonCo2EmissionsToAirNaturalVegetationBurning.py +18 -47
  21. hestia_earth/models/ipcc2019/organicCarbonPerHa.py +10 -10
  22. hestia_earth/models/ipcc2019/organicCarbonPerHa_utils.py +4 -19
  23. hestia_earth/models/ipcc2019/organicSoilCultivation_utils.py +0 -9
  24. hestia_earth/models/log.py +75 -1
  25. hestia_earth/models/mocking/search-results.json +1 -1
  26. hestia_earth/models/utils/blank_node.py +12 -4
  27. hestia_earth/models/version.py +1 -1
  28. {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/METADATA +15 -7
  29. {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/RECORD +37 -34
  30. {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/WHEEL +1 -1
  31. tests/models/ecoalimV9/test_cycle.py +2 -2
  32. tests/models/hestia/test_landTransformation20YearAverageDuringCycle.py +4 -8
  33. tests/models/hestia/test_soilClassification.py +72 -0
  34. tests/models/ipcc2019/test_organicCarbonPerHa_utils.py +4 -48
  35. tests/models/test_log.py +128 -0
  36. {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info/licenses}/LICENSE +0 -0
  37. {hestia_earth_models-0.74.14.dist-info → hestia_earth_models-0.74.16.dist-info}/top_level.txt +0 -0
@@ -144,14 +144,17 @@ def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
144
144
  return list(nodes_mapping.values())
145
145
 
146
146
 
147
- def run(nodes: list):
148
- init_gee()
149
-
147
+ def cache_nodes(nodes: list):
150
148
  # cache sites data
151
- cached_nodes = _cache_sites(nodes)
149
+ nodes = _cache_sites(nodes)
152
150
 
153
151
  # cache related nodes
154
- cached_nodes = _cache_related_nodes(cached_nodes) if _ENABLE_CACHE_RELATED_NODES else cached_nodes
152
+ nodes = _cache_related_nodes(nodes) if _ENABLE_CACHE_RELATED_NODES else nodes
155
153
 
156
154
  # cache sources
157
- return _cache_sources(cached_nodes)
155
+ return _cache_sources(nodes)
156
+
157
+
158
+ def run(nodes: list):
159
+ init_gee()
160
+ return cache_nodes(nodes)
@@ -133,17 +133,6 @@
133
133
  "replaceThreshold": ["value", 0.01]
134
134
  },
135
135
  "stage": 1
136
- },
137
- {
138
- "key": "emissionsResourceUse",
139
- "model": "linkedImpactAssessment",
140
- "value": "landTransformation100YearAverageInputsProduction",
141
- "runStrategy": "add_blank_node_if_missing",
142
- "mergeStrategy": "list",
143
- "mergeArgs": {
144
- "replaceThreshold": ["value", 0.01]
145
- },
146
- "stage": 1
147
136
  }
148
137
  ],
149
138
  [
@@ -158,17 +147,6 @@
158
147
  },
159
148
  "stage": 1
160
149
  },
161
- {
162
- "key": "emissionsResourceUse",
163
- "model": "hestia",
164
- "value": "landTransformation100YearAverageDuringCycle",
165
- "runStrategy": "always",
166
- "mergeStrategy": "list",
167
- "mergeArgs": {
168
- "replaceThreshold": ["value", 0.01]
169
- },
170
- "stage": 1
171
- },
172
150
  {
173
151
  "key": "emissionsResourceUse",
174
152
  "model": "cml2001Baseline",
@@ -416,6 +416,14 @@
416
416
  "mergeStrategy": "list",
417
417
  "stage": 1
418
418
  },
419
+ {
420
+ "key": "measurements",
421
+ "model": "hestia",
422
+ "value": "soilClassification",
423
+ "runStrategy": "add_blank_node_if_missing",
424
+ "mergeStrategy": "list",
425
+ "stage": 1
426
+ },
419
427
  {
420
428
  "key": "management",
421
429
  "model": "hestia",
@@ -469,7 +477,7 @@
469
477
  "value": "brackishWater",
470
478
  "runStrategy": "add_blank_node_if_missing",
471
479
  "mergeStrategy": "list",
472
- "stage": 1
480
+ "stage": 2
473
481
  },
474
482
  {
475
483
  "key": "measurements",
@@ -477,7 +485,7 @@
477
485
  "value": "freshWater",
478
486
  "runStrategy": "add_blank_node_if_missing",
479
487
  "mergeStrategy": "list",
480
- "stage": 1
488
+ "stage": 2
481
489
  },
482
490
  {
483
491
  "key": "measurements",
@@ -485,7 +493,7 @@
485
493
  "value": "salineWater",
486
494
  "runStrategy": "add_blank_node_if_missing",
487
495
  "mergeStrategy": "list",
488
- "stage": 1
496
+ "stage": 2
489
497
  }
490
498
  ],
491
499
  [
@@ -9,7 +9,7 @@ REQUIREMENTS = {
9
9
  "inputs": [{"@type": "Input", "value": "", "term.@id": "machineryInfrastructureDepreciatedAmountPerCycle"}],
10
10
  "site": {
11
11
  "@type": "Site",
12
- "siteType": ["cropland", "glass or high accessible cover"]
12
+ "siteType": ["cropland"]
13
13
  }
14
14
  }
15
15
  }
@@ -20,8 +20,7 @@ RETURNS = {
20
20
  }
21
21
  MODEL_KEY = 'material'
22
22
  ALLOWED_SITE_TYPES = [
23
- SiteSiteType.CROPLAND.value,
24
- SiteSiteType.GLASS_OR_HIGH_ACCESSIBLE_COVER.value
23
+ SiteSiteType.CROPLAND.value
25
24
  ]
26
25
 
27
26
 
@@ -15,7 +15,7 @@ _TIER = EmissionMethodTier.TIER_1.value
15
15
 
16
16
  def _run_inputs(inputs: list, tier: str, term_id: str):
17
17
  total_value = list_sum([
18
- (i.get('input-value') or 0) * (i.get('operation-factor') or i.get('input-default-factor') or 0)
18
+ (i.get('input-value') or 0) * (i.get('operation-factor') or i.get('input-factor') or 0)
19
19
  for i in inputs
20
20
  ])
21
21
  input_term = {
@@ -45,37 +45,35 @@ def _fuel_input_data(term_id: str, lookup_col: str, input: dict):
45
45
  operation_term = input.get('operation', {})
46
46
  input_value = list_sum(input.get('value', []), None)
47
47
 
48
- operation_factor = extract_grouped_data(
49
- data=get_lookup_value(operation_term, lookup_col, model=MODEL, term=term_id),
50
- key=input_term_id
51
- ) if operation_term else None
52
- input_factor = get_lookup_value(input_term, lookup_col, model=MODEL, term=term_id)
48
+ operation_factor = safe_parse_float(
49
+ extract_grouped_data(
50
+ data=get_lookup_value(operation_term, lookup_col, model=MODEL, term=term_id),
51
+ key=input_term_id
52
+ ) if operation_term else None,
53
+ default=None
54
+ )
55
+ input_factor = safe_parse_float(get_lookup_value(input_term, lookup_col, model=MODEL, term=term_id), default=None)
53
56
 
54
57
  return {
55
58
  'input-id': input_term_id,
56
59
  'input-termType': input_term.get('termType'),
57
60
  'input-units': input_term.get('units'),
58
61
  'input-value': input_value,
59
- 'input-default-factor': safe_parse_float(input_factor, default=None),
62
+ 'input-factor': input_factor,
63
+ 'is-valid': all([input_value is not None, (input_factor or operation_factor) is not None])
64
+ } | ({
60
65
  'operation-id': operation_term.get('@id'),
61
66
  'operation-termType': operation_term.get('termType'),
62
67
  'operation-units': operation_term.get('units'),
63
- 'operation-factor': safe_parse_float(operation_factor, default=None)
64
- }
68
+ 'operation-factor': operation_factor,
69
+ } if operation_term else {})
65
70
 
66
71
 
67
72
  def get_fuel_inputs(term_id: str, cycle: dict, lookup_col: str):
68
- inputs = [
73
+ return [
69
74
  _fuel_input_data(term_id, lookup_col, i)
70
75
  for i in filter_list_term_type(cycle.get('inputs', []), TermTermType.FUEL)
71
76
  ]
72
- valid_inputs = [
73
- i for i in inputs if all([
74
- i.get('input-value') is not None,
75
- (i.get('operation-factor') or i.get('input-default-factor')) is not None
76
- ])
77
- ]
78
- return inputs, valid_inputs
79
77
 
80
78
 
81
79
  def group_fuel_inputs(inputs: list):
@@ -84,15 +82,17 @@ def group_fuel_inputs(inputs: list):
84
82
 
85
83
  def _should_run(cycle: dict, term_id: str, lookup_prefix: str = None):
86
84
  electricity_complete = _is_term_type_complete(cycle, 'electricityFuel')
87
- fuel_inputs, valid_inputs = get_fuel_inputs(term_id, cycle, f"{lookup_prefix or term_id}EmepEea2019")
85
+ fuel_inputs = get_fuel_inputs(term_id, cycle, f"{lookup_prefix or term_id}EmepEea2019")
86
+ all_valid_inputs = all([v['is-valid'] for v in fuel_inputs])
88
87
 
89
88
  logRequirements(cycle, model=MODEL, term=term_id,
90
89
  termType_electricityFuel_complete=electricity_complete,
91
- fuel_inputs=log_as_table(fuel_inputs))
90
+ fuel_inputs=log_as_table(fuel_inputs),
91
+ all_valid_inputs=all_valid_inputs)
92
92
 
93
- should_run = any([bool(valid_inputs), electricity_complete])
93
+ should_run = all([all_valid_inputs, electricity_complete])
94
94
  logShouldRun(cycle, MODEL, term_id, should_run, methodTier=_TIER)
95
- return should_run, group_fuel_inputs(valid_inputs)
95
+ return should_run, group_fuel_inputs(fuel_inputs)
96
96
 
97
97
 
98
98
  def run(cycle: dict, term_id: str, lookup_prefix: str = None):
@@ -2,7 +2,7 @@ from functools import reduce
2
2
  from itertools import zip_longest
3
3
  from typing import NamedTuple
4
4
 
5
- from hestia_earth.models.log import logRequirements, logShouldRun, log_as_table
5
+ from hestia_earth.models.log import format_float, format_str, logRequirements, logShouldRun, log_as_table
6
6
 
7
7
  from hestia_earth.models.utils import hectar_to_square_meter
8
8
  from hestia_earth.models.utils.constant import DAYS_IN_YEAR
@@ -178,33 +178,15 @@ def _should_run_site_data(site_data: SiteData) -> bool:
178
178
  ])
179
179
 
180
180
 
181
- def _format_float(value: float, unit: str = "", default: str = "None") -> str:
182
- return " ".join(
183
- string for string in [f"{value}", unit] if string
184
- ) if isinstance(value, (float, int)) else default
185
-
186
-
187
- _INVALID_CHARS = {"_", ":", ",", "="}
188
- _REPLACEMENT_CHAR = "-"
189
-
190
-
191
- def _format_str(value: str, default: str = "None") -> str:
192
- """Format a string for logging in a table. Remove all characters used to render the table on the front end."""
193
- return (
194
- reduce(lambda x, char: x.replace(char, _REPLACEMENT_CHAR), _INVALID_CHARS, str(value))
195
- if value else default
196
- )
197
-
198
-
199
181
  def _format_inventory(inventory: list[SiteData], default: str = "None") -> str:
200
182
  return log_as_table(
201
183
  {
202
- "site-id": _format_str(site_data.id),
203
- "site-area": _format_float(site_data.area, "ha"),
204
- "site-duration": _format_float(site_data.duration, "days"),
205
- "site-unused-duration": _format_float(site_data.unused_duration, "days"),
206
- "land-cover-id": _format_str(site_data.land_cover_id),
207
- "country-id": _format_str(site_data.country_id)
184
+ "site-id": format_str(site_data.id),
185
+ "site-area": format_float(site_data.area, "ha"),
186
+ "site-duration": format_float(site_data.duration, "days"),
187
+ "site-unused-duration": format_float(site_data.unused_duration, "days"),
188
+ "land-cover-id": format_str(site_data.land_cover_id),
189
+ "country-id": format_str(site_data.country_id)
208
190
  } for site_data in inventory
209
191
  ) if inventory else default
210
192
 
@@ -238,8 +220,8 @@ def _should_run(impact_assessment: dict):
238
220
  model=MODEL,
239
221
  term=TERM_ID,
240
222
  functional_unit=functional_unit,
241
- product_yield=_format_float(product_yield, product.get("term", {}).get("units")),
242
- economic_value_share=_format_float(economic_value_share, "pct"),
223
+ product_yield=format_float(product_yield, product.get("term", {}).get("units")),
224
+ economic_value_share=format_float(economic_value_share, "pct"),
243
225
  valid_inventory=valid_inventory,
244
226
  site_data_is_valid=site_data_is_valid,
245
227
  **site_logs,
@@ -1,10 +1,12 @@
1
1
  from datetime import datetime
2
2
  from dateutil.relativedelta import relativedelta
3
3
  from hestia_earth.schema import TermTermType
4
- from hestia_earth.utils.tools import list_sum
4
+ from hestia_earth.utils.tools import list_sum, flatten
5
5
 
6
6
  from hestia_earth.models.log import logRequirements, logShouldRun, log_as_table
7
- from hestia_earth.models.utils.blank_node import _gapfill_datestr, DatestrGapfillMode, DatestrFormat, _str_dates_match
7
+ from hestia_earth.models.utils.blank_node import (
8
+ _gapfill_datestr, DatestrGapfillMode, DatestrFormat, _str_dates_match
9
+ )
8
10
  from hestia_earth.models.utils.impact_assessment import get_site
9
11
  from hestia_earth.models.utils.indicator import _new_indicator
10
12
  from .utils import LAND_USE_TERMS_FOR_TRANSFORMATION, crop_ipcc_land_use_category
@@ -42,7 +44,17 @@ def _find_closest_node_date(
42
44
  return filtered_dates[min(filtered_dates.keys())] if filtered_dates else ""
43
45
 
44
46
 
45
- def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int) -> tuple[bool, dict, str, str]:
47
+ def _get_current_nodes(management_nodes: list, ia_date_str: str) -> list:
48
+ return [
49
+ node for node in management_nodes
50
+ if (
51
+ node.get("startDate") and node.get("endDate")
52
+ and node.get("startDate") <= ia_date_str <= node.get("endDate")
53
+ )
54
+ ]
55
+
56
+
57
+ def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int) -> tuple[bool, list]:
46
58
  cycle = impact_assessment.get('cycle', {})
47
59
  has_otherSites = len(cycle.get('otherSites') or []) != 0
48
60
 
@@ -64,26 +76,45 @@ def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int)
64
76
  node_date_field=match_date
65
77
  )
66
78
  closest_start_date, closest_end_date = (closest_date, None) if match_date == "startDate" else (None, closest_date)
67
- current_node_index = next(
68
- (i for i, node in enumerate(filtered_management_nodes)
69
- if _str_dates_match(
70
- date_str_one=node.get(match_date, ""),
71
- date_str_two=impact_assessment.get(match_date, ""),
72
- mode=match_mode
73
- )),
74
- None
75
- )
76
- current_node = filtered_management_nodes.pop(current_node_index) if current_node_index is not None else None
77
- landCover_term_id = (current_node or {}).get('term', {}).get('@id')
78
-
79
79
  prior_management_nodes = [
80
80
  node for node in filtered_management_nodes
81
81
  if _str_dates_match(node.get("endDate", ""), closest_end_date) or
82
82
  _str_dates_match(node.get("startDate", ""), closest_start_date)
83
83
  ]
84
84
 
85
- ipcc_land_use_category = crop_ipcc_land_use_category(landCover_term_id)
85
+ current_nodes = _get_current_nodes(
86
+ management_nodes=filtered_management_nodes,
87
+ ia_date_str=_gapfill_datestr(impact_assessment.get(match_date, ""), mode=match_mode)[:10],
88
+ )
86
89
 
90
+ should_run_node_results = [
91
+ should_run_node(
92
+ current_node=node,
93
+ closest_end_date=closest_end_date,
94
+ closest_start_date=closest_start_date,
95
+ has_otherSites=has_otherSites,
96
+ impact_assessment=impact_assessment,
97
+ prior_management_nodes=prior_management_nodes,
98
+ term_id=term_id
99
+ )
100
+ for node in current_nodes
101
+ ]
102
+ should_run_result = all([n[0] for n in should_run_node_results])
103
+ logShouldRun(impact_assessment, MODEL, term=term_id, should_run=should_run_result)
104
+ return should_run_result, flatten([n[1] for n in should_run_node_results])
105
+
106
+
107
+ def should_run_node(
108
+ current_node,
109
+ closest_end_date,
110
+ closest_start_date,
111
+ has_otherSites: bool,
112
+ impact_assessment: dict,
113
+ prior_management_nodes: list,
114
+ term_id: str
115
+ ) -> tuple[bool, list]:
116
+ landCover_term_id = (current_node or {}).get('term', {}).get('@id')
117
+ ipcc_land_use_category = crop_ipcc_land_use_category(landCover_term_id)
87
118
  total_landOccupationDuringCycle = list_sum([
88
119
  node.get("value") for node in impact_assessment.get("emissionsResourceUse", [])
89
120
  if node.get("term", {}).get("@id", "") == _RESOURCE_USE_TERM_ID
@@ -112,15 +143,13 @@ def should_run(impact_assessment: dict, term_id: str, historic_date_offset: int)
112
143
  ipcc_land_use_category=ipcc_land_use_category,
113
144
  indicators=log_as_table(indicators))
114
145
 
115
- should_run_result = all([
146
+ should_run_node_result = all([
116
147
  not has_otherSites,
117
148
  ipcc_land_use_category,
118
149
  total_landOccupationDuringCycle is not None,
119
150
  valid_indicators
120
151
  ])
121
- logShouldRun(impact_assessment, MODEL, term=term_id, should_run=should_run_result)
122
-
123
- return should_run_result, valid_indicators
152
+ return should_run_node_result, valid_indicators
124
153
 
125
154
 
126
155
  def run_resource_use(
@@ -0,0 +1,314 @@
1
+ from functools import reduce
2
+ from typing import NamedTuple, Optional
3
+ from pydash import merge
4
+
5
+ from hestia_earth.schema import MeasurementMethodClassification, TermTermType
6
+ from hestia_earth.utils.blank_node import get_node_value, flatten
7
+ from hestia_earth.utils.model import filter_list_term_type
8
+
9
+ from hestia_earth.models.hestia.soilMeasurement import STANDARD_DEPTHS
10
+ from hestia_earth.models.ipcc2019.organicCarbonPerHa_utils import (
11
+ IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE, IpccSoilCategory
12
+ )
13
+ from hestia_earth.models.log import format_bool, format_float, format_str, log_as_table, logRequirements, logShouldRun
14
+ from hestia_earth.models.utils import split_on_condition
15
+ from hestia_earth.models.utils.blank_node import node_lookup_match, split_nodes_by_dates
16
+ from hestia_earth.models.utils.measurement import _new_measurement
17
+ from . import MODEL
18
+
19
+ REQUIREMENTS = {
20
+ "Site": {
21
+ "optional": {
22
+ "measurements": [{
23
+ "@type": "Measurement",
24
+ "value": "",
25
+ "depthUpper": "",
26
+ "depthLower": "",
27
+ "term.termType": "soilType",
28
+ "optional": {
29
+ "dates": ""
30
+ }
31
+ }]
32
+ }
33
+ }
34
+ }
35
+ RETURNS = {
36
+ "Measurement": [{
37
+ "value": "",
38
+ "depthUpper": "",
39
+ "depthLower": "",
40
+ "methodClassification": "modelled using other measurements"
41
+ }]
42
+ }
43
+ LOOKUPS = {
44
+ "soilType": "IPCC_SOIL_CATEGORY"
45
+ }
46
+ TERM_ID = 'organicSoils,mineralSoils'
47
+
48
+ MEASUREMENT_TERM_IDS = TERM_ID.split(',')
49
+ ORGANIC_SOILS_TERM_ID = MEASUREMENT_TERM_IDS[0]
50
+ MINERAL_SOILS_TERM_ID = MEASUREMENT_TERM_IDS[1]
51
+ METHOD = MeasurementMethodClassification.MODELLED_USING_OTHER_MEASUREMENTS.value
52
+
53
+ TARGET_LOOKUP_VALUE = IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE[IpccSoilCategory.ORGANIC_SOILS]
54
+
55
+ IS_100_THRESHOLD = 99.5
56
+
57
+
58
+ def _measurement(term_id: str, **kwargs):
59
+ measurement = _new_measurement(term_id)
60
+ return measurement | {
61
+ **{k: v for k, v in kwargs.items()},
62
+ "methodClassification": METHOD
63
+ }
64
+
65
+
66
+ class _SoilTypeDatum(NamedTuple):
67
+ term_id: str
68
+ depth_upper: float
69
+ depth_lower: float
70
+ dates: list[str]
71
+ value: float
72
+ is_organic: bool
73
+ is_complete_depth: bool
74
+ is_standard_depth: bool
75
+
76
+
77
+ class _InventoryKey(NamedTuple):
78
+ depth_upper: float
79
+ depth_lower: float
80
+ date: Optional[str]
81
+
82
+
83
+ _InventoryGroup = dict[str, float]
84
+
85
+ _SoilTypeInventory = dict[_InventoryKey, _InventoryGroup]
86
+
87
+
88
+ _DEFAULT_INVENTORY: _SoilTypeInventory = {
89
+ _InventoryKey(None, None, None): {
90
+ "organicSoils": 0,
91
+ "mineralSoils": 100
92
+ }
93
+ }
94
+
95
+
96
+ def _soil_type_data_to_inventory_keys(datum: _SoilTypeDatum):
97
+ return (
98
+ [_InventoryKey(datum.depth_upper, datum.depth_lower, date) for date in dates]
99
+ if len((dates := datum.dates)) > 0
100
+ else [_InventoryKey(datum.depth_upper, datum.depth_lower, None)]
101
+ )
102
+
103
+
104
+ def _extract_soil_type_data(node: dict) -> _SoilTypeDatum:
105
+ depth_upper = node.get("depthUpper")
106
+ depth_lower = node.get("depthLower")
107
+ depth_interval = (depth_upper, depth_lower)
108
+
109
+ return _SoilTypeDatum(
110
+ term_id=node.get("term", {}).get("@id"),
111
+ depth_upper=depth_upper,
112
+ depth_lower=depth_lower,
113
+ dates=node.get("dates", []),
114
+ value=get_node_value(node),
115
+ is_organic=node_lookup_match(node, LOOKUPS["soilType"], TARGET_LOOKUP_VALUE),
116
+ is_complete_depth=all(depth is not None for depth in depth_interval),
117
+ is_standard_depth=depth_interval in STANDARD_DEPTHS,
118
+ )
119
+
120
+
121
+ def _classify_soil_type_data(soil_type_data: list[_SoilTypeDatum]):
122
+ """
123
+ Calculate the values of `organicSoils` and `mineralSoils` from `soilType` measurements for each unique combination
124
+ of depth interval and date.
125
+ """
126
+
127
+ def classify(inventory: _SoilTypeInventory, datum: _SoilTypeDatum) -> _SoilTypeInventory:
128
+ """
129
+ Sum the values of organic and mineral `soilType` Measurements by depth interval and date.
130
+ """
131
+ keys = _soil_type_data_to_inventory_keys(datum)
132
+
133
+ inner_key = ORGANIC_SOILS_TERM_ID if datum.is_organic else MINERAL_SOILS_TERM_ID
134
+
135
+ update_dict = {
136
+ key: (inner := inventory.get(key, {})) | {
137
+ inner_key: min(inner.get(inner_key, 0) + datum.value, 100)
138
+ } for key in keys
139
+ }
140
+
141
+ return merge(dict(), inventory, update_dict)
142
+
143
+ inventory = _select_most_complete_groups(reduce(classify, soil_type_data, {}))
144
+
145
+ return {
146
+ key: {
147
+ ORGANIC_SOILS_TERM_ID: (org := group.get(ORGANIC_SOILS_TERM_ID, 0)),
148
+ MINERAL_SOILS_TERM_ID: 100 - org
149
+ } for key, group in inventory.items()
150
+ }
151
+
152
+
153
+ def _group_keys_by_depth(inventory: _SoilTypeInventory) -> dict[tuple, list[_InventoryKey]]:
154
+
155
+ def group(result: dict[tuple, list[_InventoryKey]], key: _InventoryKey) -> dict[tuple, list[_InventoryKey]]:
156
+ depth_interval = (key.depth_upper, key.depth_lower)
157
+ update_dict = {depth_interval: result.get(depth_interval, []) + [key]}
158
+ return result | update_dict
159
+
160
+ return reduce(group, inventory.keys(), {})
161
+
162
+
163
+ def _select_most_complete_groups(inventory: _SoilTypeInventory):
164
+ """
165
+ For each depth interval, we need to choose the inventory items that have the most complete information.
166
+
167
+ Items should be prioritised in the following order:
168
+
169
+ - If only dated items are available, use dated
170
+ - If only undated items are available, use undated
171
+ - If there are a mix of dated and undated items:
172
+ - If dated items include organic soils measurements, use dated
173
+ - If undated items include organic soils measurements, use undated
174
+ - Otherwise, use dated
175
+ """
176
+ grouped = _group_keys_by_depth(inventory)
177
+
178
+ def select(result: set[_InventoryKey], keys: list[_InventoryKey]) -> set[_InventoryKey]:
179
+ with_dates, without_dates = split_on_condition(set(keys), lambda k: k.date is not None)
180
+
181
+ with_dates_have_org_value = any(
182
+ (
183
+ ORGANIC_SOILS_TERM_ID in (group := inventory.get(key, {}))
184
+ or group.get(MINERAL_SOILS_TERM_ID, 0) >= IS_100_THRESHOLD
185
+ ) for key in with_dates
186
+ )
187
+
188
+ without_dates_have_org_value = any(
189
+ (
190
+ ORGANIC_SOILS_TERM_ID in (group := inventory.get(key, {}))
191
+ or group.get(MINERAL_SOILS_TERM_ID, 0) >= IS_100_THRESHOLD
192
+ ) for key in without_dates
193
+ )
194
+
195
+ run_with_dates = (
196
+ with_dates_have_org_value
197
+ or (with_dates and not without_dates_have_org_value)
198
+ )
199
+
200
+ return result | (with_dates if run_with_dates else without_dates)
201
+
202
+ selected_keys = reduce(select, grouped.values(), set())
203
+
204
+ return {k: v for k, v in inventory.items() if k in selected_keys}
205
+
206
+
207
+ def _format_dates(dates: list[str]):
208
+ """Format a list of datestrings for logging."""
209
+ return " ".join(format_str(date) for date in dates) if isinstance(dates, list) and len(dates) else "None"
210
+
211
+
212
+ _DATUM_KEY_TO_FORMAT_FUNC = {
213
+ "depth_upper": lambda x: format_float(x, "cm"),
214
+ "depth_lower": lambda x: format_float(x, "cm"),
215
+ "dates": _format_dates,
216
+ "value": lambda x: format_float(x, "pct area"),
217
+ "is_organic": format_bool,
218
+ "is_complete_depth": format_bool,
219
+ "is_standard_depth": format_bool,
220
+ }
221
+ DEFAULT_FORMAT_FUNC = format_str
222
+
223
+
224
+ def _format_soil_data(data: list[_SoilTypeDatum]):
225
+ return log_as_table(
226
+ {
227
+ format_str(k): _DATUM_KEY_TO_FORMAT_FUNC.get(k, DEFAULT_FORMAT_FUNC)(v) for k, v in datum._asdict().items()
228
+ } for datum in data
229
+ ) if data else "None"
230
+
231
+
232
+ _FILTER_BY = (
233
+ "is_standard_depth",
234
+ "is_complete_depth"
235
+ )
236
+
237
+
238
+ def _filter_data_by_depth_availability(data: list[_SoilTypeDatum]):
239
+ """
240
+ If measurements with depth available -> discard measurements without depth
241
+ If measurements with standard depth available -> discard non-standard depths
242
+ Else, use measurements with depth
243
+ """
244
+ return next(
245
+ (
246
+ (filter_, result) for filter_ in _FILTER_BY
247
+ if (result := [datum for datum in data if datum.__getattribute__(filter_)])
248
+ ),
249
+ (None, data)
250
+ )
251
+
252
+
253
+ def _should_run(site: dict):
254
+ soil_type_nodes = split_nodes_by_dates(
255
+ filter_list_term_type(site.get("measurements", []), TermTermType.SOILTYPE)
256
+ )
257
+
258
+ filtered_by, soil_type_data = _filter_data_by_depth_availability(
259
+ [_extract_soil_type_data(node) for node in soil_type_nodes]
260
+ )
261
+
262
+ inventory = _classify_soil_type_data(soil_type_data) if soil_type_data else _DEFAULT_INVENTORY
263
+
264
+ should_run = all([
265
+ inventory
266
+ ])
267
+
268
+ for term_id in MEASUREMENT_TERM_IDS:
269
+
270
+ logRequirements(
271
+ site,
272
+ model=MODEL,
273
+ term=term_id,
274
+ soil_type_data=_format_soil_data(soil_type_data),
275
+ filtered_by=format_str(filtered_by)
276
+ )
277
+
278
+ logShouldRun(site, MODEL, term_id, should_run)
279
+
280
+ return should_run, inventory
281
+
282
+
283
+ _INVENTORY_KEY_TO_FIELD_KEY = {
284
+ "depth_upper": "depthUpper",
285
+ "depth_lower": "depthLower",
286
+ "date": "dates"
287
+ }
288
+ _INVENTORY_KEY_TO_FIELD_VALUE = {
289
+ "date": lambda x: [x]
290
+ }
291
+
292
+
293
+ def _key_to_measurement_fields(key: _InventoryKey):
294
+ return {
295
+ _INVENTORY_KEY_TO_FIELD_KEY.get(k, k): _INVENTORY_KEY_TO_FIELD_VALUE.get(k, lambda x: x)(v)
296
+ for k, v in key._asdict().items() if v is not None
297
+ }
298
+
299
+
300
+ def _run(inventory: _SoilTypeInventory) -> list[dict]:
301
+ return flatten(
302
+ [
303
+ _measurement(
304
+ term_id,
305
+ value=[value],
306
+ **_key_to_measurement_fields(key)
307
+ ) for term_id, value in value.items()
308
+ ] for key, value in inventory.items()
309
+ )
310
+
311
+
312
+ def run(site: dict):
313
+ should_run, valid_inventory = _should_run(site)
314
+ return _run(valid_inventory) if should_run else []