hestia-earth-models 0.74.8__py3-none-any.whl → 0.74.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hestia_earth/models/cache_sites.py +1 -1
  2. hestia_earth/models/faostat2018/liveweightPerHead.py +1 -1
  3. hestia_earth/models/faostat2018/product/price.py +1 -1
  4. hestia_earth/models/geospatialDatabase/ecoClimateZone.py +1 -1
  5. hestia_earth/models/geospatialDatabase/region.py +1 -1
  6. hestia_earth/models/geospatialDatabase/utils.py +1 -1
  7. hestia_earth/models/globalCropWaterModel2008/rootingDepth.py +2 -1
  8. hestia_earth/models/haversineFormula/transport/distance.py +1 -1
  9. hestia_earth/models/hestia/aboveGroundCropResidue.py +1 -3
  10. hestia_earth/models/hestia/cropResidueManagement.py +1 -0
  11. hestia_earth/models/hestia/excretaKgMass.py +1 -1
  12. hestia_earth/models/hestia/landCover.py +13 -6
  13. hestia_earth/models/hestia/landOccupationDuringCycle.py +1 -1
  14. hestia_earth/models/hestia/management.py +25 -11
  15. hestia_earth/models/hestia/pastureGrass.py +1 -1
  16. hestia_earth/models/impact_assessment/post_checks/__init__.py +3 -2
  17. hestia_earth/models/impact_assessment/post_checks/remove_no_value.py +13 -0
  18. hestia_earth/models/ipcc2019/biocharOrganicCarbonPerHa.py +2 -1
  19. hestia_earth/models/ipcc2019/ch4ToAirExcreta.py +4 -1
  20. hestia_earth/models/ipcc2019/organicCarbonPerHa.py +5 -1
  21. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1.py +88 -101
  22. hestia_earth/models/ipcc2019/organicCarbonPerHa_utils.py +21 -0
  23. hestia_earth/models/mocking/search-results.json +1 -1
  24. hestia_earth/models/site/pre_checks/country.py +1 -2
  25. hestia_earth/models/utils/__init__.py +7 -5
  26. hestia_earth/models/utils/blank_node.py +7 -2
  27. hestia_earth/models/utils/completeness.py +1 -2
  28. hestia_earth/models/utils/emission.py +1 -1
  29. hestia_earth/models/utils/indicator.py +1 -1
  30. hestia_earth/models/utils/input.py +1 -1
  31. hestia_earth/models/utils/management.py +1 -1
  32. hestia_earth/models/utils/measurement.py +2 -1
  33. hestia_earth/models/utils/method.py +1 -2
  34. hestia_earth/models/utils/practice.py +1 -1
  35. hestia_earth/models/utils/product.py +2 -1
  36. hestia_earth/models/utils/property.py +2 -1
  37. hestia_earth/models/utils/term.py +1 -27
  38. hestia_earth/models/version.py +1 -1
  39. {hestia_earth_models-0.74.8.dist-info → hestia_earth_models-0.74.9.dist-info}/METADATA +2 -2
  40. {hestia_earth_models-0.74.8.dist-info → hestia_earth_models-0.74.9.dist-info}/RECORD +48 -45
  41. tests/models/hestia/test_aboveGroundCropResidue.py +13 -35
  42. tests/models/hestia/test_landOccupationDuringCycle.py +9 -2
  43. tests/models/impact_assessment/post_checks/test_remove_cache_fields.py +6 -0
  44. tests/models/impact_assessment/post_checks/test_remove_no_value.py +17 -0
  45. tests/models/ipcc2019/test_organicCarbonPerHa_tier_1.py +1 -1
  46. {hestia_earth_models-0.74.8.dist-info → hestia_earth_models-0.74.9.dist-info}/LICENSE +0 -0
  47. {hestia_earth_models-0.74.8.dist-info → hestia_earth_models-0.74.9.dist-info}/WHEEL +0 -0
  48. {hestia_earth_models-0.74.8.dist-info → hestia_earth_models-0.74.9.dist-info}/top_level.txt +0 -0
@@ -3,11 +3,11 @@ from enum import Enum
3
3
  from pydash.objects import merge
4
4
  from hestia_earth.schema import TermTermType
5
5
  from hestia_earth.utils.tools import flatten, non_empty_list
6
+ from hestia_earth.utils.term import download_term
6
7
 
7
8
  from .log import logger
8
9
  from .utils import CACHE_KEY, cached_value
9
10
  from .utils.site import CACHE_YEARS_KEY
10
- from .utils.term import download_term
11
11
  from .site.pre_checks.cache_geospatialDatabase import (
12
12
  list_vectors, list_rasters, cache_site_results, _should_run
13
13
  )
@@ -2,10 +2,10 @@ from hestia_earth.schema import TermTermType
2
2
  from hestia_earth.utils.lookup import extract_grouped_data_closest_date
3
3
  from hestia_earth.utils.model import filter_list_term_type
4
4
  from hestia_earth.utils.tools import non_empty_list, safe_parse_date, safe_parse_float
5
+ from hestia_earth.utils.term import download_term
5
6
 
6
7
  from hestia_earth.models.log import logRequirements, logShouldRun
7
8
  from hestia_earth.models.utils.constant import Units
8
- from hestia_earth.models.utils.term import download_term
9
9
  from hestia_earth.models.utils.property import _new_property, node_has_no_property
10
10
  from hestia_earth.models.utils.product import convert_product_to_unit
11
11
  from hestia_earth.models.utils.animalProduct import FAO_LOOKUP_COLUMN, get_animalProduct_lookup_value
@@ -1,10 +1,10 @@
1
1
  from hestia_earth.schema import TermTermType
2
2
  from hestia_earth.utils.lookup import extract_grouped_data
3
3
  from hestia_earth.utils.tools import non_empty_list, safe_parse_float, safe_parse_date
4
+ from hestia_earth.utils.term import download_term
4
5
 
5
6
  from hestia_earth.models.log import debugValues, logRequirements, logShouldRun
6
7
  from hestia_earth.models.utils.constant import Units
7
- from hestia_earth.models.utils.term import download_term
8
8
  from hestia_earth.models.utils.currency import DEFAULT_CURRENCY
9
9
  from hestia_earth.models.utils.crop import FAOSTAT_PRODUCTION_LOOKUP_COLUMN, get_crop_grouping_faostat_production
10
10
  from hestia_earth.models.utils.animalProduct import FAO_LOOKUP_COLUMN, get_animalProduct_grouping_fao
@@ -77,4 +77,4 @@ def _should_run(site: dict):
77
77
  return should_run
78
78
 
79
79
 
80
- def run(site: dict): return _run(site) if _should_run(site) else _run_default(site)
80
+ def run(site: dict): return (_run(site) if _should_run(site) else []) or _run_default(site)
@@ -1,9 +1,9 @@
1
1
  from hestia_earth.schema import TermTermType
2
2
  from hestia_earth.utils.model import linked_node
3
+ from hestia_earth.utils.term import download_term
3
4
 
4
5
  from hestia_earth.models.log import debugValues, logRequirements, logShouldRun
5
6
  from .utils import download, has_coordinates
6
- from hestia_earth.models.utils.term import download_term
7
7
  from . import MODEL
8
8
 
9
9
  REQUIREMENTS = {
@@ -4,10 +4,10 @@ from area import area
4
4
  from functools import reduce, lru_cache
5
5
  from hestia_earth.schema import TermTermType
6
6
  from hestia_earth.utils.tools import non_empty_list
7
+ from hestia_earth.utils.term import download_term
7
8
 
8
9
  from hestia_earth.models.log import debugValues, logErrorRun, logRequirements
9
10
  from hestia_earth.models.utils.site import cached_value, region_factor, region_level_1_id
10
- from hestia_earth.models.utils.term import download_term
11
11
  from . import MODEL
12
12
 
13
13
  MAX_AREA_SIZE = int(os.getenv('MAX_AREA_SIZE', '5000'))
@@ -1,10 +1,11 @@
1
1
  from hestia_earth.schema import CycleFunctionalUnit, TermTermType
2
2
  from hestia_earth.utils.model import find_term_match, filter_list_term_type
3
3
  from hestia_earth.utils.tools import list_sum, non_empty_list, safe_parse_float
4
+ from hestia_earth.utils.term import download_term
4
5
 
5
6
  from hestia_earth.models.log import logRequirements, logShouldRun
6
7
  from hestia_earth.models.utils.property import _new_property, node_has_no_property
7
- from hestia_earth.models.utils.term import get_irrigation_terms, download_term
8
+ from hestia_earth.models.utils.term import get_irrigation_terms
8
9
  from hestia_earth.models.utils.crop import get_crop_lookup_value
9
10
  from hestia_earth.models.utils.completeness import _is_term_type_complete
10
11
  from . import MODEL
@@ -1,10 +1,10 @@
1
1
  from haversine import haversine
2
2
  from hestia_earth.schema import TermTermType
3
3
  from hestia_earth.utils.tools import non_empty_list
4
+ from hestia_earth.utils.term import download_term
4
5
 
5
6
  from hestia_earth.models.log import logRequirements, logShouldRun, debugValues
6
7
  from hestia_earth.models.utils.method import include_methodModel
7
- from hestia_earth.models.utils.term import download_term
8
8
  from .. import MODEL
9
9
 
10
10
  REQUIREMENTS = {
@@ -112,12 +112,10 @@ def _run(cycle: dict, total_values: list):
112
112
 
113
113
  if value == 0:
114
114
  values.extend([_product(term_id, value)])
115
- elif remaining_value > 0 and value is not None and value >= 0:
115
+ elif remaining_value >= 0 and value is not None and value >= 0:
116
116
  value = value if value < remaining_value else remaining_value
117
117
  values.extend([_product(term_id, value)])
118
118
  remaining_value = remaining_value - value
119
- if remaining_value == 0:
120
- break
121
119
 
122
120
  return values + [
123
121
  # whatever remains is "left on field"
@@ -21,6 +21,7 @@ RETURNS = {
21
21
  }]
22
22
  }
23
23
  MODEL_KEY = 'cropResidueManagement'
24
+ TERM_ID = 'residueBurnt,residueIncorporated,residueLeftOnField,residueRemoved,residueIncorporatedLessThan30DaysBeforeCultivation,residueIncorporatedMoreThan30DaysBeforeCultivation' # noqa: E501
24
25
  PRACTICE_IDS = [
25
26
  residueBurnt.TERM_ID,
26
27
  residueIncorporated.TERM_ID,
@@ -1,10 +1,10 @@
1
1
  from hestia_earth.schema import NodeType, TermTermType
2
2
  from hestia_earth.utils.model import filter_list_term_type, find_term_match
3
3
  from hestia_earth.utils.tools import non_empty_list, list_sum
4
+ from hestia_earth.utils.term import download_term
4
5
 
5
6
  from hestia_earth.models.log import debugValues, logRequirements, logShouldRun
6
7
  from hestia_earth.models.utils import get_kg_term_id, get_kg_N_term_id, get_kg_VS_term_id, _filter_list_term_unit
7
- from hestia_earth.models.utils.term import download_term
8
8
  from hestia_earth.models.utils.constant import Units
9
9
  from hestia_earth.models.utils.product import _new_product, convert_product_to_unit
10
10
  from . import MODEL
@@ -10,7 +10,7 @@ from hestia_earth.utils.model import filter_list_term_type
10
10
  from hestia_earth.utils.tools import safe_parse_float, to_precision
11
11
 
12
12
  from hestia_earth.models.log import logRequirements, log_as_table, logShouldRun
13
- from hestia_earth.models.utils import _omit
13
+ from hestia_earth.models.utils import _omit, clamp
14
14
  from hestia_earth.models.utils.constant import DAYS_IN_YEAR
15
15
  from hestia_earth.models.utils.management import _new_management
16
16
  from hestia_earth.models.utils.term import get_lookup_value
@@ -896,11 +896,15 @@ def _should_run_historical_land_use_change_single_crop(
896
896
  land_use_type=land_use_type,
897
897
  permanent_crops_net_expansion=permanent_crops_net_expansion
898
898
  )
899
+ capped_expansion_factor = clamp(
900
+ value=expansion_factor * e9_net_expansion * net_expansion_cultivated_vs_harvested,
901
+ min_value=0,
902
+ max_value=1
903
+ )
899
904
 
900
905
  site_area = {
901
- land_type: (
902
- shares_of_expansion[land_type] * expansion_factor * e9_net_expansion * net_expansion_cultivated_vs_harvested
903
- ) for land_type in LAND_USE_TERMS_FOR_TRANSFORMATION.keys()
906
+ land_type: (shares_of_expansion[land_type] * capped_expansion_factor)
907
+ for land_type in LAND_USE_TERMS_FOR_TRANSFORMATION.keys()
904
908
  if land_type != land_use_type
905
909
  }
906
910
  site_area[land_use_type] = 1 - sum(site_area.values())
@@ -941,6 +945,9 @@ def _get_land_use_term_from_node(node: dict):
941
945
  return _get_lookup_with_cache(lookup_term=node.get("term", {}), column=LOOKUPS.get("landCover")[1])
942
946
 
943
947
 
948
+ def _date_strip(date: str): return date[:10] if date else None
949
+
950
+
944
951
  def _collect_land_use_types(nodes: list) -> list:
945
952
  """Look up the land use type from management nodes."""
946
953
  return [
@@ -949,8 +956,8 @@ def _collect_land_use_types(nodes: list) -> list:
949
956
  "term": node.get("term", {}),
950
957
  "id": node.get("term", {}).get("@id"),
951
958
  "land-use-type": _get_land_use_term_from_node(node),
952
- "endDate": _gapfill_datestr(datestr=node.get("endDate"), mode=DatestrGapfillMode.END)[:10],
953
- "startDate": _gapfill_datestr(datestr=node.get("startDate"), mode=DatestrGapfillMode.START)[:10]
959
+ "endDate": _date_strip(_gapfill_datestr(datestr=node.get("endDate"), mode=DatestrGapfillMode.END)),
960
+ "startDate": _date_strip(_gapfill_datestr(datestr=node.get("startDate"), mode=DatestrGapfillMode.START))
954
961
  } for node in nodes
955
962
  ]
956
963
 
@@ -212,7 +212,7 @@ def _format_inventory(inventory: list[SiteData], default: str = "None") -> str:
212
212
 
213
213
  def _should_run(impact_assessment: dict):
214
214
 
215
- cycle = impact_assessment.get("cycle")
215
+ cycle = impact_assessment.get("cycle", {})
216
216
  functional_unit = cycle.get("functionalUnit")
217
217
 
218
218
  product = get_product(impact_assessment)
@@ -286,7 +286,7 @@ def _dates_overlap(target_practice: dict, node: dict, cycle: dict, site_type_id:
286
286
  ])
287
287
 
288
288
 
289
- def _should_run_practice(management_nodes: list, cycle: dict, site_type_id: str):
289
+ def _should_run_practice(site: dict, management_nodes: list, cycle: dict, site_type_id: str):
290
290
  """
291
291
  Include only landUseManagement practices where GAP_FILL_TO_MANAGEMENT = True
292
292
  """
@@ -297,21 +297,34 @@ def _should_run_practice(management_nodes: list, cycle: dict, site_type_id: str)
297
297
  for node in filter_list_term_type(management_nodes, TermTermType.LANDCOVER)
298
298
  ]
299
299
 
300
- def run(practice: dict):
300
+ def exec(practice: dict):
301
301
  term = practice.get('term', {})
302
- target_group = get_lookup_value(practice.get("term", {}), 'sumIs100Group', skip_debug=True, model=MODEL)
303
- has_other_land_cover_in_same_group = next((
302
+ term_id = term['@id']
303
+ should_gap_fill = term.get('termType') != TermTermType.LANDUSEMANAGEMENT.value or _should_gap_fill(term)
304
+ target_group = get_lookup_value(term, 'sumIs100Group', skip_debug=True, model=MODEL)
305
+ no_other_land_cover_in_same_group = next((
304
306
  True for node in landCover_management_nodes
305
307
  if (
306
308
  node['sumIs100Group'] == target_group and
307
309
  _dates_overlap(target_practice=practice, node=node, cycle=cycle, site_type_id=site_type_id)
308
310
  )
309
- ), None) is not None
310
- return (
311
- (term.get('termType') != TermTermType.LANDUSEMANAGEMENT.value or _should_gap_fill(term)) and
312
- not has_other_land_cover_in_same_group
313
- )
314
- return run
311
+ ), None) is None
312
+ # cannot gap-fill landCover without a `startDate`
313
+ has_required_startDate = term.get('termType') != TermTermType.LANDCOVER.value or practice.get('startDate')
314
+
315
+ should_run = all([
316
+ should_gap_fill,
317
+ has_required_startDate,
318
+ no_other_land_cover_in_same_group
319
+ ])
320
+ if not should_run:
321
+ logRequirements(site, model=MODEL, term=term_id, model_key=MODEL_KEY,
322
+ should_gap_fill=should_gap_fill,
323
+ has_required_startDate=has_required_startDate,
324
+ no_other_land_cover_in_same_group=no_other_land_cover_in_same_group)
325
+ logShouldRun(site, MODEL, term_id, False, model_key=MODEL_KEY)
326
+ return should_run
327
+ return exec
315
328
 
316
329
 
317
330
  def _run_from_practices(site: dict, cycle: dict, site_type_id: str):
@@ -330,7 +343,8 @@ def _run_from_practices(site: dict, cycle: dict, site_type_id: str):
330
343
  ]
331
344
  management_nodes = site.get("management", [])
332
345
  return list(map(_map_to_value, filter(
333
- _should_run_practice(management_nodes=management_nodes, cycle=cycle, site_type_id=site_type_id), practices
346
+ _should_run_practice(site, management_nodes, cycle, site_type_id),
347
+ practices
334
348
  )))
335
349
 
336
350
 
@@ -1,9 +1,9 @@
1
1
  from hestia_earth.schema import SiteSiteType, TermTermType
2
2
  from hestia_earth.utils.model import linked_node
3
+ from hestia_earth.utils.term import download_term
3
4
 
4
5
  from hestia_earth.models.log import logRequirements, logShouldRun
5
6
  from hestia_earth.models.utils.practice import _new_practice
6
- from hestia_earth.models.utils.term import download_term
7
7
  from . import MODEL
8
8
 
9
9
  REQUIREMENTS = {
@@ -2,7 +2,7 @@ from os.path import dirname, abspath
2
2
  import sys
3
3
 
4
4
  from hestia_earth.models.utils import _run_in_serie
5
- from . import cycle, site, remove_cache_fields
5
+ from . import cycle, site, remove_cache_fields, remove_no_value
6
6
 
7
7
  CURRENT_DIR = dirname(abspath(__file__)) + '/'
8
8
  sys.path.append(CURRENT_DIR)
@@ -10,7 +10,8 @@ sys.path.append(CURRENT_DIR)
10
10
  MODELS = [
11
11
  cycle.run,
12
12
  site.run,
13
- remove_cache_fields.run
13
+ remove_cache_fields.run,
14
+ remove_no_value.run
14
15
  ]
15
16
 
16
17
 
@@ -0,0 +1,13 @@
1
+ _KEYS = ['impacts', 'endpoints']
2
+
3
+
4
+ def _has_value(blank_node: dict):
5
+ return blank_node.get('value') is not None
6
+
7
+
8
+ def _filter_has_value(impact: dict, key: str):
9
+ return list(filter(_has_value, impact[key]))
10
+
11
+
12
+ def run(impact: dict):
13
+ return impact | {key: _filter_has_value(impact, key) for key in _KEYS if impact.get(key)}
@@ -110,7 +110,7 @@ def _should_run(site: dict) -> tuple[bool, dict]:
110
110
  """
111
111
  cycles = related_cycles(site)
112
112
  site_type = site.get("siteType")
113
- ipcc_soil_category = _assign_ipcc_soil_category(site.get("measurements", []))
113
+ ipcc_soil_category, soil_logs = _assign_ipcc_soil_category(site.get("measurements", []))
114
114
 
115
115
  has_cycles = len(cycles) > 0
116
116
  has_valid_site_type = site_type in _VALID_SITE_TYPES
@@ -140,6 +140,7 @@ def _should_run(site: dict) -> tuple[bool, dict]:
140
140
  should_compile_inventory=should_compile_inventory,
141
141
  seed=seed,
142
142
  inventory=_format_inventory(inventory),
143
+ **soil_logs,
143
144
  **_format_logs(logs)
144
145
  )
145
146
 
@@ -120,7 +120,10 @@ def _get_ch4_conv_factor(cycle: dict):
120
120
  ecoClimateZone=ecoClimateZone,
121
121
  practice_id=practice_id)
122
122
 
123
- return _get_excretaManagement_MCF_from_lookup(practice_id, ecoClimateZone, duration_key) if practice_id else 0
123
+ return _get_excretaManagement_MCF_from_lookup(practice_id, ecoClimateZone, duration_key) if all([
124
+ practice_id,
125
+ ecoClimateZone is not None
126
+ ]) else 0
124
127
 
125
128
 
126
129
  def _should_run(cycle: dict):
@@ -23,7 +23,11 @@ REQUIREMENTS = {
23
23
  ],
24
24
  "optional": {
25
25
  "measurements": [
26
- {"@type": "Measurement", "value": "", "term.termType": ["soilType", "usdaSoilType"]}
26
+ {
27
+ "@doc": "The model cannot run on sites with more than 30 percent organic soils (`histols`, `histosol` and their subclasses).", # noqa: E501
28
+ "@type": "Measurement", "value": "",
29
+ "term.termType": ["soilType", "usdaSoilType"]
30
+ }
27
31
  ],
28
32
  "management": [
29
33
  {
@@ -19,14 +19,16 @@ from hestia_earth.models.utils.blank_node import (
19
19
  from hestia_earth.models.utils.ecoClimateZone import EcoClimateZone, get_eco_climate_zone_value
20
20
  from hestia_earth.models.utils.measurement import _new_measurement
21
21
  from hestia_earth.models.utils.property import get_node_property
22
- from hestia_earth.models.utils.term import get_residue_removed_or_burnt_terms, get_upland_rice_land_cover_terms
22
+ from hestia_earth.models.utils.term import (
23
+ get_lookup_value, get_residue_removed_or_burnt_terms, get_upland_rice_land_cover_terms
24
+ )
23
25
 
24
26
  from .organicCarbonPerHa_utils import (
25
- check_irrigation, DEPTH_LOWER, DEPTH_UPPER, IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE,
27
+ check_irrigation, DEPTH_LOWER, DEPTH_UPPER, format_soil_inventory, IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE,
26
28
  IPCC_LAND_USE_CATEGORY_TO_LAND_COVER_LOOKUP_VALUE, IPCC_MANAGEMENT_CATEGORY_TO_GRASSLAND_MANAGEMENT_TERM_ID,
27
29
  IPCC_MANAGEMENT_CATEGORY_TO_TILLAGE_MANAGEMENT_LOOKUP_VALUE, IpccSoilCategory, IpccCarbonInputCategory,
28
30
  IpccLandUseCategory, IpccManagementCategory, is_cover_crop, MIN_AREA_THRESHOLD, sample_constant,
29
- sample_plus_minus_error, sample_plus_minus_uncertainty, SITE_TYPE_TO_IPCC_LAND_USE_CATEGORY,
31
+ sample_plus_minus_error, sample_plus_minus_uncertainty, SITE_TYPE_TO_IPCC_LAND_USE_CATEGORY, SoilData,
30
32
  SUPER_MAJORITY_AREA_THRESHOLD, STATS_DEFINITION
31
33
  )
32
34
  from . import MODEL
@@ -45,7 +47,11 @@ REQUIREMENTS = {
45
47
  ],
46
48
  "optional": {
47
49
  "measurements": [
48
- {"@type": "Measurement", "value": "", "term.termType": ["soilType", "usdaSoilType"]}
50
+ {
51
+ "@doc": "This model cannot run on sites with more than 30 percent organic soils (`histols`, `histosol` and their subclasses).", # noqa: E501
52
+ "@type": "Measurement", "value": "",
53
+ "term.termType": ["soilType", "usdaSoilType"]
54
+ }
49
55
  ],
50
56
  "management": [
51
57
  {
@@ -606,16 +612,19 @@ def should_run(site: dict) -> tuple[bool, dict, dict]:
606
612
  measurement_nodes = site.get("measurements", [])
607
613
 
608
614
  eco_climate_zone = get_eco_climate_zone_value(site, as_enum=True)
609
- ipcc_soil_category = _assign_ipcc_soil_category(measurement_nodes)
615
+ ipcc_soil_category, soil_logs = _assign_ipcc_soil_category(measurement_nodes)
610
616
  soc_ref = _get_soc_ref_preview(ipcc_soil_category, eco_climate_zone)
611
617
 
618
+ valid_site_type = site_type in _VALID_SITE_TYPES
619
+ valid_eco_climate_zone = eco_climate_zone not in _EXCLUDED_ECO_CLIMATE_ZONES
620
+ valid_soc_ref = isinstance(soc_ref, (float, int)) and soc_ref > 0
612
621
  has_management = len(management_nodes) > 0
613
622
  has_measurements = len(measurement_nodes) > 0
614
623
 
615
624
  should_compile_inventory = all([
616
- site_type in _VALID_SITE_TYPES,
617
- eco_climate_zone not in _EXCLUDED_ECO_CLIMATE_ZONES,
618
- soc_ref or -9999 > 0,
625
+ valid_site_type,
626
+ valid_eco_climate_zone,
627
+ valid_soc_ref,
619
628
  has_management,
620
629
  has_measurements
621
630
  ])
@@ -638,9 +647,13 @@ def should_run(site: dict) -> tuple[bool, dict, dict]:
638
647
  year for year, group in inventory.items() if group.get(_InventoryKey.SHOULD_RUN)
639
648
  )
640
649
 
641
- logs = inventory_logs | {
650
+ logs = soil_logs | inventory_logs | {
642
651
  "site_type": site_type,
652
+ "soc_ref_available": valid_soc_ref,
643
653
  "soc_ref": soc_ref,
654
+ "valid_eco_climate_zone": valid_eco_climate_zone,
655
+ "valid_soil_category": ipcc_soil_category not in [IpccSoilCategory.ORGANIC_SOILS],
656
+ "valid_site_type": valid_site_type,
644
657
  "has_management": has_management,
645
658
  "has_measurements": has_measurements,
646
659
  "should_compile_inventory_tier_1": should_compile_inventory,
@@ -1034,24 +1047,34 @@ def _assign_ipcc_soil_category(
1034
1047
  soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.SOILTYPE)
1035
1048
  usda_soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.USDASOILTYPE)
1036
1049
 
1050
+ soil_data = [_unpack_soil_data(node) for node in soil_types]
1051
+ usda_soil_data = [_unpack_soil_data(node) for node in usda_soil_types]
1052
+
1037
1053
  clay_content = get_node_value(find_term_match(measurement_nodes, _CLAY_CONTENT_TERM_ID))
1038
1054
  sand_content = get_node_value(find_term_match(measurement_nodes, _SAND_CONTENT_TERM_ID))
1039
-
1040
1055
  has_sandy_soil = clay_content < _CLAY_CONTENT_MAX and sand_content > _SAND_CONTENT_MIN
1041
1056
 
1042
- return next(
1057
+ logs = {
1058
+ "soil_data": format_soil_inventory(soil_data),
1059
+ "usda_soil_data": format_soil_inventory(usda_soil_data),
1060
+ "has_sandy_soil_texture": has_sandy_soil
1061
+ }
1062
+
1063
+ category = next(
1043
1064
  (
1044
1065
  key for key in _SOIL_CATEGORY_DECISION_TREE
1045
- if _SOIL_CATEGORY_DECISION_TREE[key](
1066
+ if _check_soil_category(
1046
1067
  key=key,
1047
- soil_types=soil_types,
1048
- usda_soil_types=usda_soil_types,
1068
+ soil_data=soil_data,
1069
+ usda_soil_data=usda_soil_data,
1049
1070
  has_sandy_soil=has_sandy_soil
1050
1071
  )
1051
1072
  ),
1052
1073
  default
1053
1074
  ) if len(soil_types) > 0 or len(usda_soil_types) > 0 else default
1054
1075
 
1076
+ return category, logs
1077
+
1055
1078
 
1056
1079
  def _get_soil_type_measurements(
1057
1080
  nodes: list[dict], term_type: Literal[TermTermType.SOILTYPE, TermTermType.USDASOILTYPE]
@@ -1067,98 +1090,66 @@ def _get_soil_type_measurements(
1067
1090
  )
1068
1091
 
1069
1092
 
1070
- def _check_soil_category(
1071
- *,
1072
- key: IpccSoilCategory,
1073
- soil_types: list[dict],
1074
- usda_soil_types: list[dict],
1075
- **_
1076
- ) -> bool:
1077
- """
1078
- Check if the soil category matches the given key.
1079
-
1080
- Parameters
1081
- ----------
1082
- key : IpccSoilCategory
1083
- The IPCC soil category to check.
1084
- soil_types : list[dict]
1085
- List of soil type measurement nodes.
1086
- usda_soil_types : list[dict]
1087
- List of USDA soil type measurement nodes
1088
-
1089
- Returns
1090
- -------
1091
- bool
1092
- `True` if the soil category matches, `False` otherwise.
1093
- """
1094
- SOIL_TYPE_LOOKUP = LOOKUPS["soilType"]
1095
- USDA_SOIL_TYPE_LOOKUP = LOOKUPS["usdaSoilType"]
1093
+ def _unpack_soil_data(node):
1094
+ term = node.get("term", {})
1095
+ term_id = term.get("@id")
1096
+ term_type = term.get("termType")
1097
+ value = get_node_value(node)
1096
1098
 
1097
- target_lookup_values = IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE.get(key, None)
1099
+ lookup_value = get_lookup_value(term, LOOKUPS[term_type]) if term_type else None
1100
+ category = next(key for key, value in IPCC_SOIL_CATEGORY_TO_SOIL_TYPE_LOOKUP_VALUE.items() if value == lookup_value)
1098
1101
 
1099
- is_soil_type_match = cumulative_nodes_lookup_match(
1100
- soil_types,
1101
- lookup=SOIL_TYPE_LOOKUP,
1102
- target_lookup_values=target_lookup_values,
1103
- cumulative_threshold=MIN_AREA_THRESHOLD
1104
- )
1102
+ return SoilData(term_id, value, category)
1105
1103
 
1106
- is_usda_soil_type_match = cumulative_nodes_lookup_match(
1107
- usda_soil_types,
1108
- lookup=USDA_SOIL_TYPE_LOOKUP,
1109
- target_lookup_values=target_lookup_values,
1110
- cumulative_threshold=MIN_AREA_THRESHOLD
1111
- )
1112
1104
 
1113
- return is_soil_type_match or is_usda_soil_type_match
1105
+ _IPCC_SOIL_CATEGORY_TO_OVERRIDE_KWARGS = {
1106
+ IpccSoilCategory.SANDY_SOILS: {"has_sandy_soil"}
1107
+ }
1108
+ """
1109
+ Keyword arguments that can override the `soilType`/`usdaSoilType` lookup match for an `IpccSoilCategory`.
1110
+ """
1114
1111
 
1115
1112
 
1116
- def _check_sandy_soil_category(
1117
- *,
1118
- key: IpccSoilCategory,
1119
- soil_types: list[dict],
1120
- usda_soil_types: list[dict],
1121
- has_sandy_soil: bool,
1122
- **_
1113
+ def _check_soil_category(
1114
+ *, key: IpccSoilCategory, soil_data: list[SoilData], usda_soil_data: list[SoilData], **kwargs
1123
1115
  ) -> bool:
1124
1116
  """
1125
- Check if the soils are sandy.
1126
-
1127
- This function is special case of `_check_soil_category`.
1117
+ Check if the soil category matches the given key.
1128
1118
 
1129
1119
  Parameters
1130
1120
  ----------
1131
1121
  key : IpccSoilCategory
1132
1122
  The IPCC soil category to check.
1133
- soil_types : list[dict]
1134
- List of soil type measurement nodes.
1135
- usda_soil_types : list[dict]
1136
- List of USDA soil type measurement nodes
1137
- has_sandy_soil : bool
1138
- True if the soils are sandy, False otherwise.
1123
+ soil_data : list[SoilData]
1124
+ List of `SoilData` NamedEnums generated from `soilType` measurement nodes.
1125
+ usda_soil_data : list[SoilData]
1126
+ List of `SoilData` NamedEnums generated from `usdaSoilType` measurement nodes.
1139
1127
 
1140
1128
  Returns
1141
1129
  -------
1142
1130
  bool
1143
1131
  `True` if the soil category matches, `False` otherwise.
1144
1132
  """
1145
- return _check_soil_category(key=key, soil_types=soil_types, usda_soil_types=usda_soil_types) or has_sandy_soil
1133
+ override_kwargs = _IPCC_SOIL_CATEGORY_TO_OVERRIDE_KWARGS.get(key, set())
1134
+ valid_override = any(v for k, v in kwargs.items() if k in override_kwargs)
1146
1135
 
1136
+ is_soil_match = sum(data.value for data in soil_data if data.category == key) > MIN_AREA_THRESHOLD
1137
+ is_usda_soil_match = sum(data.value for data in usda_soil_data if data.category == key) > MIN_AREA_THRESHOLD
1138
+
1139
+ return valid_override or is_soil_match or is_usda_soil_match
1147
1140
 
1148
- _SOIL_CATEGORY_DECISION_TREE = {
1149
- IpccSoilCategory.ORGANIC_SOILS: _check_soil_category,
1150
- IpccSoilCategory.SANDY_SOILS: _check_sandy_soil_category,
1151
- IpccSoilCategory.WETLAND_SOILS: _check_soil_category,
1152
- IpccSoilCategory.VOLCANIC_SOILS: _check_soil_category,
1153
- IpccSoilCategory.SPODIC_SOILS: _check_soil_category,
1154
- IpccSoilCategory.HIGH_ACTIVITY_CLAY_SOILS: _check_soil_category,
1155
- IpccSoilCategory.LOW_ACTIVITY_CLAY_SOILS: _check_soil_category
1156
- }
1157
- """
1158
- A decision tree mapping IPCC soil categories to corresponding check functions.
1159
1141
 
1160
- Key: IpccSoilCategory
1161
- Value: Corresponding function for checking the match of the given soil category based on soil types.
1142
+ _SOIL_CATEGORY_DECISION_TREE = [
1143
+ IpccSoilCategory.ORGANIC_SOILS,
1144
+ IpccSoilCategory.SANDY_SOILS,
1145
+ IpccSoilCategory.WETLAND_SOILS,
1146
+ IpccSoilCategory.VOLCANIC_SOILS,
1147
+ IpccSoilCategory.SPODIC_SOILS,
1148
+ IpccSoilCategory.HIGH_ACTIVITY_CLAY_SOILS,
1149
+ IpccSoilCategory.LOW_ACTIVITY_CLAY_SOILS
1150
+ ]
1151
+ """
1152
+ A decision tree determining the order to check IPCC soil categories.
1162
1153
  """
1163
1154
 
1164
1155
 
@@ -1201,7 +1192,7 @@ def _assign_ipcc_land_use_category(
1201
1192
  return next(
1202
1193
  (
1203
1194
  key for key in DECISION_TREE
1204
- if DECISION_TREE[key](
1195
+ if _check_ipcc_land_use_category(
1205
1196
  key=key,
1206
1197
  land_cover_nodes=land_cover_nodes,
1207
1198
  has_long_fallow=has_long_fallow,
@@ -1317,23 +1308,19 @@ Keyword arguments that can override the `landCover` lookup match for specific `I
1317
1308
  """
1318
1309
 
1319
1310
 
1320
- _LAND_USE_CATEGORY_DECISION_TREE = {
1321
- IpccLandUseCategory.GRASSLAND: _check_ipcc_land_use_category,
1322
- IpccLandUseCategory.SET_ASIDE: _check_ipcc_land_use_category,
1323
- IpccLandUseCategory.PERENNIAL_CROPS: _check_ipcc_land_use_category,
1324
- IpccLandUseCategory.PADDY_RICE_CULTIVATION: _check_ipcc_land_use_category,
1325
- IpccLandUseCategory.ANNUAL_CROPS_WET: _check_ipcc_land_use_category,
1326
- IpccLandUseCategory.ANNUAL_CROPS: _check_ipcc_land_use_category,
1327
- IpccLandUseCategory.FOREST: _check_ipcc_land_use_category,
1328
- IpccLandUseCategory.NATIVE: _check_ipcc_land_use_category,
1329
- IpccLandUseCategory.OTHER: _check_ipcc_land_use_category
1330
- }
1311
+ _LAND_USE_CATEGORY_DECISION_TREE = [
1312
+ IpccLandUseCategory.GRASSLAND,
1313
+ IpccLandUseCategory.SET_ASIDE,
1314
+ IpccLandUseCategory.PERENNIAL_CROPS,
1315
+ IpccLandUseCategory.PADDY_RICE_CULTIVATION,
1316
+ IpccLandUseCategory.ANNUAL_CROPS_WET,
1317
+ IpccLandUseCategory.ANNUAL_CROPS,
1318
+ IpccLandUseCategory.FOREST,
1319
+ IpccLandUseCategory.NATIVE,
1320
+ IpccLandUseCategory.OTHER
1321
+ ]
1331
1322
  """
1332
- A decision tree mapping IPCC soil categories to corresponding check functions.
1333
-
1334
- Key: IpccLandUseCategory
1335
- Value: Corresponding function for checking the match of the given land use category based on land cover nodes
1336
- and additional kwargs.
1323
+ A decision tree determining the order to check IPCC land use categories.
1337
1324
  """
1338
1325
 
1339
1326