hestia-earth-models 0.65.10__py3-none-any.whl → 0.66.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. hestia_earth/models/cache_sites.py +7 -9
  2. hestia_earth/models/config/Cycle.json +34 -16
  3. hestia_earth/models/config/ImpactAssessment.json +12 -0
  4. hestia_earth/models/config/Site.json +4 -1
  5. hestia_earth/models/cycle/completeness/freshForage.py +10 -2
  6. hestia_earth/models/cycle/cropResidueManagement.py +3 -1
  7. hestia_earth/models/ecoinventV3/__init__.py +2 -1
  8. hestia_earth/models/environmentalFootprintV3/environmentalFootprintSingleOverallScore.py +135 -0
  9. hestia_earth/models/environmentalFootprintV3/soilQualityIndexLandTransformation.py +17 -6
  10. hestia_earth/models/geospatialDatabase/{aware.py → awareWaterBasinId.py} +1 -1
  11. hestia_earth/models/hestia/landCover.py +57 -39
  12. hestia_earth/models/hestia/residueRemoved.py +80 -0
  13. hestia_earth/models/hestia/resourceUse_utils.py +64 -38
  14. hestia_earth/models/hestia/utils.py +1 -2
  15. hestia_earth/models/ipcc2019/aboveGroundBiomass.py +33 -12
  16. hestia_earth/models/ipcc2019/animal/pastureGrass.py +1 -1
  17. hestia_earth/models/ipcc2019/belowGroundBiomass.py +32 -11
  18. hestia_earth/models/ipcc2019/ch4ToAirEntericFermentation.py +17 -8
  19. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +5 -3
  20. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_2_utils.py +27 -17
  21. hestia_earth/models/ipcc2019/pastureGrass.py +1 -1
  22. hestia_earth/models/ipcc2019/pastureGrass_utils.py +8 -1
  23. hestia_earth/models/log.py +1 -1
  24. hestia_earth/models/mocking/search-results.json +34 -34
  25. hestia_earth/models/pooreNemecek2018/freshwaterWithdrawalsDuringCycle.py +0 -1
  26. hestia_earth/models/pooreNemecek2018/landOccupationDuringCycle.py +13 -10
  27. hestia_earth/models/site/defaultMethodClassification.py +9 -2
  28. hestia_earth/models/site/defaultMethodClassificationDescription.py +4 -2
  29. hestia_earth/models/site/management.py +49 -31
  30. hestia_earth/models/site/pre_checks/cache_geospatialDatabase.py +19 -14
  31. hestia_earth/models/utils/blank_node.py +10 -4
  32. hestia_earth/models/utils/crop.py +1 -1
  33. hestia_earth/models/utils/cycle.py +3 -3
  34. hestia_earth/models/utils/lookup.py +1 -1
  35. hestia_earth/models/version.py +1 -1
  36. hestia_earth/orchestrator/strategies/merge/merge_list.py +17 -6
  37. {hestia_earth_models-0.65.10.dist-info → hestia_earth_models-0.66.0.dist-info}/METADATA +1 -1
  38. {hestia_earth_models-0.65.10.dist-info → hestia_earth_models-0.66.0.dist-info}/RECORD +59 -54
  39. tests/models/environmentalFootprintV3/test_environmentalFootprintSingleOverallScore.py +92 -0
  40. tests/models/environmentalFootprintV3/test_soilQualityIndexLandTransformation.py +4 -19
  41. tests/models/faostat2018/product/test_price.py +1 -1
  42. tests/models/geospatialDatabase/{test_aware.py → test_awareWaterBasinId.py} +1 -1
  43. tests/models/hestia/test_landCover.py +4 -2
  44. tests/models/hestia/test_landTransformation20YearAverageDuringCycle.py +3 -1
  45. tests/models/hestia/test_residueRemoved.py +20 -0
  46. tests/models/ipcc2019/test_aboveGroundBiomass.py +3 -1
  47. tests/models/ipcc2019/test_belowGroundBiomass.py +4 -2
  48. tests/models/ipcc2019/test_organicCarbonPerHa.py +94 -1
  49. tests/models/pooreNemecek2018/test_landOccupationDuringCycle.py +1 -3
  50. tests/models/site/pre_checks/test_cache_geospatialDatabase.py +22 -0
  51. tests/models/site/test_defaultMethodClassification.py +6 -0
  52. tests/models/site/test_defaultMethodClassificationDescription.py +6 -0
  53. tests/models/site/test_management.py +4 -4
  54. tests/models/test_cache_sites.py +2 -2
  55. tests/models/utils/test_crop.py +14 -2
  56. tests/orchestrator/strategies/merge/test_merge_list.py +11 -1
  57. {hestia_earth_models-0.65.10.dist-info → hestia_earth_models-0.66.0.dist-info}/LICENSE +0 -0
  58. {hestia_earth_models-0.65.10.dist-info → hestia_earth_models-0.66.0.dist-info}/WHEEL +0 -0
  59. {hestia_earth_models-0.65.10.dist-info → hestia_earth_models-0.66.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from .log import logger
8
8
  from .utils import CACHE_KEY, cached_value
9
9
  from .utils.site import CACHE_YEARS_KEY
10
10
  from .site.pre_checks.cache_geospatialDatabase import (
11
- list_collections, cache_site_results, _should_run
11
+ list_vectors, list_rasters, cache_site_results, _should_run
12
12
  )
13
13
  from .geospatialDatabase.utils import (
14
14
  CACHE_VALUE as CACHE_GEOSPATIAL_KEY, CACHE_AREA_SIZE,
@@ -115,8 +115,9 @@ def _group_sites(sites: dict, check_has_cache: bool = True):
115
115
  }
116
116
 
117
117
 
118
- def _run(sites: list, years: list = [], include_region: bool = False, years_only: bool = False):
119
- rasters, vectors = list_collections(years, include_region, years_only)
118
+ def _run(sites: list, years: list = [], years_only: bool = False):
119
+ rasters = list_rasters(years=years, years_only=years_only)
120
+ vectors = [] if years_only else list_vectors(sites)
120
121
  filtered_data = _group_sites(sites, not years_only)
121
122
  return flatten([
122
123
  _run_values(filtered_data.get(param_type), param_type, rasters, vectors, years)
@@ -124,7 +125,7 @@ def _run(sites: list, years: list = [], include_region: bool = False, years_only
124
125
  ])
125
126
 
126
127
 
127
- def run(sites: list, years: list = None, include_region: bool = False):
128
+ def run(sites: list, years: list = None):
128
129
  """
129
130
  Run all queries at once for the list of provided Sites.
130
131
  Note: Earth Engine needs to be initiliased with `init_gee()` before running this function.
@@ -135,11 +136,8 @@ def run(sites: list, years: list = None, include_region: bool = False):
135
136
  List of Site as dict.
136
137
  years : list[int]
137
138
  List of related years to fetch annual data.
138
- include_region : bool
139
- Prefecth region IDs.
140
- This will cache region-level data and will make the request slower. Only use if needed.
141
139
  """
142
- sites = _run(sites, include_region=include_region)
140
+ sites = _run(sites)
143
141
 
144
142
  # avoid memory limit errors by running only a few years at a time
145
143
  unique_years = sorted(list(set(years)))
@@ -149,6 +147,6 @@ def run(sites: list, years: list = None, include_region: bool = False):
149
147
  for batch_index in batches:
150
148
  logger.info(f"Processing sites in batch {batch_index + 1} of {len(batches)}...")
151
149
  sub_years = unique_years[batch_index:batch_index + batch_size]
152
- sites = _run(sites, sub_years, include_region, years_only=True)
150
+ sites = _run(sites, sub_years, years_only=True)
153
151
 
154
152
  return sites
@@ -166,6 +166,14 @@
166
166
  "mergeStrategy": "list",
167
167
  "stage": 1
168
168
  },
169
+ {
170
+ "key": "practices",
171
+ "model": "hestia",
172
+ "value": "residueRemoved",
173
+ "runStrategy": "add_blank_node_if_missing",
174
+ "mergeStrategy": "list",
175
+ "stage": 1
176
+ },
169
177
  {
170
178
  "key": "practices",
171
179
  "model": "cycle",
@@ -287,22 +295,6 @@
287
295
  "mergeStrategy": "list",
288
296
  "stage": 1
289
297
  },
290
- {
291
- "key": "inputs",
292
- "model": "faostat2018",
293
- "value": "seed",
294
- "runStrategy": "add_blank_node_if_missing",
295
- "mergeStrategy": "list",
296
- "stage": 1
297
- },
298
- {
299
- "key": "inputs",
300
- "model": "pooreNemecek2018",
301
- "value": "saplingsDepreciatedAmountPerCycle",
302
- "runStrategy": "add_blank_node_if_missing",
303
- "mergeStrategy": "list",
304
- "stage": 1
305
- },
306
298
  {
307
299
  "key": "practices",
308
300
  "model": "cycle",
@@ -441,6 +433,32 @@
441
433
  "stage": 1
442
434
  }
443
435
  ],
436
+ [
437
+ {
438
+ "key": "inputs",
439
+ "model": "faostat2018",
440
+ "value": "seed",
441
+ "runStrategy": "add_blank_node_if_missing",
442
+ "mergeStrategy": "list",
443
+ "stage": 1
444
+ },
445
+ {
446
+ "key": "inputs",
447
+ "model": "pooreNemecek2018",
448
+ "value": "saplingsDepreciatedAmountPerCycle",
449
+ "runStrategy": "add_blank_node_if_missing",
450
+ "mergeStrategy": "list",
451
+ "stage": 1
452
+ }
453
+ ],
454
+ {
455
+ "key": "completeness",
456
+ "model": "cycle",
457
+ "value": "completeness",
458
+ "runStrategy": "always",
459
+ "mergeStrategy": "node",
460
+ "stage": 1
461
+ },
444
462
  [
445
463
  {
446
464
  "key": "animals",
@@ -1656,6 +1656,18 @@
1656
1656
  }
1657
1657
  ],
1658
1658
  [
1659
+ {
1660
+ "key": "endpoints",
1661
+ "model": "environmentalFootprintV3",
1662
+ "value": "environmentalFootprintSingleOverallScore",
1663
+ "runStrategy": "always",
1664
+ "mergeStrategy": "list",
1665
+ "mergeArgs": {
1666
+ "replaceThreshold": ["value", 0.01],
1667
+ "sameMethodModel": true
1668
+ },
1669
+ "stage": 1
1670
+ },
1659
1671
  {
1660
1672
  "key": "endpoints",
1661
1673
  "model": "lcImpactCertainEffects100Years",
@@ -128,7 +128,7 @@
128
128
  {
129
129
  "key": "awareWaterBasinId",
130
130
  "model": "geospatialDatabase",
131
- "value": "aware",
131
+ "value": "awareWaterBasinId",
132
132
  "runStrategy": "add_key_if_missing",
133
133
  "mergeStrategy": "default",
134
134
  "stage": 1
@@ -406,6 +406,9 @@
406
406
  "value": "management",
407
407
  "runStrategy": "always",
408
408
  "mergeStrategy": "list",
409
+ "mergeArgs": {
410
+ "matchDatesFormat": "%Y"
411
+ },
409
412
  "stage": 2
410
413
  },
411
414
  [
@@ -9,6 +9,7 @@ from hestia_earth.utils.tools import list_sum
9
9
 
10
10
  from hestia_earth.models.log import logRequirements
11
11
  from hestia_earth.models.utils import is_from_model
12
+ from hestia_earth.models.utils.blank_node import get_lookup_value
12
13
  from . import MODEL
13
14
 
14
15
  REQUIREMENTS = {
@@ -32,13 +33,17 @@ RETURNS = {
32
33
  "freshForage": ""
33
34
  }
34
35
  }
36
+ LOOKUPS = {
37
+ "liveAnimal": "isGrazingAnimal",
38
+ "liveAquaticSpecies": "isGrazingAnimal"
39
+ }
35
40
  MODEL_KEY = 'freshForage'
36
41
  ALLOWED_SITE_TYPES = [
37
42
  SiteSiteType.PERMANENT_PASTURE.value
38
43
  ]
39
44
 
40
45
 
41
- def _valid_input(input: dict): return is_from_model(input) and list_sum(input.get('value', [-1])) > 0
46
+ def _valid_input(input: dict): return is_from_model(input) and list_sum(input.get('value', [-1])) >= 0
42
47
 
43
48
 
44
49
  def run(cycle: dict):
@@ -47,7 +52,10 @@ def run(cycle: dict):
47
52
 
48
53
  cycle_has_added_forage_input = any(map(_valid_input, cycle.get('inputs', [])))
49
54
 
50
- animals = cycle.get('animals', [])
55
+ animals = [
56
+ a for a in cycle.get('animals', [])
57
+ if get_lookup_value(a.get('term', {}), 'isGrazingAnimal', model=MODEL, key=MODEL_KEY)
58
+ ]
51
59
  all_animals_have_added_forage_input = bool(animals) and all([
52
60
  any(map(_valid_input, animal.get('inputs', []))) for animal in animals
53
61
  ])
@@ -30,6 +30,8 @@ PRACTICE_IDS = [
30
30
  residueIncorporated.TERM_ID,
31
31
  residueLeftOnField.TERM_ID,
32
32
  residueRemoved.TERM_ID,
33
+ 'residueIncorporatedLessThan30DaysBeforeCultivation',
34
+ 'residueIncorporatedMoreThan30DaysBeforeCultivation',
33
35
  ]
34
36
 
35
37
 
@@ -50,7 +52,7 @@ def _should_run(cycle: dict):
50
52
  ])]
51
53
  missing_practices = [term_id for term_id in practice_ids if term_id not in existing_practices]
52
54
 
53
- should_run = all([sum_practices == 100])
55
+ should_run = all([99.5 <= sum_practices <= 100.5])
54
56
 
55
57
  for term_id in missing_practices:
56
58
  logRequirements(cycle, model=MODEL, term=term_id,
@@ -69,7 +69,8 @@ LOOKUPS = {
69
69
  "pesticideAI": "ecoinventMapping",
70
70
  "soilAmendment": "ecoinventMapping",
71
71
  "transport": "ecoinventMapping",
72
- "veterinaryDrugs": "ecoinventMapping"
72
+ "veterinaryDrugs": "ecoinventMapping",
73
+ "feedFoodAdditive": "ecoinventMapping"
73
74
  }
74
75
  MODEL = 'ecoinventV3'
75
76
  MODEL_KEY = 'impactAssessment' # keep to generate entry in "model-links.json"
@@ -0,0 +1,135 @@
1
+ """
2
+ The inputs and outputs from the life cycle inventory are aggregated in 16 midpoint
3
+ characterised impact categories. These impact categories are then normalised (i.e., the results are divided by
4
+ the overall inventory of a reference unit, e.g., the entire world, to convert the characterised impact categories in
5
+ relative shares of the impacts of the analysed system) and weighted (i.e., each impact category is multiplied by
6
+ a weighting factor to reflect their perceived relative importance). The weighted impact categories can then be
7
+ summed to obtain the EF single overall score. The number and the name of the impact categories is the same
8
+ in EF3.0 and EF3.1.
9
+ """
10
+ from typing import List, Optional, Tuple
11
+
12
+ from hestia_earth.schema import TermTermType
13
+ from hestia_earth.utils.lookup import get_table_value, download_lookup, column_name
14
+ from hestia_earth.utils.model import filter_list_term_type
15
+ from hestia_earth.utils.tools import list_sum
16
+
17
+ from hestia_earth.models.log import logRequirements, logShouldRun, log_as_table, debugMissingLookup
18
+ from hestia_earth.models.utils.indicator import _new_indicator
19
+ from hestia_earth.models.utils.lookup import _node_value
20
+ from . import MODEL
21
+
22
+ REQUIREMENTS = {
23
+ "ImpactAssessment": {
24
+ "impacts": [
25
+ {
26
+ "@type": "Indicator",
27
+ "value": "",
28
+ "term.name": "PEF indicators only"
29
+ }
30
+ ]
31
+ }
32
+ }
33
+
34
+ LOOKUPS = {
35
+ "@doc": "Normalisation factors in PEF v3.1 are calculated using a Global population number of 6,895,889,018",
36
+ "characterisedIndicator": ["pefTerm-normalisation-v3_1", "pefTerm-weighing-v3_1"]
37
+ }
38
+
39
+ RETURNS = {
40
+ "Indicator": {
41
+ "value": ""
42
+ }
43
+ }
44
+
45
+ TERM_ID = 'environmentalFootprintSingleOverallScore'
46
+
47
+ normalisation_column = LOOKUPS['characterisedIndicator'][0]
48
+ weighing_column = LOOKUPS['characterisedIndicator'][1]
49
+
50
+
51
+ def _is_a_PEF_indicator(indicator_id) -> bool:
52
+ return (_get_factor(indicator_id, normalisation_column) not in [None, 0, 0.0] and
53
+ _get_factor(indicator_id, weighing_column) not in [None, 0, 0.0])
54
+
55
+
56
+ def _get_factor(indicator_id: str, column) -> Optional[float]:
57
+ factor = get_table_value(download_lookup(f"{list(LOOKUPS.keys())[1]}.csv", keep_in_memory=True),
58
+ 'termid', indicator_id, column_name(column))
59
+ if factor is None:
60
+ debugMissingLookup(f"{list(LOOKUPS.keys())[1]}.csv", 'termid', indicator_id, column, None, model=MODEL,
61
+ term=TERM_ID)
62
+ return float(factor) if factor is not None else None
63
+
64
+
65
+ def _normalise(indicator: dict) -> Optional[float]:
66
+ return (_node_value(indicator) / _get_factor(indicator['term']['@id'], normalisation_column)) \
67
+ if (_node_value(indicator) is not None and
68
+ _get_factor(indicator['term']['@id'], normalisation_column) not in [None, 0, 0.0]) else None
69
+
70
+
71
+ def _weighted_normalise(indicator: dict) -> Optional[float]:
72
+ return (_normalise(indicator) * (_get_factor(indicator['term']['@id'], weighing_column) / 100)
73
+ ) if (_normalise(indicator) is not None and
74
+ _get_factor(indicator['term']['@id'], weighing_column) not in [None, 0, 0.0]) else None
75
+
76
+
77
+ def _indicator(value: float) -> dict:
78
+ indicator = _new_indicator(TERM_ID, MODEL)
79
+ indicator['value'] = value
80
+ return indicator
81
+
82
+
83
+ def _run(indicators: List[dict]):
84
+ return _indicator(value=list_sum([indicator["weighted-normalised"] for indicator in indicators]))
85
+
86
+
87
+ def _valid_indicator(indicator: Optional[dict]) -> bool:
88
+ return all([indicator is not None,
89
+ isinstance(_node_value(indicator), (int, float)),
90
+ _node_value(indicator) is not None,
91
+ _is_a_PEF_indicator(indicator.get('term', {}).get('@id', ''))])
92
+
93
+
94
+ def _should_run(impact_assessment: dict) -> Tuple[bool, list[dict]]:
95
+ indicators = [
96
+ indicator for indicator in
97
+ filter_list_term_type(impact_assessment.get('impacts', []), TermTermType.CHARACTERISEDINDICATOR)
98
+ if _is_a_PEF_indicator(indicator.get('term', {}).get('@id', ''))
99
+ ]
100
+
101
+ has_pef_indicators = bool(indicators)
102
+
103
+ processed_indicators = [{
104
+ "indicator": indicator,
105
+ "valid-indicator": _valid_indicator(indicator),
106
+ "one-indicator-for-category": sum(1 for i in indicators if i['term']['@id'] == indicator['term']['@id']) == 1,
107
+ "indicator-pef-category": indicator['term']['@id'],
108
+ "value": _node_value(indicator),
109
+ "normalised": _normalise(indicator),
110
+ "normalisation-used": _get_factor(indicator['term']['@id'], normalisation_column),
111
+ "weighted-normalised": _weighted_normalise(indicator),
112
+ "weighting-used": _get_factor(indicator['term']['@id'], weighing_column),
113
+ }
114
+ for indicator in indicators
115
+ ]
116
+
117
+ no_duplicate_indicators = all([indicator['one-indicator-for-category'] for indicator in processed_indicators])
118
+ valid_indicators = [indicator for indicator in processed_indicators if indicator['valid-indicator']]
119
+ all_indicators_valid = all([indicator['valid-indicator'] for indicator in processed_indicators])
120
+
121
+ logRequirements(impact_assessment, model=MODEL, term=TERM_ID,
122
+ has_pef_indicators=has_pef_indicators,
123
+ no_duplicate_indicators=no_duplicate_indicators,
124
+ all_indicators_valid=all_indicators_valid,
125
+ processed_indicators=log_as_table(processed_indicators),
126
+ )
127
+
128
+ should_run = all([has_pef_indicators, all_indicators_valid, no_duplicate_indicators])
129
+ logShouldRun(impact_assessment, MODEL, TERM_ID, should_run)
130
+ return should_run, valid_indicators
131
+
132
+
133
+ def run(impact_assessment: dict):
134
+ should_run, indicators = _should_run(impact_assessment)
135
+ return _run(indicators) if should_run else None
@@ -25,14 +25,25 @@ REQUIREMENTS = {
25
25
  {
26
26
  "@type": "Indicator",
27
27
  "term.termType": "resourceUse",
28
- "term.@id": ["landTransformation20YearAverageInputsProduction",
29
- "landTransformation20YearAverageDuringCycle"],
28
+ "term.@id": "landTransformation20YearAverageDuringCycle",
30
29
  "value": "> 0",
31
30
  "landCover": {"@type": "Term", "term.termType": "landCover"},
32
31
  "previousLandCover": {"@type": "Term", "term.termType": "landCover"}
33
32
  }
34
33
  ],
35
- "optional": {"country": {"@type": "Term", "termType": "region"}}
34
+ "optional": {
35
+ "country": {"@type": "Term", "termType": "region"},
36
+ "emissionsResourceUse": [
37
+ {
38
+ "@type": "Indicator",
39
+ "term.termType": "resourceUse",
40
+ "term.@id": "landTransformation20YearAverageInputsProduction",
41
+ "value": "> 0",
42
+ "landCover": {"@type": "Term", "term.termType": "landCover"},
43
+ "previousLandCover": {"@type": "Term", "term.termType": "landCover"}
44
+ }
45
+ ]
46
+ }
36
47
  }
37
48
  }
38
49
 
@@ -76,7 +87,8 @@ def _run(transformations: List[dict]):
76
87
 
77
88
 
78
89
  def _is_valid_indicator(indicator: dict) -> bool:
79
- return indicator['term']['@id'] in REQUIREMENTS['ImpactAssessment']['emissionsResourceUse'][0]['term.@id']
90
+ return indicator['term']['@id'] in ["landTransformation20YearAverageInputsProduction",
91
+ "landTransformation20YearAverageDuringCycle"]
80
92
 
81
93
 
82
94
  def _should_run(impact_assessment: dict) -> Tuple[bool, list]:
@@ -144,8 +156,7 @@ def _should_run(impact_assessment: dict) -> Tuple[bool, list]:
144
156
  found_transformations=log_as_table(found_transformations_with_coefficient)
145
157
  )
146
158
 
147
- should_run = has_land_transformation_indicators is False or all([has_land_transformation_indicators,
148
- all_transformations_are_valid])
159
+ should_run = all([has_land_transformation_indicators, all_transformations_are_valid])
149
160
 
150
161
  logShouldRun(impact_assessment, MODEL, TERM_ID, should_run)
151
162
  return should_run, valid_transformations_with_coef
@@ -19,7 +19,7 @@ REQUIREMENTS = {
19
19
  RETURNS = {
20
20
  "The AWARE water basin identifier as a `string`": ""
21
21
  }
22
- MODEL_KEY = 'aware'
22
+ MODEL_KEY = 'awareWaterBasinId'
23
23
  EE_PARAMS = {
24
24
  'collection': 'AWARE',
25
25
  'ee_type': 'vector',
@@ -1,8 +1,8 @@
1
1
  """
2
2
  Land Cover
3
3
 
4
- This model calculates historic land use change over a twenty-year period, extending the
5
- functionality of the Blonk model.
4
+ This model calculates historic land use change over a twenty-year period,
5
+ extending the functionality of the Blonk model.
6
6
  """
7
7
  import math
8
8
  from collections import defaultdict
@@ -10,15 +10,16 @@ from datetime import datetime, timedelta
10
10
 
11
11
  from hestia_earth.schema import SiteSiteType, TermTermType
12
12
  from hestia_earth.utils.lookup import (
13
- download_lookup, get_table_value, column_name,
14
- extract_grouped_data_closest_date, _is_missing_value, extract_grouped_data
13
+ download_lookup, get_table_value, column_name, _is_missing_value, extract_grouped_data
15
14
  )
16
15
  from hestia_earth.utils.model import filter_list_term_type
17
- from hestia_earth.utils.tools import safe_parse_float, to_precision, non_empty_value
16
+ from hestia_earth.utils.tools import safe_parse_float, to_precision
18
17
 
19
18
  from hestia_earth.models.log import logRequirements, log_as_table, logShouldRun
19
+ from hestia_earth.models.utils.constant import DAYS_IN_YEAR
20
20
  from hestia_earth.models.utils.management import _new_management
21
21
  from hestia_earth.models.utils.term import get_lookup_value
22
+ from hestia_earth.models.utils.blank_node import _node_date, DatestrFormat, _gapfill_datestr, DatestrGapfillMode
22
23
  from .utils import (
23
24
  IPCC_LAND_USE_CATEGORY_ANNUAL,
24
25
  IPCC_LAND_USE_CATEGORY_PERENNIAL,
@@ -34,8 +35,6 @@ from .utils import (
34
35
  crop_ipcc_land_use_category,
35
36
  )
36
37
  from . import MODEL
37
- from ..utils.blank_node import _node_date, DatestrFormat, _gapfill_datestr, DatestrGapfillMode
38
- from ..utils.constant import DAYS_IN_YEAR
39
38
 
40
39
  REQUIREMENTS = {
41
40
  "Site": {
@@ -51,7 +50,10 @@ REQUIREMENTS = {
51
50
  "@type": "Management",
52
51
  "value": "",
53
52
  "term.termType": "landCover",
54
- "endDate": ""
53
+ "or": {
54
+ "startDate": "",
55
+ "endDate": ""
56
+ }
55
57
  }
56
58
  ]
57
59
  }
@@ -82,7 +84,8 @@ LOOKUPS = {
82
84
  "Permanent meadows and pastures"
83
85
  ],
84
86
  "crop": ["cropGroupingFaostatArea", "IPCC_LAND_USE_CATEGORY"],
85
- "landCover": ["cropGroupingFaostatProduction", "FAOSTAT_LAND_AREA_CATEGORY"]
87
+ "landCover": ["cropGroupingFaostatProduction", "FAOSTAT_LAND_AREA_CATEGORY"],
88
+ "property": "CALCULATE_TOTAL_LAND_COVER_SHARE_SEPARATELY"
86
89
  }
87
90
  MODEL_KEY = 'landCover'
88
91
 
@@ -120,7 +123,14 @@ def site_area_sum_to_100(dict_of_percentages: dict):
120
123
  math.isclose(sum(dict_of_percentages.values()), 0.0, rel_tol=0.01))
121
124
 
122
125
 
123
- def get_changes(country_id: str, end_year: int) -> dict:
126
+ def _should_group_landCover(term: dict):
127
+ return any(
128
+ bool(get_lookup_value(lookup_term=prop.get("term", {}), column="CALCULATE_TOTAL_LAND_COVER_SHARE_SEPARATELY"))
129
+ for prop in term.get("properties", [])
130
+ )
131
+
132
+
133
+ def get_changes(country_id: str, end_year: int) -> tuple[dict, bool]:
124
134
  """
125
135
  For each entry in ALL_LAND_USE_TERMS, creates a key: value in output dictionary, also TOTAL
126
136
  """
@@ -129,14 +139,18 @@ def get_changes(country_id: str, end_year: int) -> dict:
129
139
  land_use_term: safe_parse_float(
130
140
  extract_grouped_data(
131
141
  get_table_value(lookup, 'termid', country_id, column_name(land_use_term)),
132
- str(end_year))
142
+ str(end_year)),
143
+ default=None
133
144
  )
134
145
  for land_use_term in ALL_LAND_USE_TERMS + [LAND_AREA]
135
146
  }
136
- changes_dict[TOTAL_AGRICULTURAL_CHANGE] = (float(changes_dict.get(TOTAL_CROPLAND, 0))
137
- + float(changes_dict.get(PERMANENT_PASTURE, 0)))
147
+ missing_changes = any(val is None for val in changes_dict.values())
148
+ changes_dict = {k: v if v is not None else 0 for k, v in changes_dict.items()}
149
+ changes_dict[TOTAL_AGRICULTURAL_CHANGE] = (
150
+ float(changes_dict.get(TOTAL_CROPLAND, 0)) + float(changes_dict.get(PERMANENT_PASTURE, 0))
151
+ )
138
152
 
139
- return changes_dict
153
+ return changes_dict, missing_changes
140
154
 
141
155
 
142
156
  def _get_ratio_start_and_end_values(
@@ -148,7 +162,7 @@ def _get_ratio_start_and_end_values(
148
162
  # expansion over twenty years / current area
149
163
  lookup = download_lookup('region-faostatArea.csv')
150
164
  table_value = get_table_value(lookup, 'termid', country_id, column_name(fao_name))
151
- end_value = safe_parse_float(value=extract_grouped_data_closest_date(table_value, end_year), default=None)
165
+ end_value = safe_parse_float(value=extract_grouped_data(table_value, str(end_year)), default=None)
152
166
  return max(0.0, _safe_divide(numerator=expansion, denominator=end_value))
153
167
 
154
168
 
@@ -379,9 +393,9 @@ def _get_harvested_area(country_id: str, year: int, faostat_name: str) -> float:
379
393
  """
380
394
  lookup = download_lookup("region-crop-cropGroupingFaostatProduction-areaHarvested.csv")
381
395
  return safe_parse_float(
382
- value=extract_grouped_data_closest_date(
396
+ value=extract_grouped_data(
383
397
  data=get_table_value(lookup, "termid", country_id, column_name(faostat_name)),
384
- year=year
398
+ key=str(year)
385
399
  ),
386
400
  default=None
387
401
  )
@@ -399,7 +413,9 @@ def _get_term_id_for_crop(nodes: set, land_type: str) -> str:
399
413
  )
400
414
 
401
415
 
402
- def _run_make_management_nodes(existing_nodes: list, percentage_transformed_from: dict, start_year: int) -> list:
416
+ def _run(site: dict, existing_nodes: list, percentage_transformed_from: dict):
417
+ start_year = _get_year_from_landCover(existing_nodes[0]) - DEFAULT_WINDOW_IN_YEARS
418
+
403
419
  """Creates a list of new management nodes, excluding any dates matching existing ones."""
404
420
  existing_nodes_set = {
405
421
  (node.get("term", {}).get("@id", ""), node.get("startDate"), node.get("endDate"))
@@ -421,6 +437,9 @@ def _run_make_management_nodes(existing_nodes: list, percentage_transformed_from
421
437
  ]
422
438
  values = [v for v in values if v.get("land_management_key") not in existing_nodes_set]
423
439
 
440
+ for value in values:
441
+ logShouldRun(site, MODEL, value.get("term_id"), True, model_key=MODEL_KEY)
442
+
424
443
  return [
425
444
  _management(
426
445
  term_id=value.get("term_id"),
@@ -507,13 +526,18 @@ def _get_net_expansion_cultivated_vs_harvested(annual_crops_net_expansion, chang
507
526
  return net_expansion_cultivated_vs_harvested
508
527
 
509
528
 
529
+ def _get_year_from_landCover(node: dict):
530
+ date = node.get('startDate') or node.get('endDate')
531
+ return int(date[:4])
532
+
533
+
510
534
  def _should_run_historical_land_use_change(site: dict, nodes: list, land_use_type: str) -> tuple[bool, dict]:
511
535
  # Assume a single management node for single-cropping.
512
536
  return _should_run_historical_land_use_change_single_crop(
513
537
  site=site,
514
538
  term=nodes[0].get("term", {}),
515
539
  country_id=site.get("country", {}).get("@id"),
516
- end_year=int(nodes[0].get("endDate")[:4]),
540
+ end_year=_get_year_from_landCover(nodes[0]),
517
541
  land_use_type=land_use_type
518
542
  )
519
543
 
@@ -527,7 +551,7 @@ def _should_run_historical_land_use_change_single_crop(
527
551
  ) -> tuple[bool, dict]:
528
552
  """Calculate land use change percentages for a single management node/crop."""
529
553
  # (C-H).
530
- changes = get_changes(country_id=country_id, end_year=end_year)
554
+ changes, missing_changes = get_changes(country_id=country_id, end_year=end_year)
531
555
 
532
556
  # (L). Estimate maximum forest loss
533
557
  forest_loss = _estimate_maximum_forest_change(
@@ -657,6 +681,8 @@ def _should_run_historical_land_use_change_single_crop(
657
681
  site_area[land_use_type] = 1 - sum(site_area.values())
658
682
 
659
683
  sum_of_site_areas_is_100 = site_area_sum_to_100(site_area)
684
+ site_type_allowed = site.get("siteType") in SITE_TYPES
685
+
660
686
  logRequirements(
661
687
  log_node=site,
662
688
  model=MODEL,
@@ -664,19 +690,13 @@ def _should_run_historical_land_use_change_single_crop(
664
690
  model_key=MODEL_KEY,
665
691
  land_use_type=land_use_type,
666
692
  country_id=country_id,
693
+ changes=log_as_table(changes),
667
694
  site_area=log_as_table(site_area),
668
- sum_of_site_areas_is_100=sum_of_site_areas_is_100
695
+ sum_of_site_areas_is_100=sum_of_site_areas_is_100,
696
+ site_type_allowed=site_type_allowed
669
697
  )
670
698
 
671
- should_run = all(
672
- [
673
- site.get("siteType"),
674
- country_id,
675
- non_empty_value(term),
676
- site.get("siteType") in SITE_TYPES,
677
- sum_of_site_areas_is_100
678
- ]
679
- )
699
+ should_run = all([not missing_changes, country_id, site_type_allowed, sum_of_site_areas_is_100])
680
700
  logShouldRun(site, MODEL, term.get("@id"), should_run, model_key=MODEL_KEY)
681
701
 
682
702
  return should_run, site_area
@@ -714,8 +734,7 @@ def _no_prior_land_cover_data(nodes: list, end_date: str) -> bool:
714
734
  return len(previous_nodes) == 0
715
735
 
716
736
 
717
- def _should_run(site: dict) -> tuple[bool, dict]:
718
- management_nodes = filter_list_term_type(site.get("management", []), TermTermType.LANDCOVER)
737
+ def _should_run(site: dict, management_nodes: list) -> tuple[bool, dict]:
719
738
  summarised_nodes = _collect_land_use_types(management_nodes)
720
739
  allowed_land_use_types = [ANNUAL_CROPLAND, PERMANENT_CROPLAND, PERMANENT_PASTURE]
721
740
  relevant_nodes = sorted(
@@ -751,10 +770,9 @@ def _should_run(site: dict) -> tuple[bool, dict]:
751
770
 
752
771
 
753
772
  def run(site: dict) -> list:
754
- should_run, site_area = _should_run(site)
755
- management_nodes = filter_list_term_type(site.get("management", []), TermTermType.LANDCOVER)
756
- return _run_make_management_nodes(
757
- existing_nodes=management_nodes,
758
- percentage_transformed_from=site_area,
759
- start_year=int(management_nodes[0].get("endDate")[:4]) - DEFAULT_WINDOW_IN_YEARS
760
- ) if should_run else []
773
+ management_nodes = [
774
+ node for node in filter_list_term_type(site.get("management", []), TermTermType.LANDCOVER)
775
+ if not _should_group_landCover(node)
776
+ ]
777
+ should_run, site_area = _should_run(site=site, management_nodes=management_nodes)
778
+ return _run(site, management_nodes, site_area) if should_run else []