hestia-earth-models 0.73.4__py3-none-any.whl → 0.73.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

@@ -523,6 +523,17 @@
523
523
  "replaceThreshold": ["value", 0.01]
524
524
  },
525
525
  "stage": 2
526
+ },
527
+ {
528
+ "key": "measurements",
529
+ "model": "ipcc2019",
530
+ "value": "biocharOrganicCarbonPerHa",
531
+ "runStrategy": "always",
532
+ "mergeStrategy": "list",
533
+ "mergeArgs": {
534
+ "replaceThreshold": ["value", 0.01]
535
+ },
536
+ "stage": 2
526
537
  }
527
538
  ]
528
539
  ]
@@ -64,6 +64,11 @@ RETURNS = {
64
64
  }]
65
65
  }
66
66
  LOOKUPS = {
67
+ "region-crop-cropGroupingFAOSTAT-landCover-annualCropland": "",
68
+ "region-crop-cropGroupingFAOSTAT-landCover-forest": "",
69
+ "region-crop-cropGroupingFAOSTAT-landCover-otherLand": "",
70
+ "region-crop-cropGroupingFAOSTAT-landCover-permanentCropland": "",
71
+ "region-crop-cropGroupingFAOSTAT-landCover-permanentPasture": "",
67
72
  "region-crop-cropGroupingFaostatProduction-areaHarvestedUpTo20YearExpansion": "",
68
73
  "region-crop-cropGroupingFaostatProduction-areaHarvested": "",
69
74
  "region-faostatArea-UpTo20YearExpansion": "",
@@ -116,6 +121,36 @@ def _get_lookup_with_cache(lookup_term, column):
116
121
  )
117
122
 
118
123
 
124
+ def get_landCover_lookups(country_id: str, end_year: int, product_name: str):
125
+ """
126
+ Attempts to get the pre-calculated values for the landCover model calculation.
127
+ Returns: {"Arable land": <value>, "Forest land": <value>, "Other land": <value>,
128
+ "Permanent crops": <value>, "Permanent meadows and pastures": <value>}
129
+ Missing values are returned as None.
130
+ """
131
+ return {
132
+ # Divide by 100 to match site_area ratios
133
+ suffix: value / 100 if value is not None else value
134
+ for suffix, value in
135
+ {
136
+ suffix: safe_parse_float(
137
+ value=extract_grouped_data(
138
+ data=get_region_lookup_value(
139
+ lookup_name=f"region-crop-cropGroupingFAOSTAT-landCover-{suffix}.csv",
140
+ term_id=country_id,
141
+ column=product_name,
142
+ model=MODEL,
143
+ key=MODEL_KEY
144
+ ),
145
+ key=str(end_year)
146
+ ),
147
+ default=None
148
+ )
149
+ for suffix in LAND_USE_TERMS_FOR_TRANSFORMATION.keys()
150
+ }.items()
151
+ }
152
+
153
+
119
154
  def _management(term_id: str, value: float, start_date: str, end_date: str):
120
155
  node = _new_management(term_id, MODEL)
121
156
  node['value'] = value
@@ -821,11 +856,21 @@ def _should_run(site: dict) -> tuple[bool, list, dict]:
821
856
  target_node=relevant_nodes[0]
822
857
  ) if relevant_nodes else None
823
858
 
824
- should_run_nodes, site_area = _should_run_historical_land_use_change(
825
- site=site,
826
- nodes=relevant_nodes,
827
- land_use_type=land_use_type
828
- ) if all([land_use_type, has_no_prior_land_cover_data]) else (False, {})
859
+ landCover_from_lookups = get_landCover_lookups(
860
+ country_id=site.get("country", {}).get("@id"),
861
+ end_year=_get_year_from_landCover(relevant_nodes[0]),
862
+ product_name=relevant_nodes[0].get("term", {}).get("name", "")
863
+ ) if relevant_nodes else {}
864
+
865
+ should_run_nodes, site_area = (
866
+ (False, {}) if not all([land_use_type, has_no_prior_land_cover_data])
867
+ else (True, landCover_from_lookups) if landCover_from_lookups and all(landCover_from_lookups.values())
868
+ else _should_run_historical_land_use_change(
869
+ site=site,
870
+ nodes=relevant_nodes,
871
+ land_use_type=land_use_type
872
+ )
873
+ )
829
874
 
830
875
  logRequirements(site, model=MODEL, model_key=MODEL_KEY,
831
876
  has_management_nodes=bool(relevant_nodes),
@@ -833,6 +878,7 @@ def _should_run(site: dict) -> tuple[bool, list, dict]:
833
878
  allowed_land_use_types=';'.join(ALLOWED_LAND_USE_TYPES),
834
879
  has_no_prior_land_cover_data=has_no_prior_land_cover_data,
835
880
  management_nodes=log_as_table([_omit(n, ['term']) for n in relevant_nodes]),
881
+ landCover_from_lookups=landCover_from_lookups,
836
882
  should_run_nodes=should_run_nodes)
837
883
 
838
884
  should_run = all([land_use_type, has_no_prior_land_cover_data, should_run_nodes])
@@ -0,0 +1,435 @@
1
+ from functools import reduce
2
+ import numpy as np
3
+ from typing import Union
4
+
5
+ from hestia_earth.schema import (
6
+ CycleFunctionalUnit, MeasurementMethodClassification, MeasurementStatsDefinition, SiteSiteType, TermTermType
7
+ )
8
+
9
+ from hestia_earth.utils.blank_node import get_node_value
10
+ from hestia_earth.utils.descriptive_stats import calc_descriptive_stats
11
+ from hestia_earth.utils.stats import gen_seed, truncated_normal_1d
12
+ from hestia_earth.utils.tools import non_empty_list
13
+
14
+ from hestia_earth.models.log import log_as_table, logRequirements, logShouldRun
15
+ from hestia_earth.models.utils.blank_node import group_nodes_by_year, filter_list_term_type
16
+ from hestia_earth.models.utils.measurement import _new_measurement
17
+ from hestia_earth.models.utils.property import get_node_property
18
+ from hestia_earth.models.utils.site import related_cycles
19
+ from hestia_earth.models.utils.term import get_lookup_value
20
+
21
+ from .organicCarbonPerHa_utils import IpccSoilCategory
22
+ from .organicCarbonPerHa_tier_1 import _assign_ipcc_soil_category
23
+
24
+ from . import MODEL
25
+
26
+
27
+ REQUIREMENTS = {
28
+ "Site": {
29
+ "siteType": ["cropland", "permanent pasture"],
30
+ "related": {
31
+ "Cycle": [{
32
+ "endDate": "",
33
+ "functionalUnit": "1 ha",
34
+ "optional": {
35
+ "startDate": "",
36
+ "inputs": [{
37
+ "@type": "Input",
38
+ "term.termType": "biochar"
39
+ }]
40
+ }
41
+ }]
42
+ }
43
+ }
44
+ }
45
+ LOOKUPS = {
46
+ "biochar": [
47
+ "FRAC_OC_REMAINING_100_YEARS",
48
+ "FRAC_OC_REMAINING_100_YEARS_SD"
49
+ ]
50
+ }
51
+ RETURNS = {
52
+ "Measurement": [{
53
+ "value": "",
54
+ "sd": "",
55
+ "min": "",
56
+ "max": "",
57
+ "statsDefinition": "simulated",
58
+ "observations": "",
59
+ "dates": "",
60
+ "methodClassification": "tier 1 model"
61
+ }]
62
+ }
63
+ TERM_ID = 'biocharOrganicCarbonPerHa'
64
+
65
+ _ITERATIONS = 1000
66
+ _METHOD_CLASSIFICATION = MeasurementMethodClassification.TIER_1_MODEL.value
67
+ _STATS_DEFINITION = MeasurementStatsDefinition.SIMULATED.value
68
+
69
+ _VALID_SITE_TYPES = [SiteSiteType.CROPLAND.value, SiteSiteType.PERMANENT_PASTURE.value]
70
+ _VALID_FUNCTIONAL_UNITS = [CycleFunctionalUnit._1_HA.value]
71
+
72
+
73
+ def run(site: dict) -> list[dict]:
74
+ """
75
+ Run the model on a Site.
76
+
77
+ Parameters
78
+ ----------
79
+ site : dict
80
+ A valid HESTIA [Site](https://www.hestia.earth/schema/Site).
81
+
82
+ Returns
83
+ -------
84
+ list[dict]
85
+ A list of HESTIA [Measurement](https://www.hestia.earth/schema/Measurement) nodes with `term.termType` =
86
+ `aboveGroundBiomass`
87
+ """
88
+ should_run, inventory = _should_run(site)
89
+ return _run(inventory) if should_run else []
90
+
91
+
92
+ def _should_run(site: dict) -> tuple[bool, dict]:
93
+ """
94
+ Extract and organise required data from the input [Site](https://www.hestia.earth/schema/Site) node and determine
95
+ whether the model should run.
96
+
97
+ Parameters
98
+ ----------
99
+ site : dict
100
+ A valid HESTIA [Site](https://www.hestia.earth/schema/Site).
101
+
102
+ Returns
103
+ -------
104
+ tuple[bool, dict, dict]
105
+ should_run, inventory
106
+ """
107
+ cycles = related_cycles(site)
108
+ site_type = site.get("siteType")
109
+ ipcc_soil_category = _assign_ipcc_soil_category(site.get("measurements", []))
110
+
111
+ has_cycles = len(cycles) > 0
112
+ has_valid_site_type = site_type in _VALID_SITE_TYPES
113
+ has_functional_unit_1_ha = all(cycle.get("functionalUnit") in _VALID_FUNCTIONAL_UNITS for cycle in cycles)
114
+ has_mineral_soils = ipcc_soil_category not in [IpccSoilCategory.ORGANIC_SOILS]
115
+
116
+ seed = gen_seed(site, MODEL, TERM_ID)
117
+ rng = np.random.default_rng(seed)
118
+
119
+ should_compile_inventory = all([
120
+ has_cycles,
121
+ has_valid_site_type,
122
+ has_functional_unit_1_ha,
123
+ has_mineral_soils
124
+ ])
125
+
126
+ inventory, logs = _compile_inventory(cycles, rng) if should_compile_inventory else ({}, {})
127
+
128
+ logRequirements(
129
+ site, model=MODEL, term=TERM_ID,
130
+ has_cycles=has_cycles,
131
+ site_type=site_type,
132
+ has_valid_site_type=has_valid_site_type,
133
+ has_functional_unit_1_ha=has_functional_unit_1_ha,
134
+ has_mineral_soils=has_mineral_soils,
135
+ ipcc_soil_category=ipcc_soil_category,
136
+ should_compile_inventory=should_compile_inventory,
137
+ seed=seed,
138
+ inventory=_format_inventory(inventory),
139
+ **_format_logs(logs)
140
+ )
141
+
142
+ should_run = all([
143
+ len(inventory) > 0 # are there any cycles?
144
+ ])
145
+
146
+ logShouldRun(site, MODEL, TERM_ID, should_run)
147
+
148
+ return should_run, inventory
149
+
150
+
151
+ def _compile_inventory(
152
+ cycles: list[dict],
153
+ rng: Union[int, np.random.Generator, None] = None
154
+ ) -> dict:
155
+ """
156
+ Build an annual inventory of model input data.
157
+
158
+ Parameters
159
+ ----------
160
+ land_cover_nodes : list[dict]
161
+ A list of HESTIA [Cycles](https://www.hestia.earth/schema/Cycle).
162
+ seed : int | random.Generator | None
163
+ The rng/seed for the random sampling of model parameters.
164
+
165
+ Returns
166
+ -------
167
+ dict
168
+ Annual inventory of model data.
169
+ """
170
+ COPY_FIELDS = ("startDate", "endDate")
171
+
172
+ cycle_data = {
173
+ cycle.get("@id"): {
174
+ "biochar_nodes": filter_list_term_type(cycle.get("inputs", []), TermTermType.BIOCHAR),
175
+ **{field: cycle.get(field) for field in COPY_FIELDS}
176
+ } for cycle in cycles
177
+ }
178
+
179
+ biochar_term_ids = sorted(reduce(
180
+ lambda result, data: result.union(_get_unique_term_ids(data.get("biochar_nodes", []))),
181
+ cycle_data.values(),
182
+ set()
183
+ ))
184
+
185
+ factor_cache = {
186
+ term_id: {
187
+ "oc_content": _sample_oc_content(term_id, rng),
188
+ "frac_remaining": _sample_frac_remaining(term_id, rng),
189
+ } for term_id in biochar_term_ids
190
+ }
191
+
192
+ total_oc = {
193
+ id: reduce(
194
+ lambda result, node: result + _calc_total_oc(node, factor_cache),
195
+ data.get("biochar_nodes", []),
196
+ 0
197
+ ) for id, data in cycle_data.items()
198
+ }
199
+
200
+ grouped = group_nodes_by_year(
201
+ [
202
+ {
203
+ "total_oc": total_oc.get(id, 0),
204
+ **{field: data.get(field) for field in COPY_FIELDS}
205
+ } for id, data in cycle_data.items()
206
+ ],
207
+ include_spillovers=True
208
+ )
209
+
210
+ inventory = {
211
+ year: reduce(
212
+ lambda result, item: result + item.get("total_oc", 0) * item.get("fraction_of_node_duration", 0),
213
+ data,
214
+ 0
215
+ ) for year, data in grouped.items()
216
+ }
217
+
218
+ logs = {
219
+ "factor_cache": factor_cache
220
+ }
221
+
222
+ return inventory, logs
223
+
224
+
225
+ def _get_unique_term_ids(nodes: list[dict]) -> set[str]:
226
+ return set(node.get("term", {}).get("@id") for node in nodes)
227
+
228
+
229
+ def _sample_oc_content(term_id: str, rng):
230
+ """
231
+ Get an array of random samples based on the default organic carbon content of a biochar term.
232
+ """
233
+ node = {"term": {"@id": term_id, "termType": TermTermType.BIOCHAR.value}}
234
+
235
+ oc_prop = get_node_property(node, "organicCarbonContent")
236
+ mu = oc_prop.get("value")
237
+ sigma = oc_prop.get("sd")
238
+
239
+ return truncated_normal_1d((1, _ITERATIONS), mu/100, sigma/100, 0, 1, seed=rng) if (mu and sigma) else 0
240
+
241
+
242
+ def _sample_frac_remaining(term_id: str, rng):
243
+ """
244
+ Get an array of random samples based on the `FRAC_OC_REMAINING_100_YEARS` lookups of a biochar term.
245
+ """
246
+ term = {"@id": term_id, "termType": TermTermType.BIOCHAR.value}
247
+
248
+ mu = get_lookup_value(term, LOOKUPS["biochar"][0])
249
+ sigma = get_lookup_value(term, LOOKUPS["biochar"][1])
250
+
251
+ return truncated_normal_1d((1, _ITERATIONS), mu, sigma, 0, 1, seed=rng) if (mu and sigma) else 0
252
+
253
+
254
+ def _calc_total_oc(biochar_node: dict, factor_cache: dict):
255
+ """
256
+ Calculate the total amount of stable organic carbon added to the soil from an application of biochar.
257
+ """
258
+ term_id = biochar_node.get("term", {}).get("@id")
259
+
260
+ mass = get_node_value(biochar_node)
261
+ oc_content = factor_cache.get(term_id, {}).get("oc_content", 0)
262
+ frac_remaining = factor_cache.get(term_id, {}).get("frac_remaining", 0)
263
+
264
+ return mass * oc_content * frac_remaining
265
+
266
+
267
+ def _format_inventory(inventory: dict) -> str:
268
+ """
269
+ Format the biochar inventory for logging as a table.
270
+ """
271
+ inventory_years = sorted(set(non_empty_list(years for years in inventory.keys())))
272
+
273
+ should_run = inventory and len(inventory_years) > 0
274
+
275
+ return log_as_table(
276
+ {
277
+ "year": year,
278
+ "stable-oc-from-biochar": _format_factor(inventory.get(year))
279
+ } for year in inventory_years
280
+ ) if should_run else "None"
281
+
282
+
283
+ def _format_logs(logs: dict):
284
+ """
285
+ Format model logs. Format method selected based on dict key, with `_format_str` as fallback.
286
+ """
287
+ return {
288
+ _format_str(key): _LOG_KEY_TO_FORMAT_FUNC.get(key, _format_str)(value) for key, value in logs.items()
289
+ }
290
+
291
+
292
+ def _format_factor_cache(factor_cache: dict) -> str:
293
+ """
294
+ Format the SOC inventory for logging as a table.
295
+ """
296
+ should_run = factor_cache and len(factor_cache) > 0
297
+
298
+ return log_as_table(
299
+ {
300
+ "term-id": term_id,
301
+ **{_format_str(key): _format_factor(value) for key, value in factor_dict.items()}
302
+ } for term_id, factor_dict in factor_cache.items()
303
+ ) if should_run else "None"
304
+
305
+
306
+ def _format_factor(value) -> str:
307
+ """
308
+ Format a model factor. Method selected based on factor type (ndarray, int or float).
309
+ """
310
+ format_func = next(
311
+ (func for type, func in _TYPE_TO_FORMAT_FUNC.items() if isinstance(value, type)),
312
+ None
313
+ )
314
+ return format_func(value) if format_func else "None"
315
+
316
+
317
+ def _format_nd_array(value) -> str:
318
+ return f"{np.mean(value):.3g} ± {np.std(value):.3g}"
319
+
320
+
321
+ def _format_number(value) -> str:
322
+ return f"{value:.3g}"
323
+
324
+
325
+ _INVALID_CHARS = {"_", ":", ",", "="}
326
+ _REPLACEMENT_CHAR = "-"
327
+
328
+
329
+ def _format_str(value: str, *_) -> str:
330
+ """Format a string for logging in a table. Remove all characters used to render the table on the front end."""
331
+ return reduce(lambda x, char: x.replace(char, _REPLACEMENT_CHAR), _INVALID_CHARS, str(value))
332
+
333
+
334
+ _TYPE_TO_FORMAT_FUNC = {
335
+ np.ndarray: _format_nd_array,
336
+ (float, int): _format_number
337
+ }
338
+
339
+
340
+ _LOG_KEY_TO_FORMAT_FUNC = {
341
+ "factor_cache": _format_factor_cache
342
+ }
343
+
344
+
345
+ def _run(
346
+ inventory: dict
347
+ ) -> list[dict]:
348
+ """
349
+ Calculate the annual biochar organic carbon stock based on an inventory of biochar application data.
350
+
351
+ Parameters
352
+ ----------
353
+ inventory : dict
354
+ The annual inventory of biochar data.
355
+
356
+ Returns
357
+ -------
358
+ list[dict]
359
+ A list of HESTIA [Measurement](https://www.hestia.earth/schema/Measurement) nodes with `term.@id` =
360
+ `biocharOrganicCarbonPerHa`
361
+ """
362
+
363
+ start_year = min(inventory.keys()) - 1
364
+ end_year = max(inventory.keys()) + 1
365
+
366
+ def accumulate_oc(result, year):
367
+ value = inventory.get(year, np.zeros((1, _ITERATIONS)))
368
+ prev = result.get(year-1, np.zeros((1, _ITERATIONS)))
369
+
370
+ updated = result | {year: value+prev}
371
+ return updated
372
+
373
+ accumlated_oc = reduce(accumulate_oc, range(start_year, end_year), {})
374
+
375
+ dates = [f"{year}-12-31" for year in accumlated_oc]
376
+ values = np.vstack(tuple(accumlated_oc.values()))
377
+
378
+ descriptive_stats = calc_descriptive_stats(
379
+ values,
380
+ _STATS_DEFINITION,
381
+ axis=1, # Calculate stats rowwise.
382
+ decimals=6 # Round values to the nearest milligram.
383
+ )
384
+ return [_measurement(dates, **descriptive_stats)]
385
+
386
+
387
+ def _measurement(
388
+ dates: list[int],
389
+ value: list[float],
390
+ sd: list[float] = None,
391
+ min: list[float] = None,
392
+ max: list[float] = None,
393
+ statsDefinition: str = None,
394
+ observations: list[int] = None
395
+ ) -> dict:
396
+ """
397
+ Build a Hestia `Measurement` node to contain a value and descriptive statistics calculated by the models.
398
+
399
+ Parameters
400
+ ----------
401
+ timestamps : list[int]
402
+ A list of calendar years associated to the calculated SOC stocks.
403
+ value : list[float]
404
+ A list of values representing the mean biomass stock for each year of the inventory
405
+ sd : list[float]
406
+ A list of standard deviations representing the standard deviation of the biomass stock for each year of the
407
+ inventory.
408
+ min : list[float]
409
+ A list of minimum values representing the minimum modelled biomass stock for each year of the inventory.
410
+ max : list[float]
411
+ A list of maximum values representing the maximum modelled biomass stock for each year of the inventory.
412
+ statsDefinition : str
413
+ The [statsDefinition](https://hestia.earth/schema/Measurement#statsDefinition) of the measurement.
414
+ observations : list[int]
415
+ The number of model iterations used to calculate the descriptive statistics.
416
+
417
+ Returns
418
+ -------
419
+ dict
420
+ A valid HESTIA `Measurement` node, see: https://www.hestia.earth/schema/Measurement.
421
+ """
422
+ update_dict = {
423
+ "value": value,
424
+ "sd": sd,
425
+ "min": min,
426
+ "max": max,
427
+ "statsDefinition": statsDefinition,
428
+ "observations": observations,
429
+ "dates": dates,
430
+ "methodClassification": _METHOD_CLASSIFICATION
431
+ }
432
+ measurement = _new_measurement(TERM_ID, MODEL) | {
433
+ key: value for key, value in update_dict.items() if value
434
+ }
435
+ return measurement