hestia-earth-models 0.58.0__py3-none-any.whl → 0.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. hestia_earth/models/cycle/{irrigated.py → irrigatedTypeUnspecified.py} +4 -4
  2. hestia_earth/models/cycle/residueIncorporated.py +1 -1
  3. hestia_earth/models/emepEea2019/nh3ToAirInorganicFertiliser.py +2 -2
  4. hestia_earth/models/geospatialDatabase/clayContent.py +17 -4
  5. hestia_earth/models/geospatialDatabase/sandContent.py +17 -4
  6. hestia_earth/models/impact_assessment/irrigated.py +0 -3
  7. hestia_earth/models/ipcc2019/co2ToAirSoilCarbonStockChangeManagementChange.py +10 -9
  8. hestia_earth/models/ipcc2019/n2OToAirCropResidueDecompositionDirect.py +4 -51
  9. hestia_earth/models/ipcc2019/n2OToAirInorganicFertiliserDirect.py +104 -0
  10. hestia_earth/models/ipcc2019/n2OToAirOrganicFertiliserDirect.py +105 -0
  11. hestia_earth/models/ipcc2019/organicCarbonPerHa.py +1059 -1220
  12. hestia_earth/models/ipcc2019/utils.py +82 -1
  13. hestia_earth/models/mocking/search-results.json +161 -87
  14. hestia_earth/models/site/management.py +12 -9
  15. hestia_earth/models/site/organicCarbonPerHa.py +251 -89
  16. hestia_earth/models/utils/blank_node.py +157 -34
  17. hestia_earth/models/utils/cycle.py +6 -3
  18. hestia_earth/models/utils/measurement.py +1 -1
  19. hestia_earth/models/utils/term.py +46 -1
  20. hestia_earth/models/version.py +1 -1
  21. {hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/METADATA +4 -8
  22. {hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/RECORD +34 -30
  23. tests/models/cycle/{test_irrigated.py → test_irrigatedTypeUnspecified.py} +1 -1
  24. tests/models/geospatialDatabase/test_clayContent.py +9 -3
  25. tests/models/geospatialDatabase/test_sandContent.py +9 -3
  26. tests/models/ipcc2019/test_n2OToAirInorganicFertiliserDirect.py +74 -0
  27. tests/models/ipcc2019/test_n2OToAirOrganicFertiliserDirect.py +74 -0
  28. tests/models/ipcc2019/test_organicCarbonPerHa.py +303 -1044
  29. tests/models/site/test_organicCarbonPerHa.py +51 -5
  30. tests/models/utils/test_blank_node.py +102 -42
  31. tests/models/utils/test_term.py +17 -3
  32. {hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/LICENSE +0 -0
  33. {hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/WHEEL +0 -0
  34. {hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ from functools import reduce
8
8
  from statistics import mode, mean
9
9
  from typing import (
10
10
  Any,
11
+ List,
11
12
  Callable,
12
13
  NamedTuple,
13
14
  Optional,
@@ -23,7 +24,7 @@ from hestia_earth.utils.tools import (
23
24
  )
24
25
 
25
26
  from ..log import debugValues, log_as_table
26
- from . import _filter_list_term_unit
27
+ from . import is_from_model, _filter_list_term_unit
27
28
  from .constant import Units
28
29
  from .property import get_node_property, get_node_property_value
29
30
  from .lookup import (
@@ -127,6 +128,16 @@ def find_terms_value(nodes: list, term_id: str, default: Union[int, None] = 0):
127
128
  return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)), default)
128
129
 
129
130
 
131
+ def has_gap_filled_by_ids(nodes: list, term_ids: List[str]):
132
+ nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
133
+ return any([is_from_model(n) for n in nodes])
134
+
135
+
136
+ def has_original_by_ids(nodes: list, term_ids: List[str]):
137
+ nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
138
+ return any([not is_from_model(n) for n in nodes])
139
+
140
+
130
141
  def get_total_value(nodes: list):
131
142
  """
132
143
  Get the total `value` of a list of Blank Nodes.
@@ -941,27 +952,27 @@ def _build_time_fraction_dict(
941
952
  Returns
942
953
  -------
943
954
  dict
944
- A dictionary containing "fraction_of_year" and "fraction_of_node_duration".
955
+ A dictionary containing "fraction_of_group_duration" and "fraction_of_node_duration".
945
956
  """
946
- year_duration = _datetime_range_duration(group_datetime_range, add_second=True)
957
+ group_duration = _datetime_range_duration(group_datetime_range, add_second=True)
947
958
  node_duration = _datetime_range_duration(node_datetime_range, add_second=True)
948
959
 
949
960
  intersection_duration = _calc_datetime_range_intersection_duration(
950
961
  node_datetime_range, group_datetime_range, add_second=True
951
962
  )
952
963
 
953
- fraction_of_year = intersection_duration / year_duration
964
+ fraction_of_group_duration = intersection_duration / group_duration
954
965
  fraction_of_node_duration = intersection_duration / node_duration
955
966
 
956
967
  return {
957
- "fraction_of_year": fraction_of_year,
968
+ "fraction_of_group_duration": fraction_of_group_duration,
958
969
  "fraction_of_node_duration": fraction_of_node_duration
959
970
  }
960
971
 
961
972
 
962
973
  def _validate_time_fraction_dict(
963
974
  time_fraction_dict: dict,
964
- is_final_year: bool
975
+ is_final_group: bool
965
976
  ) -> bool:
966
977
  """
967
978
  Return `True` if the the node intersections with a year group by
@@ -973,30 +984,19 @@ def _validate_time_fraction_dict(
973
984
  be counted in the year group if the majority of that node takes place in
974
985
  that year.
975
986
  """
976
- FRACTION_OF_YEAR_THRESHOLD = 0.3
987
+ FRACTION_OF_GROUP_DURATION_THRESHOLD = 0.3
977
988
  FRACTION_OF_NODE_DURATION_THRESHOLD = 0.5
978
989
 
979
990
  return any([
980
- time_fraction_dict["fraction_of_year"] > FRACTION_OF_YEAR_THRESHOLD,
991
+ time_fraction_dict["fraction_of_group_duration"] > FRACTION_OF_GROUP_DURATION_THRESHOLD,
981
992
  time_fraction_dict["fraction_of_node_duration"] > FRACTION_OF_NODE_DURATION_THRESHOLD,
982
- is_final_year and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
993
+ is_final_group and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
983
994
  ])
984
995
 
985
996
 
986
- def _datestr_within_range(datestr: str, group_datetime_range: DatetimeRange) -> bool:
987
- """
988
- Validate if the date represented by the date string falls within the specified datetime range.
997
+ def _build_update_dict(node: dict, years: list, target_year: int) -> dict:
989
998
  """
990
- return _datetime_within_range(
991
- safe_parse_date(_gapfill_datestr(datestr)),
992
- group_datetime_range
993
- )
994
-
995
-
996
- def _build_update_dict(node: dict, group_datetime_range: DatetimeRange) -> dict:
997
- """
998
- Build an update dictionary containing values and dates from a node that fall within a given
999
- datetime range.
999
+ Build an update dictionary containing values and dates from a node that fall within a given year.
1000
1000
 
1001
1001
  This is only required if when `group_nodes_by_year` `mode = GroupNodesByYearMode.DATES`
1002
1002
 
@@ -1004,23 +1004,20 @@ def _build_update_dict(node: dict, group_datetime_range: DatetimeRange) -> dict:
1004
1004
  ----------
1005
1005
  node : dict
1006
1006
  The node containing values and dates.
1007
- group_datetime_range : DatetimeRange
1008
- The datetime range to filter values and dates.
1007
+ year : int
1008
+ The year to be matched.
1009
1009
 
1010
1010
  Returns
1011
1011
  -------
1012
1012
  dict
1013
1013
  An update dictionary containing "value" and "dates" keys.
1014
1014
  """
1015
+ valid_indices = {
1016
+ i for i, y in enumerate(years) if y == target_year
1017
+ }
1015
1018
  return {
1016
- "value": [
1017
- val for val, datestr in zip(node.get("value"), node.get("dates"))
1018
- if _datestr_within_range(datestr, group_datetime_range)
1019
- ],
1020
- "dates": [
1021
- datestr for datestr in node.get("dates")
1022
- if _datestr_within_range(datestr, group_datetime_range)
1023
- ]
1019
+ "value": [node.get("value")[i] for i in valid_indices],
1020
+ "dates": [node.get("dates")[i] for i in valid_indices]
1024
1021
  }
1025
1022
 
1026
1023
 
@@ -1068,6 +1065,12 @@ def group_nodes_by_year(
1068
1065
  node, default_node_duration=default_node_duration
1069
1066
  )
1070
1067
 
1068
+ # pre-parse the "dates" field so it doesn't get re-calculated in each iteration of the for-loop
1069
+ years = (
1070
+ [safe_parse_date(datestr).year for datestr in node.get("dates", [])]
1071
+ if mode == GroupNodesByYearMode.DATES else []
1072
+ )
1073
+
1071
1074
  range_start = node_datetime_range.start.year if node_datetime_range else 0
1072
1075
  range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
1073
1076
 
@@ -1081,7 +1084,7 @@ def group_nodes_by_year(
1081
1084
  is_final_year = _datetime_within_range(node_datetime_range.end, group_datetime_range)
1082
1085
 
1083
1086
  time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
1084
- update_dict = _build_update_dict(node, group_datetime_range) if mode == GroupNodesByYearMode.DATES else {}
1087
+ update_dict = _build_update_dict(node, years, year) if mode == GroupNodesByYearMode.DATES else {}
1085
1088
 
1086
1089
  should_run = (
1087
1090
  mode == GroupNodesByYearMode.DATES
@@ -1104,4 +1107,124 @@ def group_nodes_by_year(
1104
1107
  for year, group in grouped.items()
1105
1108
  }
1106
1109
 
1107
- return dict(sorted(iterated.items())) if sort_result else dict(iterated)
1110
+ return dict(sorted(iterated.items())) if sort_result else iterated
1111
+
1112
+
1113
+ def group_nodes_by_year_and_month(
1114
+ nodes: list[dict],
1115
+ default_node_duration: int = 1,
1116
+ sort_result: bool = True,
1117
+ inner_key: Union[Any, None] = None
1118
+ ) -> dict[int, list[dict]]:
1119
+ """
1120
+ Group nodes by year based on either their "startDate" and "endDate" fields. Incomplete date strings are gap-filled
1121
+ automatically using `_gapfill_datestr` function.
1122
+
1123
+ Returns a dict in the shape:
1124
+ ```
1125
+ {
1126
+ year (int): {
1127
+ month (int): nodes (list[dict]) # for each month 1 - 12
1128
+ }
1129
+ }
1130
+ ```
1131
+
1132
+ Parameters
1133
+ ----------
1134
+ nodes : list[dict]
1135
+ A list of nodes with start and end date information.
1136
+ default_node_duration : int, optional
1137
+ Default duration of a node years if start date is not available, by default 1.
1138
+ sort_result : bool, optional
1139
+ Flag to sort the result by year, by default True.
1140
+ inner_key: Any | None
1141
+ An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
1142
+ dictionaries together), default value: `None`.
1143
+
1144
+ Returns
1145
+ -------
1146
+ dict[int, list[dict]]
1147
+ A dictionary where keys are years and values are lists of nodes.
1148
+ """
1149
+ valid_nodes = [node for node in nodes if _should_run_node_by_end_date(node)]
1150
+
1151
+ def group_node(groups: dict, index: int):
1152
+ node = valid_nodes[index]
1153
+
1154
+ node_datetime_range = _get_node_datetime_range_from_start_and_end_date(
1155
+ node, default_node_duration=default_node_duration
1156
+ )
1157
+
1158
+ range_start = node_datetime_range.start.year if node_datetime_range else 0
1159
+ range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
1160
+
1161
+ for year in range(range_start, range_end):
1162
+ for month in range(1, 13):
1163
+
1164
+ group_datetime_range = DatetimeRange(
1165
+ start=safe_parse_date(_gapfill_datestr(f"{year}-{month:02}", DatestrGapfillMode.START)),
1166
+ end=safe_parse_date(_gapfill_datestr(f"{year}-{month}", DatestrGapfillMode.END))
1167
+ )
1168
+
1169
+ is_final_month = _datetime_within_range(node_datetime_range.end, group_datetime_range)
1170
+ time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
1171
+ should_run = _validate_time_fraction_dict(time_fraction_dict, is_final_month)
1172
+
1173
+ should_run and groups[year][month].append(node)
1174
+
1175
+ return groups
1176
+
1177
+ grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(lambda: defaultdict(list)))
1178
+
1179
+ iterated = {
1180
+ year: {inner_key: dict(group)} if inner_key else dict(group)
1181
+ for year, group in grouped.items()
1182
+ }
1183
+
1184
+ return dict(sorted(iterated.items())) if sort_result else iterated
1185
+
1186
+
1187
+ # --- Group nodes by last date ---
1188
+
1189
+
1190
+ def _get_last_date(datestrs: list[str]) -> Optional[str]:
1191
+ """
1192
+ Reduce a datestrs down to a single datestr by selecting the last one.
1193
+
1194
+ Parameters
1195
+ ----------
1196
+ datestrs : list
1197
+ A list of datestrings, e.g. the value of a node's `dates` field.
1198
+
1199
+ Returns
1200
+ -------
1201
+ str | None
1202
+ Returns the latest datestr or `None` if no valid datestr in list.
1203
+
1204
+ """
1205
+ return sorted(datestrs)[-1] if len(datestrs) > 0 else None
1206
+
1207
+
1208
+ def group_nodes_by_last_date(nodes: list) -> dict[str, list[dict]]:
1209
+ """
1210
+ Group a list of nodes by the last date of their `dates` field. Nodes with no `dates` field will be sorted into
1211
+ the `no-dates` group.
1212
+
1213
+ Parameters
1214
+ ----------
1215
+ nodes : list[dict]
1216
+ A list of Hestia format nodes.
1217
+
1218
+ Return
1219
+ ------
1220
+ dict
1221
+ A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
1222
+ """
1223
+ DEFAULT_KEY = 'no-dates'
1224
+
1225
+ def group_by(group: dict, node: dict):
1226
+ dates = node.get('dates', [])
1227
+ key = _get_last_date(dates) or DEFAULT_KEY
1228
+ return group | {key: group.get(key, []) + [node]}
1229
+
1230
+ return reduce(group_by, nodes, {})
@@ -1,6 +1,6 @@
1
1
  from hestia_earth.schema import CycleFunctionalUnit, SiteSiteType, TermTermType
2
2
  from hestia_earth.utils.model import filter_list_term_type, find_term_match, find_primary_product
3
- from hestia_earth.utils.tools import list_sum, safe_parse_float, safe_parse_date
3
+ from hestia_earth.utils.tools import flatten, list_sum, safe_parse_float, safe_parse_date
4
4
 
5
5
  from ..log import logRequirements, debugValues
6
6
  from .lookup import factor_value
@@ -370,7 +370,7 @@ def is_organic(cycle: dict):
370
370
 
371
371
  def is_irrigated(cycle: dict):
372
372
  """
373
- Check if the `Cycle` is irrigated, i.e. if it contains an irrigated `Practice`.
373
+ Check if the `Cycle` is irrigated, i.e. if it contains an irrigated `Practice` with a value above `0`.
374
374
 
375
375
  Parameters
376
376
  ----------
@@ -382,7 +382,10 @@ def is_irrigated(cycle: dict):
382
382
  bool
383
383
  `True` if the `Cycle` is irrigated, `False` otherwise.
384
384
  """
385
- return list_sum(find_term_match(cycle.get('practices', []), 'irrigated').get('value', [])) > 0
385
+ irrigated_practices = [
386
+ p for p in cycle.get('practices', []) if p.get('term', {}).get('@id', '').startswith('irrigated')
387
+ ]
388
+ return list_sum(flatten([p.get('value', []) for p in irrigated_practices])) > 0
386
389
 
387
390
 
388
391
  def cycle_end_year(cycle: dict):
@@ -17,7 +17,7 @@ from .term import get_lookup_value
17
17
  # TODO: verify those values
18
18
  MAX_DEPTH = 1000
19
19
  OLDEST_DATE = '1800'
20
-
20
+ SOIL_TEXTURE_IDS = ['sandContent', 'siltContent', 'clayContent']
21
21
  MEASUREMENT_REDUCE = {
22
22
  'mean': lambda value: mean(value),
23
23
  'mode': lambda value: mode(value),
@@ -544,7 +544,7 @@ def get_residue_removed_or_burnt_terms():
544
544
  return list(map(lambda n: n["@id"], terms))
545
545
 
546
546
 
547
- def get_rice_plant_upland_terms():
547
+ def get_upland_rice_land_cover_terms():
548
548
  """
549
549
  Find all `landCover` terms related to upland rice the Glossary.
550
550
 
@@ -566,6 +566,28 @@ def get_rice_plant_upland_terms():
566
566
  return list(map(lambda n: n["@id"], terms))
567
567
 
568
568
 
569
+ def get_upland_rice_crop_terms():
570
+ """
571
+ Find all `crop` terms related to upland rice the Glossary.
572
+
573
+ Returns
574
+ -------
575
+ list
576
+ List of matching term `@id` as `str`.
577
+ """
578
+ terms = search({
579
+ "bool": {
580
+ "must": [
581
+ {"match": {"@type": SchemaType.TERM.value}},
582
+ {"match": {"termType.keyword": TermTermType.CROP.value}},
583
+ {"match_phrase": {"name": "rice"}},
584
+ {"match": {"name": "upland"}}
585
+ ],
586
+ }
587
+ }, limit=LIMIT)
588
+ return list(map(lambda n: n["@id"], terms))
589
+
590
+
569
591
  def get_pasture_system_terms():
570
592
  """
571
593
  Find all `system` terms with the name `pasture`:
@@ -581,3 +603,26 @@ def get_pasture_system_terms():
581
603
  'name': 'pasture'
582
604
  }, limit=LIMIT)
583
605
  return list(map(lambda n: n["@id"], terms))
606
+
607
+
608
+ def get_long_fallow_land_cover_terms():
609
+ """
610
+ Find all `landCover` terms with the name `long fallow`:
611
+ https://hestia.earth/glossary?termType=landCover&query=long%fallow
612
+
613
+ Returns
614
+ -------
615
+ list
616
+ List of matching term `@id` as `str`.
617
+ """
618
+ terms = search({
619
+ "bool": {
620
+ "must": [
621
+ {"match": {"@type": SchemaType.TERM.value}},
622
+ {"match": {"termType.keyword": TermTermType.LANDCOVER.value}},
623
+ {"match_phrase_prefix": {"name": "long"}},
624
+ {"match": {"name": "fallow"}}
625
+ ],
626
+ }
627
+ }, limit=LIMIT)
628
+ return list(map(lambda n: n["@id"], terms))
@@ -1 +1 @@
1
- VERSION = '0.58.0'
1
+ VERSION = '0.59.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hestia-earth-models
3
- Version: 0.58.0
3
+ Version: 0.59.0
4
4
  Summary: Hestia's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
6
6
  Author: Hestia Team
@@ -55,19 +55,15 @@ run('no3ToGroundwaterSoilFlux', cycle_data)
55
55
 
56
56
  ### Using Spatial Models
57
57
 
58
- We have models that can gap-fill geographical information on a `Site`.
59
- If you want to use thse models:
58
+ We have models that can gap-fill geographical information on a `Site`. If you want to use these models:
60
59
  1. Install the library: `pip install hestia_earth.earth_engine`
61
60
  2. Follow the [Getting Started instructions](https://gitlab.com/hestia-earth/hestia-earth-engine#getting-started).
62
61
 
63
- ### Using Ecoinvent Model
62
+ ### Using the ecoinventV3 model
64
63
 
65
64
  ecoinvent is a consistent, transparent, and well validated life cycle inventory database.
66
65
  We use ecoinvent data to ascertain the environmental impacts of activities that occur outside of our system boundary, for example data on the environmental impacts of extracting oil and producing diesel, or the impacts of manufacturing plastics.
67
- To include these data in your environmental impact assessments using Hestia, you must own a suitable [ecoinvent license](https://ecoinvent.org/offerings/licences/).
68
66
 
69
- Please contact us at community@hestia.earth for instructions to download the required file to run the model.
70
- Once downloaded, copy the file in the _hestia_earth/models/data/ecoinventV3_ folder.
71
- Thank you!
67
+ The `ecoinventV3` model requires a valid [license](https://ecoinvent.org/offerings/licences/) to run. We are currently working on a way to enable users of this code with a valid ecoinvent licence to run these models themselves, but for now, these models are only available on the public platform.
72
68
 
73
69