hestia-earth-models 0.65.11__py3-none-any.whl → 0.67.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. hestia_earth/models/cache_sites.py +7 -9
  2. hestia_earth/models/cml2001Baseline/abioticResourceDepletionFossilFuels.py +23 -54
  3. hestia_earth/models/cml2001Baseline/resourceUseEnergyDepletionDuringCycle.py +152 -0
  4. hestia_earth/models/cml2001Baseline/resourceUseEnergyDepletionInputsProduction.py +40 -0
  5. hestia_earth/models/cml2001Baseline/resourceUseMineralsAndMetalsDuringCycle.py +80 -0
  6. hestia_earth/models/cml2001Baseline/resourceUseMineralsAndMetalsInputsProduction.py +40 -0
  7. hestia_earth/models/config/Cycle.json +34 -16
  8. hestia_earth/models/config/ImpactAssessment.json +1867 -1832
  9. hestia_earth/models/config/Site.json +4 -1
  10. hestia_earth/models/cycle/completeness/freshForage.py +10 -2
  11. hestia_earth/models/cycle/cropResidueManagement.py +3 -1
  12. hestia_earth/models/cycle/input/hestiaAggregatedData.py +13 -10
  13. hestia_earth/models/ecoinventV3/__init__.py +2 -1
  14. hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/__init__.py +4 -3
  15. hestia_earth/models/environmentalFootprintV3_1/environmentalFootprintSingleOverallScore.py +135 -0
  16. hestia_earth/models/environmentalFootprintV3_1/marineEutrophicationPotential.py +36 -0
  17. hestia_earth/models/environmentalFootprintV3_1/scarcityWeightedWaterUse.py +40 -0
  18. hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/soilQualityIndexLandTransformation.py +17 -6
  19. hestia_earth/models/geospatialDatabase/{aware.py → awareWaterBasinId.py} +1 -1
  20. hestia_earth/models/hestia/landCover.py +42 -34
  21. hestia_earth/models/hestia/residueRemoved.py +80 -0
  22. hestia_earth/models/hestia/resourceUse_utils.py +43 -29
  23. hestia_earth/models/impact_assessment/product/value.py +1 -1
  24. hestia_earth/models/ipcc2019/aboveGroundBiomass.py +34 -13
  25. hestia_earth/models/ipcc2019/belowGroundBiomass.py +33 -12
  26. hestia_earth/models/ipcc2019/ch4ToAirEntericFermentation.py +17 -8
  27. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +7 -4
  28. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1_utils.py +2 -1
  29. hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_2_utils.py +29 -18
  30. hestia_earth/models/ipcc2019/pastureGrass_utils.py +8 -1
  31. hestia_earth/models/log.py +1 -1
  32. hestia_earth/models/mocking/search-results.json +872 -872
  33. hestia_earth/models/site/defaultMethodClassification.py +9 -2
  34. hestia_earth/models/site/defaultMethodClassificationDescription.py +4 -2
  35. hestia_earth/models/site/management.py +48 -30
  36. hestia_earth/models/site/pre_checks/cache_geospatialDatabase.py +19 -14
  37. hestia_earth/models/utils/__init__.py +6 -0
  38. hestia_earth/models/utils/aggregated.py +13 -10
  39. hestia_earth/models/utils/array_builders.py +4 -3
  40. hestia_earth/models/utils/blank_node.py +23 -13
  41. hestia_earth/models/utils/lookup.py +4 -2
  42. hestia_earth/models/utils/property.py +5 -2
  43. hestia_earth/models/version.py +1 -1
  44. hestia_earth/orchestrator/log.py +11 -0
  45. hestia_earth/orchestrator/models/__init__.py +8 -3
  46. hestia_earth/orchestrator/strategies/merge/merge_list.py +17 -6
  47. {hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/METADATA +1 -1
  48. {hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/RECORD +86 -69
  49. tests/models/cml2001Baseline/test_abioticResourceDepletionFossilFuels.py +51 -87
  50. tests/models/cml2001Baseline/test_resourceUseEnergyDepletionDuringCycle.py +103 -0
  51. tests/models/cml2001Baseline/test_resourceUseEnergyDepletionInputsProduction.py +23 -0
  52. tests/models/cml2001Baseline/test_resourceUseMineralsAndMetalsDuringCycle.py +58 -0
  53. tests/models/cml2001Baseline/test_resourceUseMineralsAndMetalsInputsProduction.py +23 -0
  54. tests/models/environmentalFootprintV3_1/test_environmentalFootprintSingleOverallScore.py +93 -0
  55. tests/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/test_freshwaterEcotoxicityPotentialCtue.py +6 -5
  56. tests/models/environmentalFootprintV3_1/test_marineEutrophicationPotential.py +27 -0
  57. tests/models/environmentalFootprintV3_1/test_scarcityWeightedWaterUse.py +32 -0
  58. tests/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/test_soilQualityIndexLandOccupation.py +4 -3
  59. tests/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/test_soilQualityIndexLandTransformation.py +8 -22
  60. tests/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/test_soilQualityIndexTotalLandUseEffects.py +4 -4
  61. tests/models/faostat2018/product/test_price.py +1 -1
  62. tests/models/geospatialDatabase/{test_aware.py → test_awareWaterBasinId.py} +1 -1
  63. tests/models/hestia/test_landCover.py +2 -1
  64. tests/models/hestia/test_landTransformation20YearAverageDuringCycle.py +2 -1
  65. tests/models/hestia/test_residueRemoved.py +20 -0
  66. tests/models/impact_assessment/test_emissions.py +0 -1
  67. tests/models/ipcc2019/test_aboveGroundBiomass.py +3 -1
  68. tests/models/ipcc2019/test_belowGroundBiomass.py +4 -2
  69. tests/models/ipcc2019/test_organicCarbonPerHa.py +94 -1
  70. tests/models/site/pre_checks/test_cache_geospatialDatabase.py +22 -0
  71. tests/models/site/test_defaultMethodClassification.py +6 -0
  72. tests/models/site/test_defaultMethodClassificationDescription.py +6 -0
  73. tests/models/site/test_management.py +4 -4
  74. tests/models/test_cache_sites.py +2 -2
  75. tests/models/test_config.py +3 -3
  76. tests/models/test_ecoinventV3.py +0 -1
  77. tests/models/utils/test_array_builders.py +2 -2
  78. tests/orchestrator/strategies/merge/test_merge_list.py +11 -1
  79. /hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/freshwaterEcotoxicityPotentialCtue.py +0 -0
  80. /hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/soilQualityIndexLandOccupation.py +0 -0
  81. /hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/soilQualityIndexTotalLandUseEffects.py +0 -0
  82. /hestia_earth/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/utils.py +0 -0
  83. {hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/LICENSE +0 -0
  84. {hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/WHEEL +0 -0
  85. {hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/top_level.txt +0 -0
  86. /tests/models/{environmentalFootprintV3 → environmentalFootprintV3_1}/__init__.py +0 -0
@@ -5,6 +5,8 @@ When gap-filling `management` node on Site, the
5
5
  `defaultMethodClassification` and `defaultMethodClassificationDescription` fields become required.
6
6
  This model will use the first value in the `management` node.
7
7
  """
8
+ from hestia_earth.schema import SiteDefaultMethodClassification
9
+
8
10
  from hestia_earth.models.log import logRequirements, logShouldRun
9
11
  from . import MODEL
10
12
 
@@ -20,12 +22,17 @@ MODEL_KEY = 'defaultMethodClassification'
20
22
 
21
23
 
22
24
  def _should_run(site: dict):
23
- methodClassification = next((n.get('methodClassification') for n in site.get('management', [])), None)
25
+ has_management = bool(site.get('management', []))
26
+ methodClassification = next(
27
+ (n.get('methodClassification') for n in site.get('management', [])),
28
+ None
29
+ ) or SiteDefaultMethodClassification.MODELLED.value
24
30
 
25
31
  logRequirements(site, model=MODEL, model_key=MODEL_KEY,
32
+ has_management=has_management,
26
33
  methodClassification=methodClassification)
27
34
 
28
- should_run = all([methodClassification])
35
+ should_run = all([has_management, methodClassification])
29
36
  logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
30
37
  return should_run, methodClassification
31
38
 
@@ -20,16 +20,18 @@ MODEL_KEY = 'defaultMethodClassificationDescription'
20
20
 
21
21
 
22
22
  def _should_run(site: dict):
23
+ has_management = bool(site.get('management', []))
23
24
  methodClassificationDescription = next((
24
25
  n.get('methodClassificationDescription')
25
26
  for n in site.get('management', [])
26
27
  if n.get('methodClassification')
27
- ), None)
28
+ ), None) or 'Data calculated by merging real land use histories and modelled land use histories for each Site.'
28
29
 
29
30
  logRequirements(site, model=MODEL, model_key=MODEL_KEY,
31
+ has_management=has_management,
30
32
  methodClassificationDescription=methodClassificationDescription)
31
33
 
32
- should_run = all([methodClassificationDescription])
34
+ should_run = all([has_management, methodClassificationDescription])
33
35
  logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
34
36
  return should_run, methodClassificationDescription
35
37
 
@@ -10,7 +10,8 @@ tillage, cropResidueManagement and landUseManagement.
10
10
  All values are copied from the source node, except for crop and forage terms in which case the dates are copied from the
11
11
  cycle.
12
12
 
13
- Where `startDate` is missing from landCover products, gap-filling is attempted using `endDate` - `maximumCycleDuration`.
13
+ Where `startDate` is missing from landCover products, gap-filling is attempted using
14
+ `endDate` - `cycleDuration` (or `maximumCycleDuration` lookup).
14
15
  This is the `endDate` of the `landCover` product.
15
16
  This ensures no overlapping date ranges.
16
17
  If both `endDate` and `startDate` are missing from the product, these will be gap-filled from the `Cycle`.
@@ -18,9 +19,10 @@ If both `endDate` and `startDate` are missing from the product, these will be ga
18
19
  When nodes are chronologically consecutive with "% area" or "boolean" units and the same term and value, they are
19
20
  condensed into a single node to aid readability.
20
21
  """
22
+ from typing import List
21
23
  from datetime import timedelta, datetime
22
24
  from functools import reduce
23
- from hestia_earth.schema import TermTermType, SiteSiteType
25
+ from hestia_earth.schema import SchemaType, TermTermType, SiteSiteType, COMPLETENESS_MAPPING
24
26
  from hestia_earth.utils.lookup import column_name, get_table_value, download_lookup
25
27
  from hestia_earth.utils.model import filter_list_term_type
26
28
  from hestia_earth.utils.tools import safe_parse_float, flatten
@@ -42,7 +44,6 @@ REQUIREMENTS = {
42
44
  "related": {
43
45
  "Cycle": [{
44
46
  "@type": "Cycle",
45
- "startDate": "",
46
47
  "endDate": "",
47
48
  "products": [
48
49
  {
@@ -71,7 +72,11 @@ REQUIREMENTS = {
71
72
  "soilAmendment"
72
73
  ]
73
74
  }
74
- ]
75
+ ],
76
+ "optional": {
77
+ "startDate": "",
78
+ "cycleDuration": ""
79
+ }
75
80
  }]
76
81
  }
77
82
  }
@@ -98,6 +103,14 @@ LOOKUPS = {
98
103
  }
99
104
  MODEL_KEY = 'management'
100
105
 
106
+ _PRACTICES_TERM_TYPES = [
107
+ TermTermType.WATERREGIME,
108
+ TermTermType.TILLAGE,
109
+ TermTermType.CROPRESIDUEMANAGEMENT,
110
+ TermTermType.LANDUSEMANAGEMENT,
111
+ TermTermType.SYSTEM
112
+ ]
113
+ _PRACTICES_COMPLETENESS_MAPPING = COMPLETENESS_MAPPING.get(SchemaType.PRACTICE.value, {})
101
114
  _ANIMAL_MANURE_USED_TERM_ID = "animalManureUsed"
102
115
  _INORGANIC_NITROGEN_FERTILISER_USED_TERM_ID = "inorganicNitrogenFertiliserUsed"
103
116
  _ORGANIC_FERTILISER_USED_TERM_ID = "organicFertiliserUsed"
@@ -147,11 +160,13 @@ def management(data: dict):
147
160
  return node
148
161
 
149
162
 
150
- def _get_maximum_cycle_duration(land_cover_id: str):
151
- lookup = download_lookup("crop.csv")
152
- return safe_parse_float(
153
- get_table_value(lookup, column_name('landCoverTermId'), land_cover_id, column_name('maximumCycleDuration'))
154
- )
163
+ def _get_cycle_duration(cycle: dict, land_cover_id: str):
164
+ return cycle.get('cycleDuration') or safe_parse_float(get_table_value(
165
+ download_lookup("crop.csv"),
166
+ column_name('landCoverTermId'),
167
+ land_cover_id,
168
+ column_name('maximumCycleDuration')
169
+ ))
155
170
 
156
171
 
157
172
  def _gap_filled_date_only_str(date_str: str, mode: str = DatestrGapfillMode.END) -> str:
@@ -166,16 +181,16 @@ def _gap_filled_date_obj(date_str: str, mode: str = DatestrGapfillMode.END) -> d
166
181
 
167
182
 
168
183
  def _gap_filled_start_date(land_cover_id: str, end_date: str, cycle: dict) -> dict:
169
- """If possible, gap-fill the startDate based on the endDate - maximumCycleDuration"""
170
- maximum_cycle_duration = _get_maximum_cycle_duration(land_cover_id)
184
+ """If possible, gap-fill the startDate based on the endDate - cycleDuration"""
185
+ cycle_duration = _get_cycle_duration(cycle, land_cover_id)
171
186
  return {
172
187
  "startDate": max(
173
- _gap_filled_date_obj(end_date) - timedelta(days=maximum_cycle_duration)
174
- if maximum_cycle_duration else datetime.fromtimestamp(0),
188
+ _gap_filled_date_obj(end_date) - timedelta(days=cycle_duration)
189
+ if cycle_duration else datetime.fromtimestamp(0),
175
190
  _gap_filled_date_obj(cycle.get("startDate"), mode=DatestrGapfillMode.START)
176
191
  if cycle.get("startDate") else datetime.fromtimestamp(0)
177
192
  )
178
- } if any([maximum_cycle_duration, cycle.get("startDate")]) else {}
193
+ } if any([cycle_duration, cycle.get("startDate")]) else {}
179
194
 
180
195
 
181
196
  def _include_with_date_gap_fill(value: dict, keys: list) -> dict:
@@ -217,12 +232,21 @@ def _copy_item_if_exists(source: dict, keys: list[str] = None, dest: dict = None
217
232
  return reduce(lambda p, c: p | ({c: source[c]} if source.get(c) else {}), keys or [], dest or {})
218
233
 
219
234
 
220
- def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
235
+ def _get_relevant_items(cycle: dict, item_name: str, term_types: List[TermTermType], completeness_mapping: dict = {}):
221
236
  """
222
237
  Get items from the list of cycles with any of the relevant terms.
223
238
  Also adds dates from Cycle.
224
239
  """
225
- items = [
240
+ # filter term types that are no complete
241
+ complete_term_types = term_types if not completeness_mapping else [
242
+ term_type for term_type in term_types
243
+ if any([
244
+ not completeness_mapping.get(term_type.value),
245
+ cycle.get('completeness', {}).get(completeness_mapping.get(term_type.value), False)
246
+ ])
247
+ ]
248
+ blank_nodes = filter_list_term_type(cycle.get(item_name, []), complete_term_types)
249
+ return [
226
250
  _include_with_date_gap_fill(cycle, ["startDate", "endDate"]) |
227
251
  _include(
228
252
  _gap_filled_start_date(
@@ -233,20 +257,19 @@ def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
233
257
  "startDate"
234
258
  ) |
235
259
  item
236
- for item in filter_list_term_type(cycle.get(item_name, []), relevant_terms)
260
+ for item in blank_nodes
237
261
  ]
238
- return items
239
262
 
240
263
 
241
264
  def _process_rule(node: dict, term: dict) -> list:
242
- relevant_terms = []
265
+ term_types = []
243
266
  for column, condition, new_term in _INPUT_RULES[term.get('termType')]:
244
267
  lookup_result = get_lookup_value(term, LOOKUPS[column], model=MODEL, term=term.get('@id'), model_key=MODEL_KEY)
245
268
 
246
269
  if condition(lookup_result):
247
- relevant_terms.append(node | {'id': new_term})
270
+ term_types.append(node | {'id': new_term})
248
271
 
249
- return relevant_terms
272
+ return term_types
250
273
 
251
274
 
252
275
  def _run_from_inputs(site: dict, cycle: dict) -> list:
@@ -307,7 +330,7 @@ def _run_from_landCover(cycle: dict, crop_forage_products: list):
307
330
  )) for product in _get_relevant_items(
308
331
  cycle=cycle,
309
332
  item_name="products",
310
- relevant_terms=[TermTermType.LANDCOVER]
333
+ term_types=[TermTermType.LANDCOVER]
311
334
  )
312
335
  ]
313
336
  return land_cover_products + _run_products(
@@ -337,7 +360,7 @@ def _run_from_crop_forage(cycle: dict, site: dict):
337
360
  products = _get_relevant_items(
338
361
  cycle=cycle,
339
362
  item_name="products",
340
- relevant_terms=[TermTermType.CROP, TermTermType.FORAGE]
363
+ term_types=[TermTermType.CROP, TermTermType.FORAGE]
341
364
  ) if site.get("siteType", "") == SiteSiteType.CROPLAND.value else []
342
365
  # only take products with a matching landCover term
343
366
  products = [p for p in products if get_landCover_term_id(p.get('term', {}))]
@@ -372,13 +395,8 @@ def _run_from_practices(cycle: dict):
372
395
  ) for practice in _get_relevant_items(
373
396
  cycle=cycle,
374
397
  item_name="practices",
375
- relevant_terms=[
376
- TermTermType.WATERREGIME,
377
- TermTermType.TILLAGE,
378
- TermTermType.CROPRESIDUEMANAGEMENT,
379
- TermTermType.LANDUSEMANAGEMENT,
380
- TermTermType.SYSTEM
381
- ]
398
+ term_types=_PRACTICES_TERM_TYPES,
399
+ completeness_mapping=_PRACTICES_COMPLETENESS_MAPPING
382
400
  )
383
401
  ]
384
402
  practices = list(map(_map_to_value, filter(_should_run_practice, practices)))
@@ -93,41 +93,46 @@ def _is_type(value: dict, ee_type: str):
93
93
  ]) if isinstance(params, list) else params.get('ee_type') == ee_type
94
94
 
95
95
 
96
- def list_collections(years: list = [], include_region: bool = False, years_only: bool = False):
96
+ def list_rasters(years: list = [], years_only: bool = False):
97
97
  ee_params = list_ee_params()
98
98
  # only cache `raster` results as can be combined in a single query
99
99
  rasters = [value for value in ee_params if _is_type(value, 'raster')]
100
100
  rasters = _extend_collections(rasters, years or [])
101
101
  rasters = [raster for raster in rasters if not years_only or _is_collection_by_year(raster)]
102
102
 
103
+ return rasters
104
+
105
+
106
+ def list_vectors(sites: list):
107
+ ee_params = list_ee_params()
108
+
109
+ vectors = [value for value in ee_params if _is_type(value, 'vector')]
103
110
  vectors = [
104
- value for value in ee_params if _is_type(value, 'vector') and (
105
- include_region or not value.get('params').get('collection', '').startswith('gadm36')
106
- )
111
+ value for value in vectors
112
+ # name of the model is the key in the data. If the key is present in all sites, we don't need to query
113
+ if all([not s.get(value.get('name')) for s in sites])
107
114
  ]
108
115
  # no vectors are running with specific years
109
- vectors = [] if years_only else _extend_collections(vectors)
116
+ vectors = _extend_collections(vectors)
110
117
 
111
- return (rasters, vectors)
118
+ return vectors
112
119
 
113
120
 
114
121
  def _cache_results(site: dict, area_size: float):
115
122
  # to fetch data related to the year
116
123
  years = cached_value(site, key=CACHE_YEARS_KEY, default=[])
117
- include_region = all([has_coordinates(site), not site.get('region')])
118
- rasters, vectors = list_collections(years, include_region=include_region)
124
+ rasters = list_rasters(years)
125
+ vectors = list_vectors([site])
119
126
 
120
127
  raster_results = _run_query({
121
128
  'ee_type': 'raster',
122
- 'collections': rasters,
123
- **geospatial_data(site)
124
- })
129
+ 'collections': rasters
130
+ } | geospatial_data(site)) if rasters else []
125
131
 
126
132
  vector_results = _run_query({
127
133
  'ee_type': 'vector',
128
- 'collections': vectors,
129
- **geospatial_data(site)
130
- })
134
+ 'collections': vectors
135
+ } | geospatial_data(site)) if vectors else []
131
136
 
132
137
  return cache_site_results(raster_results + vector_results, rasters + vectors, area_size)
133
138
 
@@ -165,6 +165,12 @@ def last_day_of_month(year: int, month: int):
165
165
  )
166
166
 
167
167
 
168
+ def current_date(): return datetime.datetime.now().date().strftime('%Y-%m-%d')
169
+
170
+
171
+ def current_year(): return int(current_date()[:4])
172
+
173
+
168
174
  def flatten_args(args) -> list:
169
175
  """
170
176
  Flatten the input args into a single list.
@@ -6,7 +6,8 @@ from hestia_earth.utils.model import find_term_match, linked_node
6
6
  from hestia_earth.utils.tools import safe_parse_date, non_empty_list
7
7
 
8
8
  from hestia_earth.models.log import debugValues, logShouldRun
9
- from hestia_earth.models.utils.cycle import is_organic
9
+ from . import current_year
10
+ from .cycle import is_organic
10
11
 
11
12
  MODEL_KEY = 'impactAssessment'
12
13
  MATCH_WORLD_QUERY = {'match': {'country.name.keyword': {'query': 'World', 'boost': 1}}}
@@ -14,7 +15,7 @@ MATCH_WORLD_QUERY = {'match': {'country.name.keyword': {'query': 'World', 'boost
14
15
 
15
16
  def aggregated_end_date(end_date: str):
16
17
  year = safe_parse_date(end_date).year
17
- return round(math.floor(year / 10) * 10) + 9
18
+ return min([round(math.floor(year / 10) * 10) + 9, current_year()])
18
19
 
19
20
 
20
21
  def _match_region_country(region: dict, country: dict):
@@ -36,8 +37,7 @@ def _match_region_country(region: dict, country: dict):
36
37
  }
37
38
 
38
39
 
39
- def find_closest_impact(cycle: dict, end_date: str, input: dict, region: dict, country: dict, must_queries=[]):
40
- term = input.get('term', {})
40
+ def find_closest_impact(cycle: dict, end_date: str, term: dict, region: dict, country: dict, must_queries=[]):
41
41
  query = {
42
42
  'bool': {
43
43
  'must': non_empty_list([
@@ -74,21 +74,24 @@ def find_closest_impact(cycle: dict, end_date: str, input: dict, region: dict, c
74
74
 
75
75
  def _link_input_to_impact(model: str, cycle: dict, date: int):
76
76
  def run(input: dict):
77
- term_id = input.get('term', {}).get('@id')
77
+ term = input.get('term', {})
78
+ term_id = term.get('@id')
78
79
  region = input.get('region')
79
80
  country = input.get('country')
80
- impact = find_closest_impact(cycle, date, input, region, country)
81
+ impact = find_closest_impact(cycle, date, term, region, country)
81
82
 
83
+ search_by_region_id = (region or country or {}).get('@id') or 'region-world'
82
84
  debugValues(cycle, model=model, term=term_id, key=MODEL_KEY,
83
- input_region=(region or {}).get('@id'),
84
- input_country=(country or {}).get('@id'),
85
- impact=(impact or {}).get('@id'))
85
+ search_by_input_term_id=term_id,
86
+ search_by_region_id=search_by_region_id,
87
+ search_by_end_date=str(date),
88
+ impact_assessment_id_found=(impact or {}).get('@id'))
86
89
 
87
90
  should_run = all([impact is not None])
88
91
  logShouldRun(cycle, model, term_id, should_run)
89
92
  logShouldRun(cycle, model, term_id, should_run, key=MODEL_KEY) # show specifically under Input
90
93
 
91
- return {**input, MODEL_KEY: linked_node(impact), 'impactAssessmentIsProxy': True} if impact else None
94
+ return input | {MODEL_KEY: linked_node(impact), 'impactAssessmentIsProxy': True} if impact else None
92
95
  return run
93
96
 
94
97
 
@@ -527,12 +527,13 @@ def avg_run_in_rowwise(arr: NDArray, n: int):
527
527
  return avg_run_in_columnwise(arr.transpose(), n).transpose()
528
528
 
529
529
 
530
- def gen_seed(node: dict) -> int:
530
+ def gen_seed(node: dict, *args: tuple[str]) -> int:
531
531
  """
532
- Generate a seed based on a node's `@id` so that rng is the same each time the model is re-run.
532
+ Generate a seed based on a node's `@id` and optional args so that rng is the same each time the model is re-run.
533
533
  """
534
534
  node_id = node.get("@id", "")
535
- hashed = hashlib.shake_128(node_id.encode(), usedforsecurity=False).hexdigest(4)
535
+ seed_str = "".join([node_id] + [str(arg) for arg in args])
536
+ hashed = hashlib.shake_128(seed_str.encode(), usedforsecurity=False).hexdigest(4)
536
537
  return abs(int(hashed, 16))
537
538
 
538
539
 
@@ -35,7 +35,7 @@ from .lookup import (
35
35
  is_product_id_allowed, is_product_termType_allowed,
36
36
  is_input_id_allowed, is_input_termType_allowed, _node_value
37
37
  )
38
- from .property import get_node_property, get_node_property_value, find_term_property
38
+ from .property import get_node_property, get_node_property_value
39
39
  from .term import get_lookup_value
40
40
  from ..log import debugValues, log_as_table
41
41
 
@@ -270,7 +270,8 @@ def get_total_value_converted_with_min_ratio(
270
270
  model: str, term: str, node: dict = {},
271
271
  blank_nodes: list = [],
272
272
  prop_id: str = 'energyContentHigherHeatingValue',
273
- min_ratio: float = 0.8
273
+ min_ratio: float = 0.8,
274
+ is_sum: bool = True
274
275
  ):
275
276
  values = [
276
277
  (
@@ -301,9 +302,14 @@ def get_total_value_converted_with_min_ratio(
301
302
  debugValues(node, model=model, term=term,
302
303
  **logs)
303
304
 
304
- return list_sum([
305
+ total_converted_value = list_sum([
305
306
  value * prop_value for term_id, value, prop_value in values if all([value, prop_value])
306
- ]) * total_value / total_value_with_property if total_value_ratio >= min_ratio else None
307
+ ])
308
+
309
+ return (
310
+ total_converted_value * total_value / total_value_with_property if is_sum
311
+ else total_converted_value / total_value_with_property
312
+ ) if total_value_ratio >= min_ratio else None
307
313
 
308
314
 
309
315
  def get_N_total(nodes: list) -> list:
@@ -1462,16 +1468,20 @@ def _convert_via_property(node: dict, node_value: Union[int, float], property_fi
1462
1468
 
1463
1469
  Parameters
1464
1470
  ----------
1465
- node: a dict containing a term
1466
- node_value: value to be converted as float or int
1467
- property_field: str such as "density"
1471
+ node: dict
1472
+ Blank node containing a term
1473
+ node_value: int | float
1474
+ Value to be converted as float or int
1475
+ property_field: str
1476
+ E.g., "density"
1468
1477
 
1469
- Returns float or None
1478
+ Returns
1470
1479
  -------
1480
+ Float or None
1471
1481
  """
1472
- node_property = find_term_property(node, property_field, default={}, keep_in_memory=True)
1473
- node_property_value = safe_parse_float(node_property.get("value", 0))
1474
-
1482
+ node_property_value = get_node_property_value(
1483
+ model=None, node=node, prop_id=property_field, default=0, handle_percents=False
1484
+ )
1475
1485
  return node_value * node_property_value if node_value is not None and bool(node_property_value) else None
1476
1486
 
1477
1487
 
@@ -1480,7 +1490,7 @@ def convert_unit(node, dest_unit: Units, node_value: Union[int, float] = None) -
1480
1490
  Convert a number `value` inside a node or a optional `node_value` belonging to a term `node`, to unit `dest_unit`
1481
1491
  using the ATOMIC_WEIGHT_CONVERSIONS map or failing that, the PROPERTY_UNITS_CONVERSIONS map and lookups
1482
1492
  """
1483
- src_unit = node.get("units", "")
1493
+ src_unit = node.get("units") or node.get('term', {}).get('units', "")
1484
1494
 
1485
1495
  node_value = _node_value(node) if node_value is None else node_value
1486
1496
 
@@ -1499,7 +1509,7 @@ def convert_unit_properties(node_value: Union[int, float], node: dict, dest_unit
1499
1509
  Uses cached calls to download_hestia() internally for speedup
1500
1510
  Returns None if no conversion possible.
1501
1511
  """
1502
- src_unit = node.get('units', '')
1512
+ src_unit = node.get("units") or node.get('term', {}).get('units', "")
1503
1513
  conversions = PROPERTY_UNITS_CONVERSIONS.get(src_unit, {}).get(dest_unit.value, [])
1504
1514
  return reduce(
1505
1515
  lambda value, conversion_property_field: _convert_via_property(node, value, conversion_property_field),
@@ -49,22 +49,24 @@ def all_factor_value(
49
49
  ):
50
50
  values = list(map(_factor_value(model, term_id, lookup_name, lookup_col, grouped_key), blank_nodes))
51
51
 
52
+ has_values = len(values) > 0
52
53
  missing_values = set([v.get('id') for v in values if v.get('value') is not None and v.get('coefficient') is None])
53
54
  all_with_factors = all([v.get('coefficient') is not None for v in values if v.get('value') is not None])
54
55
 
55
56
  for missing_value in missing_values:
56
- debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None)
57
+ debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None, model=model, term=term_id)
57
58
 
58
59
  debugValues(node, model=model, term=term_id,
59
60
  all_with_factors=all_with_factors,
60
61
  missing_lookup_factor=';'.join(missing_values),
62
+ has_values=has_values,
61
63
  values_used=log_as_table(values))
62
64
 
63
65
  values = [float((v.get('value') or 0) * (v.get('coefficient') or 0)) for v in values]
64
66
 
65
67
  # fail if some factors are missing
66
68
  return None if not all_with_factors else (
67
- list_sum(values) if len(values) > 0 else default_no_values
69
+ list_sum(values) if has_values else default_no_values
68
70
  )
69
71
 
70
72
 
@@ -57,7 +57,8 @@ def find_term_property(term, property: str, default=None, keep_in_memory=False)
57
57
  return find_term_match(props, property, default)
58
58
 
59
59
 
60
- def get_node_property(node: dict, property: str, find_default_property: bool = True):
60
+ def get_node_property(node: dict, property: str, find_default_property: bool = True,
61
+ keep_in_memory: bool = False) -> dict:
61
62
  """
62
63
  Get the property by `@id` linked to the Blank Node in the glossary.
63
64
 
@@ -73,6 +74,8 @@ def get_node_property(node: dict, property: str, find_default_property: bool = T
73
74
  The `term.@id` of the property. Example: `nitrogenContent`.
74
75
  find_default_property : bool
75
76
  Default to fetching the property from the `defaultProperties` of the `Term`.
77
+ keep_in_memory:
78
+ If True and find_default_property is True, will cache this term_id call to api
76
79
 
77
80
  Returns
78
81
  -------
@@ -80,7 +83,7 @@ def get_node_property(node: dict, property: str, find_default_property: bool = T
80
83
  The property if found, `None` otherwise.
81
84
  """
82
85
  prop = find_term_match(node.get('properties', []), property, None)
83
- return find_term_property(node.get('term', {}), property, {}) if all([
86
+ return find_term_property(node.get('term', {}), property, {}, keep_in_memory) if all([
84
87
  find_default_property,
85
88
  prop is None
86
89
  ]) else (prop or {})
@@ -1 +1 @@
1
- VERSION = '0.65.11'
1
+ VERSION = '0.67.0'
@@ -1,5 +1,7 @@
1
1
  import os
2
2
  import sys
3
+ import platform
4
+ import resource
3
5
  import logging
4
6
 
5
7
  LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
@@ -42,6 +44,15 @@ if LOG_FILENAME is not None:
42
44
  def _join_args(**kwargs): return ', '.join([f"{key}={value}" for key, value in kwargs.items()])
43
45
 
44
46
 
47
+ def log_memory_usage(**kwargs):
48
+ factor = 1024 * (
49
+ 1024 if platform.system() in ['Darwin', 'Windows'] else 1
50
+ )
51
+ value = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / factor
52
+ extra = (', ' + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ''
53
+ logger.info('memory used=%s, unit=MB' + extra, value)
54
+
55
+
45
56
  def _log_node_suffix(node: dict = {}):
46
57
  node_type = node.get('@type', node.get('type')) if node else None
47
58
  node_id = node.get('@id', node.get('id', node.get('term', {}).get('@id'))) if node else None
@@ -7,7 +7,7 @@ from copy import deepcopy
7
7
  from hestia_earth.utils.tools import non_empty_list
8
8
 
9
9
  from hestia_earth.models.version import VERSION
10
- from ..log import logger
10
+ from ..log import logger, log_memory_usage
11
11
  from ..utils import get_required_model_param, _snakecase
12
12
  from ..strategies.run import should_run
13
13
  from ..strategies.merge import merge
@@ -76,10 +76,15 @@ def _run_post_checks(data: dict):
76
76
 
77
77
 
78
78
  def _run_model(data: dict, model: dict, all_models: list):
79
- module = _import_model(get_required_model_param(model, 'model'))
80
- # if no value is provided, use all the models but this one
79
+ model_id = get_required_model_param(model, 'model')
81
80
  model_value = model.get('value') or _list_except_item(all_models, model)
81
+ log_memory_usage(model_model=model_id, model_value=model_value, step='before')
82
+
83
+ module = _import_model(model_id.replace('-', '_'))
84
+ # if no value is provided, use all the models but this one
82
85
  result = module.get('run')(model_value, data)
86
+
87
+ log_memory_usage(model_model=model_id, model_value=model_value, step='after')
83
88
  return {'data': data, 'model': model, 'version': module.get('version'), 'result': result}
84
89
 
85
90
 
@@ -1,5 +1,7 @@
1
1
  import pydash
2
+ from datetime import datetime
2
3
  from hestia_earth.schema import UNIQUENESS_FIELDS
4
+ from hestia_earth.utils.tools import safe_parse_date
3
5
 
4
6
  from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
5
7
  from .merge_node import merge as merge_node
@@ -31,11 +33,17 @@ def _match_list_el(source: list, dest: list, key: str):
31
33
  return src_value == dest_value
32
34
 
33
35
 
34
- def _match_el(source: dict, dest: dict, keys: list):
36
+ def _get_value(data: dict, key: str, merge_args: dict = {}):
37
+ value = pydash.objects.get(data, key)
38
+ date = safe_parse_date(value) if key in ['startDate', 'endDate'] else None
39
+ return datetime.strftime(date, merge_args.get('matchDatesFormat', '%Y-%m-%d')) if date else value
40
+
41
+
42
+ def _match_el(source: dict, dest: dict, keys: list, merge_args: dict = {}):
35
43
  def match(key: str):
36
44
  keys = key.split('.')
37
- src_value = pydash.objects.get(source, key)
38
- dest_value = pydash.objects.get(dest, key)
45
+ src_value = _get_value(source, key, merge_args)
46
+ dest_value = _get_value(dest, key, merge_args)
39
47
  is_list = len(keys) >= 2 and (
40
48
  isinstance(pydash.objects.get(source, keys[0]), list) or
41
49
  isinstance(pydash.objects.get(dest, keys[0]), list)
@@ -68,7 +76,7 @@ def _handle_local_property(values: list, properties: list, local_id: str):
68
76
  return properties
69
77
 
70
78
 
71
- def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str):
79
+ def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str, merge_args: dict):
72
80
  """
73
81
  Find an element in the values that match the new element, based on the unique properties.
74
82
  To find a matching element:
@@ -83,7 +91,10 @@ def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model:
83
91
  ]
84
92
  properties = _handle_local_property(values, properties, 'impactAssessment.id')
85
93
 
86
- return next((i for i in range(len(values)) if _match_el(values[i], el, properties)), None) if properties else None
94
+ return next(
95
+ (i for i in range(len(values)) if _match_el(values[i], el, properties, merge_args)),
96
+ None
97
+ ) if properties else None
87
98
 
88
99
 
89
100
  def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_args: dict = {}, node_type: str = ''):
@@ -95,7 +106,7 @@ def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_
95
106
  skip_same_term = merge_args.get('skipSameTerm', False)
96
107
 
97
108
  for el in _non_empty_list(merge_with):
98
- source_index = _find_match_el_index(source, el, same_methodModel, model, node_type)
109
+ source_index = _find_match_el_index(source, el, same_methodModel, model, node_type, merge_args)
99
110
  if source_index is None:
100
111
  source.append(update_node_version(version, el))
101
112
  elif not skip_same_term:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hestia-earth-models
3
- Version: 0.65.11
3
+ Version: 0.67.0
4
4
  Summary: HESTIA's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
6
6
  Author: HESTIA Team