hestia-earth-models 0.74.3__py3-none-any.whl → 0.74.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (62) hide show
  1. hestia_earth/models/cml2001Baseline/abioticResourceDepletionMineralsAndMetals.py +0 -1
  2. hestia_earth/models/config/Cycle.json +15 -0
  3. hestia_earth/models/config/ImpactAssessment.json +30 -11
  4. hestia_earth/models/cycle/animal/input/hestiaAggregatedData.py +3 -3
  5. hestia_earth/models/cycle/completeness/seed.py +1 -1
  6. hestia_earth/models/cycle/input/hestiaAggregatedData.py +25 -16
  7. hestia_earth/models/data/hestiaAggregatedData/__init__.py +73 -0
  8. hestia_earth/models/environmentalFootprintV3_1/scarcityWeightedWaterUse.py +1 -1
  9. hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandOccupation.py +5 -6
  10. hestia_earth/models/environmentalFootprintV3_1/soilQualityIndexLandTransformation.py +10 -13
  11. hestia_earth/models/fantkeEtAl2016/damageToHumanHealthParticulateMatterFormation.py +1 -1
  12. hestia_earth/models/hestia/landCover.py +24 -0
  13. hestia_earth/models/hestia/landOccupationDuringCycle.py +80 -51
  14. hestia_earth/models/hestia/landTransformation100YearAverageDuringCycle.py +7 -1
  15. hestia_earth/models/hestia/landTransformation20YearAverageDuringCycle.py +7 -1
  16. hestia_earth/models/hestia/resourceUse_utils.py +58 -119
  17. hestia_earth/models/hestia/waterSalinity.py +57 -12
  18. hestia_earth/models/impact_assessment/post_checks/__init__.py +3 -2
  19. hestia_earth/models/impact_assessment/post_checks/remove_cache_fields.py +9 -0
  20. hestia_earth/models/impact_assessment/pre_checks/cache_emissionsResourceUse.py +21 -0
  21. hestia_earth/models/impact_assessment/pre_checks/cycle.py +5 -0
  22. hestia_earth/models/ipcc2019/co2ToAirAboveGroundBiomassStockChange.py +6 -64
  23. hestia_earth/models/ipcc2019/co2ToAirBelowGroundBiomassStockChange.py +9 -87
  24. hestia_earth/models/ipcc2019/co2ToAirBiocharStockChange.py +140 -0
  25. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +329 -217
  26. hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChange.py +10 -87
  27. hestia_earth/models/mocking/__init__.py +2 -2
  28. hestia_earth/models/mocking/mock_search.py +20 -10
  29. hestia_earth/models/mocking/search-results.json +1 -7679
  30. hestia_earth/models/pooreNemecek2018/landOccupationDuringCycle.py +8 -7
  31. hestia_earth/models/poschEtAl2008/terrestrialAcidificationPotentialAccumulatedExceedance.py +1 -1
  32. hestia_earth/models/poschEtAl2008/terrestrialEutrophicationPotentialAccumulatedExceedance.py +1 -1
  33. hestia_earth/models/preload_requests.py +18 -4
  34. hestia_earth/models/schmidt2007/utils.py +3 -3
  35. hestia_earth/models/utils/__init__.py +4 -1
  36. hestia_earth/models/utils/aggregated.py +21 -68
  37. hestia_earth/models/utils/cycle.py +3 -3
  38. hestia_earth/models/utils/impact_assessment.py +45 -41
  39. hestia_earth/models/utils/lookup.py +92 -67
  40. hestia_earth/models/version.py +1 -1
  41. hestia_earth/orchestrator/models/__init__.py +47 -10
  42. hestia_earth/orchestrator/models/transformations.py +3 -1
  43. hestia_earth/orchestrator/strategies/merge/__init__.py +1 -2
  44. hestia_earth/orchestrator/strategies/merge/merge_list.py +31 -8
  45. hestia_earth/orchestrator/utils.py +29 -0
  46. {hestia_earth_models-0.74.3.dist-info → hestia_earth_models-0.74.5.dist-info}/METADATA +2 -3
  47. {hestia_earth_models-0.74.3.dist-info → hestia_earth_models-0.74.5.dist-info}/RECORD +62 -55
  48. tests/models/cycle/animal/input/test_hestiaAggregatedData.py +3 -3
  49. tests/models/cycle/input/test_hestiaAggregatedData.py +9 -18
  50. tests/models/data/__init__.py +0 -0
  51. tests/models/data/test_hestiaAggregatedData.py +32 -0
  52. tests/models/hestia/test_landCover.py +32 -1
  53. tests/models/hestia/test_waterSalinity.py +16 -4
  54. tests/models/ipcc2019/test_co2ToAirAboveGroundBiomassStockChange.py +1 -6
  55. tests/models/ipcc2019/test_co2ToAirBelowGroundBiomassStockChange.py +1 -6
  56. tests/models/ipcc2019/test_co2ToAirBiocharStockChange.py +90 -0
  57. tests/models/ipcc2019/test_co2ToAirSoilOrganicCarbonStockChange.py +1 -6
  58. tests/models/pooreNemecek2018/test_landOccupationDuringCycle.py +1 -0
  59. tests/orchestrator/strategies/merge/test_merge_list.py +5 -0
  60. {hestia_earth_models-0.74.3.dist-info → hestia_earth_models-0.74.5.dist-info}/LICENSE +0 -0
  61. {hestia_earth_models-0.74.3.dist-info → hestia_earth_models-0.74.5.dist-info}/WHEEL +0 -0
  62. {hestia_earth_models-0.74.3.dist-info → hestia_earth_models-0.74.5.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,7 @@ from hestia_earth.utils.lookup import (
5
5
  get_table_value,
6
6
  column_name,
7
7
  extract_grouped_data,
8
- _get_single_table_value,
9
- lookup_term_ids
8
+ _get_single_table_value
10
9
  )
11
10
  from hestia_earth.utils.tools import list_sum, safe_parse_float, non_empty_list
12
11
 
@@ -18,59 +17,99 @@ def _node_value(node):
18
17
  return list_sum(value, default=None) if isinstance(value, list) else value
19
18
 
20
19
 
21
- def _factor_value(model: str, term_id: str, lookup_name: str, lookup_col: str, grouped_key: Optional[str] = None):
20
+ def _log_value_coeff(log_node: dict, value: float, coefficient: float, **log_args):
21
+ if value is not None and coefficient:
22
+ debugValues(log_node, value=value, coefficient=coefficient, **log_args)
23
+
24
+
25
+ def _factor_value(
26
+ log_node: dict,
27
+ model: str,
28
+ term_id: str,
29
+ lookup_name: str,
30
+ lookup_col: str,
31
+ group_key: Optional[str] = None,
32
+ default_world_value: Optional[bool] = False
33
+ ):
22
34
  @lru_cache()
23
35
  def get_coefficient(node_term_id: str, grouped_data_key: str):
24
- coefficient = get_region_lookup_value(lookup_name, node_term_id, lookup_col, model=model, term=term_id)
36
+ coefficient = get_region_lookup_value(
37
+ lookup_name=lookup_name,
38
+ term_id=node_term_id,
39
+ column=lookup_col,
40
+ fallback_world=default_world_value,
41
+ model=model, term=term_id
42
+ )
25
43
  # value is either a number or matching between a model and a value (restrict value to specific model only)
26
44
  return safe_parse_float(
27
45
  extract_grouped_data(coefficient, grouped_data_key),
28
46
  default=None
29
47
  ) if ':' in str(coefficient) else safe_parse_float(coefficient, default=None)
30
48
 
31
- def get_value(data: dict):
32
- node_term_id = data.get('term', {}).get('@id')
33
- grouped_data_key = grouped_key or data.get('methodModel', {}).get('@id')
34
- value = _node_value(data)
49
+ def get_value(blank_node: dict):
50
+ node_term_id = blank_node.get('term', {}).get('@id')
51
+ grouped_data_key = group_key or blank_node.get('methodModel', {}).get('@id')
52
+ value = _node_value(blank_node)
35
53
  coefficient = get_coefficient(node_term_id, grouped_data_key)
36
- if value is not None and coefficient is not None:
37
- if model:
38
- debugValues(data, model=model, term=term_id,
39
- node=node_term_id,
40
- operation=data.get('operation', {}).get('@id'),
41
- value=value,
42
- coefficient=coefficient)
54
+ if model:
55
+ _log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
56
+ model=model,
57
+ term=term_id,
58
+ node=node_term_id,
59
+ operation=blank_node.get('operation', {}).get('@id'))
43
60
  return {'id': node_term_id, 'value': value, 'coefficient': coefficient}
44
61
  return get_value
45
62
 
46
63
 
47
- def region_factor_value(model: str, term_id: str, lookup_name: str, lookup_term_id: str, group_key: str = None):
64
+ def region_factor_value(
65
+ log_node: dict,
66
+ model: str,
67
+ term_id: str,
68
+ lookup_name: str,
69
+ lookup_term_id: str,
70
+ group_key: Optional[str] = None,
71
+ default_world_value: Optional[bool] = False
72
+ ):
48
73
  @lru_cache()
49
74
  def get_coefficient(node_term_id: str, region_term_id: str):
50
- coefficient = get_region_lookup_value(lookup_name, region_term_id, node_term_id, model=model, term=term_id)
75
+ coefficient = get_region_lookup_value(
76
+ lookup_name=lookup_name,
77
+ term_id=region_term_id,
78
+ column=node_term_id,
79
+ fallback_world=default_world_value,
80
+ model=model, term=term_id
81
+ )
51
82
  return safe_parse_float(
52
83
  extract_grouped_data(coefficient, group_key) if group_key else coefficient,
53
84
  default=None
54
85
  )
55
86
 
56
- def get_value(data: dict):
57
- node_term_id = data.get('term', {}).get('@id')
58
- value = _node_value(data)
87
+ def get_value(blank_node: dict):
88
+ node_term_id = blank_node.get('term', {}).get('@id')
89
+ value = _node_value(blank_node)
59
90
  # when getting data for a `region`, we can try to get the `region` on the node first, in case it is set
60
91
  region_term_id = (
61
- (data.get('region') or data.get('country') or {'@id': lookup_term_id}).get('@id')
92
+ (blank_node.get('region') or blank_node.get('country') or {'@id': lookup_term_id}).get('@id')
62
93
  ) if lookup_term_id.startswith('GADM-') else lookup_term_id
63
94
  coefficient = get_coefficient(node_term_id, region_term_id)
64
- if value is not None and coefficient is not None:
65
- debugValues(data, model=model, term=term_id,
66
- node=node_term_id,
67
- value=value,
68
- coefficient=coefficient)
95
+ _log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
96
+ model=model,
97
+ term=term_id,
98
+ node=node_term_id,
99
+ operation=blank_node.get('operation', {}).get('@id'))
69
100
  return {'id': node_term_id, 'region-id': region_term_id, 'value': value, 'coefficient': coefficient}
70
101
  return get_value
71
102
 
72
103
 
73
- def aware_factor_value(model: str, term_id: str, lookup_name: str, aware_id: str, group_key: str = None):
104
+ def aware_factor_value(
105
+ log_node: dict,
106
+ model: str,
107
+ term_id: str,
108
+ lookup_name: str,
109
+ aware_id: str,
110
+ group_key: Optional[str] = None,
111
+ default_world_value: Optional[bool] = False
112
+ ):
74
113
  lookup = download_lookup(lookup_name, False) # avoid saving in memory as there could be many different files used
75
114
  lookup_col = column_name('awareWaterBasinId')
76
115
 
@@ -82,17 +121,16 @@ def aware_factor_value(model: str, term_id: str, lookup_name: str, aware_id: str
82
121
  default=None
83
122
  ) if group_key else coefficient
84
123
 
85
- def get_value(data: dict):
86
- node_term_id = data.get('term', {}).get('@id')
87
- value = _node_value(data)
124
+ def get_value(blank_node: dict):
125
+ node_term_id = blank_node.get('term', {}).get('@id')
126
+ value = _node_value(blank_node)
88
127
 
89
128
  try:
90
129
  coefficient = get_coefficient(node_term_id)
91
- if value is not None and coefficient is not None:
92
- debugValues(data, model=model, term=term_id,
93
- node=node_term_id,
94
- value=value,
95
- coefficient=coefficient)
130
+ _log_value_coeff(log_node=log_node, value=value, coefficient=coefficient,
131
+ model=model,
132
+ term=term_id,
133
+ node=node_term_id)
96
134
  except Exception: # factor does not exist
97
135
  coefficient = None
98
136
 
@@ -101,17 +139,21 @@ def aware_factor_value(model: str, term_id: str, lookup_name: str, aware_id: str
101
139
 
102
140
 
103
141
  def all_factor_value(
104
- logs_model: str,
105
- logs_term_id: str,
106
- node: dict,
142
+ log_model: str,
143
+ log_term_id: str,
144
+ log_node: dict,
107
145
  lookup_name: str,
108
146
  lookup_col: str,
109
147
  blank_nodes: List[dict],
110
- grouped_key: Optional[str] = None,
148
+ group_key: Optional[str] = None,
111
149
  default_no_values=0,
112
- factor_value_func=_factor_value
150
+ factor_value_func=_factor_value,
151
+ default_world_value: bool = False
113
152
  ):
114
- values = list(map(factor_value_func(logs_model, logs_term_id, lookup_name, lookup_col, grouped_key), blank_nodes))
153
+ values = list(map(
154
+ factor_value_func(log_node, log_model, log_term_id, lookup_name, lookup_col, group_key, default_world_value),
155
+ blank_nodes
156
+ ))
115
157
 
116
158
  has_values = len(values) > 0
117
159
  missing_values = set([
@@ -129,11 +171,11 @@ def all_factor_value(
129
171
  row_value=debug_values[1] if len(debug_values) == 2 else debug_values[0],
130
172
  col=debug_values[0] if len(debug_values) == 2 else lookup_col,
131
173
  value=None,
132
- model=logs_model,
133
- term=logs_term_id
174
+ model=log_model,
175
+ term=log_term_id
134
176
  )
135
177
 
136
- debugValues(node, model=logs_model, term=logs_term_id,
178
+ debugValues(log_node, model=log_model, term=log_term_id,
137
179
  all_with_factors=all_with_factors,
138
180
  missing_lookup_factor=log_as_table([
139
181
  {
@@ -144,7 +186,7 @@ def all_factor_value(
144
186
  for v in missing_values
145
187
  ]),
146
188
  has_values=has_values,
147
- values_used=log_as_table(values))
189
+ values_used=log_as_table([v for v in values if v.get('coefficient')]))
148
190
 
149
191
  values = [float((v.get('value') or 0) * (v.get('coefficient') or 0)) for v in values]
150
192
 
@@ -152,37 +194,20 @@ def all_factor_value(
152
194
  return None if not all_with_factors else (list_sum(values) if has_values else default_no_values)
153
195
 
154
196
 
155
- def _country_in_lookup(country_id: str):
156
- def in_lookup(lookup_name: str):
157
- return (
158
- download_lookup(lookup_name.replace('region', country_id)) is not None or
159
- country_id in lookup_term_ids(download_lookup(lookup_name))
160
- )
161
- return in_lookup
162
-
163
-
164
- def fallback_country(country_id: str, lookups: List[str]) -> str:
165
- """
166
- Given a country `@id`, and lookup tables, checks if a location can be used in lookup file
167
- else fallback to the default "region-world".
168
- """
169
- is_in_lookup = lambda v: all(map(_country_in_lookup(v), lookups)) # noqa: E731
170
- fallback_id = 'region-world'
171
- return country_id if country_id and is_in_lookup(country_id) else fallback_id if is_in_lookup(fallback_id) else None
172
-
173
-
174
197
  def get_region_lookup(lookup_name: str, term_id: str):
175
198
  # for performance, try to load the region specific lookup if exists
176
199
  return (
177
- download_lookup(lookup_name.replace('region-', f"{term_id}-"))
200
+ download_lookup(lookup_name.replace('region-', f"{term_id}-"), build_index=True)
178
201
  if lookup_name and lookup_name.startswith('region-') else None
179
- ) or download_lookup(lookup_name)
202
+ ) or download_lookup(lookup_name, build_index=True)
180
203
 
181
204
 
182
205
  @lru_cache()
183
- def get_region_lookup_value(lookup_name: str, term_id: str, column: str, **log_args):
206
+ def get_region_lookup_value(lookup_name: str, term_id: str, column: str, fallback_world: bool = False, **log_args):
184
207
  # for performance, try to load the region specific lookup if exists
185
208
  lookup = get_region_lookup(lookup_name, term_id)
186
209
  value = get_table_value(lookup, 'termid', term_id, column_name(column))
210
+ if value is None and fallback_world:
211
+ return get_region_lookup_value(lookup_name, 'region-world', column, **log_args)
187
212
  debugMissingLookup(lookup_name, 'termid', term_id, column, value, **log_args)
188
213
  return value
@@ -1 +1 @@
1
- VERSION = '0.74.3'
1
+ VERSION = '0.74.5'
@@ -9,10 +9,10 @@ from copy import deepcopy
9
9
  from hestia_earth.utils.tools import non_empty_list, current_time_ms
10
10
 
11
11
  from hestia_earth.models.version import VERSION
12
- from ..log import logger
13
- from ..utils import get_required_model_param, _snakecase
14
- from ..strategies.run import should_run
15
- from ..strategies.merge import merge
12
+ from hestia_earth.orchestrator.log import logger
13
+ from hestia_earth.orchestrator.utils import get_required_model_param, _snakecase, reset_index
14
+ from hestia_earth.orchestrator.strategies.run import should_run
15
+ from hestia_earth.orchestrator.strategies.merge import merge
16
16
 
17
17
 
18
18
  def _memory_usage():
@@ -29,6 +29,23 @@ def _max_workers(type: str):
29
29
  return None
30
30
 
31
31
 
32
+ # do not deep copy to improve performance, only set on low risk keys
33
+ _SKIP_DEEPCOPY_KEYS = [
34
+ 'emissions',
35
+ 'emissionsResourceUse'
36
+ ]
37
+
38
+
39
+ def _node_copy(node: dict):
40
+ skip_keys = [key for key in _SKIP_DEEPCOPY_KEYS if key in node and isinstance(node[key], list)]
41
+ new_node = deepcopy(node | {
42
+ key: [] for key in skip_keys
43
+ }) | {
44
+ key: node[key] for key in skip_keys
45
+ }
46
+ return new_node
47
+
48
+
32
49
  def _list_except_item(list, item):
33
50
  idx = list.index(item)
34
51
  return list[:idx] + list[idx+1:]
@@ -66,22 +83,40 @@ def _import_model(name: str):
66
83
 
67
84
  def _run_pre_checks(data: dict):
68
85
  node_type = _snakecase(data.get('@type', data.get('type')))
86
+
87
+ now = current_time_ms()
88
+ memory_usage = _memory_usage()
89
+
69
90
  try:
70
91
  pre_checks = _import_model('.'.join([node_type, 'pre_checks'])).get('run')
71
92
  logger.info('running pre checks for %s', node_type)
72
- return pre_checks(data)
93
+ data = pre_checks(data)
73
94
  except Exception:
74
- return data
95
+ pass
96
+
97
+ logger.info('model_model=%s, model_value=%s, time=%s, memory_used=%s',
98
+ node_type, 'pre_checks', current_time_ms() - now, _memory_usage() - memory_usage)
99
+
100
+ return data
75
101
 
76
102
 
77
103
  def _run_post_checks(data: dict):
78
104
  node_type = _snakecase(data.get('@type', data.get('type')))
105
+
106
+ now = current_time_ms()
107
+ memory_usage = _memory_usage()
108
+
79
109
  try:
80
110
  post_checks = _import_model('.'.join([node_type, 'post_checks'])).get('run')
81
111
  logger.info('running post checks for %s', node_type)
82
- return post_checks(data)
112
+ data = post_checks(data)
83
113
  except Exception:
84
- return data
114
+ pass
115
+
116
+ logger.info('model_model=%s, model_value=%s, time=%s, memory_used=%s',
117
+ node_type, 'post_checks', current_time_ms() - now, _memory_usage() - memory_usage)
118
+
119
+ return data
85
120
 
86
121
 
87
122
  def _run_model(data: dict, model: dict, all_models: list):
@@ -108,7 +143,7 @@ def _run(data: dict, model: dict, all_models: list):
108
143
  def _run_serie(data: dict, models: list, stage: Union[int, List[int]] = None):
109
144
  return reduce(
110
145
  lambda prev, m: merge(
111
- prev, _run_parallel(prev, m, models) if isinstance(m, list) else [_run(deepcopy(prev), m, models)]
146
+ prev, _run_parallel(prev, m, models) if isinstance(m, list) else [_run(_node_copy(prev), m, models)]
112
147
  ),
113
148
  _filter_models_stage(models, stage=stage),
114
149
  data
@@ -120,7 +155,7 @@ def _run_parallel(data: dict, model: list, all_models: list):
120
155
 
121
156
  max_workers = _max_workers(data.get('@type', data.get('type')))
122
157
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
123
- futures = [executor.submit(_run, deepcopy(data), m, all_models) for m in model]
158
+ futures = [executor.submit(_run, _node_copy(data), m, all_models) for m in model]
124
159
 
125
160
  for future in concurrent.futures.as_completed(futures):
126
161
  results.append(future.result())
@@ -129,6 +164,8 @@ def _run_parallel(data: dict, model: list, all_models: list):
129
164
 
130
165
 
131
166
  def run(data: dict, models: list, stage: Union[int, List[int]] = None):
167
+ # make sure we reset before recalculating the node
168
+ reset_index()
132
169
  # run pre-checks if exist
133
170
  data = _run_pre_checks(data)
134
171
  data = _run_serie(data, models, stage=stage)
@@ -7,7 +7,7 @@ from hestia_earth.models.utils.transformation import previous_transformation
7
7
  from hestia_earth.models.utils.product import find_by_product
8
8
 
9
9
  from . import run as run_node, _import_model
10
- from hestia_earth.orchestrator.utils import new_practice, _filter_by_keys
10
+ from hestia_earth.orchestrator.utils import new_practice, _filter_by_keys, reset_index
11
11
 
12
12
 
13
13
  def _full_completeness():
@@ -94,6 +94,8 @@ def _run_transformation(cycle: dict, models: list):
94
94
  transformation = _add_excreta_inputs(previous, transformation)
95
95
  transformation = _apply_transformation_share(previous, transformation)
96
96
  transformation = _run_models(cycle, transformation, models)
97
+ # reset the index between 2 transformations, as they dont share the same values
98
+ reset_index()
97
99
  return transformations + [transformation]
98
100
  return run
99
101
 
@@ -1,5 +1,4 @@
1
1
  from functools import reduce
2
- import pydash
3
2
 
4
3
  from hestia_earth.orchestrator.utils import _non_empty, _non_empty_list, update_node_version
5
4
  from . import merge_append
@@ -34,7 +33,7 @@ def _merge_result(data: dict, result: dict):
34
33
  node_type = data.get('type', data.get('@type'))
35
34
  values = [values] if not isinstance(values, list) and merge_type == 'list' else values
36
35
  new_value = _STRATEGIES[merge_type](current, values, version, model, merge_args, node_type)
37
- new_data = pydash.objects.merge({}, data, {key: new_value})
36
+ new_data = data | {key: new_value}
38
37
  return update_node_version(version, new_data, data)
39
38
 
40
39
 
@@ -3,7 +3,12 @@ from datetime import datetime
3
3
  from hestia_earth.schema import UNIQUENESS_FIELDS
4
4
  from hestia_earth.utils.tools import safe_parse_date, flatten
5
5
 
6
- from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
6
+ from hestia_earth.orchestrator.utils import (
7
+ _non_empty_list,
8
+ update_node_version,
9
+ wrap_index,
10
+ update_index
11
+ )
7
12
  from .merge_node import merge as merge_node
8
13
 
9
14
  _METHOD_MODEL_KEY = 'methodModel.@id'
@@ -74,7 +79,14 @@ def _build_matching_properties(values: list, model: dict = {}, merge_args: dict
74
79
  return _handle_local_property(values, properties, 'impactAssessment.id')
75
80
 
76
81
 
77
- def merge(source: list, new_values: list, version: str, model: dict = {}, merge_args: dict = {}, node_type: str = ''):
82
+ def merge(
83
+ source: list,
84
+ new_values: list,
85
+ version: str,
86
+ model: dict = {},
87
+ merge_args: dict = {},
88
+ node_type: str = ''
89
+ ):
78
90
  source = [] if source is None else source
79
91
 
80
92
  # only merge if the
@@ -83,19 +95,30 @@ def merge(source: list, new_values: list, version: str, model: dict = {}, merge_
83
95
  # build list of properties used to do the matching
84
96
  properties = _build_matching_properties(source, model, merge_args, node_type)
85
97
 
86
- source_index_keys = {
87
- _value_index_key(value, properties, merge_args): index
88
- for index, value in enumerate(source)
89
- } if properties else None
98
+ # store previous identical index to speed merging
99
+ index_key = '-'.join([node_type, model.get('key', '')])
100
+ # when the subkey changes, we need to completely rebuild the index
101
+ index_sub_key = '-'.join(properties + [str(merge_args)])
102
+
103
+ def build_index():
104
+ return {
105
+ _value_index_key(value, properties, merge_args): index
106
+ for index, value in enumerate(source)
107
+ } if properties else None
108
+
109
+ source_index_keys = wrap_index(index_key, index_sub_key, build_index)
90
110
 
91
111
  for el in _non_empty_list(new_values):
92
112
  new_value_index_key = _value_index_key(el, properties, merge_args)
93
113
  source_index = source_index_keys.get(new_value_index_key) if source_index_keys else None
94
114
  if source_index is None:
95
115
  # add to index keys for next elements
96
- if source_index_keys:
97
- source_index_keys[new_value_index_key] = len(source)
116
+ source_index_keys = source_index_keys or {}
117
+ source_index_keys[new_value_index_key] = len(source)
98
118
  source.append(update_node_version(version, el))
99
119
  elif not skip_same_term:
100
120
  source[source_index] = merge_node(source[source_index], el, version, model, merge_args)
121
+
122
+ update_index(index_key, index_sub_key, source_index_keys)
123
+
101
124
  return source
@@ -6,6 +6,35 @@ from functools import reduce
6
6
  EXCLUDED_VERSION_KEYS = [
7
7
  '@type'
8
8
  ]
9
+ _memory = {}
10
+
11
+
12
+ def wrap_index(key: str, sub_key: str, func):
13
+ global _memory # noqa: F824
14
+ memory_data = _memory.get(key, {})
15
+ data = (memory_data.get('data') if memory_data.get('sub_key') == sub_key else None) or func()
16
+ _memory[key] = {
17
+ 'sub_key': sub_key,
18
+ 'data': data
19
+ }
20
+ return data
21
+
22
+
23
+ def update_index(key: str, sub_key: str, data):
24
+ global _memory # noqa: F824
25
+ _memory[key] = {
26
+ 'sub_key': sub_key,
27
+ 'data': data
28
+ }
29
+ return data
30
+
31
+
32
+ def reset_index():
33
+ """
34
+ Reset the merging index between different nodes.
35
+ """
36
+ global _memory
37
+ _memory = {}
9
38
 
10
39
 
11
40
  def get_required_model_param(model, key: str):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hestia-earth-models
3
- Version: 0.74.3
3
+ Version: 0.74.5
4
4
  Summary: HESTIA's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
6
6
  Author: HESTIA Team
@@ -12,8 +12,7 @@ Classifier: Programming Language :: Python :: 3.6
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: hestia-earth-schema<34.0.0,>=33.5.0
15
- Requires-Dist: hestia-earth-utils>=0.15.1
16
- Requires-Dist: python-dateutil>=2.8.1
15
+ Requires-Dist: hestia-earth-utils>=0.15.3
17
16
  Requires-Dist: CurrencyConverter==0.16.8
18
17
  Requires-Dist: haversine>=2.7.0
19
18
  Requires-Dist: pydash