hestia-earth-aggregation 0.21.2__tar.gz → 0.21.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {hestia_earth_aggregation-0.21.2/hestia_earth_aggregation.egg-info → hestia_earth_aggregation-0.21.4}/PKG-INFO +1 -1
  2. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/__init__.py +22 -13
  3. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/aggregate_cycles.py +102 -50
  4. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/log.py +20 -14
  5. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/recalculate_cycles.py +1 -1
  6. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/__init__.py +226 -0
  7. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/aggregate_country_nodes.py +784 -0
  8. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/aggregate_weighted.py +197 -0
  9. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/blank_node.py +519 -0
  10. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/combine.py +46 -23
  11. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/completeness.py +62 -32
  12. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/covariance.py +68 -40
  13. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/cycle.py +474 -0
  14. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/distribution.py +208 -0
  15. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/emission.py +70 -0
  16. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/group.py +160 -0
  17. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/input.py +28 -0
  18. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/lookup.py +64 -0
  19. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/management.py +78 -0
  20. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/measurement.py +38 -0
  21. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/practice.py +58 -0
  22. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/product.py +24 -0
  23. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/property.py +13 -9
  24. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/quality_score.py +265 -0
  25. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/queries.py +503 -0
  26. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/site.py +100 -0
  27. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/source.py +22 -0
  28. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/term.py +95 -0
  29. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/weights.py +184 -0
  30. hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/version.py +1 -0
  31. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4/hestia_earth_aggregation.egg-info}/PKG-INFO +1 -1
  32. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/setup.py +4 -4
  33. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/tests/test_aggregation.py +1 -1
  34. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/__init__.py +0 -177
  35. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/aggregate_country_nodes.py +0 -599
  36. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/aggregate_weighted.py +0 -133
  37. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/blank_node.py +0 -401
  38. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/cycle.py +0 -327
  39. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/distribution.py +0 -148
  40. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/emission.py +0 -55
  41. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/group.py +0 -107
  42. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/input.py +0 -25
  43. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/lookup.py +0 -46
  44. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/management.py +0 -61
  45. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/measurement.py +0 -32
  46. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/practice.py +0 -56
  47. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/product.py +0 -22
  48. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/quality_score.py +0 -199
  49. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/queries.py +0 -427
  50. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/site.py +0 -82
  51. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/source.py +0 -16
  52. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/term.py +0 -75
  53. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/weights.py +0 -140
  54. hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/version.py +0 -1
  55. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/LICENSE +0 -0
  56. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/MANIFEST.in +0 -0
  57. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/README.md +0 -0
  58. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/config/Cycle/processedFood.json +0 -0
  59. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/SOURCES.txt +0 -0
  60. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/dependency_links.txt +0 -0
  61. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/requires.txt +0 -0
  62. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/top_level.txt +0 -0
  63. {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_aggregation
3
- Version: 0.21.2
3
+ Version: 0.21.4
4
4
  Summary: HESTIA's aggregation engine.
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-aggregation-engine
6
6
  Author: HESTIA Team
@@ -9,8 +9,10 @@ from .utils.quality_score import calculate_score
9
9
 
10
10
  def _mock_nb_distribution(include_distribution: bool):
11
11
  original_func = distribution._nb_iterations
12
- distribution._nb_iterations = lambda *args: original_func(*args) if include_distribution else 0
13
- not include_distribution and logger.warning('Not generating distribution.')
12
+ distribution._nb_iterations = lambda *args: (
13
+ original_func(*args) if include_distribution else 0
14
+ )
15
+ not include_distribution and logger.warning("Not generating distribution.")
14
16
 
15
17
 
16
18
  def aggregate(
@@ -20,7 +22,7 @@ def aggregate(
20
22
  end_year: int,
21
23
  source: dict = None,
22
24
  include_distribution: bool = True,
23
- filter_by_country: bool = True
25
+ filter_by_country: bool = True,
24
26
  ):
25
27
  """
26
28
  Aggregates data from HESTIA.
@@ -53,16 +55,23 @@ def aggregate(
53
55
  _mock_nb_distribution(include_distribution)
54
56
 
55
57
  now = current_time_ms()
56
- logger.info('Aggregating %s in %s for period %s to %s' + (' with distribution' if include_distribution else ''),
57
- product.get('name'),
58
- country.get('name'),
59
- start_year,
60
- end_year)
61
- aggregations, countries = run_aggregate(country, product, source, start_year, end_year, filter_by_country)
62
- logger.info('time=%s, unit=ms', current_time_ms() - now)
63
- aggregations = [
64
- recalculate(agg, product) for agg in aggregations
65
- ] if should_recalculate(product) else aggregations
58
+ logger.info(
59
+ "Aggregating %s in %s for period %s to %s"
60
+ + (" with distribution" if include_distribution else ""),
61
+ product.get("name"),
62
+ country.get("name"),
63
+ start_year,
64
+ end_year,
65
+ )
66
+ aggregations, countries = run_aggregate(
67
+ country, product, source, start_year, end_year, filter_by_country
68
+ )
69
+ logger.info("time=%s, unit=ms", current_time_ms() - now)
70
+ aggregations = (
71
+ [recalculate(agg, product) for agg in aggregations]
72
+ if should_recalculate(product)
73
+ else aggregations
74
+ )
66
75
  aggregations = [
67
76
  calculate_score(cycle=agg, countries=countries) for agg in aggregations
68
77
  ]
@@ -3,23 +3,36 @@ from hestia_earth.utils.tools import non_empty_list
3
3
 
4
4
  from hestia_earth.aggregation.log import logger, log_memory_usage
5
5
  from hestia_earth.aggregation.utils import CYCLE_AGGREGATION_KEYS, SITE_AGGREGATION_KEYS
6
- from hestia_earth.aggregation.utils.queries import find_global_nodes, find_country_nodes, download_site
6
+ from hestia_earth.aggregation.utils.queries import (
7
+ find_global_nodes,
8
+ find_country_nodes,
9
+ download_site,
10
+ )
7
11
  from hestia_earth.aggregation.utils.term import _is_global
8
12
  from hestia_earth.aggregation.utils.group import group_blank_nodes
9
13
  from hestia_earth.aggregation.utils.blank_node import cleanup_node_blank_nodes
10
- from hestia_earth.aggregation.utils.aggregate_weighted import aggregate as aggregate_weighted
14
+ from hestia_earth.aggregation.utils.aggregate_weighted import (
15
+ aggregate as aggregate_weighted,
16
+ )
11
17
  from hestia_earth.aggregation.utils.aggregate_country_nodes import aggregate_cycles
12
18
  from hestia_earth.aggregation.utils.weights import (
13
- country_weights, country_weight_node_id, world_weights, world_weight_node_id
19
+ country_weights,
20
+ country_weight_node_id,
21
+ world_weights,
22
+ world_weight_node_id,
14
23
  )
15
24
  from hestia_earth.aggregation.utils.site import format_site
16
25
  from hestia_earth.aggregation.utils.cycle import (
17
- aggregate_with_matrix, format_for_grouping, format_terms_results, format_country_results, update_cycle
26
+ aggregate_with_matrix,
27
+ format_for_grouping,
28
+ format_terms_results,
29
+ format_country_results,
30
+ update_cycle,
18
31
  )
19
32
  from hestia_earth.aggregation.utils.covariance import (
20
33
  init_covariance_files,
21
34
  remove_covariance_files,
22
- generate_covariance_country
35
+ generate_covariance_country,
23
36
  )
24
37
 
25
38
 
@@ -31,37 +44,44 @@ def _aggregate_country(
31
44
  start_year: int,
32
45
  end_year: int,
33
46
  generate_weights_func=None,
34
- missing_weights_node_id_func=None
47
+ missing_weights_node_id_func=None,
35
48
  ) -> Tuple[dict, dict]:
36
- functional_unit = cycles[0].get('functionalUnit')
37
- site_type = cycles[0].get('site', {}).get('siteType')
49
+ functional_unit = cycles[0].get("functionalUnit")
50
+ site_type = cycles[0].get("site", {}).get("siteType")
38
51
 
39
52
  # aggregate cycles with weights
40
53
  cycles_formatted = format_for_grouping(cycles)
41
54
  cycle_data = group_blank_nodes(
42
- cycles_formatted, CYCLE_AGGREGATION_KEYS, start_year, end_year, product=product, site_type=site_type
55
+ cycles_formatted,
56
+ CYCLE_AGGREGATION_KEYS,
57
+ start_year,
58
+ end_year,
59
+ product=product,
60
+ site_type=site_type,
43
61
  )
44
62
  weights = generate_weights_func(cycle_data)
45
63
  cycle_data = cycle_data | aggregate_weighted(
46
64
  aggregate_keys=CYCLE_AGGREGATION_KEYS,
47
65
  data=cycle_data,
48
66
  weights=weights,
49
- missing_weights_node_id_func=missing_weights_node_id_func
67
+ missing_weights_node_id_func=missing_weights_node_id_func,
50
68
  )
51
69
 
52
70
  # aggregate sites with weights
53
- sites = [c.get('site') for c in cycles]
71
+ sites = [c.get("site") for c in cycles]
54
72
  site_data = group_blank_nodes(sites, SITE_AGGREGATION_KEYS)
55
73
  site_data = aggregate_weighted(
56
74
  aggregate_keys=SITE_AGGREGATION_KEYS,
57
75
  data=site_data,
58
76
  weights=weights,
59
- missing_weights_node_id_func=missing_weights_node_id_func
77
+ missing_weights_node_id_func=missing_weights_node_id_func,
60
78
  )
61
79
  aggregated_site = format_site(site_data, sites)
62
80
 
63
81
  cycle_data = format_country_results(cycle_data, product, country, aggregated_site)
64
- aggregated_cycle = update_cycle(country, start_year, end_year, source, functional_unit, False)(cycle_data)
82
+ aggregated_cycle = update_cycle(
83
+ country, start_year, end_year, source, functional_unit, False
84
+ )(cycle_data)
65
85
  return (aggregated_cycle, weights)
66
86
 
67
87
 
@@ -71,7 +91,7 @@ def aggregate_country(
71
91
  source: dict,
72
92
  start_year: int,
73
93
  end_year: int,
74
- filter_by_country: bool = True
94
+ filter_by_country: bool = True,
75
95
  ) -> Tuple[list, list]:
76
96
  """
77
97
  Create 1 to many country-level aggregations.
@@ -100,52 +120,70 @@ def aggregate_country(
100
120
  """
101
121
  init_covariance_files()
102
122
 
103
- cycles = find_country_nodes(product, start_year, end_year, country if filter_by_country else None)
123
+ cycles = find_country_nodes(
124
+ product, start_year, end_year, country if filter_by_country else None
125
+ )
104
126
  if not cycles:
105
- logger.info('1 - No cycles to run aggregation.')
127
+ logger.info("1 - No cycles to run aggregation.")
106
128
  return ([], [])
107
129
 
108
130
  # combine cycles into a "master" cycle with multiple values
109
131
  cycles_aggregated = aggregate_cycles(
110
- cycles=cycles,
111
- product=product,
112
- start_year=start_year,
113
- end_year=end_year
132
+ cycles=cycles, product=product, start_year=start_year, end_year=end_year
114
133
  )
115
134
  if not cycles_aggregated:
116
- logger.info('2 - No aggregated cycles.')
135
+ logger.info("2 - No aggregated cycles.")
117
136
  return ([], [])
118
137
 
119
- logger.info('Cycles aggregated, generating final country aggregation...')
138
+ logger.info("Cycles aggregated, generating final country aggregation...")
120
139
  log_memory_usage()
121
140
 
122
- functional_unit = cycles_aggregated[0].get('functionalUnit')
141
+ functional_unit = cycles_aggregated[0].get("functionalUnit")
123
142
  include_matrix = aggregate_with_matrix(product)
124
- cycles_aggregated = non_empty_list([
125
- format_terms_results(cycle, product, country) for cycle in cycles_aggregated
126
- ])
127
- cycles_aggregated = non_empty_list(map(
128
- update_cycle(country, start_year, end_year, source, functional_unit, include_matrix),
129
- cycles_aggregated
130
- ))
143
+ cycles_aggregated = non_empty_list(
144
+ [format_terms_results(cycle, product, country) for cycle in cycles_aggregated]
145
+ )
146
+ cycles_aggregated = non_empty_list(
147
+ map(
148
+ update_cycle(
149
+ country, start_year, end_year, source, functional_unit, include_matrix
150
+ ),
151
+ cycles_aggregated,
152
+ )
153
+ )
131
154
  logger.info(f"Found {len(cycles_aggregated)} cycles at sub-country level")
132
155
  if len(cycles_aggregated) == 0:
133
- logger.info('3 - No cycles to run aggregation.')
156
+ logger.info("3 - No cycles to run aggregation.")
134
157
  return []
135
158
 
136
159
  # step 2: use aggregated cycles to calculate country-level cycles
137
- country_cycle, weights = _aggregate_country(
138
- country, product, cycles_aggregated, source, start_year, end_year,
139
- generate_weights_func=country_weights,
140
- missing_weights_node_id_func=country_weight_node_id
141
- ) if all([
142
- cycles_aggregated,
143
- # when not including matrix, cycles and country_cycles will be the same
144
- include_matrix
145
- ]) else (None, {})
160
+ country_cycle, weights = (
161
+ _aggregate_country(
162
+ country,
163
+ product,
164
+ cycles_aggregated,
165
+ source,
166
+ start_year,
167
+ end_year,
168
+ generate_weights_func=country_weights,
169
+ missing_weights_node_id_func=country_weight_node_id,
170
+ )
171
+ if all(
172
+ [
173
+ cycles_aggregated,
174
+ # when not including matrix, cycles and country_cycles will be the same
175
+ include_matrix,
176
+ ]
177
+ )
178
+ else (None, {})
179
+ )
146
180
  log_memory_usage()
147
181
 
148
- country_cycle = (country_cycle | generate_covariance_country(weights=weights)) if country_cycle else None
182
+ country_cycle = (
183
+ (country_cycle | generate_covariance_country(weights=weights))
184
+ if country_cycle
185
+ else None
186
+ )
149
187
 
150
188
  log_memory_usage()
151
189
 
@@ -163,7 +201,7 @@ def aggregate_global(
163
201
  start_year: int,
164
202
  end_year: int,
165
203
  *args,
166
- **kwargs
204
+ **kwargs,
167
205
  ) -> Tuple[list, list]:
168
206
  """
169
207
  Aggregate World and other regions level 0 (like `region-easter-europe`).
@@ -188,14 +226,28 @@ def aggregate_global(
188
226
  The list of countries that were used to aggregate.
189
227
  """
190
228
  cycles = find_global_nodes(product, start_year, end_year, country)
191
- cycles = [cycle | {'site': download_site(cycle.get('site'), data_state='original')} for cycle in cycles]
192
- countries = non_empty_list([cycle.get('site', {}).get('country') for cycle in cycles])
193
-
194
- aggregated_cycle, *args = _aggregate_country(
195
- country, product, cycles, source, start_year, end_year,
196
- generate_weights_func=world_weights,
197
- missing_weights_node_id_func=world_weight_node_id
198
- ) if cycles else (None, {})
229
+ cycles = [
230
+ cycle | {"site": download_site(cycle.get("site"), data_state="original")}
231
+ for cycle in cycles
232
+ ]
233
+ countries = non_empty_list(
234
+ [cycle.get("site", {}).get("country") for cycle in cycles]
235
+ )
236
+
237
+ aggregated_cycle, *args = (
238
+ _aggregate_country(
239
+ country,
240
+ product,
241
+ cycles,
242
+ source,
243
+ start_year,
244
+ end_year,
245
+ generate_weights_func=world_weights,
246
+ missing_weights_node_id_func=world_weight_node_id,
247
+ )
248
+ if cycles
249
+ else (None, {})
250
+ )
199
251
  return (non_empty_list([cleanup_node_blank_nodes(aggregated_cycle)]), countries)
200
252
 
201
253
 
@@ -4,14 +4,14 @@ import platform
4
4
  import resource
5
5
  import logging
6
6
 
7
- LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
7
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
8
8
 
9
9
  # disable root logger
10
10
  root_logger = logging.getLogger()
11
11
  root_logger.disabled = True
12
12
 
13
13
  # create custom logger
14
- logger = logging.getLogger('hestia_earth.aggregation')
14
+ logger = logging.getLogger("hestia_earth.aggregation")
15
15
  logger.removeHandler(sys.stdout)
16
16
  logger.setLevel(logging.getLevelName(LOG_LEVEL))
17
17
 
@@ -29,28 +29,28 @@ def log_to_file(filepath: str):
29
29
  formatter = logging.Formatter(
30
30
  '{"timestamp": "%(asctime)s", "level": "%(levelname)s", "logger": "%(name)s", '
31
31
  '"filename": "%(filename)s", "message": "%(message)s"}',
32
- '%Y-%m-%dT%H:%M:%S%z')
33
- handler = logging.FileHandler(filepath, encoding='utf-8')
32
+ "%Y-%m-%dT%H:%M:%S%z",
33
+ )
34
+ handler = logging.FileHandler(filepath, encoding="utf-8")
34
35
  handler.setFormatter(formatter)
35
36
  handler.setLevel(logging.getLevelName(LOG_LEVEL))
36
37
  logger.addHandler(handler)
37
38
 
38
39
 
39
- LOG_FILENAME = os.getenv('LOG_FILENAME')
40
+ LOG_FILENAME = os.getenv("LOG_FILENAME")
40
41
  if LOG_FILENAME is not None:
41
42
  log_to_file(LOG_FILENAME)
42
43
 
43
44
 
44
- def _join_args(**kwargs): return ', '.join([f"{key}={value}" for key, value in kwargs.items()])
45
+ def _join_args(**kwargs):
46
+ return ", ".join([f"{key}={value}" for key, value in kwargs.items()])
45
47
 
46
48
 
47
49
  def log_memory_usage(**kwargs):
48
- factor = 1024 * (
49
- 1024 if platform.system() in ['Darwin', 'Windows'] else 1
50
- )
50
+ factor = 1024 * (1024 if platform.system() in ["Darwin", "Windows"] else 1)
51
51
  value = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / factor
52
- extra = (', ' + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ''
53
- logger.info('memory_used=%s, unit=MB' + extra, value)
52
+ extra = (", " + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ""
53
+ logger.info("memory_used=%s, unit=MB" + extra, value)
54
54
 
55
55
 
56
56
  def debugRequirements(**kwargs):
@@ -67,7 +67,13 @@ def _sum_values(values: list):
67
67
 
68
68
 
69
69
  def debugWeights(weights: dict):
70
- total_weight = _sum_values(v.get('weight') for v in weights.values()) or 100
70
+ total_weight = _sum_values(v.get("weight") for v in weights.values()) or 100
71
71
  for id, weight in weights.items():
72
- value = weight.get('weight')
73
- logger.debug('id=%s, weight=%s, ratio=%s/%s', id, value * 100 / total_weight, value, total_weight)
72
+ value = weight.get("weight")
73
+ logger.debug(
74
+ "id=%s, weight=%s, ratio=%s/%s",
75
+ id,
76
+ value * 100 / total_weight,
77
+ value,
78
+ total_weight,
79
+ )
@@ -3,7 +3,7 @@ import json
3
3
  from hestia_earth.orchestrator import run
4
4
 
5
5
  CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
6
- CONFIG_PATH = os.path.join(CURRENT_DIR, 'config', 'Cycle')
6
+ CONFIG_PATH = os.path.join(CURRENT_DIR, "config", "Cycle")
7
7
 
8
8
 
9
9
  def should_recalculate(product: dict):
@@ -0,0 +1,226 @@
1
+ import os
2
+ import json
3
+ from decimal import Decimal
4
+ from statistics import stdev, mean
5
+ from hestia_earth.utils.model import linked_node
6
+ from hestia_earth.utils.tools import non_empty_list, flatten, safe_parse_date
7
+
8
+ from ..version import VERSION
9
+
10
+ MIN_NB_OBSERVATIONS = 20
11
+ CYCLE_AGGREGATION_KEYS = ["inputs", "practices", "products", "emissions"]
12
+ SITE_AGGREGATION_KEYS = ["measurements", "management"]
13
+
14
+
15
+ class HestiaError(Exception):
16
+ def __init__(self, message: str, data: dict = {}):
17
+ super().__init__(message)
18
+ self.error = {"message": message} | data
19
+
20
+ def __str__(self):
21
+ return f"Error downloading nodes: {json.dumps(self.error or {})}"
22
+
23
+
24
+ def create_folders(filepath: str):
25
+ return os.makedirs(os.path.dirname(filepath), exist_ok=True)
26
+
27
+
28
+ def pick(value: dict, keys: list):
29
+ return {k: value.get(k) for k in keys if k in value}
30
+
31
+
32
+ def is_empty(value):
33
+ return value is None or (
34
+ value in [None, "", "-"]
35
+ if isinstance(value, str)
36
+ else (
37
+ len(value) == 0
38
+ if isinstance(value, list)
39
+ else len(value.keys()) == 0 if isinstance(value, dict) else False
40
+ )
41
+ )
42
+
43
+
44
+ def remove_empty_fields(value: dict):
45
+ return {key: value for key, value in value.items() if not is_empty(value)}
46
+
47
+
48
+ def _save_json(data: dict, filename: str):
49
+ should_run = os.getenv("DEBUG", "false") == "true"
50
+ if not should_run:
51
+ return
52
+ dir = os.getenv("TMP_DIR", "/tmp")
53
+ filepath = f"{dir}/{filename}.jsonld"
54
+ create_folders(filepath)
55
+ with open(filepath, "w") as f:
56
+ return json.dump(data, f, indent=2)
57
+
58
+
59
+ def sum_data(nodes: list, key: str):
60
+ return sum([node.get(key, 1) for node in nodes])
61
+
62
+
63
+ def format_aggregated_list(node_type: str, values: list):
64
+ nodes = non_empty_list(
65
+ flatten(
66
+ [
67
+ {"@id": v} if isinstance(v, str) else v.get(f"aggregated{node_type}s")
68
+ for v in non_empty_list(values)
69
+ ]
70
+ )
71
+ )
72
+ # build sorted list of ids
73
+ ids = sorted(list(set(map(lambda x: x["@id"], nodes))))
74
+ nodes = [{"@type": node_type, "@id": v} for v in ids]
75
+ return list(map(linked_node, nodes))
76
+
77
+
78
+ def match_dates(blank_node: dict, start_year: int, end_year: int):
79
+ dates = blank_node.get("dates", [])
80
+ start_date = safe_parse_date(blank_node.get("startDate"), default=None)
81
+ end_date = safe_parse_date(blank_node.get("endDate"), default=None)
82
+ return all(
83
+ [
84
+ not dates
85
+ or any(
86
+ [
87
+ int(start_year) <= safe_parse_date(date).year <= int(end_year)
88
+ for date in dates
89
+ if safe_parse_date(date, default=None)
90
+ ]
91
+ ),
92
+ not start_date
93
+ or not end_date
94
+ or any(
95
+ [
96
+ int(start_year) <= start_date.year <= int(end_year),
97
+ int(start_year) <= end_date.year <= int(end_year),
98
+ ]
99
+ ),
100
+ ]
101
+ )
102
+
103
+
104
+ def _aggregated_node(node: dict):
105
+ return node | {"aggregated": True, "aggregatedVersion": VERSION}
106
+
107
+
108
+ def _aggregated_version(node: dict):
109
+ keys = list(node.keys())
110
+ keys.remove("@type") if "@type" in keys else None
111
+ node["aggregated"] = node.get("aggregated", [])
112
+ node["aggregatedVersion"] = node.get("aggregatedVersion", [])
113
+ for key in keys:
114
+ if node.get(key) is None:
115
+ continue
116
+ if key in node["aggregated"]:
117
+ node.get("aggregatedVersion")[node["aggregated"].index(key)] = VERSION
118
+ else:
119
+ node["aggregated"].append(key)
120
+ node["aggregatedVersion"].append(VERSION)
121
+ return node
122
+
123
+
124
+ def _min(values, observations: int = 0, min_observations: int = MIN_NB_OBSERVATIONS):
125
+ has_boolean = any([isinstance(v, bool) for v in values])
126
+ return (
127
+ None
128
+ if has_boolean
129
+ else min(values) if (observations or len(values)) >= min_observations else None
130
+ )
131
+
132
+
133
+ def _max(values, observations: int = 0, min_observations: int = MIN_NB_OBSERVATIONS):
134
+ has_boolean = any([isinstance(v, bool) for v in values])
135
+ return (
136
+ None
137
+ if has_boolean
138
+ else max(values) if (observations or len(values)) >= min_observations else None
139
+ )
140
+
141
+
142
+ def _sd(values):
143
+ return stdev(values) if len(values) >= 2 else None
144
+
145
+
146
+ def _all_boolean(values: list):
147
+ return all([isinstance(v, bool) for v in values])
148
+
149
+
150
+ def _numeric_weighted_average(values: list):
151
+ total_weight = (
152
+ sum(Decimal(str(weight)) for _v, weight in values) if values else Decimal(0)
153
+ )
154
+ weighted_values = [
155
+ Decimal(str(value)) * Decimal(str(weight)) for value, weight in values
156
+ ]
157
+ average = (
158
+ sum(weighted_values) / (total_weight if total_weight else 1)
159
+ if weighted_values
160
+ else None
161
+ )
162
+ return None if average is None else float(average)
163
+
164
+
165
+ def _bool_weighted_average(values: list):
166
+ return mean(map(int, values)) >= 0.5
167
+
168
+
169
+ def weighted_average(weighted_values: list):
170
+ values = [v for v, _w in weighted_values]
171
+ all_boolean = _all_boolean(values)
172
+ return (
173
+ None
174
+ if not values
175
+ else (
176
+ _bool_weighted_average(values)
177
+ if all_boolean
178
+ else _numeric_weighted_average(weighted_values)
179
+ )
180
+ )
181
+
182
+
183
+ def _unique_nodes(nodes: list):
184
+ return sorted(
185
+ list({n.get("@id"): n for n in nodes}.values()), key=lambda n: n.get("@id")
186
+ )
187
+
188
+
189
+ def _set_dict_single(data: dict, key: str, value, strict=False):
190
+ if data is not None and value is not None and (not strict or not is_empty(value)):
191
+ data[key] = value
192
+ return data
193
+
194
+
195
+ def _set_dict_array(data: dict, key: str, value, strict=False):
196
+ if data is not None and value is not None and (not strict or value != 0):
197
+ data[key] = [value]
198
+ return data
199
+
200
+
201
+ def format_evs(value: float):
202
+ return min([100, round(value, 2)]) if value else value
203
+
204
+
205
+ def value_difference(value: float, expected_value: float):
206
+ """
207
+ Get the difference in percentage between a value and the expected value.
208
+
209
+ Parameters
210
+ ----------
211
+ value : float
212
+ The value to check.
213
+ expected_value : float
214
+ The expected value.
215
+
216
+ Returns
217
+ -------
218
+ bool
219
+ The difference in percentage between the value and the expected value.
220
+ """
221
+ return (
222
+ 0
223
+ if (isinstance(expected_value, list) and len(expected_value) == 0)
224
+ or expected_value == 0
225
+ else (round(abs(value - expected_value) / expected_value, 4))
226
+ )