hestia-earth-utils 0.16.9__tar.gz → 0.16.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {hestia_earth_utils-0.16.9/hestia_earth_utils.egg-info → hestia_earth_utils-0.16.11}/PKG-INFO +1 -1
  2. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/api.py +78 -36
  3. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/blank_node.py +100 -60
  4. hestia_earth_utils-0.16.11/hestia_earth/utils/calculation_status.py +75 -0
  5. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/cycle.py +7 -7
  6. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/date.py +7 -2
  7. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/descriptive_stats.py +10 -6
  8. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/emission.py +26 -15
  9. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/lookup.py +62 -28
  10. hestia_earth_utils-0.16.11/hestia_earth/utils/lookup_utils.py +206 -0
  11. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/model.py +45 -40
  12. hestia_earth_utils-0.16.11/hestia_earth/utils/pipeline.py +377 -0
  13. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/pivot/_shared.py +16 -12
  14. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/pivot/pivot_csv.py +35 -18
  15. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/pivot/pivot_json.py +34 -18
  16. hestia_earth_utils-0.16.11/hestia_earth/utils/request.py +31 -0
  17. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/stats.py +89 -68
  18. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/storage/_azure_client.py +17 -6
  19. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/storage/_local_client.py +8 -3
  20. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/storage/_s3_client.py +27 -22
  21. hestia_earth_utils-0.16.11/hestia_earth/utils/storage/_sns_client.py +17 -0
  22. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/term.py +5 -5
  23. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/tools.py +50 -21
  24. hestia_earth_utils-0.16.11/hestia_earth/utils/version.py +1 -0
  25. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11/hestia_earth_utils.egg-info}/PKG-INFO +1 -1
  26. hestia_earth_utils-0.16.11/setup.py +33 -0
  27. hestia_earth_utils-0.16.11/tests/test_api.py +171 -0
  28. hestia_earth_utils-0.16.11/tests/test_blank_node.py +73 -0
  29. hestia_earth_utils-0.16.11/tests/test_date.py +17 -0
  30. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_descriptive_stats.py +10 -10
  31. hestia_earth_utils-0.16.11/tests/test_emission.py +51 -0
  32. hestia_earth_utils-0.16.11/tests/test_lookup.py +142 -0
  33. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_lookup_utils.py +32 -32
  34. hestia_earth_utils-0.16.11/tests/test_model.py +57 -0
  35. hestia_earth_utils-0.16.11/tests/test_pipeline.py +250 -0
  36. hestia_earth_utils-0.16.11/tests/test_request.py +9 -0
  37. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_stats.py +46 -22
  38. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_term.py +3 -3
  39. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_tools.py +29 -42
  40. hestia_earth_utils-0.16.9/hestia_earth/utils/calculation_status.py +0 -65
  41. hestia_earth_utils-0.16.9/hestia_earth/utils/lookup_utils.py +0 -180
  42. hestia_earth_utils-0.16.9/hestia_earth/utils/pipeline.py +0 -288
  43. hestia_earth_utils-0.16.9/hestia_earth/utils/request.py +0 -20
  44. hestia_earth_utils-0.16.9/hestia_earth/utils/storage/_sns_client.py +0 -12
  45. hestia_earth_utils-0.16.9/hestia_earth/utils/version.py +0 -1
  46. hestia_earth_utils-0.16.9/setup.py +0 -36
  47. hestia_earth_utils-0.16.9/tests/test_api.py +0 -169
  48. hestia_earth_utils-0.16.9/tests/test_blank_node.py +0 -59
  49. hestia_earth_utils-0.16.9/tests/test_date.py +0 -17
  50. hestia_earth_utils-0.16.9/tests/test_emission.py +0 -62
  51. hestia_earth_utils-0.16.9/tests/test_lookup.py +0 -128
  52. hestia_earth_utils-0.16.9/tests/test_model.py +0 -69
  53. hestia_earth_utils-0.16.9/tests/test_pipeline.py +0 -212
  54. hestia_earth_utils-0.16.9/tests/test_request.py +0 -9
  55. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/MANIFEST.in +0 -0
  56. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/README.md +0 -0
  57. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/bin/hestia-format-upload +0 -0
  58. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/bin/hestia-pivot-csv +0 -0
  59. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/__init__.py +0 -0
  60. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/pivot/__init__.py +0 -0
  61. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/storage/__init__.py +0 -0
  62. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth/utils/table.py +0 -0
  63. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth_utils.egg-info/SOURCES.txt +0 -0
  64. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth_utils.egg-info/dependency_links.txt +0 -0
  65. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth_utils.egg-info/requires.txt +0 -0
  66. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/hestia_earth_utils.egg-info/top_level.txt +0 -0
  67. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/setup.cfg +0 -0
  68. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_calculation_status.py +0 -0
  69. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_cycle.py +0 -0
  70. {hestia_earth_utils-0.16.9 → hestia_earth_utils-0.16.11}/tests/test_table.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.16.9
3
+ Version: 0.16.11
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
@@ -9,9 +9,13 @@ from .request import request_url, api_url, api_access_token
9
9
 
10
10
 
11
11
  def _match_key_value(key: str, value):
12
- first_key = key.split('.')[0]
13
- query = {'match': {key: value}}
14
- return {'nested': {'path': first_key, 'query': query}} if first_key in NESTED_SEARCHABLE_KEYS else query
12
+ first_key = key.split(".")[0]
13
+ query = {"match": {key: value}}
14
+ return (
15
+ {"nested": {"path": first_key, "query": query}}
16
+ if first_key in NESTED_SEARCHABLE_KEYS
17
+ else query
18
+ )
15
19
 
16
20
 
17
21
  def _retry_request_error(func, retry_max: int = 5):
@@ -30,26 +34,28 @@ def _retry_request_error(func, retry_max: int = 5):
30
34
  def _safe_get_request(url: str, res_error=None):
31
35
  def exec():
32
36
  try:
33
- headers = {'Content-Type': 'application/json'}
37
+ headers = {"Content-Type": "application/json"}
34
38
  access_token = api_access_token()
35
39
  if access_token:
36
- headers['X-Access-Token'] = access_token
40
+ headers["X-Access-Token"] = access_token
37
41
  return requests.get(url, headers=headers).json()
38
42
  except requests.exceptions.RequestException:
39
43
  return res_error
44
+
40
45
  return _retry_request_error(exec)
41
46
 
42
47
 
43
48
  def _safe_post_request(url: str, body: dict, res_error={}):
44
49
  def exec():
45
50
  try:
46
- headers = {'Content-Type': 'application/json'}
51
+ headers = {"Content-Type": "application/json"}
47
52
  access_token = api_access_token()
48
53
  if access_token:
49
- headers['X-Access-Token'] = access_token
54
+ headers["X-Access-Token"] = access_token
50
55
  return requests.post(url, json.dumps(body), headers=headers).json()
51
56
  except requests.exceptions.RequestException:
52
57
  return res_error
58
+
53
59
  return _retry_request_error(exec)
54
60
 
55
61
 
@@ -63,11 +69,21 @@ def node_type_to_url(node_type: SchemaType):
63
69
 
64
70
  def node_to_path(node_type: SchemaType, node_id: str, data_state=None):
65
71
  jsonld_path = os.path.join(_parse_node_type(node_type), f"{node_id}.jsonld")
66
- return jsonld_path if data_state is None or data_state == 'original' or len(data_state) == 0 else \
67
- os.path.join(data_state, jsonld_path)
68
-
69
-
70
- def find_related(node_type: SchemaType, id: str, related_type: SchemaType, limit=100, offset=0, relationship=None):
72
+ return (
73
+ jsonld_path
74
+ if data_state is None or data_state == "original" or len(data_state) == 0
75
+ else os.path.join(data_state, jsonld_path)
76
+ )
77
+
78
+
79
+ def find_related(
80
+ node_type: SchemaType,
81
+ id: str,
82
+ related_type: SchemaType,
83
+ limit=100,
84
+ offset=0,
85
+ relationship=None,
86
+ ):
71
87
  """
72
88
  Return the list of related Nodes by going through a "relationship".
73
89
  You can navigate the HESTIA Graph Database using this method.
@@ -88,16 +104,26 @@ def find_related(node_type: SchemaType, id: str, related_type: SchemaType, limit
88
104
  relationship
89
105
  The relationship used to connect both Node. See the API for more information.
90
106
  """
91
- url = request_url(f"{api_url()}/{node_type_to_url(node_type)}/{id}/{node_type_to_url(related_type)}",
92
- limit=limit, offset=offset, relationship=relationship)
107
+ url = request_url(
108
+ f"{api_url()}/{node_type_to_url(node_type)}/{id}/{node_type_to_url(related_type)}",
109
+ limit=limit,
110
+ offset=offset,
111
+ relationship=relationship,
112
+ )
93
113
  response = _safe_get_request(url)
94
114
  # handle errors
95
- return response.get('results', []) if isinstance(response, dict) else response
115
+ return response.get("results", []) if isinstance(response, dict) else response
96
116
 
97
117
 
98
- def _exec_download_hestia(node_id: str, node_type=SchemaType.TERM, data_state='', mode='') -> dict:
118
+ def _exec_download_hestia(
119
+ node_id: str, node_type=SchemaType.TERM, data_state="", mode=""
120
+ ) -> dict:
99
121
  def fallback():
100
- url = request_url(f"{api_url()}/{node_type_to_url(node_type)}/{node_id}", dataState=data_state, mode=mode)
122
+ url = request_url(
123
+ f"{api_url()}/{node_type_to_url(node_type)}/{node_id}",
124
+ dataState=data_state,
125
+ mode=mode,
126
+ )
101
127
  return _safe_get_request(url)
102
128
 
103
129
  try:
@@ -111,7 +137,9 @@ def _exec_download_hestia(node_id: str, node_type=SchemaType.TERM, data_state=''
111
137
  _exec_download_hestia_cached = cache(_exec_download_hestia)
112
138
 
113
139
 
114
- def download_hestia(node_id: str, node_type=SchemaType.TERM, data_state='', mode='') -> dict:
140
+ def download_hestia(
141
+ node_id: str, node_type=SchemaType.TERM, data_state="", mode=""
142
+ ) -> dict:
115
143
  """
116
144
  Download a Node from the HESTIA Database.
117
145
 
@@ -134,7 +162,11 @@ def download_hestia(node_id: str, node_type=SchemaType.TERM, data_state='', mode
134
162
  The `JSON` content of the Node.
135
163
  """
136
164
  # cache all requests to `Term` by default, as the values are not likely to change during a single execution
137
- download_func = _exec_download_hestia_cached if _parse_node_type(node_type) == 'Term' else _exec_download_hestia
165
+ download_func = (
166
+ _exec_download_hestia_cached
167
+ if _parse_node_type(node_type) == "Term"
168
+ else _exec_download_hestia
169
+ )
138
170
  return download_func(node_id, node_type, data_state, mode)
139
171
 
140
172
 
@@ -154,10 +186,11 @@ def node_exists(node_id: str, node_type=SchemaType.TERM) -> bool:
154
186
  bool
155
187
  True if the node exists, False otherwise.
156
188
  """
189
+
157
190
  def fallback():
158
191
  url = request_url(f"{api_url()}/{node_type_to_url(node_type)}/{node_id}")
159
192
  result = _safe_get_request(url)
160
- return result is not None and '@id' in result
193
+ return result is not None and "@id" in result
161
194
 
162
195
  try:
163
196
  return _exists(node_to_path(node_type, node_id))
@@ -165,7 +198,9 @@ def node_exists(node_id: str, node_type=SchemaType.TERM) -> bool:
165
198
  return fallback()
166
199
 
167
200
 
168
- def search(query: dict, fields=['@type', '@id', 'name'], limit=10, offset=0, sort=None) -> list:
201
+ def search(
202
+ query: dict, fields=["@type", "@id", "name"], limit=10, offset=0, sort=None
203
+ ) -> list:
169
204
  """
170
205
  Executes a raw search on the HESTIA Platform.
171
206
 
@@ -189,13 +224,16 @@ def search(query: dict, fields=['@type', '@id', 'name'], limit=10, offset=0, sor
189
224
  List[JSON]
190
225
  List of Nodes (as JSON) found.
191
226
  """
192
- return _safe_post_request(f"{api_url()}/search", {
193
- 'query': query,
194
- 'limit': limit,
195
- 'offset': offset,
196
- 'fields': fields,
197
- **({'sort': sort} if sort is not None else {})
198
- }).get('results', [])
227
+ return _safe_post_request(
228
+ f"{api_url()}/search",
229
+ {
230
+ "query": query,
231
+ "limit": limit,
232
+ "offset": offset,
233
+ "fields": fields,
234
+ **({"sort": sort} if sort is not None else {}),
235
+ },
236
+ ).get("results", [])
199
237
 
200
238
 
201
239
  def find_node(node_type: SchemaType, args: dict, limit=10) -> list:
@@ -217,10 +255,12 @@ def find_node(node_type: SchemaType, args: dict, limit=10) -> list:
217
255
  List[JSON]
218
256
  List of Nodes (as JSON) found.
219
257
  """
220
- query_args = list(map(lambda key: _match_key_value(key, args.get(key)), args.keys()))
221
- must = [{'match': {'@type': node_type.value}}]
258
+ query_args = list(
259
+ map(lambda key: _match_key_value(key, args.get(key)), args.keys())
260
+ )
261
+ must = [{"match": {"@type": node_type.value}}]
222
262
  must.extend(query_args)
223
- return search(query={'bool': {'must': must}}, limit=limit)
263
+ return search(query={"bool": {"must": must}}, limit=limit)
224
264
 
225
265
 
226
266
  def find_node_exact(node_type: SchemaType, args: dict) -> dict:
@@ -240,10 +280,12 @@ def find_node_exact(node_type: SchemaType, args: dict) -> dict:
240
280
  JSON
241
281
  JSON of the node if found, else `None`.
242
282
  """
243
- query_args = list(map(lambda key: _match_key_value(f"{key}.keyword", args.get(key)), args.keys()))
244
- must = [{'match': {'@type': node_type.value}}]
283
+ query_args = list(
284
+ map(lambda key: _match_key_value(f"{key}.keyword", args.get(key)), args.keys())
285
+ )
286
+ must = [{"match": {"@type": node_type.value}}]
245
287
  must.extend(query_args)
246
- results = search(query={'bool': {'must': must}}, limit=2)
288
+ results = search(query={"bool": {"must": must}}, limit=2)
247
289
  # do not return a duplicate
248
290
  return results[0] if len(results) == 1 else None
249
291
 
@@ -263,7 +305,7 @@ def find_term_ids_by_names(names, batch_size=1000):
263
305
  "must": [
264
306
  {
265
307
  "terms": {
266
- "name.keyword": unique_names[i: i + batch_size],
308
+ "name.keyword": unique_names[i : i + batch_size],
267
309
  }
268
310
  },
269
311
  {"term": {"@type.keyword": "Term"}},
@@ -274,7 +316,7 @@ def find_term_ids_by_names(names, batch_size=1000):
274
316
  }
275
317
  results = search(query=query, limit=batch_size, fields=["@id", "name"])
276
318
  for term in results:
277
- result[term.get('name')] = term.get('@id')
319
+ result[term.get("name")] = term.get("@id")
278
320
  missing_names = unique_names_set - set(result.keys())
279
321
  if len(missing_names):
280
322
  raise Exception(f"Failed to find ids for names: {'; '.join(missing_names)}")
@@ -12,26 +12,31 @@ from .model import filter_list_term_type
12
12
 
13
13
 
14
14
  def get_lookup_value(blank_node: dict, column: str):
15
- term = blank_node.get('term', {})
15
+ term = blank_node.get("term", {})
16
16
  table_name = f"{term.get('termType')}.csv" if term else None
17
- value = get_table_value(
18
- download_lookup(table_name), 'term.id', term.get('@id'), column
19
- ) if table_name else None
17
+ value = (
18
+ get_table_value(download_lookup(table_name), "term.id", term.get("@id"), column)
19
+ if table_name
20
+ else None
21
+ )
20
22
  return value
21
23
 
22
24
 
23
- def group_by_keys(values: list, group_keys: list = ['term']):
25
+ def group_by_keys(values: list, group_keys: list = ["term"]):
24
26
  def node_value(value):
25
27
  return (
26
- value.get('@id') if isinstance(value, dict) else
27
- list(map(node_value, value)) if isinstance(value, list) else
28
- value
28
+ value.get("@id")
29
+ if isinstance(value, dict)
30
+ else list(map(node_value, value)) if isinstance(value, list) else value
29
31
  )
30
32
 
31
33
  def run(group: dict, node: dict):
32
- group_key = '-'.join(flatten(non_empty_list([node_value(node.get(v)) for v in group_keys])))
34
+ group_key = "-".join(
35
+ flatten(non_empty_list([node_value(node.get(v)) for v in group_keys]))
36
+ )
33
37
  group[group_key] = group.get(group_key, []) + [node]
34
38
  return group
39
+
35
40
  return reduce(run, values, {})
36
41
 
37
42
 
@@ -39,11 +44,12 @@ class ArrayTreatment(Enum):
39
44
  """
40
45
  Enum representing different treatments for arrays of values.
41
46
  """
42
- MEAN = 'mean'
43
- MODE = 'mode'
44
- SUM = 'sum'
45
- FIRST = 'first'
46
- LAST = 'last'
47
+
48
+ MEAN = "mean"
49
+ MODE = "mode"
50
+ SUM = "sum"
51
+ FIRST = "first"
52
+ LAST = "last"
47
53
 
48
54
 
49
55
  def _should_run_array_treatment(value):
@@ -52,11 +58,21 @@ def _should_run_array_treatment(value):
52
58
 
53
59
  DEFAULT_ARRAY_TREATMENT = ArrayTreatment.MEAN
54
60
  ARRAY_TREATMENT_TO_REDUCER = {
55
- ArrayTreatment.MEAN: lambda value: mean(non_empty_list(value)) if _should_run_array_treatment(value) else None,
56
- ArrayTreatment.MODE: lambda value: mode(non_empty_list(value)) if _should_run_array_treatment(value) else None,
57
- ArrayTreatment.SUM: lambda value: sum(non_empty_list(value)) if _should_run_array_treatment(value) else None,
58
- ArrayTreatment.FIRST: lambda value: value[0] if _should_run_array_treatment(value) else None,
59
- ArrayTreatment.LAST: lambda value: value[-1] if _should_run_array_treatment(value) else None
61
+ ArrayTreatment.MEAN: lambda value: (
62
+ mean(non_empty_list(value)) if _should_run_array_treatment(value) else None
63
+ ),
64
+ ArrayTreatment.MODE: lambda value: (
65
+ mode(non_empty_list(value)) if _should_run_array_treatment(value) else None
66
+ ),
67
+ ArrayTreatment.SUM: lambda value: (
68
+ sum(non_empty_list(value)) if _should_run_array_treatment(value) else None
69
+ ),
70
+ ArrayTreatment.FIRST: lambda value: (
71
+ value[0] if _should_run_array_treatment(value) else None
72
+ ),
73
+ ArrayTreatment.LAST: lambda value: (
74
+ value[-1] if _should_run_array_treatment(value) else None
75
+ ),
60
76
  }
61
77
  """
62
78
  A dictionary mapping ArrayTreatment enums to corresponding reducer functions.
@@ -66,7 +82,7 @@ A dictionary mapping ArrayTreatment enums to corresponding reducer functions.
66
82
  def _retrieve_array_treatment(
67
83
  node: dict,
68
84
  is_larger_unit: bool = False,
69
- default: ArrayTreatment = ArrayTreatment.MEAN
85
+ default: ArrayTreatment = ArrayTreatment.MEAN,
70
86
  ) -> ArrayTreatment:
71
87
  """
72
88
  Retrieves the array treatment for a given node.
@@ -90,27 +106,26 @@ def _retrieve_array_treatment(
90
106
  The retrieved array treatment.
91
107
 
92
108
  """
93
- ARRAY_TREATMENT_LOOKUPS = [
94
- 'arrayTreatmentLargerUnitOfTime',
95
- 'arrayTreatment'
96
- ]
97
- lookup = ARRAY_TREATMENT_LOOKUPS[0] if is_larger_unit else ARRAY_TREATMENT_LOOKUPS[1]
109
+ ARRAY_TREATMENT_LOOKUPS = ["arrayTreatmentLargerUnitOfTime", "arrayTreatment"]
110
+ lookup = (
111
+ ARRAY_TREATMENT_LOOKUPS[0] if is_larger_unit else ARRAY_TREATMENT_LOOKUPS[1]
112
+ )
98
113
 
99
114
  lookup_value = get_lookup_value(node, lookup)
100
115
 
101
116
  return next(
102
117
  (treatment for treatment in ArrayTreatment if treatment.value == lookup_value),
103
- default
118
+ default,
104
119
  )
105
120
 
106
121
 
107
122
  def get_node_value(
108
123
  node: dict,
109
- key: str = 'value',
124
+ key: str = "value",
110
125
  is_larger_unit: bool = False,
111
126
  array_treatment: Optional[ArrayTreatment] = None,
112
127
  default_array_treatment: Optional[ArrayTreatment] = ArrayTreatment.MEAN,
113
- default: Any = 0
128
+ default: Any = 0,
114
129
  ) -> Union[float, bool]:
115
130
  """
116
131
  Get the value from the dictionary representing the node,
@@ -136,26 +151,44 @@ def get_node_value(
136
151
  float | bool
137
152
  The extracted value from the node.
138
153
  """
139
- value = node.get(key)
140
-
141
- reducer = ARRAY_TREATMENT_TO_REDUCER[(
142
- array_treatment or
143
- _retrieve_array_treatment(node, is_larger_unit=is_larger_unit, default=default_array_treatment)
144
- )] if isinstance(value, list) and len(value) > 0 else None
154
+ value = (node or {}).get(key)
155
+
156
+ reducer = (
157
+ ARRAY_TREATMENT_TO_REDUCER[
158
+ (
159
+ array_treatment
160
+ or _retrieve_array_treatment(
161
+ node, is_larger_unit=is_larger_unit, default=default_array_treatment
162
+ )
163
+ )
164
+ ]
165
+ if isinstance(value, list) and len(value) > 0
166
+ else None
167
+ )
145
168
 
146
- return reducer(value) if reducer else (
147
- value if any([isinstance(value, float), isinstance(value, int), isinstance(value, bool)]) else
148
- default if not non_empty_value(value) else
149
- value
169
+ return (
170
+ reducer(value)
171
+ if reducer
172
+ else (
173
+ value
174
+ if any(
175
+ [
176
+ isinstance(value, float),
177
+ isinstance(value, int),
178
+ isinstance(value, bool),
179
+ ]
180
+ )
181
+ else default if not non_empty_value(value) else value
182
+ )
150
183
  )
151
184
 
152
185
 
153
- _BLANK_NODE_GROUPING_KEYS = {
154
- TermTermType.EMISSION: ['methodModel']
155
- }
186
+ _BLANK_NODE_GROUPING_KEYS = {TermTermType.EMISSION: ["methodModel"]}
156
187
 
157
188
 
158
- def get_blank_nodes_calculation_status(node: dict, list_key: str, termType: TermTermType):
189
+ def get_blank_nodes_calculation_status(
190
+ node: dict, list_key: str, termType: TermTermType
191
+ ):
159
192
  """
160
193
  Get calculation status for a Node and a list of Blank node.
161
194
  Example: get the calculation status for all emissions included in the HESTIA system boundary.
@@ -178,31 +211,38 @@ def get_blank_nodes_calculation_status(node: dict, list_key: str, termType: Term
178
211
  """
179
212
  all_term_ids = cycle_emissions_in_system_boundary(node, termType=termType)
180
213
  blank_nodes = filter_list_term_type(node.get(list_key, []), termType)
181
- blank_nodes_by_term = group_by_keys(blank_nodes, ['term'])
214
+ blank_nodes_by_term = group_by_keys(blank_nodes, ["term"])
182
215
  blank_nodes_grouping_keys = _BLANK_NODE_GROUPING_KEYS.get(termType) or []
183
216
 
184
217
  def blank_node_data(blank_nodes: list):
185
- value = get_node_value({
186
- 'term': blank_nodes[0].get('term'),
187
- 'value': list(map(get_node_value, blank_nodes))
188
- })
189
- inputs = flatten(map(lambda v: v.get('inputs', []), blank_nodes))
190
- return {
191
- 'value': value
192
- } | ({
193
- 'inputs': sorted(list(map(lambda v: v.get('@id'), inputs)))
194
- } if inputs else {})
218
+ value = get_node_value(
219
+ {
220
+ "term": blank_nodes[0].get("term"),
221
+ "value": list(map(get_node_value, blank_nodes)),
222
+ }
223
+ )
224
+ inputs = flatten(map(lambda v: v.get("inputs", []), blank_nodes))
225
+ return {"value": value} | (
226
+ {"inputs": sorted(list(map(lambda v: v.get("@id"), inputs)))}
227
+ if inputs
228
+ else {}
229
+ )
195
230
 
196
231
  def map_blank_node(term_id: str):
197
232
  values = blank_nodes_by_term.get(term_id, [])
198
- grouped_blank_nodes = group_by_keys(values, blank_nodes_grouping_keys) if blank_nodes_grouping_keys else {}
233
+ grouped_blank_nodes = (
234
+ group_by_keys(values, blank_nodes_grouping_keys)
235
+ if blank_nodes_grouping_keys
236
+ else {}
237
+ )
199
238
  return (
200
- {} if not values else
201
- {
202
- k: blank_node_data(v)
203
- for k, v in grouped_blank_nodes.items()
204
- } if grouped_blank_nodes else
205
- blank_node_data([values[0]])
239
+ {}
240
+ if not values
241
+ else (
242
+ {k: blank_node_data(v) for k, v in grouped_blank_nodes.items()}
243
+ if grouped_blank_nodes
244
+ else blank_node_data([values[0]])
245
+ )
206
246
  )
207
247
 
208
248
  return {term_id: map_blank_node(term_id) for term_id in all_term_ids}
@@ -0,0 +1,75 @@
1
+ import pandas as pd
2
+
3
+ from .cycle import get_cycle_emissions_calculation_status
4
+
5
+
6
+ def _emissions_color(row):
7
+ color = (
8
+ "red"
9
+ if row["emissions-missing"] > 0
10
+ else "yellow" if row["emissions-incomplete"] > 0 else "lightgreen"
11
+ )
12
+ return [f"background-color: {color}"] * len(row)
13
+
14
+
15
+ def _emissions_with_status(cycle: dict):
16
+ emissions = get_cycle_emissions_calculation_status(cycle)
17
+ all_emissions = emissions.keys()
18
+ # an emission is missing if there is no value (ignore `missingInputs`)
19
+ missing_emissions = set(
20
+ [
21
+ k
22
+ for k, v in emissions.items()
23
+ if len((set(v.keys()) - set(["missingInputs"]))) == 0
24
+ ]
25
+ )
26
+ # an emission is incomplete if it has missing inputs
27
+ incomplete_emissions = set(
28
+ [
29
+ k
30
+ for k, v in emissions.items()
31
+ if all([len(v.get("missingInputs", [])) > 0, k not in missing_emissions])
32
+ ]
33
+ )
34
+ complete_emissions = set(
35
+ [
36
+ k
37
+ for k, v in emissions.items()
38
+ if all(
39
+ [
40
+ len(v.get("missingInputs", [])) == 0,
41
+ len((set(v.keys()) - set(["missingInputs"]))) > 0,
42
+ ]
43
+ )
44
+ ]
45
+ )
46
+ return {
47
+ "emissions-total": len(all_emissions),
48
+ "emissions-complete": len(complete_emissions),
49
+ "emissions-incomplete": len(incomplete_emissions),
50
+ "emissions-missing": len(missing_emissions),
51
+ "emissions": emissions,
52
+ }
53
+
54
+
55
+ def _handle_lists(df: pd.DataFrame, columns: list):
56
+ for col in columns:
57
+ df[col] = df[col].apply(lambda v: ";".join(v) if isinstance(v, list) else v)
58
+ return df
59
+
60
+
61
+ def get_nodes_calculations_status_dataframe(nodes: list, file_format: str = "excel"):
62
+ cycles_status = [
63
+ {"id": cycle.get("@id") or cycle.get("id")} | _emissions_with_status(cycle)
64
+ for cycle in nodes
65
+ if (cycle.get("@type") or cycle.get("type")) == "Cycle"
66
+ ]
67
+ df = pd.json_normalize(cycles_status, errors="ignore")
68
+ # convert list of inputs to semi-column strings
69
+ list_columns = [
70
+ col
71
+ for col in df.columns
72
+ if col.endswith(".inputs") or col.endswith(".missingInputs")
73
+ ]
74
+ df = _handle_lists(df, list_columns)
75
+ return df.style.apply(_emissions_color, axis=1) if file_format == "excel" else df
@@ -5,11 +5,9 @@ from .blank_node import get_blank_nodes_calculation_status
5
5
 
6
6
 
7
7
  def _extend_missing_inputs(value: dict, input_ids: set):
8
- included_inputs = set(flatten([
9
- v.get('inputs', []) for v in value.values()
10
- ]))
8
+ included_inputs = set(flatten([v.get("inputs", []) for v in value.values()]))
11
9
  missing_inputs = input_ids - included_inputs
12
- return {'missingInputs': sorted(list(missing_inputs))} if missing_inputs else {}
10
+ return {"missingInputs": sorted(list(missing_inputs))} if missing_inputs else {}
13
11
 
14
12
 
15
13
  def get_cycle_emissions_calculation_status(cycle: dict):
@@ -28,9 +26,11 @@ def get_cycle_emissions_calculation_status(cycle: dict):
28
26
  and the resulting calculation as value, containing the recalculated `value`, `method` and `methodTier`.
29
27
  Note: if a calculation fails for an emission, the `value` is an empty dictionary.
30
28
  """
31
- status = get_blank_nodes_calculation_status(cycle, 'emissions', TermTermType.EMISSION)
32
- input_ids = set([v.get('term', {}).get('@id') for v in cycle.get('inputs', [])])
29
+ status = get_blank_nodes_calculation_status(
30
+ cycle, "emissions", TermTermType.EMISSION
31
+ )
32
+ input_ids = set([v.get("term", {}).get("@id") for v in cycle.get("inputs", [])])
33
33
  return {
34
- k: v | (_extend_missing_inputs(v, input_ids) if 'InputsProduction' in k else {})
34
+ k: v | (_extend_missing_inputs(v, input_ids) if "InputsProduction" in k else {})
35
35
  for k, v in status.items()
36
36
  }
@@ -61,7 +61,10 @@ def is_in_days(date: str) -> bool:
61
61
  bool
62
62
  True if the date contains the year, month and day.
63
63
  """
64
- return date is not None and re.compile(r'^[\d]{4}\-[\d]{2}\-[\d]{2}').match(date) is not None
64
+ return (
65
+ date is not None
66
+ and re.compile(r"^[\d]{4}\-[\d]{2}\-[\d]{2}").match(date) is not None
67
+ )
65
68
 
66
69
 
67
70
  def is_in_months(date: str) -> bool:
@@ -78,4 +81,6 @@ def is_in_months(date: str) -> bool:
78
81
  bool
79
82
  True if the date contains the year, month but no day.
80
83
  """
81
- return date is not None and re.compile(r'^[\d]{4}\-[\d]{2}$').match(date) is not None
84
+ return (
85
+ date is not None and re.compile(r"^[\d]{4}\-[\d]{2}$").match(date) is not None
86
+ )
@@ -9,7 +9,7 @@ def calc_descriptive_stats(
9
9
  arr: NDArray,
10
10
  stats_definition: Union[Enum, str],
11
11
  axis: Optional[int] = None,
12
- decimals: int = 6
12
+ decimals: int = 6,
13
13
  ) -> dict:
14
14
  """
15
15
  Calculate the descriptive stats for an array row-wise, round them to specified number of decimal places and return
@@ -34,9 +34,9 @@ def calc_descriptive_stats(
34
34
  max_ = around(arr.max(axis=axis), decimals)
35
35
 
36
36
  observations = (
37
- [arr.shape[0]] * arr.shape[1] if axis == 0
38
- else [arr.shape[1]] * arr.shape[0] if axis == 1
39
- else [arr.size]
37
+ [arr.shape[0]] * arr.shape[1]
38
+ if axis == 0
39
+ else [arr.shape[1]] * arr.shape[0] if axis == 1 else [arr.size]
40
40
  )
41
41
 
42
42
  return {
@@ -44,6 +44,10 @@ def calc_descriptive_stats(
44
44
  "sd": list(sd) if isinstance(sd, Iterable) else [sd],
45
45
  "min": list(min_) if isinstance(min_, Iterable) else [min_],
46
46
  "max": list(max_) if isinstance(max_, Iterable) else [max_],
47
- "statsDefinition": stats_definition.value if isinstance(stats_definition, Enum) else stats_definition,
48
- "observations": observations
47
+ "statsDefinition": (
48
+ stats_definition.value
49
+ if isinstance(stats_definition, Enum)
50
+ else stats_definition
51
+ ),
52
+ "observations": observations,
49
53
  }