hestia-earth-utils 0.16.9__py3-none-any.whl → 0.16.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/utils/api.py +78 -36
- hestia_earth/utils/blank_node.py +101 -60
- hestia_earth/utils/calculation_status.py +45 -35
- hestia_earth/utils/cycle.py +7 -7
- hestia_earth/utils/date.py +7 -2
- hestia_earth/utils/descriptive_stats.py +10 -6
- hestia_earth/utils/emission.py +26 -15
- hestia_earth/utils/lookup.py +62 -28
- hestia_earth/utils/lookup_utils.py +89 -63
- hestia_earth/utils/model.py +45 -40
- hestia_earth/utils/pipeline.py +179 -90
- hestia_earth/utils/pivot/_shared.py +16 -12
- hestia_earth/utils/pivot/pivot_csv.py +35 -18
- hestia_earth/utils/pivot/pivot_json.py +34 -18
- hestia_earth/utils/request.py +17 -6
- hestia_earth/utils/stats.py +89 -68
- hestia_earth/utils/storage/_azure_client.py +17 -6
- hestia_earth/utils/storage/_local_client.py +8 -3
- hestia_earth/utils/storage/_s3_client.py +27 -22
- hestia_earth/utils/storage/_sns_client.py +7 -2
- hestia_earth/utils/term.py +5 -5
- hestia_earth/utils/tools.py +50 -21
- hestia_earth/utils/version.py +1 -1
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/METADATA +1 -1
- hestia_earth_utils-0.16.10.dist-info/RECORD +33 -0
- hestia_earth_utils-0.16.9.dist-info/RECORD +0 -33
- {hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-format-upload +0 -0
- {hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-pivot-csv +0 -0
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/WHEEL +0 -0
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/top_level.txt +0 -0
hestia_earth/utils/api.py
CHANGED
|
@@ -9,9 +9,13 @@ from .request import request_url, api_url, api_access_token
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _match_key_value(key: str, value):
|
|
12
|
-
first_key = key.split(
|
|
13
|
-
query = {
|
|
14
|
-
return
|
|
12
|
+
first_key = key.split(".")[0]
|
|
13
|
+
query = {"match": {key: value}}
|
|
14
|
+
return (
|
|
15
|
+
{"nested": {"path": first_key, "query": query}}
|
|
16
|
+
if first_key in NESTED_SEARCHABLE_KEYS
|
|
17
|
+
else query
|
|
18
|
+
)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
def _retry_request_error(func, retry_max: int = 5):
|
|
@@ -30,26 +34,28 @@ def _retry_request_error(func, retry_max: int = 5):
|
|
|
30
34
|
def _safe_get_request(url: str, res_error=None):
|
|
31
35
|
def exec():
|
|
32
36
|
try:
|
|
33
|
-
headers = {
|
|
37
|
+
headers = {"Content-Type": "application/json"}
|
|
34
38
|
access_token = api_access_token()
|
|
35
39
|
if access_token:
|
|
36
|
-
headers[
|
|
40
|
+
headers["X-Access-Token"] = access_token
|
|
37
41
|
return requests.get(url, headers=headers).json()
|
|
38
42
|
except requests.exceptions.RequestException:
|
|
39
43
|
return res_error
|
|
44
|
+
|
|
40
45
|
return _retry_request_error(exec)
|
|
41
46
|
|
|
42
47
|
|
|
43
48
|
def _safe_post_request(url: str, body: dict, res_error={}):
|
|
44
49
|
def exec():
|
|
45
50
|
try:
|
|
46
|
-
headers = {
|
|
51
|
+
headers = {"Content-Type": "application/json"}
|
|
47
52
|
access_token = api_access_token()
|
|
48
53
|
if access_token:
|
|
49
|
-
headers[
|
|
54
|
+
headers["X-Access-Token"] = access_token
|
|
50
55
|
return requests.post(url, json.dumps(body), headers=headers).json()
|
|
51
56
|
except requests.exceptions.RequestException:
|
|
52
57
|
return res_error
|
|
58
|
+
|
|
53
59
|
return _retry_request_error(exec)
|
|
54
60
|
|
|
55
61
|
|
|
@@ -63,11 +69,21 @@ def node_type_to_url(node_type: SchemaType):
|
|
|
63
69
|
|
|
64
70
|
def node_to_path(node_type: SchemaType, node_id: str, data_state=None):
|
|
65
71
|
jsonld_path = os.path.join(_parse_node_type(node_type), f"{node_id}.jsonld")
|
|
66
|
-
return
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
return (
|
|
73
|
+
jsonld_path
|
|
74
|
+
if data_state is None or data_state == "original" or len(data_state) == 0
|
|
75
|
+
else os.path.join(data_state, jsonld_path)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def find_related(
|
|
80
|
+
node_type: SchemaType,
|
|
81
|
+
id: str,
|
|
82
|
+
related_type: SchemaType,
|
|
83
|
+
limit=100,
|
|
84
|
+
offset=0,
|
|
85
|
+
relationship=None,
|
|
86
|
+
):
|
|
71
87
|
"""
|
|
72
88
|
Return the list of related Nodes by going through a "relationship".
|
|
73
89
|
You can navigate the HESTIA Graph Database using this method.
|
|
@@ -88,16 +104,26 @@ def find_related(node_type: SchemaType, id: str, related_type: SchemaType, limit
|
|
|
88
104
|
relationship
|
|
89
105
|
The relationship used to connect both Node. See the API for more information.
|
|
90
106
|
"""
|
|
91
|
-
url = request_url(
|
|
92
|
-
|
|
107
|
+
url = request_url(
|
|
108
|
+
f"{api_url()}/{node_type_to_url(node_type)}/{id}/{node_type_to_url(related_type)}",
|
|
109
|
+
limit=limit,
|
|
110
|
+
offset=offset,
|
|
111
|
+
relationship=relationship,
|
|
112
|
+
)
|
|
93
113
|
response = _safe_get_request(url)
|
|
94
114
|
# handle errors
|
|
95
|
-
return response.get(
|
|
115
|
+
return response.get("results", []) if isinstance(response, dict) else response
|
|
96
116
|
|
|
97
117
|
|
|
98
|
-
def _exec_download_hestia(
|
|
118
|
+
def _exec_download_hestia(
|
|
119
|
+
node_id: str, node_type=SchemaType.TERM, data_state="", mode=""
|
|
120
|
+
) -> dict:
|
|
99
121
|
def fallback():
|
|
100
|
-
url = request_url(
|
|
122
|
+
url = request_url(
|
|
123
|
+
f"{api_url()}/{node_type_to_url(node_type)}/{node_id}",
|
|
124
|
+
dataState=data_state,
|
|
125
|
+
mode=mode,
|
|
126
|
+
)
|
|
101
127
|
return _safe_get_request(url)
|
|
102
128
|
|
|
103
129
|
try:
|
|
@@ -111,7 +137,9 @@ def _exec_download_hestia(node_id: str, node_type=SchemaType.TERM, data_state=''
|
|
|
111
137
|
_exec_download_hestia_cached = cache(_exec_download_hestia)
|
|
112
138
|
|
|
113
139
|
|
|
114
|
-
def download_hestia(
|
|
140
|
+
def download_hestia(
|
|
141
|
+
node_id: str, node_type=SchemaType.TERM, data_state="", mode=""
|
|
142
|
+
) -> dict:
|
|
115
143
|
"""
|
|
116
144
|
Download a Node from the HESTIA Database.
|
|
117
145
|
|
|
@@ -134,7 +162,11 @@ def download_hestia(node_id: str, node_type=SchemaType.TERM, data_state='', mode
|
|
|
134
162
|
The `JSON` content of the Node.
|
|
135
163
|
"""
|
|
136
164
|
# cache all requests to `Term` by default, as the values are not likely to change during a single execution
|
|
137
|
-
download_func =
|
|
165
|
+
download_func = (
|
|
166
|
+
_exec_download_hestia_cached
|
|
167
|
+
if _parse_node_type(node_type) == "Term"
|
|
168
|
+
else _exec_download_hestia
|
|
169
|
+
)
|
|
138
170
|
return download_func(node_id, node_type, data_state, mode)
|
|
139
171
|
|
|
140
172
|
|
|
@@ -154,10 +186,11 @@ def node_exists(node_id: str, node_type=SchemaType.TERM) -> bool:
|
|
|
154
186
|
bool
|
|
155
187
|
True if the node exists, False otherwise.
|
|
156
188
|
"""
|
|
189
|
+
|
|
157
190
|
def fallback():
|
|
158
191
|
url = request_url(f"{api_url()}/{node_type_to_url(node_type)}/{node_id}")
|
|
159
192
|
result = _safe_get_request(url)
|
|
160
|
-
return result is not None and
|
|
193
|
+
return result is not None and "@id" in result
|
|
161
194
|
|
|
162
195
|
try:
|
|
163
196
|
return _exists(node_to_path(node_type, node_id))
|
|
@@ -165,7 +198,9 @@ def node_exists(node_id: str, node_type=SchemaType.TERM) -> bool:
|
|
|
165
198
|
return fallback()
|
|
166
199
|
|
|
167
200
|
|
|
168
|
-
def search(
|
|
201
|
+
def search(
|
|
202
|
+
query: dict, fields=["@type", "@id", "name"], limit=10, offset=0, sort=None
|
|
203
|
+
) -> list:
|
|
169
204
|
"""
|
|
170
205
|
Executes a raw search on the HESTIA Platform.
|
|
171
206
|
|
|
@@ -189,13 +224,16 @@ def search(query: dict, fields=['@type', '@id', 'name'], limit=10, offset=0, sor
|
|
|
189
224
|
List[JSON]
|
|
190
225
|
List of Nodes (as JSON) found.
|
|
191
226
|
"""
|
|
192
|
-
return _safe_post_request(
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
227
|
+
return _safe_post_request(
|
|
228
|
+
f"{api_url()}/search",
|
|
229
|
+
{
|
|
230
|
+
"query": query,
|
|
231
|
+
"limit": limit,
|
|
232
|
+
"offset": offset,
|
|
233
|
+
"fields": fields,
|
|
234
|
+
**({"sort": sort} if sort is not None else {}),
|
|
235
|
+
},
|
|
236
|
+
).get("results", [])
|
|
199
237
|
|
|
200
238
|
|
|
201
239
|
def find_node(node_type: SchemaType, args: dict, limit=10) -> list:
|
|
@@ -217,10 +255,12 @@ def find_node(node_type: SchemaType, args: dict, limit=10) -> list:
|
|
|
217
255
|
List[JSON]
|
|
218
256
|
List of Nodes (as JSON) found.
|
|
219
257
|
"""
|
|
220
|
-
query_args = list(
|
|
221
|
-
|
|
258
|
+
query_args = list(
|
|
259
|
+
map(lambda key: _match_key_value(key, args.get(key)), args.keys())
|
|
260
|
+
)
|
|
261
|
+
must = [{"match": {"@type": node_type.value}}]
|
|
222
262
|
must.extend(query_args)
|
|
223
|
-
return search(query={
|
|
263
|
+
return search(query={"bool": {"must": must}}, limit=limit)
|
|
224
264
|
|
|
225
265
|
|
|
226
266
|
def find_node_exact(node_type: SchemaType, args: dict) -> dict:
|
|
@@ -240,10 +280,12 @@ def find_node_exact(node_type: SchemaType, args: dict) -> dict:
|
|
|
240
280
|
JSON
|
|
241
281
|
JSON of the node if found, else `None`.
|
|
242
282
|
"""
|
|
243
|
-
query_args = list(
|
|
244
|
-
|
|
283
|
+
query_args = list(
|
|
284
|
+
map(lambda key: _match_key_value(f"{key}.keyword", args.get(key)), args.keys())
|
|
285
|
+
)
|
|
286
|
+
must = [{"match": {"@type": node_type.value}}]
|
|
245
287
|
must.extend(query_args)
|
|
246
|
-
results = search(query={
|
|
288
|
+
results = search(query={"bool": {"must": must}}, limit=2)
|
|
247
289
|
# do not return a duplicate
|
|
248
290
|
return results[0] if len(results) == 1 else None
|
|
249
291
|
|
|
@@ -263,7 +305,7 @@ def find_term_ids_by_names(names, batch_size=1000):
|
|
|
263
305
|
"must": [
|
|
264
306
|
{
|
|
265
307
|
"terms": {
|
|
266
|
-
"name.keyword": unique_names[i: i + batch_size],
|
|
308
|
+
"name.keyword": unique_names[i : i + batch_size],
|
|
267
309
|
}
|
|
268
310
|
},
|
|
269
311
|
{"term": {"@type.keyword": "Term"}},
|
|
@@ -274,7 +316,7 @@ def find_term_ids_by_names(names, batch_size=1000):
|
|
|
274
316
|
}
|
|
275
317
|
results = search(query=query, limit=batch_size, fields=["@id", "name"])
|
|
276
318
|
for term in results:
|
|
277
|
-
result[term.get(
|
|
319
|
+
result[term.get("name")] = term.get("@id")
|
|
278
320
|
missing_names = unique_names_set - set(result.keys())
|
|
279
321
|
if len(missing_names):
|
|
280
322
|
raise Exception(f"Failed to find ids for names: {'; '.join(missing_names)}")
|
hestia_earth/utils/blank_node.py
CHANGED
|
@@ -12,26 +12,31 @@ from .model import filter_list_term_type
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def get_lookup_value(blank_node: dict, column: str):
|
|
15
|
-
term = blank_node.get(
|
|
15
|
+
term = blank_node.get("term", {})
|
|
16
16
|
table_name = f"{term.get('termType')}.csv" if term else None
|
|
17
|
-
value =
|
|
18
|
-
download_lookup(table_name),
|
|
19
|
-
|
|
17
|
+
value = (
|
|
18
|
+
get_table_value(download_lookup(table_name), "term.id", term.get("@id"), column)
|
|
19
|
+
if table_name
|
|
20
|
+
else None
|
|
21
|
+
)
|
|
20
22
|
return value
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
def group_by_keys(values: list, group_keys: list = [
|
|
25
|
+
def group_by_keys(values: list, group_keys: list = ["term"]):
|
|
24
26
|
def node_value(value):
|
|
25
27
|
return (
|
|
26
|
-
value.get(
|
|
27
|
-
|
|
28
|
-
value
|
|
28
|
+
value.get("@id")
|
|
29
|
+
if isinstance(value, dict)
|
|
30
|
+
else list(map(node_value, value)) if isinstance(value, list) else value
|
|
29
31
|
)
|
|
30
32
|
|
|
31
33
|
def run(group: dict, node: dict):
|
|
32
|
-
group_key =
|
|
34
|
+
group_key = "-".join(
|
|
35
|
+
flatten(non_empty_list([node_value(node.get(v)) for v in group_keys]))
|
|
36
|
+
)
|
|
33
37
|
group[group_key] = group.get(group_key, []) + [node]
|
|
34
38
|
return group
|
|
39
|
+
|
|
35
40
|
return reduce(run, values, {})
|
|
36
41
|
|
|
37
42
|
|
|
@@ -39,11 +44,12 @@ class ArrayTreatment(Enum):
|
|
|
39
44
|
"""
|
|
40
45
|
Enum representing different treatments for arrays of values.
|
|
41
46
|
"""
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
+
|
|
48
|
+
MEAN = "mean"
|
|
49
|
+
MODE = "mode"
|
|
50
|
+
SUM = "sum"
|
|
51
|
+
FIRST = "first"
|
|
52
|
+
LAST = "last"
|
|
47
53
|
|
|
48
54
|
|
|
49
55
|
def _should_run_array_treatment(value):
|
|
@@ -52,11 +58,21 @@ def _should_run_array_treatment(value):
|
|
|
52
58
|
|
|
53
59
|
DEFAULT_ARRAY_TREATMENT = ArrayTreatment.MEAN
|
|
54
60
|
ARRAY_TREATMENT_TO_REDUCER = {
|
|
55
|
-
ArrayTreatment.MEAN: lambda value:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
ArrayTreatment.
|
|
59
|
-
|
|
61
|
+
ArrayTreatment.MEAN: lambda value: (
|
|
62
|
+
mean(non_empty_list(value)) if _should_run_array_treatment(value) else None
|
|
63
|
+
),
|
|
64
|
+
ArrayTreatment.MODE: lambda value: (
|
|
65
|
+
mode(non_empty_list(value)) if _should_run_array_treatment(value) else None
|
|
66
|
+
),
|
|
67
|
+
ArrayTreatment.SUM: lambda value: (
|
|
68
|
+
sum(non_empty_list(value)) if _should_run_array_treatment(value) else None
|
|
69
|
+
),
|
|
70
|
+
ArrayTreatment.FIRST: lambda value: (
|
|
71
|
+
value[0] if _should_run_array_treatment(value) else None
|
|
72
|
+
),
|
|
73
|
+
ArrayTreatment.LAST: lambda value: (
|
|
74
|
+
value[-1] if _should_run_array_treatment(value) else None
|
|
75
|
+
),
|
|
60
76
|
}
|
|
61
77
|
"""
|
|
62
78
|
A dictionary mapping ArrayTreatment enums to corresponding reducer functions.
|
|
@@ -66,7 +82,7 @@ A dictionary mapping ArrayTreatment enums to corresponding reducer functions.
|
|
|
66
82
|
def _retrieve_array_treatment(
|
|
67
83
|
node: dict,
|
|
68
84
|
is_larger_unit: bool = False,
|
|
69
|
-
default: ArrayTreatment = ArrayTreatment.MEAN
|
|
85
|
+
default: ArrayTreatment = ArrayTreatment.MEAN,
|
|
70
86
|
) -> ArrayTreatment:
|
|
71
87
|
"""
|
|
72
88
|
Retrieves the array treatment for a given node.
|
|
@@ -90,27 +106,26 @@ def _retrieve_array_treatment(
|
|
|
90
106
|
The retrieved array treatment.
|
|
91
107
|
|
|
92
108
|
"""
|
|
93
|
-
ARRAY_TREATMENT_LOOKUPS = [
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
lookup = ARRAY_TREATMENT_LOOKUPS[0] if is_larger_unit else ARRAY_TREATMENT_LOOKUPS[1]
|
|
109
|
+
ARRAY_TREATMENT_LOOKUPS = ["arrayTreatmentLargerUnitOfTime", "arrayTreatment"]
|
|
110
|
+
lookup = (
|
|
111
|
+
ARRAY_TREATMENT_LOOKUPS[0] if is_larger_unit else ARRAY_TREATMENT_LOOKUPS[1]
|
|
112
|
+
)
|
|
98
113
|
|
|
99
114
|
lookup_value = get_lookup_value(node, lookup)
|
|
100
115
|
|
|
101
116
|
return next(
|
|
102
117
|
(treatment for treatment in ArrayTreatment if treatment.value == lookup_value),
|
|
103
|
-
default
|
|
118
|
+
default,
|
|
104
119
|
)
|
|
105
120
|
|
|
106
121
|
|
|
107
122
|
def get_node_value(
|
|
108
123
|
node: dict,
|
|
109
|
-
key: str =
|
|
124
|
+
key: str = "value",
|
|
110
125
|
is_larger_unit: bool = False,
|
|
111
126
|
array_treatment: Optional[ArrayTreatment] = None,
|
|
112
127
|
default_array_treatment: Optional[ArrayTreatment] = ArrayTreatment.MEAN,
|
|
113
|
-
default: Any = 0
|
|
128
|
+
default: Any = 0,
|
|
114
129
|
) -> Union[float, bool]:
|
|
115
130
|
"""
|
|
116
131
|
Get the value from the dictionary representing the node,
|
|
@@ -136,26 +151,45 @@ def get_node_value(
|
|
|
136
151
|
float | bool
|
|
137
152
|
The extracted value from the node.
|
|
138
153
|
"""
|
|
139
|
-
value = node.get(key)
|
|
140
|
-
|
|
141
|
-
reducer =
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
154
|
+
value = (node or {}).get(key)
|
|
155
|
+
|
|
156
|
+
reducer = (
|
|
157
|
+
ARRAY_TREATMENT_TO_REDUCER[
|
|
158
|
+
(
|
|
159
|
+
array_treatment
|
|
160
|
+
or _retrieve_array_treatment(
|
|
161
|
+
node, is_larger_unit=is_larger_unit, default=default_array_treatment
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
]
|
|
165
|
+
if isinstance(value, list) and len(value) > 0
|
|
166
|
+
else None
|
|
167
|
+
)
|
|
145
168
|
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
169
|
+
return (
|
|
170
|
+
reducer(value)
|
|
171
|
+
if reducer
|
|
172
|
+
else (
|
|
173
|
+
value
|
|
174
|
+
if any(
|
|
175
|
+
[
|
|
176
|
+
isinstance(value, float),
|
|
177
|
+
isinstance(value, int),
|
|
178
|
+
isinstance(value, bool),
|
|
179
|
+
value is None,
|
|
180
|
+
]
|
|
181
|
+
)
|
|
182
|
+
else default if not non_empty_value(value) else value
|
|
183
|
+
)
|
|
150
184
|
)
|
|
151
185
|
|
|
152
186
|
|
|
153
|
-
_BLANK_NODE_GROUPING_KEYS = {
|
|
154
|
-
TermTermType.EMISSION: ['methodModel']
|
|
155
|
-
}
|
|
187
|
+
_BLANK_NODE_GROUPING_KEYS = {TermTermType.EMISSION: ["methodModel"]}
|
|
156
188
|
|
|
157
189
|
|
|
158
|
-
def get_blank_nodes_calculation_status(
|
|
190
|
+
def get_blank_nodes_calculation_status(
|
|
191
|
+
node: dict, list_key: str, termType: TermTermType
|
|
192
|
+
):
|
|
159
193
|
"""
|
|
160
194
|
Get calculation status for a Node and a list of Blank node.
|
|
161
195
|
Example: get the calculation status for all emissions included in the HESTIA system boundary.
|
|
@@ -178,31 +212,38 @@ def get_blank_nodes_calculation_status(node: dict, list_key: str, termType: Term
|
|
|
178
212
|
"""
|
|
179
213
|
all_term_ids = cycle_emissions_in_system_boundary(node, termType=termType)
|
|
180
214
|
blank_nodes = filter_list_term_type(node.get(list_key, []), termType)
|
|
181
|
-
blank_nodes_by_term = group_by_keys(blank_nodes, [
|
|
215
|
+
blank_nodes_by_term = group_by_keys(blank_nodes, ["term"])
|
|
182
216
|
blank_nodes_grouping_keys = _BLANK_NODE_GROUPING_KEYS.get(termType) or []
|
|
183
217
|
|
|
184
218
|
def blank_node_data(blank_nodes: list):
|
|
185
|
-
value = get_node_value(
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
} | (
|
|
193
|
-
|
|
194
|
-
|
|
219
|
+
value = get_node_value(
|
|
220
|
+
{
|
|
221
|
+
"term": blank_nodes[0].get("term"),
|
|
222
|
+
"value": list(map(get_node_value, blank_nodes)),
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
inputs = flatten(map(lambda v: v.get("inputs", []), blank_nodes))
|
|
226
|
+
return {"value": value} | (
|
|
227
|
+
{"inputs": sorted(list(map(lambda v: v.get("@id"), inputs)))}
|
|
228
|
+
if inputs
|
|
229
|
+
else {}
|
|
230
|
+
)
|
|
195
231
|
|
|
196
232
|
def map_blank_node(term_id: str):
|
|
197
233
|
values = blank_nodes_by_term.get(term_id, [])
|
|
198
|
-
grouped_blank_nodes =
|
|
234
|
+
grouped_blank_nodes = (
|
|
235
|
+
group_by_keys(values, blank_nodes_grouping_keys)
|
|
236
|
+
if blank_nodes_grouping_keys
|
|
237
|
+
else {}
|
|
238
|
+
)
|
|
199
239
|
return (
|
|
200
|
-
{}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
for k, v in grouped_blank_nodes.items()
|
|
204
|
-
|
|
205
|
-
|
|
240
|
+
{}
|
|
241
|
+
if not values
|
|
242
|
+
else (
|
|
243
|
+
{k: blank_node_data(v) for k, v in grouped_blank_nodes.items()}
|
|
244
|
+
if grouped_blank_nodes
|
|
245
|
+
else blank_node_data([values[0]])
|
|
246
|
+
)
|
|
206
247
|
)
|
|
207
248
|
|
|
208
249
|
return {term_id: map_blank_node(term_id) for term_id in all_term_ids}
|
|
@@ -5,9 +5,9 @@ from .cycle import get_cycle_emissions_calculation_status
|
|
|
5
5
|
|
|
6
6
|
def _emissions_color(row):
|
|
7
7
|
color = (
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
"red"
|
|
9
|
+
if row["emissions-missing"] > 0
|
|
10
|
+
else "yellow" if row["emissions-incomplete"] > 0 else "lightgreen"
|
|
11
11
|
)
|
|
12
12
|
return [f"background-color: {color}"] * len(row)
|
|
13
13
|
|
|
@@ -16,50 +16,60 @@ def _emissions_with_status(cycle: dict):
|
|
|
16
16
|
emissions = get_cycle_emissions_calculation_status(cycle)
|
|
17
17
|
all_emissions = emissions.keys()
|
|
18
18
|
# an emission is missing if there is no value (ignore `missingInputs`)
|
|
19
|
-
missing_emissions = set(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
missing_emissions = set(
|
|
20
|
+
[
|
|
21
|
+
k
|
|
22
|
+
for k, v in emissions.items()
|
|
23
|
+
if len((set(v.keys()) - set(["missingInputs"]))) == 0
|
|
24
|
+
]
|
|
25
|
+
)
|
|
23
26
|
# an emission is incomplete if it has missing inputs
|
|
24
|
-
incomplete_emissions = set(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
k not in missing_emissions
|
|
29
|
-
]
|
|
30
|
-
|
|
31
|
-
complete_emissions = set(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
27
|
+
incomplete_emissions = set(
|
|
28
|
+
[
|
|
29
|
+
k
|
|
30
|
+
for k, v in emissions.items()
|
|
31
|
+
if all([len(v.get("missingInputs", [])) > 0, k not in missing_emissions])
|
|
32
|
+
]
|
|
33
|
+
)
|
|
34
|
+
complete_emissions = set(
|
|
35
|
+
[
|
|
36
|
+
k
|
|
37
|
+
for k, v in emissions.items()
|
|
38
|
+
if all(
|
|
39
|
+
[
|
|
40
|
+
len(v.get("missingInputs", [])) == 0,
|
|
41
|
+
len((set(v.keys()) - set(["missingInputs"]))) > 0,
|
|
42
|
+
]
|
|
43
|
+
)
|
|
44
|
+
]
|
|
45
|
+
)
|
|
38
46
|
return {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
47
|
+
"emissions-total": len(all_emissions),
|
|
48
|
+
"emissions-complete": len(complete_emissions),
|
|
49
|
+
"emissions-incomplete": len(incomplete_emissions),
|
|
50
|
+
"emissions-missing": len(missing_emissions),
|
|
51
|
+
"emissions": emissions,
|
|
44
52
|
}
|
|
45
53
|
|
|
46
54
|
|
|
47
55
|
def _handle_lists(df: pd.DataFrame, columns: list):
|
|
48
56
|
for col in columns:
|
|
49
|
-
df[col] = df[col].apply(lambda v:
|
|
57
|
+
df[col] = df[col].apply(lambda v: ";".join(v) if isinstance(v, list) else v)
|
|
50
58
|
return df
|
|
51
59
|
|
|
52
60
|
|
|
53
|
-
def get_nodes_calculations_status_dataframe(nodes: list, file_format: str =
|
|
61
|
+
def get_nodes_calculations_status_dataframe(nodes: list, file_format: str = "excel"):
|
|
54
62
|
cycles_status = [
|
|
55
|
-
{
|
|
56
|
-
'id': cycle.get('@id') or cycle.get('id')
|
|
57
|
-
} | _emissions_with_status(cycle)
|
|
63
|
+
{"id": cycle.get("@id") or cycle.get("id")} | _emissions_with_status(cycle)
|
|
58
64
|
for cycle in nodes
|
|
59
|
-
if (cycle.get(
|
|
65
|
+
if (cycle.get("@type") or cycle.get("type")) == "Cycle"
|
|
60
66
|
]
|
|
61
|
-
df = pd.json_normalize(cycles_status, errors=
|
|
67
|
+
df = pd.json_normalize(cycles_status, errors="ignore")
|
|
62
68
|
# convert list of inputs to semi-column strings
|
|
63
|
-
list_columns = [
|
|
69
|
+
list_columns = [
|
|
70
|
+
col
|
|
71
|
+
for col in df.columns
|
|
72
|
+
if col.endswith(".inputs") or col.endswith(".missingInputs")
|
|
73
|
+
]
|
|
64
74
|
df = _handle_lists(df, list_columns)
|
|
65
|
-
return df.style.apply(_emissions_color, axis=1) if file_format ==
|
|
75
|
+
return df.style.apply(_emissions_color, axis=1) if file_format == "excel" else df
|
hestia_earth/utils/cycle.py
CHANGED
|
@@ -5,11 +5,9 @@ from .blank_node import get_blank_nodes_calculation_status
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def _extend_missing_inputs(value: dict, input_ids: set):
|
|
8
|
-
included_inputs = set(flatten([
|
|
9
|
-
v.get('inputs', []) for v in value.values()
|
|
10
|
-
]))
|
|
8
|
+
included_inputs = set(flatten([v.get("inputs", []) for v in value.values()]))
|
|
11
9
|
missing_inputs = input_ids - included_inputs
|
|
12
|
-
return {
|
|
10
|
+
return {"missingInputs": sorted(list(missing_inputs))} if missing_inputs else {}
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
def get_cycle_emissions_calculation_status(cycle: dict):
|
|
@@ -28,9 +26,11 @@ def get_cycle_emissions_calculation_status(cycle: dict):
|
|
|
28
26
|
and the resulting calculation as value, containing the recalculated `value`, `method` and `methodTier`.
|
|
29
27
|
Note: if a calculation fails for an emission, the `value` is an empty dictionary.
|
|
30
28
|
"""
|
|
31
|
-
status = get_blank_nodes_calculation_status(
|
|
32
|
-
|
|
29
|
+
status = get_blank_nodes_calculation_status(
|
|
30
|
+
cycle, "emissions", TermTermType.EMISSION
|
|
31
|
+
)
|
|
32
|
+
input_ids = set([v.get("term", {}).get("@id") for v in cycle.get("inputs", [])])
|
|
33
33
|
return {
|
|
34
|
-
k: v | (_extend_missing_inputs(v, input_ids) if
|
|
34
|
+
k: v | (_extend_missing_inputs(v, input_ids) if "InputsProduction" in k else {})
|
|
35
35
|
for k, v in status.items()
|
|
36
36
|
}
|
hestia_earth/utils/date.py
CHANGED
|
@@ -61,7 +61,10 @@ def is_in_days(date: str) -> bool:
|
|
|
61
61
|
bool
|
|
62
62
|
True if the date contains the year, month and day.
|
|
63
63
|
"""
|
|
64
|
-
return
|
|
64
|
+
return (
|
|
65
|
+
date is not None
|
|
66
|
+
and re.compile(r"^[\d]{4}\-[\d]{2}\-[\d]{2}").match(date) is not None
|
|
67
|
+
)
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
def is_in_months(date: str) -> bool:
|
|
@@ -78,4 +81,6 @@ def is_in_months(date: str) -> bool:
|
|
|
78
81
|
bool
|
|
79
82
|
True if the date contains the year, month but no day.
|
|
80
83
|
"""
|
|
81
|
-
return
|
|
84
|
+
return (
|
|
85
|
+
date is not None and re.compile(r"^[\d]{4}\-[\d]{2}$").match(date) is not None
|
|
86
|
+
)
|
|
@@ -9,7 +9,7 @@ def calc_descriptive_stats(
|
|
|
9
9
|
arr: NDArray,
|
|
10
10
|
stats_definition: Union[Enum, str],
|
|
11
11
|
axis: Optional[int] = None,
|
|
12
|
-
decimals: int = 6
|
|
12
|
+
decimals: int = 6,
|
|
13
13
|
) -> dict:
|
|
14
14
|
"""
|
|
15
15
|
Calculate the descriptive stats for an array row-wise, round them to specified number of decimal places and return
|
|
@@ -34,9 +34,9 @@ def calc_descriptive_stats(
|
|
|
34
34
|
max_ = around(arr.max(axis=axis), decimals)
|
|
35
35
|
|
|
36
36
|
observations = (
|
|
37
|
-
[arr.shape[0]] * arr.shape[1]
|
|
38
|
-
|
|
39
|
-
else [arr.size]
|
|
37
|
+
[arr.shape[0]] * arr.shape[1]
|
|
38
|
+
if axis == 0
|
|
39
|
+
else [arr.shape[1]] * arr.shape[0] if axis == 1 else [arr.size]
|
|
40
40
|
)
|
|
41
41
|
|
|
42
42
|
return {
|
|
@@ -44,6 +44,10 @@ def calc_descriptive_stats(
|
|
|
44
44
|
"sd": list(sd) if isinstance(sd, Iterable) else [sd],
|
|
45
45
|
"min": list(min_) if isinstance(min_, Iterable) else [min_],
|
|
46
46
|
"max": list(max_) if isinstance(max_, Iterable) else [max_],
|
|
47
|
-
"statsDefinition":
|
|
48
|
-
|
|
47
|
+
"statsDefinition": (
|
|
48
|
+
stats_definition.value
|
|
49
|
+
if isinstance(stats_definition, Enum)
|
|
50
|
+
else stats_definition
|
|
51
|
+
),
|
|
52
|
+
"observations": observations,
|
|
49
53
|
}
|