hestia-earth-aggregation 0.21.2__tar.gz → 0.21.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hestia_earth_aggregation-0.21.2/hestia_earth_aggregation.egg-info → hestia_earth_aggregation-0.21.4}/PKG-INFO +1 -1
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/__init__.py +22 -13
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/aggregate_cycles.py +102 -50
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/log.py +20 -14
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/recalculate_cycles.py +1 -1
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/__init__.py +226 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/aggregate_country_nodes.py +784 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/aggregate_weighted.py +197 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/blank_node.py +519 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/combine.py +46 -23
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/completeness.py +62 -32
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/covariance.py +68 -40
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/cycle.py +474 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/distribution.py +208 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/emission.py +70 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/group.py +160 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/input.py +28 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/lookup.py +64 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/management.py +78 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/measurement.py +38 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/practice.py +58 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/product.py +24 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/utils/property.py +13 -9
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/quality_score.py +265 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/queries.py +503 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/site.py +100 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/source.py +22 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/term.py +95 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/utils/weights.py +184 -0
- hestia_earth_aggregation-0.21.4/hestia_earth/aggregation/version.py +1 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4/hestia_earth_aggregation.egg-info}/PKG-INFO +1 -1
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/setup.py +4 -4
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/tests/test_aggregation.py +1 -1
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/__init__.py +0 -177
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/aggregate_country_nodes.py +0 -599
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/aggregate_weighted.py +0 -133
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/blank_node.py +0 -401
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/cycle.py +0 -327
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/distribution.py +0 -148
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/emission.py +0 -55
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/group.py +0 -107
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/input.py +0 -25
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/lookup.py +0 -46
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/management.py +0 -61
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/measurement.py +0 -32
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/practice.py +0 -56
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/product.py +0 -22
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/quality_score.py +0 -199
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/queries.py +0 -427
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/site.py +0 -82
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/source.py +0 -16
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/term.py +0 -75
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/utils/weights.py +0 -140
- hestia_earth_aggregation-0.21.2/hestia_earth/aggregation/version.py +0 -1
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/LICENSE +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/MANIFEST.in +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/README.md +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/config/Cycle/processedFood.json +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/SOURCES.txt +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/dependency_links.txt +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/requires.txt +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth_aggregation.egg-info/top_level.txt +0 -0
- {hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/setup.cfg +0 -0
|
@@ -9,8 +9,10 @@ from .utils.quality_score import calculate_score
|
|
|
9
9
|
|
|
10
10
|
def _mock_nb_distribution(include_distribution: bool):
|
|
11
11
|
original_func = distribution._nb_iterations
|
|
12
|
-
distribution._nb_iterations = lambda *args:
|
|
13
|
-
|
|
12
|
+
distribution._nb_iterations = lambda *args: (
|
|
13
|
+
original_func(*args) if include_distribution else 0
|
|
14
|
+
)
|
|
15
|
+
not include_distribution and logger.warning("Not generating distribution.")
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
def aggregate(
|
|
@@ -20,7 +22,7 @@ def aggregate(
|
|
|
20
22
|
end_year: int,
|
|
21
23
|
source: dict = None,
|
|
22
24
|
include_distribution: bool = True,
|
|
23
|
-
filter_by_country: bool = True
|
|
25
|
+
filter_by_country: bool = True,
|
|
24
26
|
):
|
|
25
27
|
"""
|
|
26
28
|
Aggregates data from HESTIA.
|
|
@@ -53,16 +55,23 @@ def aggregate(
|
|
|
53
55
|
_mock_nb_distribution(include_distribution)
|
|
54
56
|
|
|
55
57
|
now = current_time_ms()
|
|
56
|
-
logger.info(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
58
|
+
logger.info(
|
|
59
|
+
"Aggregating %s in %s for period %s to %s"
|
|
60
|
+
+ (" with distribution" if include_distribution else ""),
|
|
61
|
+
product.get("name"),
|
|
62
|
+
country.get("name"),
|
|
63
|
+
start_year,
|
|
64
|
+
end_year,
|
|
65
|
+
)
|
|
66
|
+
aggregations, countries = run_aggregate(
|
|
67
|
+
country, product, source, start_year, end_year, filter_by_country
|
|
68
|
+
)
|
|
69
|
+
logger.info("time=%s, unit=ms", current_time_ms() - now)
|
|
70
|
+
aggregations = (
|
|
71
|
+
[recalculate(agg, product) for agg in aggregations]
|
|
72
|
+
if should_recalculate(product)
|
|
73
|
+
else aggregations
|
|
74
|
+
)
|
|
66
75
|
aggregations = [
|
|
67
76
|
calculate_score(cycle=agg, countries=countries) for agg in aggregations
|
|
68
77
|
]
|
|
@@ -3,23 +3,36 @@ from hestia_earth.utils.tools import non_empty_list
|
|
|
3
3
|
|
|
4
4
|
from hestia_earth.aggregation.log import logger, log_memory_usage
|
|
5
5
|
from hestia_earth.aggregation.utils import CYCLE_AGGREGATION_KEYS, SITE_AGGREGATION_KEYS
|
|
6
|
-
from hestia_earth.aggregation.utils.queries import
|
|
6
|
+
from hestia_earth.aggregation.utils.queries import (
|
|
7
|
+
find_global_nodes,
|
|
8
|
+
find_country_nodes,
|
|
9
|
+
download_site,
|
|
10
|
+
)
|
|
7
11
|
from hestia_earth.aggregation.utils.term import _is_global
|
|
8
12
|
from hestia_earth.aggregation.utils.group import group_blank_nodes
|
|
9
13
|
from hestia_earth.aggregation.utils.blank_node import cleanup_node_blank_nodes
|
|
10
|
-
from hestia_earth.aggregation.utils.aggregate_weighted import
|
|
14
|
+
from hestia_earth.aggregation.utils.aggregate_weighted import (
|
|
15
|
+
aggregate as aggregate_weighted,
|
|
16
|
+
)
|
|
11
17
|
from hestia_earth.aggregation.utils.aggregate_country_nodes import aggregate_cycles
|
|
12
18
|
from hestia_earth.aggregation.utils.weights import (
|
|
13
|
-
country_weights,
|
|
19
|
+
country_weights,
|
|
20
|
+
country_weight_node_id,
|
|
21
|
+
world_weights,
|
|
22
|
+
world_weight_node_id,
|
|
14
23
|
)
|
|
15
24
|
from hestia_earth.aggregation.utils.site import format_site
|
|
16
25
|
from hestia_earth.aggregation.utils.cycle import (
|
|
17
|
-
aggregate_with_matrix,
|
|
26
|
+
aggregate_with_matrix,
|
|
27
|
+
format_for_grouping,
|
|
28
|
+
format_terms_results,
|
|
29
|
+
format_country_results,
|
|
30
|
+
update_cycle,
|
|
18
31
|
)
|
|
19
32
|
from hestia_earth.aggregation.utils.covariance import (
|
|
20
33
|
init_covariance_files,
|
|
21
34
|
remove_covariance_files,
|
|
22
|
-
generate_covariance_country
|
|
35
|
+
generate_covariance_country,
|
|
23
36
|
)
|
|
24
37
|
|
|
25
38
|
|
|
@@ -31,37 +44,44 @@ def _aggregate_country(
|
|
|
31
44
|
start_year: int,
|
|
32
45
|
end_year: int,
|
|
33
46
|
generate_weights_func=None,
|
|
34
|
-
missing_weights_node_id_func=None
|
|
47
|
+
missing_weights_node_id_func=None,
|
|
35
48
|
) -> Tuple[dict, dict]:
|
|
36
|
-
functional_unit = cycles[0].get(
|
|
37
|
-
site_type = cycles[0].get(
|
|
49
|
+
functional_unit = cycles[0].get("functionalUnit")
|
|
50
|
+
site_type = cycles[0].get("site", {}).get("siteType")
|
|
38
51
|
|
|
39
52
|
# aggregate cycles with weights
|
|
40
53
|
cycles_formatted = format_for_grouping(cycles)
|
|
41
54
|
cycle_data = group_blank_nodes(
|
|
42
|
-
cycles_formatted,
|
|
55
|
+
cycles_formatted,
|
|
56
|
+
CYCLE_AGGREGATION_KEYS,
|
|
57
|
+
start_year,
|
|
58
|
+
end_year,
|
|
59
|
+
product=product,
|
|
60
|
+
site_type=site_type,
|
|
43
61
|
)
|
|
44
62
|
weights = generate_weights_func(cycle_data)
|
|
45
63
|
cycle_data = cycle_data | aggregate_weighted(
|
|
46
64
|
aggregate_keys=CYCLE_AGGREGATION_KEYS,
|
|
47
65
|
data=cycle_data,
|
|
48
66
|
weights=weights,
|
|
49
|
-
missing_weights_node_id_func=missing_weights_node_id_func
|
|
67
|
+
missing_weights_node_id_func=missing_weights_node_id_func,
|
|
50
68
|
)
|
|
51
69
|
|
|
52
70
|
# aggregate sites with weights
|
|
53
|
-
sites = [c.get(
|
|
71
|
+
sites = [c.get("site") for c in cycles]
|
|
54
72
|
site_data = group_blank_nodes(sites, SITE_AGGREGATION_KEYS)
|
|
55
73
|
site_data = aggregate_weighted(
|
|
56
74
|
aggregate_keys=SITE_AGGREGATION_KEYS,
|
|
57
75
|
data=site_data,
|
|
58
76
|
weights=weights,
|
|
59
|
-
missing_weights_node_id_func=missing_weights_node_id_func
|
|
77
|
+
missing_weights_node_id_func=missing_weights_node_id_func,
|
|
60
78
|
)
|
|
61
79
|
aggregated_site = format_site(site_data, sites)
|
|
62
80
|
|
|
63
81
|
cycle_data = format_country_results(cycle_data, product, country, aggregated_site)
|
|
64
|
-
aggregated_cycle = update_cycle(
|
|
82
|
+
aggregated_cycle = update_cycle(
|
|
83
|
+
country, start_year, end_year, source, functional_unit, False
|
|
84
|
+
)(cycle_data)
|
|
65
85
|
return (aggregated_cycle, weights)
|
|
66
86
|
|
|
67
87
|
|
|
@@ -71,7 +91,7 @@ def aggregate_country(
|
|
|
71
91
|
source: dict,
|
|
72
92
|
start_year: int,
|
|
73
93
|
end_year: int,
|
|
74
|
-
filter_by_country: bool = True
|
|
94
|
+
filter_by_country: bool = True,
|
|
75
95
|
) -> Tuple[list, list]:
|
|
76
96
|
"""
|
|
77
97
|
Create 1 to many country-level aggregations.
|
|
@@ -100,52 +120,70 @@ def aggregate_country(
|
|
|
100
120
|
"""
|
|
101
121
|
init_covariance_files()
|
|
102
122
|
|
|
103
|
-
cycles = find_country_nodes(
|
|
123
|
+
cycles = find_country_nodes(
|
|
124
|
+
product, start_year, end_year, country if filter_by_country else None
|
|
125
|
+
)
|
|
104
126
|
if not cycles:
|
|
105
|
-
logger.info(
|
|
127
|
+
logger.info("1 - No cycles to run aggregation.")
|
|
106
128
|
return ([], [])
|
|
107
129
|
|
|
108
130
|
# combine cycles into a "master" cycle with multiple values
|
|
109
131
|
cycles_aggregated = aggregate_cycles(
|
|
110
|
-
cycles=cycles,
|
|
111
|
-
product=product,
|
|
112
|
-
start_year=start_year,
|
|
113
|
-
end_year=end_year
|
|
132
|
+
cycles=cycles, product=product, start_year=start_year, end_year=end_year
|
|
114
133
|
)
|
|
115
134
|
if not cycles_aggregated:
|
|
116
|
-
logger.info(
|
|
135
|
+
logger.info("2 - No aggregated cycles.")
|
|
117
136
|
return ([], [])
|
|
118
137
|
|
|
119
|
-
logger.info(
|
|
138
|
+
logger.info("Cycles aggregated, generating final country aggregation...")
|
|
120
139
|
log_memory_usage()
|
|
121
140
|
|
|
122
|
-
functional_unit = cycles_aggregated[0].get(
|
|
141
|
+
functional_unit = cycles_aggregated[0].get("functionalUnit")
|
|
123
142
|
include_matrix = aggregate_with_matrix(product)
|
|
124
|
-
cycles_aggregated = non_empty_list(
|
|
125
|
-
format_terms_results(cycle, product, country) for cycle in cycles_aggregated
|
|
126
|
-
|
|
127
|
-
cycles_aggregated = non_empty_list(
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
143
|
+
cycles_aggregated = non_empty_list(
|
|
144
|
+
[format_terms_results(cycle, product, country) for cycle in cycles_aggregated]
|
|
145
|
+
)
|
|
146
|
+
cycles_aggregated = non_empty_list(
|
|
147
|
+
map(
|
|
148
|
+
update_cycle(
|
|
149
|
+
country, start_year, end_year, source, functional_unit, include_matrix
|
|
150
|
+
),
|
|
151
|
+
cycles_aggregated,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
131
154
|
logger.info(f"Found {len(cycles_aggregated)} cycles at sub-country level")
|
|
132
155
|
if len(cycles_aggregated) == 0:
|
|
133
|
-
logger.info(
|
|
156
|
+
logger.info("3 - No cycles to run aggregation.")
|
|
134
157
|
return []
|
|
135
158
|
|
|
136
159
|
# step 2: use aggregated cycles to calculate country-level cycles
|
|
137
|
-
country_cycle, weights =
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
160
|
+
country_cycle, weights = (
|
|
161
|
+
_aggregate_country(
|
|
162
|
+
country,
|
|
163
|
+
product,
|
|
164
|
+
cycles_aggregated,
|
|
165
|
+
source,
|
|
166
|
+
start_year,
|
|
167
|
+
end_year,
|
|
168
|
+
generate_weights_func=country_weights,
|
|
169
|
+
missing_weights_node_id_func=country_weight_node_id,
|
|
170
|
+
)
|
|
171
|
+
if all(
|
|
172
|
+
[
|
|
173
|
+
cycles_aggregated,
|
|
174
|
+
# when not including matrix, cycles and country_cycles will be the same
|
|
175
|
+
include_matrix,
|
|
176
|
+
]
|
|
177
|
+
)
|
|
178
|
+
else (None, {})
|
|
179
|
+
)
|
|
146
180
|
log_memory_usage()
|
|
147
181
|
|
|
148
|
-
country_cycle = (
|
|
182
|
+
country_cycle = (
|
|
183
|
+
(country_cycle | generate_covariance_country(weights=weights))
|
|
184
|
+
if country_cycle
|
|
185
|
+
else None
|
|
186
|
+
)
|
|
149
187
|
|
|
150
188
|
log_memory_usage()
|
|
151
189
|
|
|
@@ -163,7 +201,7 @@ def aggregate_global(
|
|
|
163
201
|
start_year: int,
|
|
164
202
|
end_year: int,
|
|
165
203
|
*args,
|
|
166
|
-
**kwargs
|
|
204
|
+
**kwargs,
|
|
167
205
|
) -> Tuple[list, list]:
|
|
168
206
|
"""
|
|
169
207
|
Aggregate World and other regions level 0 (like `region-easter-europe`).
|
|
@@ -188,14 +226,28 @@ def aggregate_global(
|
|
|
188
226
|
The list of countries that were used to aggregate.
|
|
189
227
|
"""
|
|
190
228
|
cycles = find_global_nodes(product, start_year, end_year, country)
|
|
191
|
-
cycles = [
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
229
|
+
cycles = [
|
|
230
|
+
cycle | {"site": download_site(cycle.get("site"), data_state="original")}
|
|
231
|
+
for cycle in cycles
|
|
232
|
+
]
|
|
233
|
+
countries = non_empty_list(
|
|
234
|
+
[cycle.get("site", {}).get("country") for cycle in cycles]
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
aggregated_cycle, *args = (
|
|
238
|
+
_aggregate_country(
|
|
239
|
+
country,
|
|
240
|
+
product,
|
|
241
|
+
cycles,
|
|
242
|
+
source,
|
|
243
|
+
start_year,
|
|
244
|
+
end_year,
|
|
245
|
+
generate_weights_func=world_weights,
|
|
246
|
+
missing_weights_node_id_func=world_weight_node_id,
|
|
247
|
+
)
|
|
248
|
+
if cycles
|
|
249
|
+
else (None, {})
|
|
250
|
+
)
|
|
199
251
|
return (non_empty_list([cleanup_node_blank_nodes(aggregated_cycle)]), countries)
|
|
200
252
|
|
|
201
253
|
|
{hestia_earth_aggregation-0.21.2 → hestia_earth_aggregation-0.21.4}/hestia_earth/aggregation/log.py
RENAMED
|
@@ -4,14 +4,14 @@ import platform
|
|
|
4
4
|
import resource
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
|
-
LOG_LEVEL = os.getenv(
|
|
7
|
+
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
|
8
8
|
|
|
9
9
|
# disable root logger
|
|
10
10
|
root_logger = logging.getLogger()
|
|
11
11
|
root_logger.disabled = True
|
|
12
12
|
|
|
13
13
|
# create custom logger
|
|
14
|
-
logger = logging.getLogger(
|
|
14
|
+
logger = logging.getLogger("hestia_earth.aggregation")
|
|
15
15
|
logger.removeHandler(sys.stdout)
|
|
16
16
|
logger.setLevel(logging.getLevelName(LOG_LEVEL))
|
|
17
17
|
|
|
@@ -29,28 +29,28 @@ def log_to_file(filepath: str):
|
|
|
29
29
|
formatter = logging.Formatter(
|
|
30
30
|
'{"timestamp": "%(asctime)s", "level": "%(levelname)s", "logger": "%(name)s", '
|
|
31
31
|
'"filename": "%(filename)s", "message": "%(message)s"}',
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
33
|
+
)
|
|
34
|
+
handler = logging.FileHandler(filepath, encoding="utf-8")
|
|
34
35
|
handler.setFormatter(formatter)
|
|
35
36
|
handler.setLevel(logging.getLevelName(LOG_LEVEL))
|
|
36
37
|
logger.addHandler(handler)
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
LOG_FILENAME = os.getenv(
|
|
40
|
+
LOG_FILENAME = os.getenv("LOG_FILENAME")
|
|
40
41
|
if LOG_FILENAME is not None:
|
|
41
42
|
log_to_file(LOG_FILENAME)
|
|
42
43
|
|
|
43
44
|
|
|
44
|
-
def _join_args(**kwargs):
|
|
45
|
+
def _join_args(**kwargs):
|
|
46
|
+
return ", ".join([f"{key}={value}" for key, value in kwargs.items()])
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
def log_memory_usage(**kwargs):
|
|
48
|
-
factor = 1024 * (
|
|
49
|
-
1024 if platform.system() in ['Darwin', 'Windows'] else 1
|
|
50
|
-
)
|
|
50
|
+
factor = 1024 * (1024 if platform.system() in ["Darwin", "Windows"] else 1)
|
|
51
51
|
value = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / factor
|
|
52
|
-
extra = (
|
|
53
|
-
logger.info(
|
|
52
|
+
extra = (", " + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ""
|
|
53
|
+
logger.info("memory_used=%s, unit=MB" + extra, value)
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
def debugRequirements(**kwargs):
|
|
@@ -67,7 +67,13 @@ def _sum_values(values: list):
|
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
def debugWeights(weights: dict):
|
|
70
|
-
total_weight = _sum_values(v.get(
|
|
70
|
+
total_weight = _sum_values(v.get("weight") for v in weights.values()) or 100
|
|
71
71
|
for id, weight in weights.items():
|
|
72
|
-
value = weight.get(
|
|
73
|
-
logger.debug(
|
|
72
|
+
value = weight.get("weight")
|
|
73
|
+
logger.debug(
|
|
74
|
+
"id=%s, weight=%s, ratio=%s/%s",
|
|
75
|
+
id,
|
|
76
|
+
value * 100 / total_weight,
|
|
77
|
+
value,
|
|
78
|
+
total_weight,
|
|
79
|
+
)
|
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
from hestia_earth.orchestrator import run
|
|
4
4
|
|
|
5
5
|
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
6
|
-
CONFIG_PATH = os.path.join(CURRENT_DIR,
|
|
6
|
+
CONFIG_PATH = os.path.join(CURRENT_DIR, "config", "Cycle")
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def should_recalculate(product: dict):
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from statistics import stdev, mean
|
|
5
|
+
from hestia_earth.utils.model import linked_node
|
|
6
|
+
from hestia_earth.utils.tools import non_empty_list, flatten, safe_parse_date
|
|
7
|
+
|
|
8
|
+
from ..version import VERSION
|
|
9
|
+
|
|
10
|
+
MIN_NB_OBSERVATIONS = 20
|
|
11
|
+
CYCLE_AGGREGATION_KEYS = ["inputs", "practices", "products", "emissions"]
|
|
12
|
+
SITE_AGGREGATION_KEYS = ["measurements", "management"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class HestiaError(Exception):
|
|
16
|
+
def __init__(self, message: str, data: dict = {}):
|
|
17
|
+
super().__init__(message)
|
|
18
|
+
self.error = {"message": message} | data
|
|
19
|
+
|
|
20
|
+
def __str__(self):
|
|
21
|
+
return f"Error downloading nodes: {json.dumps(self.error or {})}"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def create_folders(filepath: str):
|
|
25
|
+
return os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def pick(value: dict, keys: list):
|
|
29
|
+
return {k: value.get(k) for k in keys if k in value}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_empty(value):
|
|
33
|
+
return value is None or (
|
|
34
|
+
value in [None, "", "-"]
|
|
35
|
+
if isinstance(value, str)
|
|
36
|
+
else (
|
|
37
|
+
len(value) == 0
|
|
38
|
+
if isinstance(value, list)
|
|
39
|
+
else len(value.keys()) == 0 if isinstance(value, dict) else False
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def remove_empty_fields(value: dict):
|
|
45
|
+
return {key: value for key, value in value.items() if not is_empty(value)}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _save_json(data: dict, filename: str):
|
|
49
|
+
should_run = os.getenv("DEBUG", "false") == "true"
|
|
50
|
+
if not should_run:
|
|
51
|
+
return
|
|
52
|
+
dir = os.getenv("TMP_DIR", "/tmp")
|
|
53
|
+
filepath = f"{dir}/{filename}.jsonld"
|
|
54
|
+
create_folders(filepath)
|
|
55
|
+
with open(filepath, "w") as f:
|
|
56
|
+
return json.dump(data, f, indent=2)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def sum_data(nodes: list, key: str):
|
|
60
|
+
return sum([node.get(key, 1) for node in nodes])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def format_aggregated_list(node_type: str, values: list):
|
|
64
|
+
nodes = non_empty_list(
|
|
65
|
+
flatten(
|
|
66
|
+
[
|
|
67
|
+
{"@id": v} if isinstance(v, str) else v.get(f"aggregated{node_type}s")
|
|
68
|
+
for v in non_empty_list(values)
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
# build sorted list of ids
|
|
73
|
+
ids = sorted(list(set(map(lambda x: x["@id"], nodes))))
|
|
74
|
+
nodes = [{"@type": node_type, "@id": v} for v in ids]
|
|
75
|
+
return list(map(linked_node, nodes))
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def match_dates(blank_node: dict, start_year: int, end_year: int):
|
|
79
|
+
dates = blank_node.get("dates", [])
|
|
80
|
+
start_date = safe_parse_date(blank_node.get("startDate"), default=None)
|
|
81
|
+
end_date = safe_parse_date(blank_node.get("endDate"), default=None)
|
|
82
|
+
return all(
|
|
83
|
+
[
|
|
84
|
+
not dates
|
|
85
|
+
or any(
|
|
86
|
+
[
|
|
87
|
+
int(start_year) <= safe_parse_date(date).year <= int(end_year)
|
|
88
|
+
for date in dates
|
|
89
|
+
if safe_parse_date(date, default=None)
|
|
90
|
+
]
|
|
91
|
+
),
|
|
92
|
+
not start_date
|
|
93
|
+
or not end_date
|
|
94
|
+
or any(
|
|
95
|
+
[
|
|
96
|
+
int(start_year) <= start_date.year <= int(end_year),
|
|
97
|
+
int(start_year) <= end_date.year <= int(end_year),
|
|
98
|
+
]
|
|
99
|
+
),
|
|
100
|
+
]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _aggregated_node(node: dict):
|
|
105
|
+
return node | {"aggregated": True, "aggregatedVersion": VERSION}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _aggregated_version(node: dict):
|
|
109
|
+
keys = list(node.keys())
|
|
110
|
+
keys.remove("@type") if "@type" in keys else None
|
|
111
|
+
node["aggregated"] = node.get("aggregated", [])
|
|
112
|
+
node["aggregatedVersion"] = node.get("aggregatedVersion", [])
|
|
113
|
+
for key in keys:
|
|
114
|
+
if node.get(key) is None:
|
|
115
|
+
continue
|
|
116
|
+
if key in node["aggregated"]:
|
|
117
|
+
node.get("aggregatedVersion")[node["aggregated"].index(key)] = VERSION
|
|
118
|
+
else:
|
|
119
|
+
node["aggregated"].append(key)
|
|
120
|
+
node["aggregatedVersion"].append(VERSION)
|
|
121
|
+
return node
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _min(values, observations: int = 0, min_observations: int = MIN_NB_OBSERVATIONS):
|
|
125
|
+
has_boolean = any([isinstance(v, bool) for v in values])
|
|
126
|
+
return (
|
|
127
|
+
None
|
|
128
|
+
if has_boolean
|
|
129
|
+
else min(values) if (observations or len(values)) >= min_observations else None
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _max(values, observations: int = 0, min_observations: int = MIN_NB_OBSERVATIONS):
|
|
134
|
+
has_boolean = any([isinstance(v, bool) for v in values])
|
|
135
|
+
return (
|
|
136
|
+
None
|
|
137
|
+
if has_boolean
|
|
138
|
+
else max(values) if (observations or len(values)) >= min_observations else None
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _sd(values):
|
|
143
|
+
return stdev(values) if len(values) >= 2 else None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _all_boolean(values: list):
|
|
147
|
+
return all([isinstance(v, bool) for v in values])
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _numeric_weighted_average(values: list):
|
|
151
|
+
total_weight = (
|
|
152
|
+
sum(Decimal(str(weight)) for _v, weight in values) if values else Decimal(0)
|
|
153
|
+
)
|
|
154
|
+
weighted_values = [
|
|
155
|
+
Decimal(str(value)) * Decimal(str(weight)) for value, weight in values
|
|
156
|
+
]
|
|
157
|
+
average = (
|
|
158
|
+
sum(weighted_values) / (total_weight if total_weight else 1)
|
|
159
|
+
if weighted_values
|
|
160
|
+
else None
|
|
161
|
+
)
|
|
162
|
+
return None if average is None else float(average)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _bool_weighted_average(values: list):
|
|
166
|
+
return mean(map(int, values)) >= 0.5
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def weighted_average(weighted_values: list):
|
|
170
|
+
values = [v for v, _w in weighted_values]
|
|
171
|
+
all_boolean = _all_boolean(values)
|
|
172
|
+
return (
|
|
173
|
+
None
|
|
174
|
+
if not values
|
|
175
|
+
else (
|
|
176
|
+
_bool_weighted_average(values)
|
|
177
|
+
if all_boolean
|
|
178
|
+
else _numeric_weighted_average(weighted_values)
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _unique_nodes(nodes: list):
|
|
184
|
+
return sorted(
|
|
185
|
+
list({n.get("@id"): n for n in nodes}.values()), key=lambda n: n.get("@id")
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _set_dict_single(data: dict, key: str, value, strict=False):
|
|
190
|
+
if data is not None and value is not None and (not strict or not is_empty(value)):
|
|
191
|
+
data[key] = value
|
|
192
|
+
return data
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _set_dict_array(data: dict, key: str, value, strict=False):
|
|
196
|
+
if data is not None and value is not None and (not strict or value != 0):
|
|
197
|
+
data[key] = [value]
|
|
198
|
+
return data
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def format_evs(value: float):
|
|
202
|
+
return min([100, round(value, 2)]) if value else value
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def value_difference(value: float, expected_value: float):
|
|
206
|
+
"""
|
|
207
|
+
Get the difference in percentage between a value and the expected value.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
value : float
|
|
212
|
+
The value to check.
|
|
213
|
+
expected_value : float
|
|
214
|
+
The expected value.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
bool
|
|
219
|
+
The difference in percentage between the value and the expected value.
|
|
220
|
+
"""
|
|
221
|
+
return (
|
|
222
|
+
0
|
|
223
|
+
if (isinstance(expected_value, list) and len(expected_value) == 0)
|
|
224
|
+
or expected_value == 0
|
|
225
|
+
else (round(abs(value - expected_value) / expected_value, 4))
|
|
226
|
+
)
|