hestia-earth-utils 0.16.9__py3-none-any.whl → 0.16.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/utils/api.py +78 -36
- hestia_earth/utils/blank_node.py +101 -60
- hestia_earth/utils/calculation_status.py +45 -35
- hestia_earth/utils/cycle.py +7 -7
- hestia_earth/utils/date.py +7 -2
- hestia_earth/utils/descriptive_stats.py +10 -6
- hestia_earth/utils/emission.py +26 -15
- hestia_earth/utils/lookup.py +62 -28
- hestia_earth/utils/lookup_utils.py +89 -63
- hestia_earth/utils/model.py +45 -40
- hestia_earth/utils/pipeline.py +179 -90
- hestia_earth/utils/pivot/_shared.py +16 -12
- hestia_earth/utils/pivot/pivot_csv.py +35 -18
- hestia_earth/utils/pivot/pivot_json.py +34 -18
- hestia_earth/utils/request.py +17 -6
- hestia_earth/utils/stats.py +89 -68
- hestia_earth/utils/storage/_azure_client.py +17 -6
- hestia_earth/utils/storage/_local_client.py +8 -3
- hestia_earth/utils/storage/_s3_client.py +27 -22
- hestia_earth/utils/storage/_sns_client.py +7 -2
- hestia_earth/utils/term.py +5 -5
- hestia_earth/utils/tools.py +50 -21
- hestia_earth/utils/version.py +1 -1
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/METADATA +1 -1
- hestia_earth_utils-0.16.10.dist-info/RECORD +33 -0
- hestia_earth_utils-0.16.9.dist-info/RECORD +0 -33
- {hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-format-upload +0 -0
- {hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-pivot-csv +0 -0
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/WHEEL +0 -0
- {hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/top_level.txt +0 -0
hestia_earth/utils/request.py
CHANGED
|
@@ -3,18 +3,29 @@ import os
|
|
|
3
3
|
from .tools import non_empty_value
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def api_url() -> str:
|
|
6
|
+
def api_url() -> str:
|
|
7
|
+
return os.getenv("API_URL", "https://api.hestia.earth")
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
def api_access_token() -> str:
|
|
10
|
+
def api_access_token() -> str:
|
|
11
|
+
return os.getenv("API_ACCESS_TOKEN")
|
|
10
12
|
|
|
11
13
|
|
|
12
|
-
def web_url() -> str:
|
|
14
|
+
def web_url() -> str:
|
|
15
|
+
return os.getenv("WEB_URL", "https://www.hestia.earth")
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
def join_args(values) -> str:
|
|
18
|
+
def join_args(values) -> str:
|
|
19
|
+
return "&".join(list(filter(non_empty_value, values))).strip()
|
|
16
20
|
|
|
17
21
|
|
|
18
22
|
def request_url(base_url: str, **kwargs) -> str:
|
|
19
|
-
args = list(
|
|
20
|
-
|
|
23
|
+
args = list(
|
|
24
|
+
map(
|
|
25
|
+
lambda key: (
|
|
26
|
+
"=".join([key, str(kwargs.get(key))]) if kwargs.get(key) else None
|
|
27
|
+
),
|
|
28
|
+
kwargs.keys(),
|
|
29
|
+
)
|
|
30
|
+
)
|
|
31
|
+
return "?".join(list(filter(non_empty_value, [base_url, join_args(args)]))).strip()
|
hestia_earth/utils/stats.py
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Based on code by Cool Farm Tool: https://gitlab.com/MethodsCFT/coolfarm-soc/-/blob/main/src/cfasoc/builders.py
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import hashlib
|
|
5
6
|
from functools import reduce
|
|
6
7
|
from numpy import (
|
|
7
|
-
cumsum,
|
|
8
|
-
|
|
8
|
+
cumsum,
|
|
9
|
+
dot,
|
|
10
|
+
full,
|
|
11
|
+
linalg,
|
|
12
|
+
hstack,
|
|
13
|
+
random,
|
|
14
|
+
mean,
|
|
15
|
+
vstack,
|
|
16
|
+
abs,
|
|
17
|
+
array,
|
|
18
|
+
concatenate,
|
|
19
|
+
exp,
|
|
20
|
+
float64,
|
|
21
|
+
inf,
|
|
22
|
+
pi,
|
|
23
|
+
prod,
|
|
24
|
+
sign,
|
|
25
|
+
sqrt,
|
|
9
26
|
)
|
|
10
27
|
from numpy.typing import NDArray, DTypeLike
|
|
11
28
|
from typing import Union
|
|
@@ -90,7 +107,10 @@ def repeat_1d_array_as_columns(n_columns: int, column: NDArray) -> NDArray:
|
|
|
90
107
|
|
|
91
108
|
|
|
92
109
|
def discrete_uniform_1d(
|
|
93
|
-
shape: tuple,
|
|
110
|
+
shape: tuple,
|
|
111
|
+
low: float,
|
|
112
|
+
high: float,
|
|
113
|
+
seed: Union[int, random.Generator, None] = None,
|
|
94
114
|
) -> NDArray:
|
|
95
115
|
"""
|
|
96
116
|
Sample from a discrete uniform distribution and produce an array of a specified shape.
|
|
@@ -116,14 +136,14 @@ def discrete_uniform_1d(
|
|
|
116
136
|
"""
|
|
117
137
|
n_rows, n_columns = shape
|
|
118
138
|
rng = random.default_rng(seed)
|
|
119
|
-
return repeat_array_as_rows(
|
|
120
|
-
n_rows,
|
|
121
|
-
rng.uniform(low=low, high=high, size=n_columns)
|
|
122
|
-
)
|
|
139
|
+
return repeat_array_as_rows(n_rows, rng.uniform(low=low, high=high, size=n_columns))
|
|
123
140
|
|
|
124
141
|
|
|
125
142
|
def discrete_uniform_2d(
|
|
126
|
-
shape: tuple,
|
|
143
|
+
shape: tuple,
|
|
144
|
+
low: float,
|
|
145
|
+
high: float,
|
|
146
|
+
seed: Union[int, random.Generator, None] = None,
|
|
127
147
|
) -> NDArray:
|
|
128
148
|
"""
|
|
129
149
|
Sample from a discrete uniform distribution and produce an array of a specified shape.
|
|
@@ -151,7 +171,11 @@ def discrete_uniform_2d(
|
|
|
151
171
|
|
|
152
172
|
|
|
153
173
|
def triangular_1d(
|
|
154
|
-
shape: tuple,
|
|
174
|
+
shape: tuple,
|
|
175
|
+
low: float,
|
|
176
|
+
high: float,
|
|
177
|
+
mode: float,
|
|
178
|
+
seed: Union[int, random.Generator, None] = None,
|
|
155
179
|
) -> NDArray:
|
|
156
180
|
"""
|
|
157
181
|
Sample from a triangular distribution and produce an array of a specified shape.
|
|
@@ -180,13 +204,16 @@ def triangular_1d(
|
|
|
180
204
|
n_rows, n_columns = shape
|
|
181
205
|
rng = random.default_rng(seed)
|
|
182
206
|
return repeat_array_as_rows(
|
|
183
|
-
n_rows,
|
|
184
|
-
rng.triangular(left=low, mode=mode, right=high, size=n_columns)
|
|
207
|
+
n_rows, rng.triangular(left=low, mode=mode, right=high, size=n_columns)
|
|
185
208
|
)
|
|
186
209
|
|
|
187
210
|
|
|
188
211
|
def triangular_2d(
|
|
189
|
-
shape: tuple,
|
|
212
|
+
shape: tuple,
|
|
213
|
+
low: float,
|
|
214
|
+
high: float,
|
|
215
|
+
mode: float,
|
|
216
|
+
seed: Union[int, random.Generator, None] = None,
|
|
190
217
|
) -> NDArray:
|
|
191
218
|
"""
|
|
192
219
|
Sample from a triangular distribution and produce an array of a specified shape.
|
|
@@ -216,7 +243,10 @@ def triangular_2d(
|
|
|
216
243
|
|
|
217
244
|
|
|
218
245
|
def normal_1d(
|
|
219
|
-
shape: tuple,
|
|
246
|
+
shape: tuple,
|
|
247
|
+
mu: float,
|
|
248
|
+
sigma: float,
|
|
249
|
+
seed: Union[int, random.Generator, None] = None,
|
|
220
250
|
) -> NDArray:
|
|
221
251
|
"""
|
|
222
252
|
Sample from a normal distribution and produce an array of a specified shape.
|
|
@@ -242,14 +272,14 @@ def normal_1d(
|
|
|
242
272
|
"""
|
|
243
273
|
n_rows, n_columns = shape
|
|
244
274
|
rng = random.default_rng(seed)
|
|
245
|
-
return repeat_array_as_rows(
|
|
246
|
-
n_rows,
|
|
247
|
-
rng.normal(loc=mu, scale=sigma, size=n_columns)
|
|
248
|
-
)
|
|
275
|
+
return repeat_array_as_rows(n_rows, rng.normal(loc=mu, scale=sigma, size=n_columns))
|
|
249
276
|
|
|
250
277
|
|
|
251
278
|
def normal_2d(
|
|
252
|
-
shape: tuple,
|
|
279
|
+
shape: tuple,
|
|
280
|
+
mu: float,
|
|
281
|
+
sigma: float,
|
|
282
|
+
seed: Union[int, random.Generator, None] = None,
|
|
253
283
|
) -> NDArray:
|
|
254
284
|
"""
|
|
255
285
|
Sample from a normal distribution and produce an array of a specified shape.
|
|
@@ -277,7 +307,12 @@ def normal_2d(
|
|
|
277
307
|
|
|
278
308
|
|
|
279
309
|
def truncated_normal_1d(
|
|
280
|
-
shape: tuple,
|
|
310
|
+
shape: tuple,
|
|
311
|
+
mu: float,
|
|
312
|
+
sigma: float,
|
|
313
|
+
low: float,
|
|
314
|
+
high: float,
|
|
315
|
+
seed: Union[int, random.Generator, None] = None,
|
|
281
316
|
) -> NDArray:
|
|
282
317
|
"""
|
|
283
318
|
Sample from a truncated normal distribution and produce an array of a specified shape.
|
|
@@ -308,12 +343,17 @@ def truncated_normal_1d(
|
|
|
308
343
|
n_rows, n_columns = shape
|
|
309
344
|
return repeat_array_as_rows(
|
|
310
345
|
n_rows,
|
|
311
|
-
truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
|
|
346
|
+
truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed),
|
|
312
347
|
)
|
|
313
348
|
|
|
314
349
|
|
|
315
350
|
def truncated_normal_2d(
|
|
316
|
-
shape: tuple,
|
|
351
|
+
shape: tuple,
|
|
352
|
+
mu: float,
|
|
353
|
+
sigma: float,
|
|
354
|
+
low: float,
|
|
355
|
+
high: float,
|
|
356
|
+
seed: Union[int, random.Generator, None] = None,
|
|
317
357
|
) -> NDArray:
|
|
318
358
|
"""
|
|
319
359
|
Sample from a truncated normal distribution and produce an array of a specified shape.
|
|
@@ -348,7 +388,7 @@ def plus_minus_uncertainty_to_normal_1d(
|
|
|
348
388
|
value: float,
|
|
349
389
|
uncertainty: float,
|
|
350
390
|
confidence_interval: float = 95,
|
|
351
|
-
seed: Union[int, random.Generator, None] = None
|
|
391
|
+
seed: Union[int, random.Generator, None] = None,
|
|
352
392
|
) -> NDArray:
|
|
353
393
|
"""
|
|
354
394
|
Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
|
|
@@ -390,8 +430,7 @@ def plus_minus_uncertainty_to_normal_1d(
|
|
|
390
430
|
n_sds = calc_z_critical(confidence_interval)
|
|
391
431
|
sigma = (value * (uncertainty / 100)) / n_sds
|
|
392
432
|
return repeat_array_as_rows(
|
|
393
|
-
n_rows,
|
|
394
|
-
normal_1d(shape=(1, n_columns), mu=value, sigma=sigma, seed=seed)
|
|
433
|
+
n_rows, normal_1d(shape=(1, n_columns), mu=value, sigma=sigma, seed=seed)
|
|
395
434
|
)
|
|
396
435
|
|
|
397
436
|
|
|
@@ -400,7 +439,7 @@ def plus_minus_uncertainty_to_normal_2d(
|
|
|
400
439
|
value: float,
|
|
401
440
|
uncertainty: float,
|
|
402
441
|
confidence_interval: float = 95,
|
|
403
|
-
seed: Union[int, random.Generator, None] = None
|
|
442
|
+
seed: Union[int, random.Generator, None] = None,
|
|
404
443
|
) -> NDArray:
|
|
405
444
|
"""
|
|
406
445
|
Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
|
|
@@ -443,7 +482,7 @@ def plus_minus_uncertainty_to_normal_2d(
|
|
|
443
482
|
|
|
444
483
|
|
|
445
484
|
def grouped_avg(arr: NDArray, n: int = 12) -> NDArray:
|
|
446
|
-
"""
|
|
485
|
+
"""Row-wise averaging of numpy arrays. For example:
|
|
447
486
|
1 2 3
|
|
448
487
|
4 5 6
|
|
449
488
|
7 8 9
|
|
@@ -482,7 +521,7 @@ def grouped_avg(arr: NDArray, n: int = 12) -> NDArray:
|
|
|
482
521
|
NDArray
|
|
483
522
|
Output array
|
|
484
523
|
"""
|
|
485
|
-
result = cumsum(arr, 0)[n-1::n] / float(n)
|
|
524
|
+
result = cumsum(arr, 0)[n - 1 :: n] / float(n)
|
|
486
525
|
result[1:] = result[1:] - result[:-1]
|
|
487
526
|
return result
|
|
488
527
|
|
|
@@ -582,19 +621,14 @@ def correlated_normal_2d(
|
|
|
582
621
|
correlated_samples = dot(cholesky_decomp, independent_samples)
|
|
583
622
|
|
|
584
623
|
# Scale by standard deviations and shift by means
|
|
585
|
-
scaled_samples = (
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
+ repeat_1d_array_as_columns(n_iterations, means)
|
|
589
|
-
)
|
|
624
|
+
scaled_samples = correlated_samples * repeat_1d_array_as_columns(
|
|
625
|
+
n_iterations, sds
|
|
626
|
+
) + repeat_1d_array_as_columns(n_iterations, means)
|
|
590
627
|
|
|
591
628
|
return scaled_samples
|
|
592
629
|
|
|
593
630
|
|
|
594
|
-
def calc_z_critical(
|
|
595
|
-
confidence_interval: float,
|
|
596
|
-
n_sided: int = 2
|
|
597
|
-
) -> float64:
|
|
631
|
+
def calc_z_critical(confidence_interval: float, n_sided: int = 2) -> float64:
|
|
598
632
|
"""
|
|
599
633
|
Calculate the z-critical value from the confidence interval.
|
|
600
634
|
|
|
@@ -640,9 +674,13 @@ def _normal_ppf(q: float64, tol: float64 = 1e-10) -> float64:
|
|
|
640
674
|
return x_new if abs(x_new - x) >= tol else x
|
|
641
675
|
|
|
642
676
|
return (
|
|
643
|
-
inf
|
|
644
|
-
|
|
645
|
-
|
|
677
|
+
inf
|
|
678
|
+
if q == 1
|
|
679
|
+
else (
|
|
680
|
+
-inf
|
|
681
|
+
if q == 0
|
|
682
|
+
else reduce(lambda x, _: step(x), range(MAX_ITER), INITIAL_GUESS)
|
|
683
|
+
)
|
|
646
684
|
)
|
|
647
685
|
|
|
648
686
|
|
|
@@ -715,10 +753,7 @@ def _normal_pdf(x: float64) -> float64:
|
|
|
715
753
|
return 1 / sqrt(2 * pi) * exp(-0.5 * x**2)
|
|
716
754
|
|
|
717
755
|
|
|
718
|
-
def _calc_confidence_level(
|
|
719
|
-
z_critical: float64,
|
|
720
|
-
n_sided: int = 2
|
|
721
|
-
) -> float64:
|
|
756
|
+
def _calc_confidence_level(z_critical: float64, n_sided: int = 2) -> float64:
|
|
722
757
|
"""
|
|
723
758
|
Calculate the confidence interval from the z-critical value.
|
|
724
759
|
|
|
@@ -739,9 +774,7 @@ def _calc_confidence_level(
|
|
|
739
774
|
|
|
740
775
|
|
|
741
776
|
def calc_required_iterations_monte_carlo(
|
|
742
|
-
confidence_level: float,
|
|
743
|
-
precision: float,
|
|
744
|
-
sd: float
|
|
777
|
+
confidence_level: float, precision: float, sd: float
|
|
745
778
|
) -> int:
|
|
746
779
|
"""
|
|
747
780
|
Calculate the number of iterations required for a Monte Carlo simulation to have a desired precision, subject to a
|
|
@@ -770,9 +803,7 @@ def calc_required_iterations_monte_carlo(
|
|
|
770
803
|
|
|
771
804
|
|
|
772
805
|
def calc_confidence_level_monte_carlo(
|
|
773
|
-
n_iterations: int,
|
|
774
|
-
precision: float,
|
|
775
|
-
sd: float
|
|
806
|
+
n_iterations: int, precision: float, sd: float
|
|
776
807
|
) -> float:
|
|
777
808
|
"""
|
|
778
809
|
Calculate the confidence level that the sample mean calculated by the Monte Carlo simulation deviates from the
|
|
@@ -794,13 +825,11 @@ def calc_confidence_level_monte_carlo(
|
|
|
794
825
|
The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
|
|
795
826
|
sure that the sample mean deviates from the true populatation mean by less than the desired precision).
|
|
796
827
|
"""
|
|
797
|
-
return _calc_confidence_level(precision*sqrt(n_iterations)/sd)
|
|
828
|
+
return _calc_confidence_level(precision * sqrt(n_iterations) / sd)
|
|
798
829
|
|
|
799
830
|
|
|
800
831
|
def calc_precision_monte_carlo(
|
|
801
|
-
confidence_level: float,
|
|
802
|
-
n_iterations: int,
|
|
803
|
-
sd: float
|
|
832
|
+
confidence_level: float, n_iterations: int, sd: float
|
|
804
833
|
) -> float:
|
|
805
834
|
"""
|
|
806
835
|
Calculate the +/- precision of a Monte Carlo simulation for a desired confidence level.
|
|
@@ -822,7 +851,7 @@ def calc_precision_monte_carlo(
|
|
|
822
851
|
units as the estimated mean.
|
|
823
852
|
"""
|
|
824
853
|
z_critical = calc_z_critical(confidence_level)
|
|
825
|
-
return (sd*z_critical)/sqrt(n_iterations)
|
|
854
|
+
return (sd * z_critical) / sqrt(n_iterations)
|
|
826
855
|
|
|
827
856
|
|
|
828
857
|
def truncnorm_rvs(
|
|
@@ -831,7 +860,7 @@ def truncnorm_rvs(
|
|
|
831
860
|
loc: float,
|
|
832
861
|
scale: float,
|
|
833
862
|
shape: Union[int, tuple[int, ...]],
|
|
834
|
-
seed: Union[int, random.Generator, None] = None
|
|
863
|
+
seed: Union[int, random.Generator, None] = None,
|
|
835
864
|
) -> NDArray:
|
|
836
865
|
"""
|
|
837
866
|
Generate random samples from a truncated normal distribution. Unlike the `scipy` equivalent, the `a` and `b` values
|
|
@@ -908,11 +937,7 @@ def add_normal_distributions(
|
|
|
908
937
|
**Z = X<sub>1</sub> + X<sub>2</sub>**.
|
|
909
938
|
"""
|
|
910
939
|
mu_sum = mu_1 + mu_2
|
|
911
|
-
sigma_sum = sqrt(
|
|
912
|
-
sigma_1 ** 2
|
|
913
|
-
+ sigma_2 ** 2
|
|
914
|
-
+ 2 * rho * sigma_1 * sigma_2
|
|
915
|
-
)
|
|
940
|
+
sigma_sum = sqrt(sigma_1**2 + sigma_2**2 + 2 * rho * sigma_1 * sigma_2)
|
|
916
941
|
return mu_sum, sigma_sum
|
|
917
942
|
|
|
918
943
|
|
|
@@ -953,11 +978,7 @@ def subtract_normal_distributions(
|
|
|
953
978
|
**Z = X<sub>1</sub> - X<sub>2</sub>**.
|
|
954
979
|
"""
|
|
955
980
|
mu_sum = mu_1 - mu_2
|
|
956
|
-
sigma_sum = sqrt(
|
|
957
|
-
sigma_1 ** 2
|
|
958
|
-
+ sigma_2 ** 2
|
|
959
|
-
- 2 * rho * sigma_1 * sigma_2
|
|
960
|
-
)
|
|
981
|
+
sigma_sum = sqrt(sigma_1**2 + sigma_2**2 - 2 * rho * sigma_1 * sigma_2)
|
|
961
982
|
return mu_sum, sigma_sum
|
|
962
983
|
|
|
963
984
|
|
|
@@ -967,7 +988,7 @@ def lerp_normal_distributions(
|
|
|
967
988
|
mu_2: float,
|
|
968
989
|
sigma_2: float,
|
|
969
990
|
alpha: float,
|
|
970
|
-
rho: float = 0
|
|
991
|
+
rho: float = 0,
|
|
971
992
|
) -> tuple[float, float]:
|
|
972
993
|
"""
|
|
973
994
|
Linearly interpolate between two normal distributions, with optional correlation.
|
|
@@ -1008,8 +1029,8 @@ def lerp_normal_distributions(
|
|
|
1008
1029
|
"""
|
|
1009
1030
|
mu_Z = (1 - alpha) * mu_1 + alpha * mu_2
|
|
1010
1031
|
var_Z = (
|
|
1011
|
-
((1 - alpha) ** 2) * sigma_1
|
|
1012
|
-
+ (alpha
|
|
1032
|
+
((1 - alpha) ** 2) * sigma_1**2
|
|
1033
|
+
+ (alpha**2) * sigma_2**2
|
|
1013
1034
|
+ 2 * alpha * (1 - alpha) * rho * sigma_1 * sigma_2
|
|
1014
1035
|
)
|
|
1015
1036
|
sigma_Z = sqrt(var_Z)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
CONN_STRING = os.getenv(
|
|
4
|
-
CONTAINER = os.getenv(
|
|
5
|
-
CONTAINER_GLOSSARY = os.getenv(
|
|
3
|
+
CONN_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
|
4
|
+
CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER")
|
|
5
|
+
CONTAINER_GLOSSARY = os.getenv("AZURE_STORAGE_CONTAINER_GLOSSARY")
|
|
6
6
|
_blob_service = None # noqa: F824
|
|
7
7
|
|
|
8
8
|
|
|
@@ -10,7 +10,12 @@ _blob_service = None # noqa: F824
|
|
|
10
10
|
def _get_blob_service_client():
|
|
11
11
|
global _blob_service
|
|
12
12
|
from azure.storage.blob import BlobServiceClient
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
_blob_service = (
|
|
15
|
+
BlobServiceClient.from_connection_string(CONN_STRING)
|
|
16
|
+
if _blob_service is None
|
|
17
|
+
else _blob_service
|
|
18
|
+
)
|
|
14
19
|
return _blob_service
|
|
15
20
|
|
|
16
21
|
|
|
@@ -20,8 +25,11 @@ def _get_container(glossary: bool = False) -> str:
|
|
|
20
25
|
|
|
21
26
|
def _load_from_container(container: str, key: str):
|
|
22
27
|
from azure.core.exceptions import ResourceNotFoundError
|
|
28
|
+
|
|
23
29
|
try:
|
|
24
|
-
blob_client = _get_blob_service_client().get_blob_client(
|
|
30
|
+
blob_client = _get_blob_service_client().get_blob_client(
|
|
31
|
+
container=container, blob=key
|
|
32
|
+
)
|
|
25
33
|
return blob_client.download_blob().readall()
|
|
26
34
|
except ResourceNotFoundError:
|
|
27
35
|
return None
|
|
@@ -29,8 +37,11 @@ def _load_from_container(container: str, key: str):
|
|
|
29
37
|
|
|
30
38
|
def _exists_in_container(container: str, key: str):
|
|
31
39
|
from azure.core.exceptions import ResourceNotFoundError
|
|
40
|
+
|
|
32
41
|
try:
|
|
33
|
-
blob_client = _get_blob_service_client().get_blob_client(
|
|
42
|
+
blob_client = _get_blob_service_client().get_blob_client(
|
|
43
|
+
container=container, blob=key
|
|
44
|
+
)
|
|
34
45
|
return blob_client.exists()
|
|
35
46
|
except ResourceNotFoundError:
|
|
36
47
|
return False
|
|
@@ -2,16 +2,21 @@ import os
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def _get_folder(glossary: bool = False) -> str:
|
|
5
|
-
return
|
|
5
|
+
return (
|
|
6
|
+
os.getenv("DOWNLOAD_FOLDER_GLOSSARY")
|
|
7
|
+
if glossary
|
|
8
|
+
else os.getenv("DOWNLOAD_FOLDER")
|
|
9
|
+
)
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
def _load_from_folder(folder: str, key: str):
|
|
9
13
|
try:
|
|
10
14
|
with open(os.path.join(folder, key)) as f:
|
|
11
|
-
return f.read().encode(
|
|
15
|
+
return f.read().encode("utf-8")
|
|
12
16
|
except Exception:
|
|
13
17
|
# in case the file does not exist, should simply return None
|
|
14
18
|
return None
|
|
15
19
|
|
|
16
20
|
|
|
17
|
-
def _exists_in_folder(folder: str, key: str):
|
|
21
|
+
def _exists_in_folder(folder: str, key: str):
|
|
22
|
+
return os.path.exists(os.path.join(folder, key))
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
BUCKET = os.getenv(
|
|
4
|
-
BUCKET_GLOSSARY = os.getenv(
|
|
3
|
+
BUCKET = os.getenv("AWS_BUCKET")
|
|
4
|
+
BUCKET_GLOSSARY = os.getenv("AWS_BUCKET_GLOSSARY")
|
|
5
5
|
_s3_client = None # noqa: F824
|
|
6
6
|
|
|
7
7
|
|
|
@@ -9,7 +9,10 @@ _s3_client = None # noqa: F824
|
|
|
9
9
|
def _get_s3_client():
|
|
10
10
|
global _s3_client
|
|
11
11
|
import boto3
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
_s3_client = (
|
|
14
|
+
boto3.session.Session().client("s3") if _s3_client is None else _s3_client
|
|
15
|
+
)
|
|
13
16
|
return _s3_client
|
|
14
17
|
|
|
15
18
|
|
|
@@ -19,14 +22,16 @@ def _get_bucket(glossary: bool = False) -> str:
|
|
|
19
22
|
|
|
20
23
|
def _load_from_bucket(bucket: str, key: str):
|
|
21
24
|
from botocore.exceptions import ClientError
|
|
25
|
+
|
|
22
26
|
try:
|
|
23
|
-
return _get_s3_client().get_object(Bucket=bucket, Key=key)[
|
|
27
|
+
return _get_s3_client().get_object(Bucket=bucket, Key=key)["Body"].read()
|
|
24
28
|
except ClientError:
|
|
25
29
|
return None
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
def _exists_in_bucket(bucket: str, key: str):
|
|
29
33
|
from botocore.exceptions import ClientError
|
|
34
|
+
|
|
30
35
|
try:
|
|
31
36
|
_get_s3_client().head_object(Bucket=bucket, Key=key)
|
|
32
37
|
return True
|
|
@@ -36,14 +41,18 @@ def _exists_in_bucket(bucket: str, key: str):
|
|
|
36
41
|
|
|
37
42
|
def _read_size(bucket: str, key: str):
|
|
38
43
|
try:
|
|
39
|
-
return _get_s3_client().head_object(Bucket=bucket, Key=key).get(
|
|
44
|
+
return _get_s3_client().head_object(Bucket=bucket, Key=key).get("ContentLength")
|
|
40
45
|
except Exception:
|
|
41
46
|
return 0
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
def _read_metadata(bucket_name: str, key: str):
|
|
45
50
|
try:
|
|
46
|
-
return
|
|
51
|
+
return (
|
|
52
|
+
_get_s3_client()
|
|
53
|
+
.head_object(Bucket=bucket_name, Key=key)
|
|
54
|
+
.get("Metadata", {})
|
|
55
|
+
)
|
|
47
56
|
except Exception:
|
|
48
57
|
return {}
|
|
49
58
|
|
|
@@ -55,9 +64,9 @@ def _update_metadata(bucket: str, key: str, data: dict = {}):
|
|
|
55
64
|
_get_s3_client().copy_object(
|
|
56
65
|
Bucket=bucket,
|
|
57
66
|
Key=key,
|
|
58
|
-
CopySource={
|
|
67
|
+
CopySource={"Bucket": bucket, "Key": key},
|
|
59
68
|
Metadata=metadata,
|
|
60
|
-
MetadataDirective=
|
|
69
|
+
MetadataDirective="REPLACE",
|
|
61
70
|
)
|
|
62
71
|
except Exception:
|
|
63
72
|
pass
|
|
@@ -65,33 +74,32 @@ def _update_metadata(bucket: str, key: str, data: dict = {}):
|
|
|
65
74
|
|
|
66
75
|
def _last_modified(bucket: str, key: str):
|
|
67
76
|
try:
|
|
68
|
-
return _get_s3_client().head_object(Bucket=bucket, Key=key).get(
|
|
77
|
+
return _get_s3_client().head_object(Bucket=bucket, Key=key).get("LastModified")
|
|
69
78
|
except Exception:
|
|
70
79
|
return None
|
|
71
80
|
|
|
72
81
|
|
|
73
82
|
def _upload_to_bucket(bucket: str, key: str, body, content_type: str):
|
|
74
83
|
from botocore.exceptions import ClientError
|
|
84
|
+
|
|
75
85
|
try:
|
|
76
86
|
return _get_s3_client().put_object(
|
|
77
|
-
Bucket=bucket,
|
|
78
|
-
Key=key,
|
|
79
|
-
Body=body,
|
|
80
|
-
ContentType=content_type
|
|
87
|
+
Bucket=bucket, Key=key, Body=body, ContentType=content_type
|
|
81
88
|
)
|
|
82
89
|
except ClientError:
|
|
83
90
|
return None
|
|
84
91
|
|
|
85
92
|
|
|
86
|
-
def _list_bucket_objects(bucket: str, folder: str =
|
|
93
|
+
def _list_bucket_objects(bucket: str, folder: str = ""):
|
|
87
94
|
from botocore.exceptions import ClientError
|
|
95
|
+
|
|
88
96
|
try:
|
|
89
|
-
paginator = _get_s3_client().get_paginator(
|
|
97
|
+
paginator = _get_s3_client().get_paginator("list_objects_v2")
|
|
90
98
|
pages = paginator.paginate(Bucket=bucket, Prefix=folder)
|
|
91
99
|
|
|
92
100
|
contents = []
|
|
93
101
|
for page in pages:
|
|
94
|
-
contents.extend(page.get(
|
|
102
|
+
contents.extend(page.get("Contents", []))
|
|
95
103
|
return contents
|
|
96
104
|
except ClientError:
|
|
97
105
|
return []
|
|
@@ -99,17 +107,14 @@ def _list_bucket_objects(bucket: str, folder: str = ''):
|
|
|
99
107
|
|
|
100
108
|
def _delete_objects(bucket: str, objects: list):
|
|
101
109
|
from botocore.exceptions import ClientError
|
|
110
|
+
|
|
102
111
|
try:
|
|
103
112
|
# delete in batch of 1000 max allowed
|
|
104
113
|
batch_size = 1000
|
|
105
114
|
for i in range(0, len(objects), batch_size):
|
|
106
|
-
batch_objects = objects[i:i + batch_size]
|
|
115
|
+
batch_objects = objects[i : i + batch_size]
|
|
107
116
|
_get_s3_client().delete_objects(
|
|
108
|
-
Bucket=bucket,
|
|
109
|
-
Delete={
|
|
110
|
-
'Objects': batch_objects,
|
|
111
|
-
'Quiet': True
|
|
112
|
-
}
|
|
117
|
+
Bucket=bucket, Delete={"Objects": batch_objects, "Quiet": True}
|
|
113
118
|
)
|
|
114
119
|
except ClientError:
|
|
115
120
|
return None
|
|
@@ -7,6 +7,11 @@ _sns_client = None # noqa: F824
|
|
|
7
7
|
def _get_sns_client():
|
|
8
8
|
global _sns_client
|
|
9
9
|
import boto3
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
region_name = os.getenv("AWS_REGION")
|
|
12
|
+
_sns_client = (
|
|
13
|
+
boto3.session.Session().client("sns", region_name=region_name)
|
|
14
|
+
if _sns_client is None
|
|
15
|
+
else _sns_client
|
|
16
|
+
)
|
|
12
17
|
return _sns_client
|
hestia_earth/utils/term.py
CHANGED
|
@@ -12,7 +12,7 @@ def _load_term_file(term_type: str):
|
|
|
12
12
|
try:
|
|
13
13
|
filepath = f"glossary/{term_type}.json"
|
|
14
14
|
nodes = json.loads(_load_from_storage(filepath, glossary=True))
|
|
15
|
-
return {node.get(
|
|
15
|
+
return {node.get("@id"): node for node in nodes}
|
|
16
16
|
except Exception:
|
|
17
17
|
return {}
|
|
18
18
|
|
|
@@ -21,11 +21,11 @@ def download_term(term: Union[str, dict], termType: Union[str, TermTermType] = N
|
|
|
21
21
|
"""
|
|
22
22
|
Download a Term, using the glossary file if available, or default to the standard download.
|
|
23
23
|
"""
|
|
24
|
-
term_id = term.get(
|
|
24
|
+
term_id = term.get("@id", term.get("id")) if isinstance(term, dict) else term
|
|
25
25
|
term_type = (
|
|
26
|
-
termType if isinstance(termType, str) else termType.value
|
|
27
|
-
|
|
28
|
-
term.get(
|
|
26
|
+
(termType if isinstance(termType, str) else termType.value)
|
|
27
|
+
if termType
|
|
28
|
+
else (term.get("termType") if isinstance(term, dict) else None)
|
|
29
29
|
)
|
|
30
30
|
cached_nodes = _load_term_file(term_type) if term_type else {}
|
|
31
31
|
return cached_nodes.get(term_id) or download_hestia(term_id)
|