hestia-earth-utils 0.16.8__py3-none-any.whl → 0.16.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. hestia_earth/utils/api.py +78 -36
  2. hestia_earth/utils/blank_node.py +101 -60
  3. hestia_earth/utils/calculation_status.py +45 -35
  4. hestia_earth/utils/cycle.py +7 -7
  5. hestia_earth/utils/date.py +7 -2
  6. hestia_earth/utils/descriptive_stats.py +10 -6
  7. hestia_earth/utils/emission.py +26 -15
  8. hestia_earth/utils/lookup.py +62 -28
  9. hestia_earth/utils/lookup_utils.py +89 -63
  10. hestia_earth/utils/model.py +45 -40
  11. hestia_earth/utils/pipeline.py +179 -90
  12. hestia_earth/utils/pivot/_shared.py +16 -12
  13. hestia_earth/utils/pivot/pivot_csv.py +35 -18
  14. hestia_earth/utils/pivot/pivot_json.py +34 -18
  15. hestia_earth/utils/request.py +17 -6
  16. hestia_earth/utils/stats.py +89 -68
  17. hestia_earth/utils/storage/_azure_client.py +17 -6
  18. hestia_earth/utils/storage/_local_client.py +8 -3
  19. hestia_earth/utils/storage/_s3_client.py +27 -22
  20. hestia_earth/utils/storage/_sns_client.py +7 -2
  21. hestia_earth/utils/term.py +5 -5
  22. hestia_earth/utils/tools.py +50 -21
  23. hestia_earth/utils/version.py +1 -1
  24. {hestia_earth_utils-0.16.8.dist-info → hestia_earth_utils-0.16.10.dist-info}/METADATA +1 -1
  25. hestia_earth_utils-0.16.10.dist-info/RECORD +33 -0
  26. hestia_earth_utils-0.16.8.dist-info/RECORD +0 -33
  27. {hestia_earth_utils-0.16.8.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-format-upload +0 -0
  28. {hestia_earth_utils-0.16.8.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-pivot-csv +0 -0
  29. {hestia_earth_utils-0.16.8.dist-info → hestia_earth_utils-0.16.10.dist-info}/WHEEL +0 -0
  30. {hestia_earth_utils-0.16.8.dist-info → hestia_earth_utils-0.16.10.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,29 @@ import os
3
3
  from .tools import non_empty_value
4
4
 
5
5
 
6
- def api_url() -> str: return os.getenv('API_URL', 'https://api.hestia.earth')
6
+ def api_url() -> str:
7
+ return os.getenv("API_URL", "https://api.hestia.earth")
7
8
 
8
9
 
9
- def api_access_token() -> str: return os.getenv('API_ACCESS_TOKEN')
10
+ def api_access_token() -> str:
11
+ return os.getenv("API_ACCESS_TOKEN")
10
12
 
11
13
 
12
- def web_url() -> str: return os.getenv('WEB_URL', 'https://www.hestia.earth')
14
+ def web_url() -> str:
15
+ return os.getenv("WEB_URL", "https://www.hestia.earth")
13
16
 
14
17
 
15
- def join_args(values) -> str: return '&'.join(list(filter(non_empty_value, values))).strip()
18
+ def join_args(values) -> str:
19
+ return "&".join(list(filter(non_empty_value, values))).strip()
16
20
 
17
21
 
18
22
  def request_url(base_url: str, **kwargs) -> str:
19
- args = list(map(lambda key: '='.join([key, str(kwargs.get(key))]) if kwargs.get(key) else None, kwargs.keys()))
20
- return '?'.join(list(filter(non_empty_value, [base_url, join_args(args)]))).strip()
23
+ args = list(
24
+ map(
25
+ lambda key: (
26
+ "=".join([key, str(kwargs.get(key))]) if kwargs.get(key) else None
27
+ ),
28
+ kwargs.keys(),
29
+ )
30
+ )
31
+ return "?".join(list(filter(non_empty_value, [base_url, join_args(args)]))).strip()
@@ -1,11 +1,28 @@
1
1
  """
2
2
  Based on code by Cool Farm Tool: https://gitlab.com/MethodsCFT/coolfarm-soc/-/blob/main/src/cfasoc/builders.py
3
3
  """
4
+
4
5
  import hashlib
5
6
  from functools import reduce
6
7
  from numpy import (
7
- cumsum, dot, full, linalg, hstack, random, mean, vstack, abs, array, concatenate, exp, float64, inf, pi, prod, sign,
8
- sqrt
8
+ cumsum,
9
+ dot,
10
+ full,
11
+ linalg,
12
+ hstack,
13
+ random,
14
+ mean,
15
+ vstack,
16
+ abs,
17
+ array,
18
+ concatenate,
19
+ exp,
20
+ float64,
21
+ inf,
22
+ pi,
23
+ prod,
24
+ sign,
25
+ sqrt,
9
26
  )
10
27
  from numpy.typing import NDArray, DTypeLike
11
28
  from typing import Union
@@ -90,7 +107,10 @@ def repeat_1d_array_as_columns(n_columns: int, column: NDArray) -> NDArray:
90
107
 
91
108
 
92
109
  def discrete_uniform_1d(
93
- shape: tuple, low: float, high: float, seed: Union[int, random.Generator, None] = None
110
+ shape: tuple,
111
+ low: float,
112
+ high: float,
113
+ seed: Union[int, random.Generator, None] = None,
94
114
  ) -> NDArray:
95
115
  """
96
116
  Sample from a discrete uniform distribution and produce an array of a specified shape.
@@ -116,14 +136,14 @@ def discrete_uniform_1d(
116
136
  """
117
137
  n_rows, n_columns = shape
118
138
  rng = random.default_rng(seed)
119
- return repeat_array_as_rows(
120
- n_rows,
121
- rng.uniform(low=low, high=high, size=n_columns)
122
- )
139
+ return repeat_array_as_rows(n_rows, rng.uniform(low=low, high=high, size=n_columns))
123
140
 
124
141
 
125
142
  def discrete_uniform_2d(
126
- shape: tuple, low: float, high: float, seed: Union[int, random.Generator, None] = None
143
+ shape: tuple,
144
+ low: float,
145
+ high: float,
146
+ seed: Union[int, random.Generator, None] = None,
127
147
  ) -> NDArray:
128
148
  """
129
149
  Sample from a discrete uniform distribution and produce an array of a specified shape.
@@ -151,7 +171,11 @@ def discrete_uniform_2d(
151
171
 
152
172
 
153
173
  def triangular_1d(
154
- shape: tuple, low: float, high: float, mode: float, seed: Union[int, random.Generator, None] = None
174
+ shape: tuple,
175
+ low: float,
176
+ high: float,
177
+ mode: float,
178
+ seed: Union[int, random.Generator, None] = None,
155
179
  ) -> NDArray:
156
180
  """
157
181
  Sample from a triangular distribution and produce an array of a specified shape.
@@ -180,13 +204,16 @@ def triangular_1d(
180
204
  n_rows, n_columns = shape
181
205
  rng = random.default_rng(seed)
182
206
  return repeat_array_as_rows(
183
- n_rows,
184
- rng.triangular(left=low, mode=mode, right=high, size=n_columns)
207
+ n_rows, rng.triangular(left=low, mode=mode, right=high, size=n_columns)
185
208
  )
186
209
 
187
210
 
188
211
  def triangular_2d(
189
- shape: tuple, low: float, high: float, mode: float, seed: Union[int, random.Generator, None] = None
212
+ shape: tuple,
213
+ low: float,
214
+ high: float,
215
+ mode: float,
216
+ seed: Union[int, random.Generator, None] = None,
190
217
  ) -> NDArray:
191
218
  """
192
219
  Sample from a triangular distribution and produce an array of a specified shape.
@@ -216,7 +243,10 @@ def triangular_2d(
216
243
 
217
244
 
218
245
  def normal_1d(
219
- shape: tuple, mu: float, sigma: float, seed: Union[int, random.Generator, None] = None
246
+ shape: tuple,
247
+ mu: float,
248
+ sigma: float,
249
+ seed: Union[int, random.Generator, None] = None,
220
250
  ) -> NDArray:
221
251
  """
222
252
  Sample from a normal distribution and produce an array of a specified shape.
@@ -242,14 +272,14 @@ def normal_1d(
242
272
  """
243
273
  n_rows, n_columns = shape
244
274
  rng = random.default_rng(seed)
245
- return repeat_array_as_rows(
246
- n_rows,
247
- rng.normal(loc=mu, scale=sigma, size=n_columns)
248
- )
275
+ return repeat_array_as_rows(n_rows, rng.normal(loc=mu, scale=sigma, size=n_columns))
249
276
 
250
277
 
251
278
  def normal_2d(
252
- shape: tuple, mu: float, sigma: float, seed: Union[int, random.Generator, None] = None
279
+ shape: tuple,
280
+ mu: float,
281
+ sigma: float,
282
+ seed: Union[int, random.Generator, None] = None,
253
283
  ) -> NDArray:
254
284
  """
255
285
  Sample from a normal distribution and produce an array of a specified shape.
@@ -277,7 +307,12 @@ def normal_2d(
277
307
 
278
308
 
279
309
  def truncated_normal_1d(
280
- shape: tuple, mu: float, sigma: float, low: float, high: float, seed: Union[int, random.Generator, None] = None
310
+ shape: tuple,
311
+ mu: float,
312
+ sigma: float,
313
+ low: float,
314
+ high: float,
315
+ seed: Union[int, random.Generator, None] = None,
281
316
  ) -> NDArray:
282
317
  """
283
318
  Sample from a truncated normal distribution and produce an array of a specified shape.
@@ -308,12 +343,17 @@ def truncated_normal_1d(
308
343
  n_rows, n_columns = shape
309
344
  return repeat_array_as_rows(
310
345
  n_rows,
311
- truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
346
+ truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed),
312
347
  )
313
348
 
314
349
 
315
350
  def truncated_normal_2d(
316
- shape: tuple, mu: float, sigma: float, low: float, high: float, seed: Union[int, random.Generator, None] = None
351
+ shape: tuple,
352
+ mu: float,
353
+ sigma: float,
354
+ low: float,
355
+ high: float,
356
+ seed: Union[int, random.Generator, None] = None,
317
357
  ) -> NDArray:
318
358
  """
319
359
  Sample from a truncated normal distribution and produce an array of a specified shape.
@@ -348,7 +388,7 @@ def plus_minus_uncertainty_to_normal_1d(
348
388
  value: float,
349
389
  uncertainty: float,
350
390
  confidence_interval: float = 95,
351
- seed: Union[int, random.Generator, None] = None
391
+ seed: Union[int, random.Generator, None] = None,
352
392
  ) -> NDArray:
353
393
  """
354
394
  Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
@@ -390,8 +430,7 @@ def plus_minus_uncertainty_to_normal_1d(
390
430
  n_sds = calc_z_critical(confidence_interval)
391
431
  sigma = (value * (uncertainty / 100)) / n_sds
392
432
  return repeat_array_as_rows(
393
- n_rows,
394
- normal_1d(shape=(1, n_columns), mu=value, sigma=sigma, seed=seed)
433
+ n_rows, normal_1d(shape=(1, n_columns), mu=value, sigma=sigma, seed=seed)
395
434
  )
396
435
 
397
436
 
@@ -400,7 +439,7 @@ def plus_minus_uncertainty_to_normal_2d(
400
439
  value: float,
401
440
  uncertainty: float,
402
441
  confidence_interval: float = 95,
403
- seed: Union[int, random.Generator, None] = None
442
+ seed: Union[int, random.Generator, None] = None,
404
443
  ) -> NDArray:
405
444
  """
406
445
  Return a normally distributed sample given a value and uncertainty expressed as +/- a percentage.
@@ -443,7 +482,7 @@ def plus_minus_uncertainty_to_normal_2d(
443
482
 
444
483
 
445
484
  def grouped_avg(arr: NDArray, n: int = 12) -> NDArray:
446
- """ Row-wise averaging of numpy arrays. For example:
485
+ """Row-wise averaging of numpy arrays. For example:
447
486
  1 2 3
448
487
  4 5 6
449
488
  7 8 9
@@ -482,7 +521,7 @@ def grouped_avg(arr: NDArray, n: int = 12) -> NDArray:
482
521
  NDArray
483
522
  Output array
484
523
  """
485
- result = cumsum(arr, 0)[n-1::n] / float(n)
524
+ result = cumsum(arr, 0)[n - 1 :: n] / float(n)
486
525
  result[1:] = result[1:] - result[:-1]
487
526
  return result
488
527
 
@@ -582,19 +621,14 @@ def correlated_normal_2d(
582
621
  correlated_samples = dot(cholesky_decomp, independent_samples)
583
622
 
584
623
  # Scale by standard deviations and shift by means
585
- scaled_samples = (
586
- correlated_samples
587
- * repeat_1d_array_as_columns(n_iterations, sds)
588
- + repeat_1d_array_as_columns(n_iterations, means)
589
- )
624
+ scaled_samples = correlated_samples * repeat_1d_array_as_columns(
625
+ n_iterations, sds
626
+ ) + repeat_1d_array_as_columns(n_iterations, means)
590
627
 
591
628
  return scaled_samples
592
629
 
593
630
 
594
- def calc_z_critical(
595
- confidence_interval: float,
596
- n_sided: int = 2
597
- ) -> float64:
631
+ def calc_z_critical(confidence_interval: float, n_sided: int = 2) -> float64:
598
632
  """
599
633
  Calculate the z-critical value from the confidence interval.
600
634
 
@@ -640,9 +674,13 @@ def _normal_ppf(q: float64, tol: float64 = 1e-10) -> float64:
640
674
  return x_new if abs(x_new - x) >= tol else x
641
675
 
642
676
  return (
643
- inf if q == 1 else
644
- -inf if q == 0 else
645
- reduce(lambda x, _: step(x), range(MAX_ITER), INITIAL_GUESS)
677
+ inf
678
+ if q == 1
679
+ else (
680
+ -inf
681
+ if q == 0
682
+ else reduce(lambda x, _: step(x), range(MAX_ITER), INITIAL_GUESS)
683
+ )
646
684
  )
647
685
 
648
686
 
@@ -715,10 +753,7 @@ def _normal_pdf(x: float64) -> float64:
715
753
  return 1 / sqrt(2 * pi) * exp(-0.5 * x**2)
716
754
 
717
755
 
718
- def _calc_confidence_level(
719
- z_critical: float64,
720
- n_sided: int = 2
721
- ) -> float64:
756
+ def _calc_confidence_level(z_critical: float64, n_sided: int = 2) -> float64:
722
757
  """
723
758
  Calculate the confidence interval from the z-critical value.
724
759
 
@@ -739,9 +774,7 @@ def _calc_confidence_level(
739
774
 
740
775
 
741
776
  def calc_required_iterations_monte_carlo(
742
- confidence_level: float,
743
- precision: float,
744
- sd: float
777
+ confidence_level: float, precision: float, sd: float
745
778
  ) -> int:
746
779
  """
747
780
  Calculate the number of iterations required for a Monte Carlo simulation to have a desired precision, subject to a
@@ -770,9 +803,7 @@ def calc_required_iterations_monte_carlo(
770
803
 
771
804
 
772
805
  def calc_confidence_level_monte_carlo(
773
- n_iterations: int,
774
- precision: float,
775
- sd: float
806
+ n_iterations: int, precision: float, sd: float
776
807
  ) -> float:
777
808
  """
778
809
  Calculate the confidence level that the sample mean calculated by the Monte Carlo simulation deviates from the
@@ -794,13 +825,11 @@ def calc_confidence_level_monte_carlo(
794
825
  The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
795
826
  sure that the sample mean deviates from the true populatation mean by less than the desired precision).
796
827
  """
797
- return _calc_confidence_level(precision*sqrt(n_iterations)/sd)
828
+ return _calc_confidence_level(precision * sqrt(n_iterations) / sd)
798
829
 
799
830
 
800
831
  def calc_precision_monte_carlo(
801
- confidence_level: float,
802
- n_iterations: int,
803
- sd: float
832
+ confidence_level: float, n_iterations: int, sd: float
804
833
  ) -> float:
805
834
  """
806
835
  Calculate the +/- precision of a Monte Carlo simulation for a desired confidence level.
@@ -822,7 +851,7 @@ def calc_precision_monte_carlo(
822
851
  units as the estimated mean.
823
852
  """
824
853
  z_critical = calc_z_critical(confidence_level)
825
- return (sd*z_critical)/sqrt(n_iterations)
854
+ return (sd * z_critical) / sqrt(n_iterations)
826
855
 
827
856
 
828
857
  def truncnorm_rvs(
@@ -831,7 +860,7 @@ def truncnorm_rvs(
831
860
  loc: float,
832
861
  scale: float,
833
862
  shape: Union[int, tuple[int, ...]],
834
- seed: Union[int, random.Generator, None] = None
863
+ seed: Union[int, random.Generator, None] = None,
835
864
  ) -> NDArray:
836
865
  """
837
866
  Generate random samples from a truncated normal distribution. Unlike the `scipy` equivalent, the `a` and `b` values
@@ -908,11 +937,7 @@ def add_normal_distributions(
908
937
  **Z = X<sub>1</sub> + X<sub>2</sub>**.
909
938
  """
910
939
  mu_sum = mu_1 + mu_2
911
- sigma_sum = sqrt(
912
- sigma_1 ** 2
913
- + sigma_2 ** 2
914
- + 2 * rho * sigma_1 * sigma_2
915
- )
940
+ sigma_sum = sqrt(sigma_1**2 + sigma_2**2 + 2 * rho * sigma_1 * sigma_2)
916
941
  return mu_sum, sigma_sum
917
942
 
918
943
 
@@ -953,11 +978,7 @@ def subtract_normal_distributions(
953
978
  **Z = X<sub>1</sub> - X<sub>2</sub>**.
954
979
  """
955
980
  mu_sum = mu_1 - mu_2
956
- sigma_sum = sqrt(
957
- sigma_1 ** 2
958
- + sigma_2 ** 2
959
- - 2 * rho * sigma_1 * sigma_2
960
- )
981
+ sigma_sum = sqrt(sigma_1**2 + sigma_2**2 - 2 * rho * sigma_1 * sigma_2)
961
982
  return mu_sum, sigma_sum
962
983
 
963
984
 
@@ -967,7 +988,7 @@ def lerp_normal_distributions(
967
988
  mu_2: float,
968
989
  sigma_2: float,
969
990
  alpha: float,
970
- rho: float = 0
991
+ rho: float = 0,
971
992
  ) -> tuple[float, float]:
972
993
  """
973
994
  Linearly interpolate between two normal distributions, with optional correlation.
@@ -1008,8 +1029,8 @@ def lerp_normal_distributions(
1008
1029
  """
1009
1030
  mu_Z = (1 - alpha) * mu_1 + alpha * mu_2
1010
1031
  var_Z = (
1011
- ((1 - alpha) ** 2) * sigma_1 ** 2
1012
- + (alpha ** 2) * sigma_2 ** 2
1032
+ ((1 - alpha) ** 2) * sigma_1**2
1033
+ + (alpha**2) * sigma_2**2
1013
1034
  + 2 * alpha * (1 - alpha) * rho * sigma_1 * sigma_2
1014
1035
  )
1015
1036
  sigma_Z = sqrt(var_Z)
@@ -1,8 +1,8 @@
1
1
  import os
2
2
 
3
- CONN_STRING = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
4
- CONTAINER = os.getenv('AZURE_STORAGE_CONTAINER')
5
- CONTAINER_GLOSSARY = os.getenv('AZURE_STORAGE_CONTAINER_GLOSSARY')
3
+ CONN_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
4
+ CONTAINER = os.getenv("AZURE_STORAGE_CONTAINER")
5
+ CONTAINER_GLOSSARY = os.getenv("AZURE_STORAGE_CONTAINER_GLOSSARY")
6
6
  _blob_service = None # noqa: F824
7
7
 
8
8
 
@@ -10,7 +10,12 @@ _blob_service = None # noqa: F824
10
10
  def _get_blob_service_client():
11
11
  global _blob_service
12
12
  from azure.storage.blob import BlobServiceClient
13
- _blob_service = BlobServiceClient.from_connection_string(CONN_STRING) if _blob_service is None else _blob_service
13
+
14
+ _blob_service = (
15
+ BlobServiceClient.from_connection_string(CONN_STRING)
16
+ if _blob_service is None
17
+ else _blob_service
18
+ )
14
19
  return _blob_service
15
20
 
16
21
 
@@ -20,8 +25,11 @@ def _get_container(glossary: bool = False) -> str:
20
25
 
21
26
  def _load_from_container(container: str, key: str):
22
27
  from azure.core.exceptions import ResourceNotFoundError
28
+
23
29
  try:
24
- blob_client = _get_blob_service_client().get_blob_client(container=container, blob=key)
30
+ blob_client = _get_blob_service_client().get_blob_client(
31
+ container=container, blob=key
32
+ )
25
33
  return blob_client.download_blob().readall()
26
34
  except ResourceNotFoundError:
27
35
  return None
@@ -29,8 +37,11 @@ def _load_from_container(container: str, key: str):
29
37
 
30
38
  def _exists_in_container(container: str, key: str):
31
39
  from azure.core.exceptions import ResourceNotFoundError
40
+
32
41
  try:
33
- blob_client = _get_blob_service_client().get_blob_client(container=container, blob=key)
42
+ blob_client = _get_blob_service_client().get_blob_client(
43
+ container=container, blob=key
44
+ )
34
45
  return blob_client.exists()
35
46
  except ResourceNotFoundError:
36
47
  return False
@@ -2,16 +2,21 @@ import os
2
2
 
3
3
 
4
4
  def _get_folder(glossary: bool = False) -> str:
5
- return os.getenv('DOWNLOAD_FOLDER_GLOSSARY') if glossary else os.getenv('DOWNLOAD_FOLDER')
5
+ return (
6
+ os.getenv("DOWNLOAD_FOLDER_GLOSSARY")
7
+ if glossary
8
+ else os.getenv("DOWNLOAD_FOLDER")
9
+ )
6
10
 
7
11
 
8
12
  def _load_from_folder(folder: str, key: str):
9
13
  try:
10
14
  with open(os.path.join(folder, key)) as f:
11
- return f.read().encode('utf-8')
15
+ return f.read().encode("utf-8")
12
16
  except Exception:
13
17
  # in case the file does not exist, should simply return None
14
18
  return None
15
19
 
16
20
 
17
- def _exists_in_folder(folder: str, key: str): return os.path.exists(os.path.join(folder, key))
21
+ def _exists_in_folder(folder: str, key: str):
22
+ return os.path.exists(os.path.join(folder, key))
@@ -1,7 +1,7 @@
1
1
  import os
2
2
 
3
- BUCKET = os.getenv('AWS_BUCKET')
4
- BUCKET_GLOSSARY = os.getenv('AWS_BUCKET_GLOSSARY')
3
+ BUCKET = os.getenv("AWS_BUCKET")
4
+ BUCKET_GLOSSARY = os.getenv("AWS_BUCKET_GLOSSARY")
5
5
  _s3_client = None # noqa: F824
6
6
 
7
7
 
@@ -9,7 +9,10 @@ _s3_client = None # noqa: F824
9
9
  def _get_s3_client():
10
10
  global _s3_client
11
11
  import boto3
12
- _s3_client = boto3.session.Session().client('s3') if _s3_client is None else _s3_client
12
+
13
+ _s3_client = (
14
+ boto3.session.Session().client("s3") if _s3_client is None else _s3_client
15
+ )
13
16
  return _s3_client
14
17
 
15
18
 
@@ -19,14 +22,16 @@ def _get_bucket(glossary: bool = False) -> str:
19
22
 
20
23
  def _load_from_bucket(bucket: str, key: str):
21
24
  from botocore.exceptions import ClientError
25
+
22
26
  try:
23
- return _get_s3_client().get_object(Bucket=bucket, Key=key)['Body'].read()
27
+ return _get_s3_client().get_object(Bucket=bucket, Key=key)["Body"].read()
24
28
  except ClientError:
25
29
  return None
26
30
 
27
31
 
28
32
  def _exists_in_bucket(bucket: str, key: str):
29
33
  from botocore.exceptions import ClientError
34
+
30
35
  try:
31
36
  _get_s3_client().head_object(Bucket=bucket, Key=key)
32
37
  return True
@@ -36,14 +41,18 @@ def _exists_in_bucket(bucket: str, key: str):
36
41
 
37
42
  def _read_size(bucket: str, key: str):
38
43
  try:
39
- return _get_s3_client().head_object(Bucket=bucket, Key=key).get('ContentLength')
44
+ return _get_s3_client().head_object(Bucket=bucket, Key=key).get("ContentLength")
40
45
  except Exception:
41
46
  return 0
42
47
 
43
48
 
44
49
  def _read_metadata(bucket_name: str, key: str):
45
50
  try:
46
- return _get_s3_client().head_object(Bucket=bucket_name, Key=key).get('Metadata', {})
51
+ return (
52
+ _get_s3_client()
53
+ .head_object(Bucket=bucket_name, Key=key)
54
+ .get("Metadata", {})
55
+ )
47
56
  except Exception:
48
57
  return {}
49
58
 
@@ -55,9 +64,9 @@ def _update_metadata(bucket: str, key: str, data: dict = {}):
55
64
  _get_s3_client().copy_object(
56
65
  Bucket=bucket,
57
66
  Key=key,
58
- CopySource={'Bucket': bucket, 'Key': key},
67
+ CopySource={"Bucket": bucket, "Key": key},
59
68
  Metadata=metadata,
60
- MetadataDirective='REPLACE'
69
+ MetadataDirective="REPLACE",
61
70
  )
62
71
  except Exception:
63
72
  pass
@@ -65,33 +74,32 @@ def _update_metadata(bucket: str, key: str, data: dict = {}):
65
74
 
66
75
  def _last_modified(bucket: str, key: str):
67
76
  try:
68
- return _get_s3_client().head_object(Bucket=bucket, Key=key).get('LastModified')
77
+ return _get_s3_client().head_object(Bucket=bucket, Key=key).get("LastModified")
69
78
  except Exception:
70
79
  return None
71
80
 
72
81
 
73
82
  def _upload_to_bucket(bucket: str, key: str, body, content_type: str):
74
83
  from botocore.exceptions import ClientError
84
+
75
85
  try:
76
86
  return _get_s3_client().put_object(
77
- Bucket=bucket,
78
- Key=key,
79
- Body=body,
80
- ContentType=content_type
87
+ Bucket=bucket, Key=key, Body=body, ContentType=content_type
81
88
  )
82
89
  except ClientError:
83
90
  return None
84
91
 
85
92
 
86
- def _list_bucket_objects(bucket: str, folder: str = ''):
93
+ def _list_bucket_objects(bucket: str, folder: str = ""):
87
94
  from botocore.exceptions import ClientError
95
+
88
96
  try:
89
- paginator = _get_s3_client().get_paginator('list_objects_v2')
97
+ paginator = _get_s3_client().get_paginator("list_objects_v2")
90
98
  pages = paginator.paginate(Bucket=bucket, Prefix=folder)
91
99
 
92
100
  contents = []
93
101
  for page in pages:
94
- contents.extend(page.get('Contents', []))
102
+ contents.extend(page.get("Contents", []))
95
103
  return contents
96
104
  except ClientError:
97
105
  return []
@@ -99,17 +107,14 @@ def _list_bucket_objects(bucket: str, folder: str = ''):
99
107
 
100
108
  def _delete_objects(bucket: str, objects: list):
101
109
  from botocore.exceptions import ClientError
110
+
102
111
  try:
103
112
  # delete in batch of 1000 max allowed
104
113
  batch_size = 1000
105
114
  for i in range(0, len(objects), batch_size):
106
- batch_objects = objects[i:i + batch_size]
115
+ batch_objects = objects[i : i + batch_size]
107
116
  _get_s3_client().delete_objects(
108
- Bucket=bucket,
109
- Delete={
110
- 'Objects': batch_objects,
111
- 'Quiet': True
112
- }
117
+ Bucket=bucket, Delete={"Objects": batch_objects, "Quiet": True}
113
118
  )
114
119
  except ClientError:
115
120
  return None
@@ -7,6 +7,11 @@ _sns_client = None # noqa: F824
7
7
  def _get_sns_client():
8
8
  global _sns_client
9
9
  import boto3
10
- region_name = os.getenv('AWS_REGION')
11
- _sns_client = boto3.session.Session().client('sns', region_name=region_name) if _sns_client is None else _sns_client
10
+
11
+ region_name = os.getenv("AWS_REGION")
12
+ _sns_client = (
13
+ boto3.session.Session().client("sns", region_name=region_name)
14
+ if _sns_client is None
15
+ else _sns_client
16
+ )
12
17
  return _sns_client
@@ -12,7 +12,7 @@ def _load_term_file(term_type: str):
12
12
  try:
13
13
  filepath = f"glossary/{term_type}.json"
14
14
  nodes = json.loads(_load_from_storage(filepath, glossary=True))
15
- return {node.get('@id'): node for node in nodes}
15
+ return {node.get("@id"): node for node in nodes}
16
16
  except Exception:
17
17
  return {}
18
18
 
@@ -21,11 +21,11 @@ def download_term(term: Union[str, dict], termType: Union[str, TermTermType] = N
21
21
  """
22
22
  Download a Term, using the glossary file if available, or default to the standard download.
23
23
  """
24
- term_id = term.get('@id', term.get('id')) if isinstance(term, dict) else term
24
+ term_id = term.get("@id", term.get("id")) if isinstance(term, dict) else term
25
25
  term_type = (
26
- termType if isinstance(termType, str) else termType.value
27
- ) if termType else (
28
- term.get('termType') if isinstance(term, dict) else None
26
+ (termType if isinstance(termType, str) else termType.value)
27
+ if termType
28
+ else (term.get("termType") if isinstance(term, dict) else None)
29
29
  )
30
30
  cached_nodes = _load_term_file(term_type) if term_type else {}
31
31
  return cached_nodes.get(term_id) or download_hestia(term_id)