anemoi-utils 0.3.9__tar.gz → 0.3.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- {anemoi_utils-0.3.9/src/anemoi_utils.egg-info → anemoi_utils-0.3.11}/PKG-INFO +1 -1
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/installing.rst +1 -1
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/_version.py +2 -2
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/caching.py +20 -9
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/commands/config.py +2 -2
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/config.py +11 -10
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/humanize.py +109 -14
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/provenance.py +1 -1
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/s3.py +97 -29
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11/src/anemoi_utils.egg-info}/PKG-INFO +1 -1
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/.github/workflows/python-publish.yml +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/.gitignore +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/.pre-commit-config.yaml +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/.readthedocs.yaml +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/LICENSE +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/README.md +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/Makefile +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/_static/logo.png +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/_static/style.css +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/_templates/.gitkeep +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/conf.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/index.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/checkpoints.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/config.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/dates.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/grib.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/humanize.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/provenance.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/s3.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/modules/text.rst +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/docs/requirements.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/pyproject.toml +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/setup.cfg +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/__init__.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/__main__.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/checkpoints.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/cli.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/commands/__init__.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/dates.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/grib.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/hindcasts.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/mars/__init__.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/mars/mars.yaml +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/text.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi/utils/timer.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi_utils.egg-info/SOURCES.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi_utils.egg-info/dependency_links.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi_utils.egg-info/entry_points.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi_utils.egg-info/requires.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/src/anemoi_utils.egg-info/top_level.txt +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/tests/test_dates.py +0 -0
- {anemoi_utils-0.3.9 → anemoi_utils-0.3.11}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.11
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -9,17 +9,26 @@ import hashlib
|
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
11
|
import time
|
|
12
|
+
from threading import Lock
|
|
13
|
+
|
|
14
|
+
LOCK = Lock()
|
|
15
|
+
CACHE = {}
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
def cache(key, proc, collection="default", expires=None):
|
|
15
|
-
path = os.path.join(os.path.expanduser("~"), ".cache", "anemoi", collection)
|
|
16
|
-
os.makedirs(path, exist_ok=True)
|
|
17
19
|
|
|
18
20
|
key = json.dumps(key, sort_keys=True)
|
|
19
21
|
m = hashlib.md5()
|
|
20
22
|
m.update(key.encode("utf-8"))
|
|
23
|
+
m = m.hexdigest()
|
|
24
|
+
|
|
25
|
+
if m in CACHE:
|
|
26
|
+
return CACHE[m]
|
|
27
|
+
|
|
28
|
+
path = os.path.join(os.path.expanduser("~"), ".cache", "anemoi", collection)
|
|
29
|
+
os.makedirs(path, exist_ok=True)
|
|
21
30
|
|
|
22
|
-
filename = os.path.join(path, m
|
|
31
|
+
filename = os.path.join(path, m)
|
|
23
32
|
if os.path.exists(filename):
|
|
24
33
|
with open(filename, "r") as f:
|
|
25
34
|
data = json.load(f)
|
|
@@ -35,6 +44,7 @@ def cache(key, proc, collection="default", expires=None):
|
|
|
35
44
|
with open(filename, "w") as f:
|
|
36
45
|
json.dump(data, f)
|
|
37
46
|
|
|
47
|
+
CACHE[m] = value
|
|
38
48
|
return value
|
|
39
49
|
|
|
40
50
|
|
|
@@ -49,11 +59,12 @@ class cached:
|
|
|
49
59
|
full = f"{func.__module__}.{func.__name__}"
|
|
50
60
|
|
|
51
61
|
def wrapped(*args, **kwargs):
|
|
52
|
-
|
|
53
|
-
(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
62
|
+
with LOCK:
|
|
63
|
+
return cache(
|
|
64
|
+
(full, args, kwargs),
|
|
65
|
+
lambda: func(*args, **kwargs),
|
|
66
|
+
self.collection,
|
|
67
|
+
self.expires,
|
|
68
|
+
)
|
|
58
69
|
|
|
59
70
|
return wrapped
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import json
|
|
13
13
|
|
|
14
|
-
from ..config import
|
|
14
|
+
from ..config import config_path
|
|
15
15
|
from ..config import load_config
|
|
16
16
|
from . import Command
|
|
17
17
|
|
|
@@ -23,7 +23,7 @@ class Config(Command):
|
|
|
23
23
|
|
|
24
24
|
def run(self, args):
|
|
25
25
|
if args.path:
|
|
26
|
-
print(
|
|
26
|
+
print(config_path())
|
|
27
27
|
else:
|
|
28
28
|
print(json.dumps(load_config(), indent=4))
|
|
29
29
|
|
|
@@ -143,7 +143,7 @@ def _set_defaults(a, b):
|
|
|
143
143
|
a.setdefault(k, v)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def
|
|
146
|
+
def config_path(name="settings.toml"):
|
|
147
147
|
global QUIET
|
|
148
148
|
|
|
149
149
|
if name.startswith("/") or name.startswith("."):
|
|
@@ -205,10 +205,11 @@ def load_any_dict_format(path):
|
|
|
205
205
|
|
|
206
206
|
def _load_config(name="settings.toml", secrets=None, defaults=None):
|
|
207
207
|
|
|
208
|
-
|
|
209
|
-
|
|
208
|
+
key = json.dumps((name, secrets, defaults), sort_keys=True, default=str)
|
|
209
|
+
if key in CONFIG:
|
|
210
|
+
return CONFIG[key]
|
|
210
211
|
|
|
211
|
-
path =
|
|
212
|
+
path = config_path(name)
|
|
212
213
|
if os.path.exists(path):
|
|
213
214
|
config = load_any_dict_format(path)
|
|
214
215
|
else:
|
|
@@ -238,14 +239,14 @@ def _load_config(name="settings.toml", secrets=None, defaults=None):
|
|
|
238
239
|
secret_config = _load_config(secret_name)
|
|
239
240
|
_merge_dicts(config, secret_config)
|
|
240
241
|
|
|
241
|
-
CONFIG[
|
|
242
|
-
return CONFIG[
|
|
242
|
+
CONFIG[key] = DotDict(config)
|
|
243
|
+
return CONFIG[key]
|
|
243
244
|
|
|
244
245
|
|
|
245
246
|
def _save_config(name, data):
|
|
246
247
|
CONFIG.pop(name, None)
|
|
247
248
|
|
|
248
|
-
conf =
|
|
249
|
+
conf = config_path(name)
|
|
249
250
|
|
|
250
251
|
if conf.endswith(".json"):
|
|
251
252
|
with open(conf, "w") as f:
|
|
@@ -300,7 +301,7 @@ def load_config(name="settings.toml", secrets=None, defaults=None):
|
|
|
300
301
|
|
|
301
302
|
def load_raw_config(name, default=None):
|
|
302
303
|
|
|
303
|
-
path =
|
|
304
|
+
path = config_path(name)
|
|
304
305
|
if os.path.exists(path):
|
|
305
306
|
return load_any_dict_format(path)
|
|
306
307
|
|
|
@@ -324,13 +325,13 @@ def check_config_mode(name="settings.toml", secrets_name=None, secrets=None):
|
|
|
324
325
|
if name in CHECKED:
|
|
325
326
|
return
|
|
326
327
|
|
|
327
|
-
conf =
|
|
328
|
+
conf = config_path(name)
|
|
328
329
|
if not os.path.exists(conf):
|
|
329
330
|
return
|
|
330
331
|
mode = os.stat(conf).st_mode
|
|
331
332
|
if mode & 0o777 != 0o600:
|
|
332
333
|
if secrets_name:
|
|
333
|
-
secret_path =
|
|
334
|
+
secret_path = config_path(secrets_name)
|
|
334
335
|
raise SystemError(
|
|
335
336
|
f"Configuration file {conf} should not hold entries {secrets}.\n"
|
|
336
337
|
f"Please move them to {secret_path}."
|
|
@@ -12,10 +12,11 @@
|
|
|
12
12
|
import datetime
|
|
13
13
|
import json
|
|
14
14
|
import re
|
|
15
|
+
import warnings
|
|
15
16
|
from collections import defaultdict
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
def
|
|
19
|
+
def bytes_to_human(n: float) -> str:
|
|
19
20
|
"""Convert a number of bytes to a human readable string
|
|
20
21
|
|
|
21
22
|
>>> bytes(4096)
|
|
@@ -55,7 +56,16 @@ def bytes(n: float) -> str:
|
|
|
55
56
|
return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i])
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
def
|
|
59
|
+
def bytes(n: float) -> str:
|
|
60
|
+
warnings.warn(
|
|
61
|
+
"Function bytes is deprecated and will be removed in a future version. Use bytes_to_human instead.",
|
|
62
|
+
category=DeprecationWarning,
|
|
63
|
+
stacklevel=2,
|
|
64
|
+
)
|
|
65
|
+
return bytes_to_human(n)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def base2_to_human(n) -> str:
|
|
59
69
|
|
|
60
70
|
u = ["", "K", "M", "G", "T", " P", "E", "Z", "Y"]
|
|
61
71
|
i = 0
|
|
@@ -65,6 +75,16 @@ def base2(n) -> str:
|
|
|
65
75
|
return "%g%s" % (int(n * 10 + 0.5) / 10.0, u[i])
|
|
66
76
|
|
|
67
77
|
|
|
78
|
+
def base2(n) -> str:
|
|
79
|
+
|
|
80
|
+
warnings.warn(
|
|
81
|
+
"Function base2 is deprecated and will be removed in a future version. Use base2_to_human instead.",
|
|
82
|
+
category=DeprecationWarning,
|
|
83
|
+
stacklevel=2,
|
|
84
|
+
)
|
|
85
|
+
return base2_to_human(n)
|
|
86
|
+
|
|
87
|
+
|
|
68
88
|
PERIODS = (
|
|
69
89
|
(7 * 24 * 60 * 60, "week"),
|
|
70
90
|
(24 * 60 * 60, "day"),
|
|
@@ -81,7 +101,7 @@ def _plural(count):
|
|
|
81
101
|
return ""
|
|
82
102
|
|
|
83
103
|
|
|
84
|
-
def
|
|
104
|
+
def seconds_to_human(seconds: float) -> str:
|
|
85
105
|
"""Convert a number of seconds to a human readable string
|
|
86
106
|
|
|
87
107
|
>>> seconds(4000)
|
|
@@ -140,12 +160,17 @@ def seconds(seconds: float) -> str:
|
|
|
140
160
|
return " ".join(s)
|
|
141
161
|
|
|
142
162
|
|
|
143
|
-
def
|
|
144
|
-
|
|
163
|
+
def seconds(seconds: float) -> str:
|
|
164
|
+
warnings.warn(
|
|
165
|
+
"Function seconds is deprecated and will be removed in a future version. Use seconds_to_human instead.",
|
|
166
|
+
category=DeprecationWarning,
|
|
167
|
+
stacklevel=2,
|
|
168
|
+
)
|
|
169
|
+
return seconds_to_human(seconds)
|
|
145
170
|
|
|
146
171
|
|
|
147
172
|
def plural(value, what):
|
|
148
|
-
return f"{
|
|
173
|
+
return f"{value:,} {what}{_plural(value)}"
|
|
149
174
|
|
|
150
175
|
|
|
151
176
|
DOW = [
|
|
@@ -395,7 +420,7 @@ def list_to_human(lst, conjunction="and"):
|
|
|
395
420
|
return f" {conjunction} ".join(lst)
|
|
396
421
|
|
|
397
422
|
|
|
398
|
-
def
|
|
423
|
+
def human_to_number(value, name, units, none_ok):
|
|
399
424
|
if value is None and none_ok:
|
|
400
425
|
return None
|
|
401
426
|
|
|
@@ -414,17 +439,44 @@ def as_number(value, name, units, none_ok):
|
|
|
414
439
|
return value * units[unit]
|
|
415
440
|
|
|
416
441
|
|
|
417
|
-
def
|
|
442
|
+
def as_number(value, name=None, units=None, none_ok=False):
|
|
443
|
+
warnings.warn(
|
|
444
|
+
"Function as_number is deprecated and will be removed in a future version. Use human_to_number instead.",
|
|
445
|
+
category=DeprecationWarning,
|
|
446
|
+
stacklevel=2,
|
|
447
|
+
)
|
|
448
|
+
return human_to_number(value, name, units, none_ok)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def human_seconds(value, name=None, none_ok=False):
|
|
418
452
|
units = dict(s=1, m=60, h=3600, d=86400, w=86400 * 7)
|
|
419
|
-
return
|
|
453
|
+
return human_to_number(value, name, units, none_ok)
|
|
420
454
|
|
|
421
455
|
|
|
422
|
-
def
|
|
456
|
+
def as_seconds(value, name=None, none_ok=False):
|
|
457
|
+
warnings.warn(
|
|
458
|
+
"Function as_seconds is deprecated and will be removed in a future version. Use human_seconds instead.",
|
|
459
|
+
category=DeprecationWarning,
|
|
460
|
+
stacklevel=2,
|
|
461
|
+
)
|
|
462
|
+
return human_seconds(value, name, none_ok)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def human_to_percent(value, name=None, none_ok=False):
|
|
423
466
|
units = {"%": 1}
|
|
424
|
-
return
|
|
467
|
+
return human_to_number(value, name, units, none_ok)
|
|
425
468
|
|
|
426
469
|
|
|
427
|
-
def
|
|
470
|
+
def as_percent(value, name=None, none_ok=False):
|
|
471
|
+
warnings.warn(
|
|
472
|
+
"Function as_percent is deprecated and will be removed in a future version. Use human_to_percent instead.",
|
|
473
|
+
category=DeprecationWarning,
|
|
474
|
+
stacklevel=2,
|
|
475
|
+
)
|
|
476
|
+
return human_to_percent(value, name, none_ok)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def human_to_bytes(value, name=None, none_ok=False):
|
|
428
480
|
units = {}
|
|
429
481
|
n = 1
|
|
430
482
|
for u in "KMGTP":
|
|
@@ -432,10 +484,19 @@ def as_bytes(value, name=None, none_ok=False):
|
|
|
432
484
|
units[u] = n
|
|
433
485
|
units[u.lower()] = n
|
|
434
486
|
|
|
435
|
-
return
|
|
487
|
+
return human_to_number(value, name, units, none_ok)
|
|
436
488
|
|
|
437
489
|
|
|
438
|
-
def
|
|
490
|
+
def as_bytes(value, name=None, none_ok=False):
|
|
491
|
+
warnings.warn(
|
|
492
|
+
"Function as_bytes is deprecated and will be removed in a future version. Use human_to_bytes instead.",
|
|
493
|
+
category=DeprecationWarning,
|
|
494
|
+
stacklevel=2,
|
|
495
|
+
)
|
|
496
|
+
return human_to_bytes(value, name, none_ok)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def human_to_timedelta(value, name=None, none_ok=False):
|
|
439
500
|
if value is None and none_ok:
|
|
440
501
|
return None
|
|
441
502
|
|
|
@@ -471,6 +532,15 @@ def as_timedelta(value, name=None, none_ok=False):
|
|
|
471
532
|
)
|
|
472
533
|
|
|
473
534
|
|
|
535
|
+
def as_timedelta(value, name=None, none_ok=False):
|
|
536
|
+
warnings.warn(
|
|
537
|
+
"Function as_timedelta is deprecated and will be removed in a future version. Use human_to_timedelta instead.",
|
|
538
|
+
category=DeprecationWarning,
|
|
539
|
+
stacklevel=2,
|
|
540
|
+
)
|
|
541
|
+
return human_to_timedelta(value, name, none_ok)
|
|
542
|
+
|
|
543
|
+
|
|
474
544
|
def rounded_datetime(d):
|
|
475
545
|
if float(d.microsecond) / 1000.0 / 1000.0 >= 0.5:
|
|
476
546
|
d = d + datetime.timedelta(seconds=1)
|
|
@@ -530,3 +600,28 @@ def json_pretty_dump(obj, max_line_length=120, default=str):
|
|
|
530
600
|
return json.dumps(obj, default=default)
|
|
531
601
|
|
|
532
602
|
return _format_json(obj)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def shorten_list(lst, max_length=5):
|
|
606
|
+
"""Shorten a list to a maximum length.
|
|
607
|
+
|
|
608
|
+
Parameters
|
|
609
|
+
----------
|
|
610
|
+
lst
|
|
611
|
+
The list to be shortened.
|
|
612
|
+
max_length
|
|
613
|
+
Maximum length of the shortened list.
|
|
614
|
+
|
|
615
|
+
Returns
|
|
616
|
+
-------
|
|
617
|
+
list
|
|
618
|
+
Shortened list.
|
|
619
|
+
"""
|
|
620
|
+
if len(lst) <= max_length:
|
|
621
|
+
return lst
|
|
622
|
+
else:
|
|
623
|
+
half = max_length // 2
|
|
624
|
+
result = list(lst[:half]) + ["..."] + list(lst[max_length - half :])
|
|
625
|
+
if isinstance(lst, tuple):
|
|
626
|
+
return tuple(result)
|
|
627
|
+
return result
|
|
@@ -16,6 +16,10 @@ to use a different S3 compatible service::
|
|
|
16
16
|
aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
17
17
|
aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
18
18
|
|
|
19
|
+
Alternatively, the `endpoint_url`, and keys can be set in one of
|
|
20
|
+
the `~/.config/anemoi/settings.toml`
|
|
21
|
+
or `~/.config/anemoi/settings-secrets.toml` files.
|
|
22
|
+
|
|
19
23
|
"""
|
|
20
24
|
|
|
21
25
|
import concurrent.futures
|
|
@@ -27,7 +31,7 @@ from copy import deepcopy
|
|
|
27
31
|
import tqdm
|
|
28
32
|
|
|
29
33
|
from .config import load_config
|
|
30
|
-
from .humanize import
|
|
34
|
+
from .humanize import bytes_to_human
|
|
31
35
|
|
|
32
36
|
LOGGER = logging.getLogger(__name__)
|
|
33
37
|
|
|
@@ -37,17 +41,43 @@ LOGGER = logging.getLogger(__name__)
|
|
|
37
41
|
thread_local = threading.local()
|
|
38
42
|
|
|
39
43
|
|
|
40
|
-
def s3_client(bucket):
|
|
44
|
+
def s3_client(bucket, region=None):
|
|
41
45
|
import boto3
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
from botocore import UNSIGNED
|
|
47
|
+
from botocore.client import Config
|
|
44
48
|
|
|
45
49
|
if not hasattr(thread_local, "s3_clients"):
|
|
46
50
|
thread_local.s3_clients = {}
|
|
47
51
|
|
|
48
|
-
|
|
52
|
+
key = f"{bucket}-{region}"
|
|
53
|
+
|
|
54
|
+
boto3_config = dict(max_pool_connections=25)
|
|
55
|
+
|
|
56
|
+
if key in thread_local.s3_clients:
|
|
57
|
+
return thread_local.s3_clients[key]
|
|
58
|
+
|
|
59
|
+
boto3_config = dict(max_pool_connections=25)
|
|
60
|
+
|
|
61
|
+
if region:
|
|
62
|
+
# This is using AWS
|
|
63
|
+
|
|
64
|
+
options = {"region_name": region}
|
|
65
|
+
|
|
66
|
+
# Anonymous access
|
|
67
|
+
if not (
|
|
68
|
+
os.path.exists(os.path.expanduser("~/.aws/credentials"))
|
|
69
|
+
or ("AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ)
|
|
70
|
+
):
|
|
71
|
+
boto3_config["signature_version"] = UNSIGNED
|
|
72
|
+
|
|
73
|
+
else:
|
|
74
|
+
|
|
75
|
+
# We may be accessing a different S3 compatible service
|
|
76
|
+
# Use anemoi.config to get the configuration
|
|
49
77
|
|
|
50
78
|
options = {}
|
|
79
|
+
config = load_config(secrets=["aws_access_key_id", "aws_secret_access_key"])
|
|
80
|
+
|
|
51
81
|
cfg = config.get("object-storage", {})
|
|
52
82
|
for k, v in cfg.items():
|
|
53
83
|
if isinstance(v, (str, int, float, bool)):
|
|
@@ -62,21 +92,29 @@ def s3_client(bucket):
|
|
|
62
92
|
raise ValueError(f"Unsupported object storage type {type}")
|
|
63
93
|
|
|
64
94
|
if "config" in options:
|
|
95
|
+
boto3_config.update(options["config"])
|
|
96
|
+
del options["config"]
|
|
65
97
|
from botocore.client import Config
|
|
66
98
|
|
|
67
|
-
|
|
68
|
-
|
|
99
|
+
options["config"] = Config(**boto3_config)
|
|
100
|
+
|
|
101
|
+
thread_local.s3_clients[key] = boto3.client("s3", **options)
|
|
102
|
+
|
|
103
|
+
return thread_local.s3_clients[key]
|
|
69
104
|
|
|
70
|
-
thread_local.s3_clients[bucket] = boto3.client("s3", **options)
|
|
71
105
|
|
|
72
|
-
|
|
106
|
+
def _ignore(number_of_files, total_size, total_transferred, transfering):
|
|
107
|
+
pass
|
|
73
108
|
|
|
74
109
|
|
|
75
110
|
class Transfer:
|
|
76
111
|
|
|
77
|
-
def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
112
|
+
def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1, progress=None):
|
|
78
113
|
assert verbosity == 1, verbosity
|
|
79
114
|
|
|
115
|
+
if progress is None:
|
|
116
|
+
progress = _ignore
|
|
117
|
+
|
|
80
118
|
# from boto3.s3.transfer import TransferConfig
|
|
81
119
|
# config = TransferConfig(use_threads=False)
|
|
82
120
|
config = None
|
|
@@ -85,7 +123,8 @@ class Transfer:
|
|
|
85
123
|
if verbosity > 0:
|
|
86
124
|
LOGGER.info(f"{self.action} {source} to {target}")
|
|
87
125
|
|
|
88
|
-
|
|
126
|
+
total_size = 0
|
|
127
|
+
total_transferred = 0
|
|
89
128
|
|
|
90
129
|
futures = []
|
|
91
130
|
for name in self.list_source(source):
|
|
@@ -101,11 +140,14 @@ class Transfer:
|
|
|
101
140
|
config=config,
|
|
102
141
|
)
|
|
103
142
|
)
|
|
104
|
-
|
|
143
|
+
total_size += self.source_size(name)
|
|
105
144
|
|
|
106
145
|
if len(futures) % 10000 == 0:
|
|
146
|
+
|
|
147
|
+
progress(len(futures), total_size, 0, False)
|
|
148
|
+
|
|
107
149
|
if verbosity > 0:
|
|
108
|
-
LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({
|
|
150
|
+
LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes_to_human(total_size)})")
|
|
109
151
|
done, _ = concurrent.futures.wait(
|
|
110
152
|
futures,
|
|
111
153
|
timeout=0.001,
|
|
@@ -115,14 +157,22 @@ class Transfer:
|
|
|
115
157
|
for future in done:
|
|
116
158
|
future.result()
|
|
117
159
|
|
|
160
|
+
number_of_files = len(futures)
|
|
161
|
+
progress(number_of_files, total_size, 0, True)
|
|
162
|
+
|
|
118
163
|
if verbosity > 0:
|
|
119
|
-
LOGGER.info(f"{self.action} {
|
|
120
|
-
with tqdm.tqdm(total=
|
|
121
|
-
for future in futures:
|
|
122
|
-
|
|
164
|
+
LOGGER.info(f"{self.action} {number_of_files:,} files ({bytes_to_human(total_size)})")
|
|
165
|
+
with tqdm.tqdm(total=total_size, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
|
|
166
|
+
for future in concurrent.futures.as_completed(futures):
|
|
167
|
+
size = future.result()
|
|
168
|
+
pbar.update(size)
|
|
169
|
+
total_transferred += size
|
|
170
|
+
progress(number_of_files, total_size, total_transferred, True)
|
|
123
171
|
else:
|
|
124
|
-
for future in futures:
|
|
125
|
-
future.result()
|
|
172
|
+
for future in concurrent.futures.as_completed(futures):
|
|
173
|
+
size = future.result()
|
|
174
|
+
total_transferred += size
|
|
175
|
+
progress(number_of_files, total_size, total_transferred, True)
|
|
126
176
|
|
|
127
177
|
except Exception:
|
|
128
178
|
executor.shutdown(wait=False, cancel_futures=True)
|
|
@@ -168,7 +218,7 @@ class Upload(Transfer):
|
|
|
168
218
|
size = os.path.getsize(source)
|
|
169
219
|
|
|
170
220
|
if verbosity > 0:
|
|
171
|
-
LOGGER.info(f"{self.action} {source} to {target} ({
|
|
221
|
+
LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
|
|
172
222
|
|
|
173
223
|
try:
|
|
174
224
|
results = s3.head_object(Bucket=bucket, Key=key)
|
|
@@ -235,7 +285,6 @@ class Download(Transfer):
|
|
|
235
285
|
try:
|
|
236
286
|
response = s3.head_object(Bucket=bucket, Key=key)
|
|
237
287
|
except s3.exceptions.ClientError as e:
|
|
238
|
-
print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key)
|
|
239
288
|
if e.response["Error"]["Code"] == "404":
|
|
240
289
|
raise ValueError(f"{source} does not exist ({bucket}, {key})")
|
|
241
290
|
raise
|
|
@@ -243,7 +292,7 @@ class Download(Transfer):
|
|
|
243
292
|
size = int(response["ContentLength"])
|
|
244
293
|
|
|
245
294
|
if verbosity > 0:
|
|
246
|
-
LOGGER.info(f"Downloading {source} to {target} ({
|
|
295
|
+
LOGGER.info(f"Downloading {source} to {target} ({bytes_to_human(size)})")
|
|
247
296
|
|
|
248
297
|
if overwrite:
|
|
249
298
|
resume = False
|
|
@@ -272,7 +321,7 @@ class Download(Transfer):
|
|
|
272
321
|
return size
|
|
273
322
|
|
|
274
323
|
|
|
275
|
-
def upload(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
324
|
+
def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
|
|
276
325
|
"""Upload a file or a folder to S3.
|
|
277
326
|
|
|
278
327
|
Parameters
|
|
@@ -286,6 +335,9 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, thread
|
|
|
286
335
|
resume : bool, optional
|
|
287
336
|
If the data is alreay on S3 it will not be uploaded, unless the remote file
|
|
288
337
|
has a different size, by default False
|
|
338
|
+
progress: callable, optional
|
|
339
|
+
A callable that will be called with the number of files, the total size of the files, the total size
|
|
340
|
+
transferred and a boolean indicating if the transfer has started. By default None
|
|
289
341
|
threads : int, optional
|
|
290
342
|
The number of threads to use when uploading a directory, by default 1
|
|
291
343
|
"""
|
|
@@ -293,13 +345,26 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, thread
|
|
|
293
345
|
uploader = Upload()
|
|
294
346
|
if os.path.isdir(source):
|
|
295
347
|
uploader.transfer_folder(
|
|
296
|
-
source=source,
|
|
348
|
+
source=source,
|
|
349
|
+
target=target,
|
|
350
|
+
overwrite=overwrite,
|
|
351
|
+
resume=resume,
|
|
352
|
+
verbosity=verbosity,
|
|
353
|
+
progress=progress,
|
|
354
|
+
threads=threads,
|
|
297
355
|
)
|
|
298
356
|
else:
|
|
299
|
-
uploader.transfer_file(
|
|
357
|
+
uploader.transfer_file(
|
|
358
|
+
source=source,
|
|
359
|
+
target=target,
|
|
360
|
+
overwrite=overwrite,
|
|
361
|
+
resume=resume,
|
|
362
|
+
verbosity=verbosity,
|
|
363
|
+
progress=progress,
|
|
364
|
+
)
|
|
300
365
|
|
|
301
366
|
|
|
302
|
-
def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
367
|
+
def download(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
|
|
303
368
|
"""Download a file or a folder from S3.
|
|
304
369
|
|
|
305
370
|
Parameters
|
|
@@ -315,6 +380,9 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
|
|
|
315
380
|
resume : bool, optional
|
|
316
381
|
If the data is alreay on local it will not be downloaded, unless the remote file
|
|
317
382
|
has a different size, by default False
|
|
383
|
+
progress: callable, optional
|
|
384
|
+
A callable that will be called with the number of files, the total size of the files, the total size
|
|
385
|
+
transferred and a boolean indicating if the transfer has started. By default None
|
|
318
386
|
threads : int, optional
|
|
319
387
|
The number of threads to use when downloading a directory, by default 1
|
|
320
388
|
"""
|
|
@@ -329,6 +397,7 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
|
|
|
329
397
|
overwrite=overwrite,
|
|
330
398
|
resume=resume,
|
|
331
399
|
verbosity=verbosity,
|
|
400
|
+
progress=progress,
|
|
332
401
|
threads=threads,
|
|
333
402
|
)
|
|
334
403
|
else:
|
|
@@ -338,6 +407,7 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
|
|
|
338
407
|
overwrite=overwrite,
|
|
339
408
|
resume=resume,
|
|
340
409
|
verbosity=verbosity,
|
|
410
|
+
progress=progress,
|
|
341
411
|
)
|
|
342
412
|
|
|
343
413
|
|
|
@@ -387,7 +457,7 @@ def _delete_file(target):
|
|
|
387
457
|
return
|
|
388
458
|
|
|
389
459
|
LOGGER.info(f"Deleting {target}")
|
|
390
|
-
|
|
460
|
+
s3.delete_object(Bucket=bucket, Key=key)
|
|
391
461
|
LOGGER.info(f"{target} is deleted")
|
|
392
462
|
|
|
393
463
|
|
|
@@ -423,13 +493,11 @@ def list_folder(folder):
|
|
|
423
493
|
A list of the subfolders names in the folder.
|
|
424
494
|
"""
|
|
425
495
|
|
|
426
|
-
print(folder)
|
|
427
496
|
assert folder.startswith("s3://")
|
|
428
497
|
if not folder.endswith("/"):
|
|
429
498
|
folder += "/"
|
|
430
499
|
|
|
431
500
|
_, _, bucket, prefix = folder.split("/", 3)
|
|
432
|
-
print(bucket, prefix)
|
|
433
501
|
|
|
434
502
|
s3 = s3_client(bucket)
|
|
435
503
|
paginator = s3.get_paginator("list_objects_v2")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.11
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|