anemoi-utils 0.4.23__py3-none-any.whl → 0.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/_version.py +2 -2
- anemoi/utils/remote/s3.py +107 -26
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/METADATA +1 -1
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/RECORD +8 -8
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/WHEEL +1 -1
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/licenses/LICENSE +0 -0
- {anemoi_utils-0.4.23.dist-info → anemoi_utils-0.4.24.dist-info}/top_level.txt +0 -0
anemoi/utils/_version.py
CHANGED
anemoi/utils/remote/s3.py
CHANGED
|
@@ -24,6 +24,7 @@ the `~/.config/anemoi/settings.toml`
|
|
|
24
24
|
or `~/.config/anemoi/settings-secrets.toml` files.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
+
import fnmatch
|
|
27
28
|
import logging
|
|
28
29
|
import os
|
|
29
30
|
import threading
|
|
@@ -38,15 +39,15 @@ from ..humanize import bytes_to_human
|
|
|
38
39
|
from . import BaseDownload
|
|
39
40
|
from . import BaseUpload
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
LOG = logging.getLogger(__name__)
|
|
43
|
+
SECRETS = ["aws_access_key_id", "aws_secret_access_key"]
|
|
43
44
|
|
|
44
45
|
# s3_clients are not thread-safe, so we need to create a new client for each thread
|
|
45
46
|
|
|
46
47
|
thread_local = threading.local()
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
def s3_client(bucket: str, region: str = None) -> Any:
|
|
50
|
+
def s3_client(bucket: str, *, region: str = None, service: str = "s3") -> Any:
|
|
50
51
|
"""Get an S3 client for the specified bucket and region.
|
|
51
52
|
|
|
52
53
|
Parameters
|
|
@@ -55,6 +56,8 @@ def s3_client(bucket: str, region: str = None) -> Any:
|
|
|
55
56
|
The name of the S3 bucket.
|
|
56
57
|
region : str, optional
|
|
57
58
|
The AWS region of the S3 bucket.
|
|
59
|
+
service : str, optional
|
|
60
|
+
The AWS service to use, default is "s3".
|
|
58
61
|
|
|
59
62
|
Returns
|
|
60
63
|
-------
|
|
@@ -68,7 +71,7 @@ def s3_client(bucket: str, region: str = None) -> Any:
|
|
|
68
71
|
if not hasattr(thread_local, "s3_clients"):
|
|
69
72
|
thread_local.s3_clients = {}
|
|
70
73
|
|
|
71
|
-
key = f"{bucket}-{region}"
|
|
74
|
+
key = f"{bucket}-{region}-{service}"
|
|
72
75
|
|
|
73
76
|
if key in thread_local.s3_clients:
|
|
74
77
|
return thread_local.s3_clients[key]
|
|
@@ -96,17 +99,27 @@ def s3_client(bucket: str, region: str = None) -> Any:
|
|
|
96
99
|
# We may be accessing a different S3 compatible service
|
|
97
100
|
# Use anemoi.config to get the configuration
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
region = "unknown-region"
|
|
103
|
+
|
|
104
|
+
options = {"region_name": region}
|
|
105
|
+
config = load_config(secrets=SECRETS)
|
|
101
106
|
|
|
102
107
|
cfg = config.get("object-storage", {})
|
|
108
|
+
candidate = None
|
|
103
109
|
for k, v in cfg.items():
|
|
104
110
|
if isinstance(v, (str, int, float, bool)):
|
|
105
111
|
options[k] = v
|
|
106
112
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
113
|
+
if isinstance(v, dict):
|
|
114
|
+
if fnmatch.fnmatch(bucket, k):
|
|
115
|
+
if candidate is not None:
|
|
116
|
+
raise ValueError(f"Multiple object storage configurations match {bucket}: {candidate} and {k}")
|
|
117
|
+
candidate = k
|
|
118
|
+
|
|
119
|
+
if candidate is not None:
|
|
120
|
+
for k, v in cfg.get(candidate, {}).items():
|
|
121
|
+
if isinstance(v, (str, int, float, bool)):
|
|
122
|
+
options[k] = v
|
|
110
123
|
|
|
111
124
|
type = options.pop("type", "s3")
|
|
112
125
|
if type != "s3":
|
|
@@ -115,11 +128,27 @@ def s3_client(bucket: str, region: str = None) -> Any:
|
|
|
115
128
|
if "config" in options:
|
|
116
129
|
boto3_config.update(options["config"])
|
|
117
130
|
del options["config"]
|
|
118
|
-
from botocore.client import Config
|
|
119
131
|
|
|
120
132
|
options["config"] = Config(**boto3_config)
|
|
121
133
|
|
|
122
|
-
|
|
134
|
+
def _(options):
|
|
135
|
+
|
|
136
|
+
def __(k, v):
|
|
137
|
+
if k in SECRETS:
|
|
138
|
+
return "***"
|
|
139
|
+
return v
|
|
140
|
+
|
|
141
|
+
if isinstance(options, dict):
|
|
142
|
+
return {k: __(k, v) for k, v in options.items()}
|
|
143
|
+
|
|
144
|
+
if isinstance(options, list):
|
|
145
|
+
return [_(o) for o in options]
|
|
146
|
+
|
|
147
|
+
return options
|
|
148
|
+
|
|
149
|
+
LOG.info(f"Using S3 options: {_(options)}")
|
|
150
|
+
|
|
151
|
+
thread_local.s3_clients[key] = boto3.client(service, **options)
|
|
123
152
|
|
|
124
153
|
return thread_local.s3_clients[key]
|
|
125
154
|
|
|
@@ -215,7 +244,7 @@ class S3Upload(BaseUpload):
|
|
|
215
244
|
size = os.path.getsize(source)
|
|
216
245
|
|
|
217
246
|
if verbosity > 0:
|
|
218
|
-
|
|
247
|
+
LOG.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
|
|
219
248
|
|
|
220
249
|
try:
|
|
221
250
|
results = s3.head_object(Bucket=bucket, Key=key)
|
|
@@ -227,7 +256,7 @@ class S3Upload(BaseUpload):
|
|
|
227
256
|
|
|
228
257
|
if remote_size is not None:
|
|
229
258
|
if remote_size != size:
|
|
230
|
-
|
|
259
|
+
LOG.warning(
|
|
231
260
|
f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
|
|
232
261
|
)
|
|
233
262
|
elif resume:
|
|
@@ -400,7 +429,7 @@ class S3Download(BaseDownload):
|
|
|
400
429
|
size = int(response["ContentLength"])
|
|
401
430
|
|
|
402
431
|
if verbosity > 0:
|
|
403
|
-
|
|
432
|
+
LOG.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
|
|
404
433
|
|
|
405
434
|
if overwrite:
|
|
406
435
|
resume = False
|
|
@@ -409,7 +438,7 @@ class S3Download(BaseDownload):
|
|
|
409
438
|
if os.path.exists(target):
|
|
410
439
|
local_size = os.path.getsize(target)
|
|
411
440
|
if local_size != size:
|
|
412
|
-
|
|
441
|
+
LOG.warning(
|
|
413
442
|
f"{target} already with different size, re-downloading (remote={size}, local={local_size})"
|
|
414
443
|
)
|
|
415
444
|
else:
|
|
@@ -464,7 +493,7 @@ def _list_objects(target: str, batch: bool = False) -> Iterable:
|
|
|
464
493
|
yield from objects
|
|
465
494
|
|
|
466
495
|
|
|
467
|
-
def
|
|
496
|
+
def delete_folder(target: str) -> None:
|
|
468
497
|
"""Delete a folder from S3.
|
|
469
498
|
|
|
470
499
|
Parameters
|
|
@@ -477,13 +506,13 @@ def _delete_folder(target: str) -> None:
|
|
|
477
506
|
|
|
478
507
|
total = 0
|
|
479
508
|
for batch in _list_objects(target, batch=True):
|
|
480
|
-
|
|
509
|
+
LOG.info(f"Deleting {len(batch):,} objects from {target}")
|
|
481
510
|
s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": o["Key"]} for o in batch]})
|
|
482
511
|
total += len(batch)
|
|
483
|
-
|
|
512
|
+
LOG.info(f"Deleted {len(batch):,} objects (total={total:,})")
|
|
484
513
|
|
|
485
514
|
|
|
486
|
-
def
|
|
515
|
+
def delete_file(target: str) -> None:
|
|
487
516
|
"""Delete a file from S3.
|
|
488
517
|
|
|
489
518
|
Parameters
|
|
@@ -505,12 +534,12 @@ def _delete_file(target: str) -> None:
|
|
|
505
534
|
exits = False
|
|
506
535
|
|
|
507
536
|
if not exits:
|
|
508
|
-
|
|
537
|
+
LOG.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
|
|
509
538
|
return
|
|
510
539
|
|
|
511
|
-
|
|
540
|
+
LOG.info(f"Deleting {target}")
|
|
512
541
|
s3.delete_object(Bucket=bucket, Key=key)
|
|
513
|
-
|
|
542
|
+
LOG.info(f"{target} is deleted")
|
|
514
543
|
|
|
515
544
|
|
|
516
545
|
def delete(target: str) -> None:
|
|
@@ -525,9 +554,9 @@ def delete(target: str) -> None:
|
|
|
525
554
|
assert target.startswith("s3://")
|
|
526
555
|
|
|
527
556
|
if target.endswith("/"):
|
|
528
|
-
|
|
557
|
+
delete_folder(target)
|
|
529
558
|
else:
|
|
530
|
-
|
|
559
|
+
delete_file(target)
|
|
531
560
|
|
|
532
561
|
|
|
533
562
|
def list_folder(folder: str) -> Iterable:
|
|
@@ -581,7 +610,33 @@ def object_info(target: str) -> dict:
|
|
|
581
610
|
return s3.head_object(Bucket=bucket, Key=key)
|
|
582
611
|
except s3.exceptions.ClientError as e:
|
|
583
612
|
if e.response["Error"]["Code"] == "404":
|
|
584
|
-
raise
|
|
613
|
+
raise FileNotFoundError(f"{target} does not exist")
|
|
614
|
+
raise
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def object_exists(target: str) -> bool:
|
|
618
|
+
"""Check if an object exists.
|
|
619
|
+
|
|
620
|
+
Parameters
|
|
621
|
+
----------
|
|
622
|
+
target : str
|
|
623
|
+
The URL of a file or a folder on S3. The URL should start with 's3://'.
|
|
624
|
+
|
|
625
|
+
Returns
|
|
626
|
+
-------
|
|
627
|
+
bool
|
|
628
|
+
True if the object exists, False otherwise.
|
|
629
|
+
"""
|
|
630
|
+
|
|
631
|
+
_, _, bucket, key = target.split("/", 3)
|
|
632
|
+
s3 = s3_client(bucket)
|
|
633
|
+
|
|
634
|
+
try:
|
|
635
|
+
s3.head_object(Bucket=bucket, Key=key)
|
|
636
|
+
return True
|
|
637
|
+
except s3.exceptions.ClientError as e:
|
|
638
|
+
if e.response["Error"]["Code"] == "404":
|
|
639
|
+
return False
|
|
585
640
|
raise
|
|
586
641
|
|
|
587
642
|
|
|
@@ -600,7 +655,7 @@ def object_acl(target: str) -> dict:
|
|
|
600
655
|
"""
|
|
601
656
|
|
|
602
657
|
_, _, bucket, key = target.split("/", 3)
|
|
603
|
-
s3 = s3_client()
|
|
658
|
+
s3 = s3_client(bucket)
|
|
604
659
|
|
|
605
660
|
return s3.get_object_acl(Bucket=bucket, Key=key)
|
|
606
661
|
|
|
@@ -643,3 +698,29 @@ def upload(source: str, target: str, *args, **kwargs) -> None:
|
|
|
643
698
|
|
|
644
699
|
assert target.startswith("s3://"), f"target {target} should start with 's3://'"
|
|
645
700
|
return transfer(source, target, *args, **kwargs)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def quotas(target: str) -> dict:
|
|
704
|
+
"""Get the quotas for an S3 bucket.
|
|
705
|
+
|
|
706
|
+
Parameters
|
|
707
|
+
----------
|
|
708
|
+
target : str
|
|
709
|
+
The URL of a file or a folder on S3. The URL should start with 's3://'.
|
|
710
|
+
|
|
711
|
+
Returns
|
|
712
|
+
-------
|
|
713
|
+
dict
|
|
714
|
+
A dictionary with the quotas for the bucket.
|
|
715
|
+
"""
|
|
716
|
+
from botocore.exceptions import ClientError
|
|
717
|
+
|
|
718
|
+
_, _, bucket, _ = target.split("/", 3)
|
|
719
|
+
s3 = s3_client(bucket, service="service-quotas")
|
|
720
|
+
|
|
721
|
+
try:
|
|
722
|
+
return s3.list_service_quotas(ServiceCode="ec2")
|
|
723
|
+
except ClientError as e:
|
|
724
|
+
if e.response["Error"]["Code"] == "404":
|
|
725
|
+
raise ValueError(f"{target} does not exist")
|
|
726
|
+
raise
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.24
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
anemoi/utils/__init__.py,sha256=uVhpF-VjIl_4mMywOVtgTutgsdIsqz-xdkwxeMhzuag,730
|
|
2
2
|
anemoi/utils/__main__.py,sha256=6LlE4MYrPvqqrykxXh7XMi50UZteUY59NeM8P9Zs2dU,910
|
|
3
|
-
anemoi/utils/_version.py,sha256=
|
|
3
|
+
anemoi/utils/_version.py,sha256=eqPTx1mit5QVY_CIZPeIYTsnPPxmLZEDtDv2l58DdBE,513
|
|
4
4
|
anemoi/utils/caching.py,sha256=rXbeAmpBcMbbfN4EVblaHWKicsrtx1otER84FEBtz98,6183
|
|
5
5
|
anemoi/utils/checkpoints.py,sha256=N4WpAZXa4etrpSEKhHqUUtG2-x9w3FJMHcLO-dDAXPY,9600
|
|
6
6
|
anemoi/utils/cli.py,sha256=IyZfnSw0u0yYnrjOrzvm2RuuKvDk4cVb8pf8BkaChgA,6209
|
|
@@ -29,13 +29,13 @@ anemoi/utils/mars/__init__.py,sha256=b-Lc3L1TAQd9ODs0Z1YSJzgZCO1K_M3DSgx_yd2qXvM
|
|
|
29
29
|
anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
|
|
30
30
|
anemoi/utils/mars/requests.py,sha256=VFMHBVAAl0_2lOcMBa1lvaKHctN0lDJsI6_U4BucGew,1142
|
|
31
31
|
anemoi/utils/remote/__init__.py,sha256=swPWHQoh-B6Xq9R489tPw0FykMue7f-bJ8enneFYSYE,20776
|
|
32
|
-
anemoi/utils/remote/s3.py,sha256=
|
|
32
|
+
anemoi/utils/remote/s3.py,sha256=lhLxwBU-AKERzYURXJ-cOlaccEYmjoXqlvX3zYu5h-8,19977
|
|
33
33
|
anemoi/utils/remote/ssh.py,sha256=xNtsawh8okytCKRehkRCVExbHZj-CRUQNormEHglfuw,8088
|
|
34
34
|
anemoi/utils/schemas/__init__.py,sha256=nkinKlsPLPXEjfTYQT1mpKC4cvs-14w_zBkDRxakwxw,698
|
|
35
35
|
anemoi/utils/schemas/errors.py,sha256=lgOXzVTYzAE0qWQf3OZ42vCWixv8lilSqLLhzARBmvI,1831
|
|
36
|
-
anemoi_utils-0.4.
|
|
37
|
-
anemoi_utils-0.4.
|
|
38
|
-
anemoi_utils-0.4.
|
|
39
|
-
anemoi_utils-0.4.
|
|
40
|
-
anemoi_utils-0.4.
|
|
41
|
-
anemoi_utils-0.4.
|
|
36
|
+
anemoi_utils-0.4.24.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
|
|
37
|
+
anemoi_utils-0.4.24.dist-info/METADATA,sha256=EsIP40bMdXsSEAp1v4iFkKMrzdVaE4Cw892SLNlzuWs,15439
|
|
38
|
+
anemoi_utils-0.4.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
39
|
+
anemoi_utils-0.4.24.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
|
|
40
|
+
anemoi_utils-0.4.24.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
41
|
+
anemoi_utils-0.4.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|