anemoi-utils 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/_version.py +2 -2
- anemoi/utils/cli.py +1 -1
- anemoi/utils/config.py +60 -12
- anemoi/utils/s3.py +307 -124
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/METADATA +1 -1
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/RECORD +10 -10
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/WHEEL +1 -1
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/LICENSE +0 -0
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.3.4.dist-info → anemoi_utils-0.3.6.dist-info}/top_level.txt +0 -0
anemoi/utils/_version.py
CHANGED
anemoi/utils/cli.py
CHANGED
anemoi/utils/config.py
CHANGED
|
@@ -6,8 +6,12 @@
|
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
import json
|
|
9
10
|
import logging
|
|
10
11
|
import os
|
|
12
|
+
import threading
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
11
15
|
|
|
12
16
|
try:
|
|
13
17
|
import tomllib # Only available since 3.11
|
|
@@ -40,10 +44,49 @@ class DotDict(dict):
|
|
|
40
44
|
|
|
41
45
|
def __init__(self, *args, **kwargs):
|
|
42
46
|
super().__init__(*args, **kwargs)
|
|
47
|
+
|
|
43
48
|
for k, v in self.items():
|
|
44
49
|
if isinstance(v, dict):
|
|
45
50
|
self[k] = DotDict(v)
|
|
46
51
|
|
|
52
|
+
if isinstance(v, list):
|
|
53
|
+
self[k] = [DotDict(i) if isinstance(i, dict) else i for i in v]
|
|
54
|
+
|
|
55
|
+
if isinstance(v, tuple):
|
|
56
|
+
self[k] = [DotDict(i) if isinstance(i, dict) else i for i in v]
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_file(cls, path: str):
|
|
60
|
+
_, ext = os.path.splitext(path)
|
|
61
|
+
if ext == ".yaml" or ext == ".yml":
|
|
62
|
+
return cls.from_yaml_file(path)
|
|
63
|
+
elif ext == ".json":
|
|
64
|
+
return cls.from_json_file(path)
|
|
65
|
+
elif ext == ".toml":
|
|
66
|
+
return cls.from_toml_file(path)
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(f"Unknown file extension {ext}")
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_yaml_file(cls, path: str):
|
|
72
|
+
with open(path, "r") as file:
|
|
73
|
+
data = yaml.safe_load(file)
|
|
74
|
+
|
|
75
|
+
return cls(data)
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def from_json_file(cls, path: str):
|
|
79
|
+
with open(path, "r") as file:
|
|
80
|
+
data = json.load(file)
|
|
81
|
+
|
|
82
|
+
return cls(data)
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def from_toml_file(cls, path: str):
|
|
86
|
+
with open(path, "r") as file:
|
|
87
|
+
data = tomllib.load(file)
|
|
88
|
+
return cls(data)
|
|
89
|
+
|
|
47
90
|
def __getattr__(self, attr):
|
|
48
91
|
try:
|
|
49
92
|
return self[attr]
|
|
@@ -60,16 +103,10 @@ class DotDict(dict):
|
|
|
60
103
|
|
|
61
104
|
|
|
62
105
|
CONFIG = None
|
|
106
|
+
CONFIG_LOCK = threading.Lock()
|
|
63
107
|
|
|
64
108
|
|
|
65
|
-
def
|
|
66
|
-
"""Load the configuration from `~/.anemoi.toml`.
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
DotDict
|
|
71
|
-
The configuration
|
|
72
|
-
"""
|
|
109
|
+
def _load_config():
|
|
73
110
|
global CONFIG
|
|
74
111
|
if CONFIG is not None:
|
|
75
112
|
return CONFIG
|
|
@@ -86,9 +123,20 @@ def load_config():
|
|
|
86
123
|
return DotDict(CONFIG)
|
|
87
124
|
|
|
88
125
|
|
|
89
|
-
def
|
|
90
|
-
"""
|
|
126
|
+
def load_config():
|
|
127
|
+
"""Load the configuration from `~/.anemoi.toml`.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
DotDict
|
|
132
|
+
The configuration
|
|
133
|
+
"""
|
|
134
|
+
with CONFIG_LOCK:
|
|
135
|
+
return _load_config()
|
|
136
|
+
|
|
91
137
|
|
|
138
|
+
def check_config_mode():
|
|
92
139
|
conf = os.path.expanduser("~/.anemoi.toml")
|
|
93
|
-
|
|
94
|
-
|
|
140
|
+
mode = os.stat(conf).st_mode
|
|
141
|
+
if mode & 0o777 != 0o600:
|
|
142
|
+
raise SystemError(f"Configuration file {conf} is not secure. " "Please run `chmod 600 ~/.anemoi.toml`.")
|
anemoi/utils/s3.py
CHANGED
|
@@ -18,14 +18,18 @@ to use a different S3 compatible service::
|
|
|
18
18
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
+
import concurrent
|
|
21
22
|
import logging
|
|
22
23
|
import os
|
|
23
24
|
import threading
|
|
24
|
-
from
|
|
25
|
-
from contextlib import closing
|
|
25
|
+
from copy import deepcopy
|
|
26
26
|
|
|
27
27
|
import tqdm
|
|
28
28
|
|
|
29
|
+
from .config import check_config_mode
|
|
30
|
+
from .config import load_config
|
|
31
|
+
from .humanize import bytes
|
|
32
|
+
|
|
29
33
|
LOGGER = logging.getLogger(__name__)
|
|
30
34
|
|
|
31
35
|
|
|
@@ -34,70 +38,222 @@ LOGGER = logging.getLogger(__name__)
|
|
|
34
38
|
thread_local = threading.local()
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
def
|
|
41
|
+
def s3_client(bucket):
|
|
38
42
|
import boto3
|
|
39
43
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
44
|
+
config = load_config()
|
|
45
|
+
if "object-storage" in config:
|
|
46
|
+
check_config_mode()
|
|
43
47
|
|
|
48
|
+
if not hasattr(thread_local, "s3_clients"):
|
|
49
|
+
thread_local.s3_clients = {}
|
|
44
50
|
|
|
45
|
-
|
|
46
|
-
from botocore.exceptions import ClientError
|
|
51
|
+
if bucket not in thread_local.s3_clients:
|
|
47
52
|
|
|
48
|
-
|
|
53
|
+
options = {}
|
|
54
|
+
options.update(config.get("object-storage", {}))
|
|
55
|
+
options.update(config.get("object-storage", {}).get(bucket, {}))
|
|
49
56
|
|
|
50
|
-
|
|
57
|
+
type = options.pop("type", "s3")
|
|
58
|
+
if type != "s3":
|
|
59
|
+
raise ValueError(f"Unsupported object storage type {type}")
|
|
51
60
|
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
if "config" in options:
|
|
62
|
+
from botocore.client import Config
|
|
63
|
+
|
|
64
|
+
options["config"] = Config(**options["config"])
|
|
65
|
+
del options["config"]
|
|
66
|
+
|
|
67
|
+
thread_local.s3_clients[bucket] = boto3.client("s3", **options)
|
|
68
|
+
|
|
69
|
+
return thread_local.s3_clients[bucket]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Transfer:
|
|
73
|
+
|
|
74
|
+
def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
75
|
+
assert verbosity == 1, verbosity
|
|
76
|
+
|
|
77
|
+
# from boto3.s3.transfer import TransferConfig
|
|
78
|
+
# config = TransferConfig(use_threads=False)
|
|
79
|
+
config = None
|
|
80
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
|
|
81
|
+
try:
|
|
82
|
+
if verbosity > 0:
|
|
83
|
+
LOGGER.info(f"{self.action} {source} to {target}")
|
|
84
|
+
|
|
85
|
+
total = 0
|
|
86
|
+
|
|
87
|
+
futures = []
|
|
88
|
+
for name in self.list_source(source):
|
|
89
|
+
|
|
90
|
+
futures.append(
|
|
91
|
+
executor.submit(
|
|
92
|
+
self.transfer_file,
|
|
93
|
+
source=self.source_path(name, source),
|
|
94
|
+
target=self.target_path(name, source, target),
|
|
95
|
+
overwrite=overwrite,
|
|
96
|
+
resume=resume,
|
|
97
|
+
verbosity=verbosity - 1,
|
|
98
|
+
config=config,
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
total += self.source_size(name)
|
|
102
|
+
|
|
103
|
+
if len(futures) % 10000 == 0:
|
|
104
|
+
if verbosity > 0:
|
|
105
|
+
LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes(total)})")
|
|
106
|
+
done, _ = concurrent.futures.wait(
|
|
107
|
+
futures,
|
|
108
|
+
timeout=0.001,
|
|
109
|
+
return_when=concurrent.futures.FIRST_EXCEPTION,
|
|
110
|
+
)
|
|
111
|
+
# Trigger exceptions if any
|
|
112
|
+
for future in done:
|
|
113
|
+
future.result()
|
|
114
|
+
|
|
115
|
+
if verbosity > 0:
|
|
116
|
+
LOGGER.info(f"{self.action} {len(futures):,} files ({bytes(total)})")
|
|
117
|
+
with tqdm.tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
|
|
118
|
+
for future in futures:
|
|
119
|
+
pbar.update(future.result())
|
|
120
|
+
else:
|
|
121
|
+
for future in futures:
|
|
122
|
+
future.result()
|
|
123
|
+
|
|
124
|
+
except Exception:
|
|
125
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
126
|
+
raise
|
|
54
127
|
|
|
55
|
-
size = os.path.getsize(source)
|
|
56
|
-
try:
|
|
57
|
-
results = s3_client.head_object(Bucket=bucket, Key=key)
|
|
58
|
-
remote_size = int(results["ContentLength"])
|
|
59
|
-
except ClientError as e:
|
|
60
|
-
if e.response["Error"]["Code"] != "404":
|
|
61
|
-
raise
|
|
62
|
-
remote_size = None
|
|
63
128
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
LOGGER.warning(f"{target} already exists, but with different size, re-uploading")
|
|
67
|
-
overwrite = True
|
|
129
|
+
class Upload(Transfer):
|
|
130
|
+
action = "Uploading"
|
|
68
131
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
132
|
+
def list_source(self, source):
|
|
133
|
+
for root, _, files in os.walk(source):
|
|
134
|
+
for file in files:
|
|
135
|
+
yield os.path.join(root, file)
|
|
72
136
|
|
|
73
|
-
|
|
74
|
-
|
|
137
|
+
def source_path(self, local_path, source):
|
|
138
|
+
return local_path
|
|
75
139
|
|
|
76
|
-
|
|
77
|
-
|
|
140
|
+
def target_path(self, source_path, source, target):
|
|
141
|
+
relative_path = os.path.relpath(source_path, source)
|
|
142
|
+
s3_path = os.path.join(target, relative_path)
|
|
143
|
+
return s3_path
|
|
78
144
|
|
|
145
|
+
def source_size(self, local_path):
|
|
146
|
+
return os.path.getsize(local_path)
|
|
79
147
|
|
|
80
|
-
def
|
|
81
|
-
for root, _, files in os.walk(source):
|
|
82
|
-
for file in files:
|
|
83
|
-
yield os.path.join(root, file)
|
|
148
|
+
def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
|
|
84
149
|
|
|
150
|
+
from botocore.exceptions import ClientError
|
|
85
151
|
|
|
86
|
-
|
|
87
|
-
|
|
152
|
+
assert target.startswith("s3://")
|
|
153
|
+
|
|
154
|
+
_, _, bucket, key = target.split("/", 3)
|
|
155
|
+
s3 = s3_client(bucket)
|
|
88
156
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
157
|
+
size = os.path.getsize(source)
|
|
158
|
+
|
|
159
|
+
if verbosity > 0:
|
|
160
|
+
LOGGER.info(f"{self.action} {source} to {target} ({bytes(size)})")
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
results = s3.head_object(Bucket=bucket, Key=key)
|
|
164
|
+
remote_size = int(results["ContentLength"])
|
|
165
|
+
except ClientError as e:
|
|
166
|
+
if e.response["Error"]["Code"] != "404":
|
|
167
|
+
raise
|
|
168
|
+
remote_size = None
|
|
169
|
+
|
|
170
|
+
if remote_size is not None:
|
|
171
|
+
if remote_size != size:
|
|
172
|
+
LOGGER.warning(
|
|
173
|
+
f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
|
|
174
|
+
)
|
|
175
|
+
elif resume:
|
|
176
|
+
# LOGGER.info(f"{target} already exists, skipping")
|
|
177
|
+
return size
|
|
178
|
+
|
|
179
|
+
if remote_size is not None and not overwrite and not resume:
|
|
180
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
|
|
181
|
+
|
|
182
|
+
if verbosity > 0:
|
|
183
|
+
with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
|
|
184
|
+
s3.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x), Config=config)
|
|
185
|
+
else:
|
|
186
|
+
s3.upload_file(source, bucket, key, Config=config)
|
|
187
|
+
|
|
188
|
+
return size
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class Download(Transfer):
|
|
192
|
+
action = "Downloading"
|
|
193
|
+
|
|
194
|
+
def list_source(self, source):
|
|
195
|
+
yield from _list_objects(source)
|
|
196
|
+
|
|
197
|
+
def source_path(self, s3_object, source):
|
|
198
|
+
_, _, bucket, _ = source.split("/", 3)
|
|
199
|
+
return f"s3://{bucket}/{s3_object['Key']}"
|
|
200
|
+
|
|
201
|
+
def target_path(self, s3_object, source, target):
|
|
202
|
+
_, _, _, folder = source.split("/", 3)
|
|
203
|
+
local_path = os.path.join(target, os.path.relpath(s3_object["Key"], folder))
|
|
204
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|
205
|
+
return local_path
|
|
206
|
+
|
|
207
|
+
def source_size(self, s3_object):
|
|
208
|
+
return s3_object["Size"]
|
|
95
209
|
|
|
96
|
-
|
|
97
|
-
|
|
210
|
+
def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
|
|
211
|
+
# from boto3.s3.transfer import TransferConfig
|
|
98
212
|
|
|
213
|
+
_, _, bucket, key = source.split("/", 3)
|
|
214
|
+
s3 = s3_client(bucket)
|
|
99
215
|
|
|
100
|
-
|
|
216
|
+
try:
|
|
217
|
+
response = s3.head_object(Bucket=bucket, Key=key)
|
|
218
|
+
except s3.exceptions.ClientError as e:
|
|
219
|
+
print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key)
|
|
220
|
+
if e.response["Error"]["Code"] == "404":
|
|
221
|
+
raise ValueError(f"{source} does not exist ({bucket}, {key})")
|
|
222
|
+
raise
|
|
223
|
+
|
|
224
|
+
size = int(response["ContentLength"])
|
|
225
|
+
|
|
226
|
+
if verbosity > 0:
|
|
227
|
+
LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})")
|
|
228
|
+
|
|
229
|
+
if overwrite:
|
|
230
|
+
resume = False
|
|
231
|
+
|
|
232
|
+
if resume:
|
|
233
|
+
if os.path.exists(target):
|
|
234
|
+
local_size = os.path.getsize(target)
|
|
235
|
+
if local_size != size:
|
|
236
|
+
LOGGER.warning(
|
|
237
|
+
f"{target} already with different size, re-downloading (remote={size}, local={size})"
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
# if verbosity > 0:
|
|
241
|
+
# LOGGER.info(f"{target} already exists, skipping")
|
|
242
|
+
return size
|
|
243
|
+
|
|
244
|
+
if os.path.exists(target) and not overwrite:
|
|
245
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
|
|
246
|
+
|
|
247
|
+
if verbosity > 0:
|
|
248
|
+
with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
|
|
249
|
+
s3.download_file(bucket, key, target, Callback=lambda x: pbar.update(x), Config=config)
|
|
250
|
+
else:
|
|
251
|
+
s3.download_file(bucket, key, target, Config=config)
|
|
252
|
+
|
|
253
|
+
return size
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def upload(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
101
257
|
"""Upload a file or a folder to S3.
|
|
102
258
|
|
|
103
259
|
Parameters
|
|
@@ -108,62 +264,23 @@ def upload(source, target, overwrite=False, ignore_existing=False, threads=1, sh
|
|
|
108
264
|
A URL to a file or a folder on S3. The url should start with 's3://'.
|
|
109
265
|
overwrite : bool, optional
|
|
110
266
|
If the data is alreay on S3 it will be overwritten, by default False
|
|
111
|
-
|
|
267
|
+
resume : bool, optional
|
|
112
268
|
If the data is alreay on S3 it will not be uploaded, unless the remote file
|
|
113
269
|
has a different size, by default False
|
|
114
270
|
threads : int, optional
|
|
115
271
|
The number of threads to use when uploading a directory, by default 1
|
|
116
272
|
"""
|
|
273
|
+
|
|
274
|
+
uploader = Upload()
|
|
117
275
|
if os.path.isdir(source):
|
|
118
|
-
|
|
276
|
+
uploader.transfer_folder(
|
|
277
|
+
source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity, threads=threads
|
|
278
|
+
)
|
|
119
279
|
else:
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def _download_file(source, target, overwrite=False, ignore_existing=False):
|
|
124
|
-
s3_client = _s3_client()
|
|
125
|
-
_, _, bucket, key = source.split("/", 3)
|
|
126
|
-
|
|
127
|
-
response = s3_client.head_object(Bucket=bucket, Key=key)
|
|
128
|
-
size = int(response["ContentLength"])
|
|
129
|
-
|
|
130
|
-
if os.path.exists(target):
|
|
131
|
-
|
|
132
|
-
if os.path.exists(target) and os.path.getsize(target) != size:
|
|
133
|
-
LOGGER.info(f"{target} already with different size, re-downloading")
|
|
134
|
-
overwrite = True
|
|
135
|
-
|
|
136
|
-
if not overwrite and not ignore_existing:
|
|
137
|
-
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'ignore_existing' to skip")
|
|
138
|
-
|
|
139
|
-
if ignore_existing:
|
|
140
|
-
LOGGER.info(f"{target} already exists, skipping")
|
|
141
|
-
return
|
|
142
|
-
|
|
143
|
-
with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False)) as t:
|
|
144
|
-
s3_client.download_file(bucket, key, target, Callback=lambda x: t.update(x))
|
|
280
|
+
uploader.transfer_file(source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity)
|
|
145
281
|
|
|
146
282
|
|
|
147
|
-
def
|
|
148
|
-
source = source.rstrip("/")
|
|
149
|
-
_, _, bucket, folder = source.split("/", 3)
|
|
150
|
-
total = _count_objects_in_folder(source)
|
|
151
|
-
|
|
152
|
-
with ThreadPoolExecutor(max_workers=threads) as executor:
|
|
153
|
-
futures = []
|
|
154
|
-
for o in _list_folder(source):
|
|
155
|
-
name = o["Key"]
|
|
156
|
-
local_path = os.path.join(target, os.path.relpath(name, folder))
|
|
157
|
-
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|
158
|
-
futures.append(
|
|
159
|
-
executor.submit(_download_file, f"s3://{bucket}/{name}", local_path, overwrite, ignore_existing)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
for future in tqdm.tqdm(futures, total=total):
|
|
163
|
-
future.result()
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def download(source, target, overwrite=False, ignore_existing=False, threads=1, show_progress=True):
|
|
283
|
+
def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
167
284
|
"""Download a file or a folder from S3.
|
|
168
285
|
|
|
169
286
|
Parameters
|
|
@@ -176,7 +293,7 @@ def download(source, target, overwrite=False, ignore_existing=False, threads=1,
|
|
|
176
293
|
overwrite : bool, optional
|
|
177
294
|
If false, files which have already been download will be skipped, unless their size
|
|
178
295
|
does not match their size on S3 , by default False
|
|
179
|
-
|
|
296
|
+
resume : bool, optional
|
|
180
297
|
If the data is alreay on local it will not be downloaded, unless the remote file
|
|
181
298
|
has a different size, by default False
|
|
182
299
|
threads : int, optional
|
|
@@ -184,45 +301,74 @@ def download(source, target, overwrite=False, ignore_existing=False, threads=1,
|
|
|
184
301
|
"""
|
|
185
302
|
assert source.startswith("s3://")
|
|
186
303
|
|
|
304
|
+
downloader = Download()
|
|
305
|
+
|
|
187
306
|
if source.endswith("/"):
|
|
188
|
-
|
|
307
|
+
downloader.transfer_folder(
|
|
308
|
+
source=source,
|
|
309
|
+
target=target,
|
|
310
|
+
overwrite=overwrite,
|
|
311
|
+
resume=resume,
|
|
312
|
+
verbosity=verbosity,
|
|
313
|
+
threads=threads,
|
|
314
|
+
)
|
|
189
315
|
else:
|
|
190
|
-
|
|
316
|
+
downloader.transfer_file(
|
|
317
|
+
source=source,
|
|
318
|
+
target=target,
|
|
319
|
+
overwrite=overwrite,
|
|
320
|
+
resume=resume,
|
|
321
|
+
verbosity=verbosity,
|
|
322
|
+
)
|
|
191
323
|
|
|
192
324
|
|
|
193
|
-
def
|
|
194
|
-
s3_client = _s3_client()
|
|
325
|
+
def _list_objects(target, batch=False):
|
|
195
326
|
_, _, bucket, prefix = target.split("/", 3)
|
|
327
|
+
s3 = s3_client(bucket)
|
|
328
|
+
|
|
329
|
+
paginator = s3.get_paginator("list_objects_v2")
|
|
196
330
|
|
|
197
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
198
331
|
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
|
|
199
332
|
if "Contents" in page:
|
|
200
|
-
objects =
|
|
333
|
+
objects = deepcopy(page["Contents"])
|
|
201
334
|
if batch:
|
|
202
335
|
yield objects
|
|
203
336
|
else:
|
|
204
337
|
yield from objects
|
|
205
338
|
|
|
206
339
|
|
|
207
|
-
def
|
|
208
|
-
return sum(len(_) for _ in _list_folder(target, batch=True))
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def _delete_folder(target, threads):
|
|
212
|
-
s3_client = _s3_client()
|
|
340
|
+
def _delete_folder(target):
|
|
213
341
|
_, _, bucket, _ = target.split("/", 3)
|
|
342
|
+
s3 = s3_client(bucket)
|
|
214
343
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
LOGGER.info(f"
|
|
344
|
+
total = 0
|
|
345
|
+
for batch in _list_objects(target, batch=True):
|
|
346
|
+
LOGGER.info(f"Deleting {len(batch):,} objects from {target}")
|
|
347
|
+
s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": o["Key"]} for o in batch]})
|
|
348
|
+
total += len(batch)
|
|
349
|
+
LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
|
|
218
350
|
|
|
219
351
|
|
|
220
352
|
def _delete_file(target):
|
|
221
|
-
|
|
353
|
+
from botocore.exceptions import ClientError
|
|
354
|
+
|
|
222
355
|
_, _, bucket, key = target.split("/", 3)
|
|
356
|
+
s3 = s3_client(bucket)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
s3.head_object(Bucket=bucket, Key=key)
|
|
360
|
+
exits = True
|
|
361
|
+
except ClientError as e:
|
|
362
|
+
if e.response["Error"]["Code"] != "404":
|
|
363
|
+
raise
|
|
364
|
+
exits = False
|
|
365
|
+
|
|
366
|
+
if not exits:
|
|
367
|
+
LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
|
|
368
|
+
return
|
|
223
369
|
|
|
224
370
|
LOGGER.info(f"Deleting {target}")
|
|
225
|
-
|
|
371
|
+
print(s3.delete_object(Bucket=bucket, Key=key))
|
|
226
372
|
LOGGER.info(f"{target} is deleted")
|
|
227
373
|
|
|
228
374
|
|
|
@@ -244,37 +390,74 @@ def delete(target):
|
|
|
244
390
|
_delete_file(target)
|
|
245
391
|
|
|
246
392
|
|
|
247
|
-
def list_folder(
|
|
248
|
-
"""List the
|
|
393
|
+
def list_folder(folder):
|
|
394
|
+
"""List the sub folders in a folder on S3.
|
|
249
395
|
|
|
250
396
|
Parameters
|
|
251
397
|
----------
|
|
252
|
-
|
|
398
|
+
folder : str
|
|
253
399
|
The URL of a folder on S3. The url should start with 's3://'.
|
|
254
400
|
|
|
255
401
|
Returns
|
|
256
402
|
-------
|
|
257
403
|
list
|
|
258
|
-
A list of the
|
|
404
|
+
A list of the subfolders names in the folder.
|
|
259
405
|
"""
|
|
260
406
|
|
|
261
|
-
assert
|
|
262
|
-
|
|
407
|
+
assert folder.startswith("s3://")
|
|
408
|
+
if not folder.endswith("/"):
|
|
409
|
+
folder += "/"
|
|
410
|
+
|
|
411
|
+
_, _, bucket, prefix = folder.split("/", 3)
|
|
412
|
+
|
|
413
|
+
s3 = s3_client(bucket)
|
|
414
|
+
paginator = s3.get_paginator("list_objects_v2")
|
|
263
415
|
|
|
416
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"):
|
|
417
|
+
if "CommonPrefixes" in page:
|
|
418
|
+
yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")]
|
|
264
419
|
|
|
265
|
-
|
|
266
|
-
|
|
420
|
+
|
|
421
|
+
def object_info(target):
|
|
422
|
+
"""Get information about an object on S3.
|
|
267
423
|
|
|
268
424
|
Parameters
|
|
269
425
|
----------
|
|
270
426
|
target : str
|
|
271
|
-
The URL of a folder on S3. The url should start with 's3://'.
|
|
427
|
+
The URL of a file or a folder on S3. The url should start with 's3://'.
|
|
272
428
|
|
|
273
429
|
Returns
|
|
274
430
|
-------
|
|
275
|
-
|
|
276
|
-
|
|
431
|
+
dict
|
|
432
|
+
A dictionary with information about the object.
|
|
277
433
|
"""
|
|
278
434
|
|
|
279
|
-
|
|
280
|
-
|
|
435
|
+
_, _, bucket, key = target.split("/", 3)
|
|
436
|
+
s3 = s3_client(bucket)
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
return s3.head_object(Bucket=bucket, Key=key)
|
|
440
|
+
except s3.exceptions.ClientError as e:
|
|
441
|
+
if e.response["Error"]["Code"] == "404":
|
|
442
|
+
raise ValueError(f"{target} does not exist")
|
|
443
|
+
raise
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def object_acl(target):
|
|
447
|
+
"""Get information about an object's ACL on S3.
|
|
448
|
+
|
|
449
|
+
Parameters
|
|
450
|
+
----------
|
|
451
|
+
target : str
|
|
452
|
+
The URL of a file or a folder on S3. The url should start with 's3://'.
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
dict
|
|
457
|
+
A dictionary with information about the object's ACL.
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
_, _, bucket, key = target.split("/", 3)
|
|
461
|
+
s3 = s3_client()
|
|
462
|
+
|
|
463
|
+
return s3.get_object_acl(Bucket=bucket, Key=key)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
anemoi/utils/__init__.py,sha256=zZZpbKIoGWwdCOuo6YSruLR7C0GzvzI1Wzhyqaa0K7M,456
|
|
2
2
|
anemoi/utils/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
|
|
3
|
-
anemoi/utils/_version.py,sha256=
|
|
3
|
+
anemoi/utils/_version.py,sha256=IKAQ4gPrCQ2FWMXOFRqouULC2EQI1zCb4iXHsnfbmTQ,411
|
|
4
4
|
anemoi/utils/caching.py,sha256=HrC9aFHlcCTaM2Z5u0ivGIXz7eFu35UQQhUuwwuG2pk,1743
|
|
5
5
|
anemoi/utils/checkpoints.py,sha256=1_3mg4B-ykTVfIvIUEv7IxGyREx_ZcilVbB3U-V6O6I,5165
|
|
6
|
-
anemoi/utils/cli.py,sha256=
|
|
7
|
-
anemoi/utils/config.py,sha256=
|
|
6
|
+
anemoi/utils/cli.py,sha256=w6YVYfJV-50Zm9FrO0KNrrIWDdgj5hPjxJvgAh391NY,3308
|
|
7
|
+
anemoi/utils/config.py,sha256=HBU8UbT0ZSFVSgpQGY42bXukrGIJBPbdqsqK1Btx97A,3475
|
|
8
8
|
anemoi/utils/dates.py,sha256=Ot9OTY1uFvHxW1EU4DPv3oUqmzvkXTwKuwhlfVlY788,8426
|
|
9
9
|
anemoi/utils/grib.py,sha256=gVfo4KYQv31iRyoqRDwk5tiqZDUgOIvhag_kO0qjYD0,3067
|
|
10
10
|
anemoi/utils/humanize.py,sha256=LD6dGnqChxA5j3tMhSybsAGRQzi33d_qS9pUoUHubkc,10330
|
|
11
11
|
anemoi/utils/provenance.py,sha256=v54L9jF1JgYcclOhg3iojRl1v3ajbiWz_oc289xTgO4,9574
|
|
12
|
-
anemoi/utils/s3.py,sha256=
|
|
12
|
+
anemoi/utils/s3.py,sha256=OEZGm85gzWfZ29OuCx-A2dczC00XKM_SYnkGv9vgs14,15204
|
|
13
13
|
anemoi/utils/text.py,sha256=4Zlc4r9dzRjkKL9xqp2vuQsoJY15bJ3y_Xv3YW_XsmU,8510
|
|
14
14
|
anemoi/utils/timer.py,sha256=JKOgFkpJxmVRn57DEBolmTGwr25P-ePTWASBd8CLeqM,972
|
|
15
15
|
anemoi/utils/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
|
|
16
16
|
anemoi/utils/commands/checkpoint.py,sha256=SEnAizU3WklqMXUjmIh4eNrgBVwmheKG9gEBS90zwYU,1741
|
|
17
17
|
anemoi/utils/mars/__init__.py,sha256=RAeY8gJ7ZvsPlcIvrQ4fy9xVHs3SphTAPw_XJDtNIKo,1750
|
|
18
18
|
anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
|
|
19
|
-
anemoi_utils-0.3.
|
|
20
|
-
anemoi_utils-0.3.
|
|
21
|
-
anemoi_utils-0.3.
|
|
22
|
-
anemoi_utils-0.3.
|
|
23
|
-
anemoi_utils-0.3.
|
|
24
|
-
anemoi_utils-0.3.
|
|
19
|
+
anemoi_utils-0.3.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
20
|
+
anemoi_utils-0.3.6.dist-info/METADATA,sha256=DHKiN2X6h1APP0fFqHPHGN6TNWv7X65V85PPHX5ghac,15513
|
|
21
|
+
anemoi_utils-0.3.6.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
22
|
+
anemoi_utils-0.3.6.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
|
|
23
|
+
anemoi_utils-0.3.6.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
24
|
+
anemoi_utils-0.3.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|