anemoi-utils 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/_version.py +2 -2
- anemoi/utils/cli.py +1 -1
- anemoi/utils/config.py +18 -12
- anemoi/utils/s3.py +438 -32
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/METADATA +1 -1
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/RECORD +10 -10
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/WHEEL +1 -1
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/LICENSE +0 -0
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.3.3.dist-info → anemoi_utils-0.3.5.dist-info}/top_level.txt +0 -0
anemoi/utils/_version.py
CHANGED
anemoi/utils/cli.py
CHANGED
anemoi/utils/config.py
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
10
|
import os
|
|
11
|
+
import threading
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
14
|
import tomllib # Only available since 3.11
|
|
@@ -60,16 +61,10 @@ class DotDict(dict):
|
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
CONFIG = None
|
|
64
|
+
CONFIG_LOCK = threading.Lock()
|
|
63
65
|
|
|
64
66
|
|
|
65
|
-
def
|
|
66
|
-
"""Load the configuration from `~/.anemoi.toml`.
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
DotDict
|
|
71
|
-
The configuration
|
|
72
|
-
"""
|
|
67
|
+
def _load_config():
|
|
73
68
|
global CONFIG
|
|
74
69
|
if CONFIG is not None:
|
|
75
70
|
return CONFIG
|
|
@@ -86,9 +81,20 @@ def load_config():
|
|
|
86
81
|
return DotDict(CONFIG)
|
|
87
82
|
|
|
88
83
|
|
|
89
|
-
def
|
|
90
|
-
"""
|
|
84
|
+
def load_config():
|
|
85
|
+
"""Load the configuration from `~/.anemoi.toml`.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
DotDict
|
|
90
|
+
The configuration
|
|
91
|
+
"""
|
|
92
|
+
with CONFIG_LOCK:
|
|
93
|
+
return _load_config()
|
|
94
|
+
|
|
91
95
|
|
|
96
|
+
def check_config_mode():
|
|
92
97
|
conf = os.path.expanduser("~/.anemoi.toml")
|
|
93
|
-
|
|
94
|
-
|
|
98
|
+
mode = os.stat(conf).st_mode
|
|
99
|
+
if mode & 0o777 != 0o600:
|
|
100
|
+
raise SystemError(f"Configuration file {conf} is not secure. " "Please run `chmod 600 ~/.anemoi.toml`.")
|
anemoi/utils/s3.py
CHANGED
|
@@ -4,54 +4,460 @@
|
|
|
4
4
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
5
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
"""This module provides functions to upload, download, list and delete files and folders on S3.
|
|
9
|
+
The functions of this package expect that the AWS credentials are set up in the environment
|
|
10
|
+
typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or
|
|
11
|
+
by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file
|
|
12
|
+
to use a different S3 compatible service::
|
|
13
|
+
|
|
14
|
+
[default]
|
|
15
|
+
endpoint_url = https://some-storage.somewhere.world
|
|
16
|
+
aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
17
|
+
aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import concurrent
|
|
7
22
|
import logging
|
|
8
23
|
import os
|
|
9
|
-
|
|
24
|
+
import threading
|
|
25
|
+
from copy import deepcopy
|
|
10
26
|
|
|
11
|
-
import boto3
|
|
12
27
|
import tqdm
|
|
13
28
|
|
|
14
|
-
|
|
29
|
+
from .config import check_config_mode
|
|
30
|
+
from .config import load_config
|
|
31
|
+
from .humanize import bytes
|
|
15
32
|
|
|
33
|
+
LOGGER = logging.getLogger(__name__)
|
|
16
34
|
|
|
17
|
-
def upload(source, target, overwrite=False, ignore_existing=False):
|
|
18
|
-
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
|
|
19
|
-
assert target.startswith("s3://")
|
|
20
35
|
|
|
21
|
-
|
|
36
|
+
# s3_clients are not thread-safe, so we need to create a new client for each thread
|
|
22
37
|
|
|
23
|
-
|
|
24
|
-
s3_client = boto3.client("s3")
|
|
38
|
+
thread_local = threading.local()
|
|
25
39
|
|
|
26
|
-
if not overwrite:
|
|
27
|
-
results = s3_client.list_objects(Bucket=bucket, Prefix=key)
|
|
28
|
-
if results.get("Contents"):
|
|
29
|
-
if ignore_existing:
|
|
30
|
-
LOG.info(f"{target} already exists, skipping")
|
|
31
|
-
return
|
|
32
|
-
else:
|
|
33
|
-
raise ValueError(f"{target} already exists, use --overwrite to replace")
|
|
34
40
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
def s3_client(bucket):
|
|
42
|
+
import boto3
|
|
43
|
+
|
|
44
|
+
config = load_config()
|
|
45
|
+
if "object-storage" in config:
|
|
46
|
+
check_config_mode()
|
|
47
|
+
|
|
48
|
+
if not hasattr(thread_local, "s3_clients"):
|
|
49
|
+
thread_local.s3_clients = {}
|
|
50
|
+
|
|
51
|
+
if bucket not in thread_local.s3_clients:
|
|
52
|
+
|
|
53
|
+
options = {}
|
|
54
|
+
options.update(config.get("object-storage", {}))
|
|
55
|
+
options.update(config.get("object-storage", {}).get(bucket, {}))
|
|
56
|
+
|
|
57
|
+
type = options.pop("type", "s3")
|
|
58
|
+
if type != "s3":
|
|
59
|
+
raise ValueError(f"Unsupported object storage type {type}")
|
|
60
|
+
|
|
61
|
+
if "config" in options:
|
|
62
|
+
from botocore.client import Config
|
|
63
|
+
|
|
64
|
+
options["config"] = Config(**options["config"])
|
|
65
|
+
del options["config"]
|
|
66
|
+
|
|
67
|
+
thread_local.s3_clients[bucket] = boto3.client("s3", **options)
|
|
68
|
+
|
|
69
|
+
return thread_local.s3_clients[bucket]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class Transfer:
|
|
73
|
+
|
|
74
|
+
def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
75
|
+
assert verbosity == 1, verbosity
|
|
76
|
+
|
|
77
|
+
# from boto3.s3.transfer import TransferConfig
|
|
78
|
+
# config = TransferConfig(use_threads=False)
|
|
79
|
+
config = None
|
|
80
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
|
|
81
|
+
try:
|
|
82
|
+
if verbosity > 0:
|
|
83
|
+
LOGGER.info(f"{self.action} {source} to {target}")
|
|
84
|
+
|
|
85
|
+
total = 0
|
|
86
|
+
|
|
87
|
+
futures = []
|
|
88
|
+
for name in self.list_source(source):
|
|
89
|
+
|
|
90
|
+
futures.append(
|
|
91
|
+
executor.submit(
|
|
92
|
+
self.transfer_file,
|
|
93
|
+
source=self.source_path(name, source),
|
|
94
|
+
target=self.target_path(name, source, target),
|
|
95
|
+
overwrite=overwrite,
|
|
96
|
+
resume=resume,
|
|
97
|
+
verbosity=verbosity - 1,
|
|
98
|
+
config=config,
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
total += self.source_size(name)
|
|
102
|
+
|
|
103
|
+
if len(futures) % 10000 == 0:
|
|
104
|
+
if verbosity > 0:
|
|
105
|
+
LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes(total)})")
|
|
106
|
+
done, _ = concurrent.futures.wait(
|
|
107
|
+
futures,
|
|
108
|
+
timeout=0.001,
|
|
109
|
+
return_when=concurrent.futures.FIRST_EXCEPTION,
|
|
110
|
+
)
|
|
111
|
+
# Trigger exceptions if any
|
|
112
|
+
for future in done:
|
|
113
|
+
future.result()
|
|
114
|
+
|
|
115
|
+
if verbosity > 0:
|
|
116
|
+
LOGGER.info(f"{self.action} {len(futures):,} files ({bytes(total)})")
|
|
117
|
+
with tqdm.tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
|
|
118
|
+
for future in futures:
|
|
119
|
+
pbar.update(future.result())
|
|
120
|
+
else:
|
|
121
|
+
for future in futures:
|
|
122
|
+
future.result()
|
|
123
|
+
|
|
124
|
+
except Exception:
|
|
125
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Upload(Transfer):
|
|
130
|
+
action = "Uploading"
|
|
131
|
+
|
|
132
|
+
def list_source(self, source):
|
|
133
|
+
for root, _, files in os.walk(source):
|
|
134
|
+
for file in files:
|
|
135
|
+
yield os.path.join(root, file)
|
|
136
|
+
|
|
137
|
+
def source_path(self, local_path, source):
|
|
138
|
+
return local_path
|
|
139
|
+
|
|
140
|
+
def target_path(self, source_path, source, target):
|
|
141
|
+
relative_path = os.path.relpath(source_path, source)
|
|
142
|
+
s3_path = os.path.join(target, relative_path)
|
|
143
|
+
return s3_path
|
|
144
|
+
|
|
145
|
+
def source_size(self, local_path):
|
|
146
|
+
return os.path.getsize(local_path)
|
|
147
|
+
|
|
148
|
+
def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
|
|
149
|
+
|
|
150
|
+
from botocore.exceptions import ClientError
|
|
151
|
+
|
|
152
|
+
assert target.startswith("s3://")
|
|
153
|
+
|
|
154
|
+
_, _, bucket, key = target.split("/", 3)
|
|
155
|
+
s3 = s3_client(bucket)
|
|
156
|
+
|
|
157
|
+
size = os.path.getsize(source)
|
|
158
|
+
|
|
159
|
+
if verbosity > 0:
|
|
160
|
+
LOGGER.info(f"{self.action} {source} to {target} ({bytes(size)})")
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
results = s3.head_object(Bucket=bucket, Key=key)
|
|
164
|
+
remote_size = int(results["ContentLength"])
|
|
165
|
+
except ClientError as e:
|
|
166
|
+
if e.response["Error"]["Code"] != "404":
|
|
167
|
+
raise
|
|
168
|
+
remote_size = None
|
|
169
|
+
|
|
170
|
+
if remote_size is not None:
|
|
171
|
+
if remote_size != size:
|
|
172
|
+
LOGGER.warning(
|
|
173
|
+
f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
|
|
174
|
+
)
|
|
175
|
+
elif resume:
|
|
176
|
+
# LOGGER.info(f"{target} already exists, skipping")
|
|
177
|
+
return size
|
|
178
|
+
|
|
179
|
+
if remote_size is not None and not overwrite and not resume:
|
|
180
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
|
|
181
|
+
|
|
182
|
+
if verbosity > 0:
|
|
183
|
+
with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
|
|
184
|
+
s3.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x), Config=config)
|
|
185
|
+
else:
|
|
186
|
+
s3.upload_file(source, bucket, key, Config=config)
|
|
187
|
+
|
|
188
|
+
return size
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class Download(Transfer):
|
|
192
|
+
action = "Downloading"
|
|
193
|
+
|
|
194
|
+
def list_source(self, source):
|
|
195
|
+
yield from _list_objects(source)
|
|
196
|
+
|
|
197
|
+
def source_path(self, s3_object, source):
|
|
198
|
+
_, _, bucket, _ = source.split("/", 3)
|
|
199
|
+
return f"s3://{bucket}/{s3_object['Key']}"
|
|
200
|
+
|
|
201
|
+
def target_path(self, s3_object, source, target):
|
|
202
|
+
_, _, _, folder = source.split("/", 3)
|
|
203
|
+
local_path = os.path.join(target, os.path.relpath(s3_object["Key"], folder))
|
|
204
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|
205
|
+
return local_path
|
|
206
|
+
|
|
207
|
+
def source_size(self, s3_object):
|
|
208
|
+
return s3_object["Size"]
|
|
209
|
+
|
|
210
|
+
def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
|
|
211
|
+
# from boto3.s3.transfer import TransferConfig
|
|
212
|
+
|
|
213
|
+
_, _, bucket, key = source.split("/", 3)
|
|
214
|
+
s3 = s3_client(bucket)
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
response = s3.head_object(Bucket=bucket, Key=key)
|
|
218
|
+
except s3.exceptions.ClientError as e:
|
|
219
|
+
print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key)
|
|
220
|
+
if e.response["Error"]["Code"] == "404":
|
|
221
|
+
raise ValueError(f"{source} does not exist ({bucket}, {key})")
|
|
222
|
+
raise
|
|
38
223
|
|
|
39
|
-
|
|
224
|
+
size = int(response["ContentLength"])
|
|
40
225
|
|
|
226
|
+
if verbosity > 0:
|
|
227
|
+
LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})")
|
|
41
228
|
|
|
42
|
-
|
|
229
|
+
if overwrite:
|
|
230
|
+
resume = False
|
|
231
|
+
|
|
232
|
+
if resume:
|
|
233
|
+
if os.path.exists(target):
|
|
234
|
+
local_size = os.path.getsize(target)
|
|
235
|
+
if local_size != size:
|
|
236
|
+
LOGGER.warning(
|
|
237
|
+
f"{target} already with different size, re-downloading (remote={size}, local={size})"
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
# if verbosity > 0:
|
|
241
|
+
# LOGGER.info(f"{target} already exists, skipping")
|
|
242
|
+
return size
|
|
243
|
+
|
|
244
|
+
if os.path.exists(target) and not overwrite:
|
|
245
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
|
|
246
|
+
|
|
247
|
+
if verbosity > 0:
|
|
248
|
+
with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
|
|
249
|
+
s3.download_file(bucket, key, target, Callback=lambda x: pbar.update(x), Config=config)
|
|
250
|
+
else:
|
|
251
|
+
s3.download_file(bucket, key, target, Config=config)
|
|
252
|
+
|
|
253
|
+
return size
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def upload(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
257
|
+
"""Upload a file or a folder to S3.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
source : str
|
|
262
|
+
A path to a file or a folder to upload.
|
|
263
|
+
target : str
|
|
264
|
+
A URL to a file or a folder on S3. The url should start with 's3://'.
|
|
265
|
+
overwrite : bool, optional
|
|
266
|
+
If the data is alreay on S3 it will be overwritten, by default False
|
|
267
|
+
resume : bool, optional
|
|
268
|
+
If the data is alreay on S3 it will not be uploaded, unless the remote file
|
|
269
|
+
has a different size, by default False
|
|
270
|
+
threads : int, optional
|
|
271
|
+
The number of threads to use when uploading a directory, by default 1
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
uploader = Upload()
|
|
275
|
+
if os.path.isdir(source):
|
|
276
|
+
uploader.transfer_folder(
|
|
277
|
+
source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity, threads=threads
|
|
278
|
+
)
|
|
279
|
+
else:
|
|
280
|
+
uploader.transfer_file(source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
|
|
284
|
+
"""Download a file or a folder from S3.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
source : str
|
|
289
|
+
The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
|
|
290
|
+
assumed to be a folder, otherwise it is assumed to be a file.
|
|
291
|
+
target : str
|
|
292
|
+
The local path where the file or folder will be downloaded.
|
|
293
|
+
overwrite : bool, optional
|
|
294
|
+
If false, files which have already been download will be skipped, unless their size
|
|
295
|
+
does not match their size on S3 , by default False
|
|
296
|
+
resume : bool, optional
|
|
297
|
+
If the data is alreay on local it will not be downloaded, unless the remote file
|
|
298
|
+
has a different size, by default False
|
|
299
|
+
threads : int, optional
|
|
300
|
+
The number of threads to use when downloading a directory, by default 1
|
|
301
|
+
"""
|
|
43
302
|
assert source.startswith("s3://")
|
|
44
303
|
|
|
45
|
-
|
|
304
|
+
downloader = Download()
|
|
305
|
+
|
|
306
|
+
if source.endswith("/"):
|
|
307
|
+
downloader.transfer_folder(
|
|
308
|
+
source=source,
|
|
309
|
+
target=target,
|
|
310
|
+
overwrite=overwrite,
|
|
311
|
+
resume=resume,
|
|
312
|
+
verbosity=verbosity,
|
|
313
|
+
threads=threads,
|
|
314
|
+
)
|
|
315
|
+
else:
|
|
316
|
+
downloader.transfer_file(
|
|
317
|
+
source=source,
|
|
318
|
+
target=target,
|
|
319
|
+
overwrite=overwrite,
|
|
320
|
+
resume=resume,
|
|
321
|
+
verbosity=verbosity,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _list_objects(target, batch=False):
|
|
326
|
+
_, _, bucket, prefix = target.split("/", 3)
|
|
327
|
+
s3 = s3_client(bucket)
|
|
328
|
+
|
|
329
|
+
paginator = s3.get_paginator("list_objects_v2")
|
|
330
|
+
|
|
331
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
|
|
332
|
+
if "Contents" in page:
|
|
333
|
+
objects = deepcopy(page["Contents"])
|
|
334
|
+
if batch:
|
|
335
|
+
yield objects
|
|
336
|
+
else:
|
|
337
|
+
yield from objects
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _delete_folder(target):
|
|
341
|
+
_, _, bucket, _ = target.split("/", 3)
|
|
342
|
+
s3 = s3_client(bucket)
|
|
343
|
+
|
|
344
|
+
total = 0
|
|
345
|
+
for batch in _list_objects(target, batch=True):
|
|
346
|
+
LOGGER.info(f"Deleting {len(batch):,} objects from {target}")
|
|
347
|
+
s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": o["Key"]} for o in batch]})
|
|
348
|
+
total += len(batch)
|
|
349
|
+
LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _delete_file(target):
|
|
353
|
+
from botocore.exceptions import ClientError
|
|
354
|
+
|
|
355
|
+
_, _, bucket, key = target.split("/", 3)
|
|
356
|
+
s3 = s3_client(bucket)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
s3.head_object(Bucket=bucket, Key=key)
|
|
360
|
+
exits = True
|
|
361
|
+
except ClientError as e:
|
|
362
|
+
if e.response["Error"]["Code"] != "404":
|
|
363
|
+
raise
|
|
364
|
+
exits = False
|
|
365
|
+
|
|
366
|
+
if not exits:
|
|
367
|
+
LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
|
|
368
|
+
return
|
|
46
369
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
370
|
+
LOGGER.info(f"Deleting {target}")
|
|
371
|
+
print(s3.delete_object(Bucket=bucket, Key=key))
|
|
372
|
+
LOGGER.info(f"{target} is deleted")
|
|
50
373
|
|
|
51
|
-
if not overwrite:
|
|
52
|
-
if os.path.exists(source) and os.path.getsize(source) == size:
|
|
53
|
-
LOG.info(f"{source} already exists, skipping")
|
|
54
|
-
return
|
|
55
374
|
|
|
56
|
-
|
|
57
|
-
|
|
375
|
+
def delete(target):
|
|
376
|
+
"""Delete a file or a folder from S3.
|
|
377
|
+
|
|
378
|
+
Parameters
|
|
379
|
+
----------
|
|
380
|
+
target : str
|
|
381
|
+
The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
|
|
382
|
+
assumed to be a folder, otherwise it is assumed to be a file.
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
assert target.startswith("s3://")
|
|
386
|
+
|
|
387
|
+
if target.endswith("/"):
|
|
388
|
+
_delete_folder(target)
|
|
389
|
+
else:
|
|
390
|
+
_delete_file(target)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def list_folder(folder):
|
|
394
|
+
"""List the sub folders in a folder on S3.
|
|
395
|
+
|
|
396
|
+
Parameters
|
|
397
|
+
----------
|
|
398
|
+
folder : str
|
|
399
|
+
The URL of a folder on S3. The url should start with 's3://'.
|
|
400
|
+
|
|
401
|
+
Returns
|
|
402
|
+
-------
|
|
403
|
+
list
|
|
404
|
+
A list of the subfolders names in the folder.
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
assert folder.startswith("s3://")
|
|
408
|
+
if not folder.endswith("/"):
|
|
409
|
+
folder += "/"
|
|
410
|
+
|
|
411
|
+
_, _, bucket, prefix = folder.split("/", 3)
|
|
412
|
+
|
|
413
|
+
s3 = s3_client(bucket)
|
|
414
|
+
paginator = s3.get_paginator("list_objects_v2")
|
|
415
|
+
|
|
416
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"):
|
|
417
|
+
if "CommonPrefixes" in page:
|
|
418
|
+
yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")]
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def object_info(target):
|
|
422
|
+
"""Get information about an object on S3.
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
target : str
|
|
427
|
+
The URL of a file or a folder on S3. The url should start with 's3://'.
|
|
428
|
+
|
|
429
|
+
Returns
|
|
430
|
+
-------
|
|
431
|
+
dict
|
|
432
|
+
A dictionary with information about the object.
|
|
433
|
+
"""
|
|
434
|
+
|
|
435
|
+
_, _, bucket, key = target.split("/", 3)
|
|
436
|
+
s3 = s3_client(bucket)
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
return s3.head_object(Bucket=bucket, Key=key)
|
|
440
|
+
except s3.exceptions.ClientError as e:
|
|
441
|
+
if e.response["Error"]["Code"] == "404":
|
|
442
|
+
raise ValueError(f"{target} does not exist")
|
|
443
|
+
raise
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def object_acl(target):
|
|
447
|
+
"""Get information about an object's ACL on S3.
|
|
448
|
+
|
|
449
|
+
Parameters
|
|
450
|
+
----------
|
|
451
|
+
target : str
|
|
452
|
+
The URL of a file or a folder on S3. The url should start with 's3://'.
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
dict
|
|
457
|
+
A dictionary with information about the object's ACL.
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
_, _, bucket, key = target.split("/", 3)
|
|
461
|
+
s3 = s3_client()
|
|
462
|
+
|
|
463
|
+
return s3.get_object_acl(Bucket=bucket, Key=key)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
anemoi/utils/__init__.py,sha256=zZZpbKIoGWwdCOuo6YSruLR7C0GzvzI1Wzhyqaa0K7M,456
|
|
2
2
|
anemoi/utils/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
|
|
3
|
-
anemoi/utils/_version.py,sha256=
|
|
3
|
+
anemoi/utils/_version.py,sha256=3eLsZPTwWh0zKBhR6n3c4iAL9geCCdJGMojIL6dF0IA,411
|
|
4
4
|
anemoi/utils/caching.py,sha256=HrC9aFHlcCTaM2Z5u0ivGIXz7eFu35UQQhUuwwuG2pk,1743
|
|
5
5
|
anemoi/utils/checkpoints.py,sha256=1_3mg4B-ykTVfIvIUEv7IxGyREx_ZcilVbB3U-V6O6I,5165
|
|
6
|
-
anemoi/utils/cli.py,sha256=
|
|
7
|
-
anemoi/utils/config.py,sha256=
|
|
6
|
+
anemoi/utils/cli.py,sha256=w6YVYfJV-50Zm9FrO0KNrrIWDdgj5hPjxJvgAh391NY,3308
|
|
7
|
+
anemoi/utils/config.py,sha256=WD3EZ2Ylt-HLvJnOPXKEGMepR2VVNYbJKC10fPJu45w,2333
|
|
8
8
|
anemoi/utils/dates.py,sha256=Ot9OTY1uFvHxW1EU4DPv3oUqmzvkXTwKuwhlfVlY788,8426
|
|
9
9
|
anemoi/utils/grib.py,sha256=gVfo4KYQv31iRyoqRDwk5tiqZDUgOIvhag_kO0qjYD0,3067
|
|
10
10
|
anemoi/utils/humanize.py,sha256=LD6dGnqChxA5j3tMhSybsAGRQzi33d_qS9pUoUHubkc,10330
|
|
11
11
|
anemoi/utils/provenance.py,sha256=v54L9jF1JgYcclOhg3iojRl1v3ajbiWz_oc289xTgO4,9574
|
|
12
|
-
anemoi/utils/s3.py,sha256=
|
|
12
|
+
anemoi/utils/s3.py,sha256=OEZGm85gzWfZ29OuCx-A2dczC00XKM_SYnkGv9vgs14,15204
|
|
13
13
|
anemoi/utils/text.py,sha256=4Zlc4r9dzRjkKL9xqp2vuQsoJY15bJ3y_Xv3YW_XsmU,8510
|
|
14
14
|
anemoi/utils/timer.py,sha256=JKOgFkpJxmVRn57DEBolmTGwr25P-ePTWASBd8CLeqM,972
|
|
15
15
|
anemoi/utils/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
|
|
16
16
|
anemoi/utils/commands/checkpoint.py,sha256=SEnAizU3WklqMXUjmIh4eNrgBVwmheKG9gEBS90zwYU,1741
|
|
17
17
|
anemoi/utils/mars/__init__.py,sha256=RAeY8gJ7ZvsPlcIvrQ4fy9xVHs3SphTAPw_XJDtNIKo,1750
|
|
18
18
|
anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
|
|
19
|
-
anemoi_utils-0.3.
|
|
20
|
-
anemoi_utils-0.3.
|
|
21
|
-
anemoi_utils-0.3.
|
|
22
|
-
anemoi_utils-0.3.
|
|
23
|
-
anemoi_utils-0.3.
|
|
24
|
-
anemoi_utils-0.3.
|
|
19
|
+
anemoi_utils-0.3.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
20
|
+
anemoi_utils-0.3.5.dist-info/METADATA,sha256=N9sPN7e-Zb6y0sxhw6N124iHNOoWWh7Gc2RWjIAD0UY,15513
|
|
21
|
+
anemoi_utils-0.3.5.dist-info/WHEEL,sha256=cpQTJ5IWu9CdaPViMhC9YzF8gZuS5-vlfoFihTBC86A,91
|
|
22
|
+
anemoi_utils-0.3.5.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
|
|
23
|
+
anemoi_utils-0.3.5.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
24
|
+
anemoi_utils-0.3.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|