anemoi-utils 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/_version.py +2 -2
- anemoi/utils/s3.py +250 -27
- anemoi/utils/text.py +6 -7
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/METADATA +1 -1
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/RECORD +9 -9
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/LICENSE +0 -0
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/WHEEL +0 -0
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.3.2.dist-info → anemoi_utils-0.3.4.dist-info}/top_level.txt +0 -0
anemoi/utils/_version.py
CHANGED
anemoi/utils/s3.py
CHANGED
|
@@ -4,54 +4,277 @@
|
|
|
4
4
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
5
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
6
|
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
"""This module provides functions to upload, download, list and delete files and folders on S3.
|
|
9
|
+
The functions of this package expect that the AWS credentials are set up in the environment
|
|
10
|
+
typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or
|
|
11
|
+
by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file
|
|
12
|
+
to use a different S3 compatible service::
|
|
13
|
+
|
|
14
|
+
[default]
|
|
15
|
+
endpoint_url = https://some-storage.somewhere.world
|
|
16
|
+
aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
17
|
+
aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
|
|
7
21
|
import logging
|
|
8
22
|
import os
|
|
23
|
+
import threading
|
|
24
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
25
|
from contextlib import closing
|
|
10
26
|
|
|
11
|
-
import boto3
|
|
12
27
|
import tqdm
|
|
13
28
|
|
|
14
|
-
|
|
29
|
+
LOGGER = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# s3_clients are not thread-safe, so we need to create a new client for each thread
|
|
33
|
+
|
|
34
|
+
thread_local = threading.local()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _s3_client():
|
|
38
|
+
import boto3
|
|
15
39
|
|
|
40
|
+
if not hasattr(thread_local, "s3_client"):
|
|
41
|
+
thread_local.s3_client = boto3.client("s3")
|
|
42
|
+
return thread_local.s3_client
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _upload_file(source, target, overwrite=False, ignore_existing=False):
|
|
46
|
+
from botocore.exceptions import ClientError
|
|
16
47
|
|
|
17
|
-
def upload(source, target, overwrite=False, ignore_existing=False):
|
|
18
|
-
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
|
|
19
48
|
assert target.startswith("s3://")
|
|
20
49
|
|
|
21
50
|
_, _, bucket, key = target.split("/", 3)
|
|
22
51
|
|
|
23
|
-
|
|
24
|
-
s3_client =
|
|
25
|
-
|
|
26
|
-
if not overwrite:
|
|
27
|
-
results = s3_client.list_objects(Bucket=bucket, Prefix=key)
|
|
28
|
-
if results.get("Contents"):
|
|
29
|
-
if ignore_existing:
|
|
30
|
-
LOG.info(f"{target} already exists, skipping")
|
|
31
|
-
return
|
|
32
|
-
else:
|
|
33
|
-
raise ValueError(f"{target} already exists, use --overwrite to replace")
|
|
52
|
+
LOGGER.info(f"Uploading {source} to {target}")
|
|
53
|
+
s3_client = _s3_client()
|
|
34
54
|
|
|
35
55
|
size = os.path.getsize(source)
|
|
36
|
-
|
|
56
|
+
try:
|
|
57
|
+
results = s3_client.head_object(Bucket=bucket, Key=key)
|
|
58
|
+
remote_size = int(results["ContentLength"])
|
|
59
|
+
except ClientError as e:
|
|
60
|
+
if e.response["Error"]["Code"] != "404":
|
|
61
|
+
raise
|
|
62
|
+
remote_size = None
|
|
63
|
+
|
|
64
|
+
if remote_size is not None:
|
|
65
|
+
if remote_size != size:
|
|
66
|
+
LOGGER.warning(f"{target} already exists, but with different size, re-uploading")
|
|
67
|
+
overwrite = True
|
|
68
|
+
|
|
69
|
+
if ignore_existing:
|
|
70
|
+
LOGGER.info(f"{target} already exists, skipping")
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
if remote_size is not None and not overwrite:
|
|
74
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'ignore_existing' to skip")
|
|
75
|
+
|
|
76
|
+
with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False)) as t:
|
|
37
77
|
s3_client.upload_file(source, bucket, key, Callback=lambda x: t.update(x))
|
|
38
78
|
|
|
39
|
-
LOG.info(f"{target} is ready")
|
|
40
79
|
|
|
80
|
+
def _local_file_list(source):
|
|
81
|
+
for root, _, files in os.walk(source):
|
|
82
|
+
for file in files:
|
|
83
|
+
yield os.path.join(root, file)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _upload_folder(source, target, overwrite=False, ignore_existing=False, threads=1):
|
|
87
|
+
total = sum(1 for _ in _local_file_list(source))
|
|
88
|
+
|
|
89
|
+
with ThreadPoolExecutor(max_workers=threads) as executor:
|
|
90
|
+
futures = []
|
|
91
|
+
for local_path in _local_file_list(source):
|
|
92
|
+
relative_path = os.path.relpath(local_path, source)
|
|
93
|
+
s3_path = os.path.join(target, relative_path)
|
|
94
|
+
futures.append(executor.submit(_upload_file, local_path, s3_path, overwrite, ignore_existing))
|
|
95
|
+
|
|
96
|
+
for future in tqdm.tqdm(futures, total=total):
|
|
97
|
+
future.result()
|
|
41
98
|
|
|
42
|
-
def download(source, target, overwrite=False):
|
|
43
|
-
assert source.startswith("s3://")
|
|
44
99
|
|
|
100
|
+
def upload(source, target, overwrite=False, ignore_existing=False, threads=1, show_progress=True):
|
|
101
|
+
"""Upload a file or a folder to S3.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
source : str
|
|
106
|
+
A path to a file or a folder to upload.
|
|
107
|
+
target : str
|
|
108
|
+
A URL to a file or a folder on S3. The url should start with 's3://'.
|
|
109
|
+
overwrite : bool, optional
|
|
110
|
+
If the data is alreay on S3 it will be overwritten, by default False
|
|
111
|
+
ignore_existing : bool, optional
|
|
112
|
+
If the data is alreay on S3 it will not be uploaded, unless the remote file
|
|
113
|
+
has a different size, by default False
|
|
114
|
+
threads : int, optional
|
|
115
|
+
The number of threads to use when uploading a directory, by default 1
|
|
116
|
+
"""
|
|
117
|
+
if os.path.isdir(source):
|
|
118
|
+
_upload_folder(source, target, overwrite, ignore_existing, threads)
|
|
119
|
+
else:
|
|
120
|
+
_upload_file(source, target, overwrite, ignore_existing)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _download_file(source, target, overwrite=False, ignore_existing=False):
|
|
124
|
+
s3_client = _s3_client()
|
|
45
125
|
_, _, bucket, key = source.split("/", 3)
|
|
46
126
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
127
|
+
response = s3_client.head_object(Bucket=bucket, Key=key)
|
|
128
|
+
size = int(response["ContentLength"])
|
|
129
|
+
|
|
130
|
+
if os.path.exists(target):
|
|
50
131
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
132
|
+
if os.path.exists(target) and os.path.getsize(target) != size:
|
|
133
|
+
LOGGER.info(f"{target} already with different size, re-downloading")
|
|
134
|
+
overwrite = True
|
|
135
|
+
|
|
136
|
+
if not overwrite and not ignore_existing:
|
|
137
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'ignore_existing' to skip")
|
|
138
|
+
|
|
139
|
+
if ignore_existing:
|
|
140
|
+
LOGGER.info(f"{target} already exists, skipping")
|
|
54
141
|
return
|
|
55
142
|
|
|
56
|
-
with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True)) as t:
|
|
57
|
-
|
|
143
|
+
with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True, leave=False)) as t:
|
|
144
|
+
s3_client.download_file(bucket, key, target, Callback=lambda x: t.update(x))
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _download_folder(source, target, overwrite=False, ignore_existing=False, threads=1):
|
|
148
|
+
source = source.rstrip("/")
|
|
149
|
+
_, _, bucket, folder = source.split("/", 3)
|
|
150
|
+
total = _count_objects_in_folder(source)
|
|
151
|
+
|
|
152
|
+
with ThreadPoolExecutor(max_workers=threads) as executor:
|
|
153
|
+
futures = []
|
|
154
|
+
for o in _list_folder(source):
|
|
155
|
+
name = o["Key"]
|
|
156
|
+
local_path = os.path.join(target, os.path.relpath(name, folder))
|
|
157
|
+
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
|
158
|
+
futures.append(
|
|
159
|
+
executor.submit(_download_file, f"s3://{bucket}/{name}", local_path, overwrite, ignore_existing)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
for future in tqdm.tqdm(futures, total=total):
|
|
163
|
+
future.result()
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def download(source, target, overwrite=False, ignore_existing=False, threads=1, show_progress=True):
|
|
167
|
+
"""Download a file or a folder from S3.
|
|
168
|
+
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
source : str
|
|
172
|
+
The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
|
|
173
|
+
assumed to be a folder, otherwise it is assumed to be a file.
|
|
174
|
+
target : str
|
|
175
|
+
The local path where the file or folder will be downloaded.
|
|
176
|
+
overwrite : bool, optional
|
|
177
|
+
If false, files which have already been download will be skipped, unless their size
|
|
178
|
+
does not match their size on S3 , by default False
|
|
179
|
+
ignore_existing : bool, optional
|
|
180
|
+
If the data is alreay on local it will not be downloaded, unless the remote file
|
|
181
|
+
has a different size, by default False
|
|
182
|
+
threads : int, optional
|
|
183
|
+
The number of threads to use when downloading a directory, by default 1
|
|
184
|
+
"""
|
|
185
|
+
assert source.startswith("s3://")
|
|
186
|
+
|
|
187
|
+
if source.endswith("/"):
|
|
188
|
+
_download_folder(source, target, overwrite, ignore_existing, threads)
|
|
189
|
+
else:
|
|
190
|
+
_download_file(source, target, overwrite, ignore_existing)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _list_folder(target, batch=False):
|
|
194
|
+
s3_client = _s3_client()
|
|
195
|
+
_, _, bucket, prefix = target.split("/", 3)
|
|
196
|
+
|
|
197
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
|
198
|
+
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
|
|
199
|
+
if "Contents" in page:
|
|
200
|
+
objects = [{"Key": obj["Key"]} for obj in page["Contents"]]
|
|
201
|
+
if batch:
|
|
202
|
+
yield objects
|
|
203
|
+
else:
|
|
204
|
+
yield from objects
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _count_objects_in_folder(target):
|
|
208
|
+
return sum(len(_) for _ in _list_folder(target, batch=True))
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _delete_folder(target, threads):
|
|
212
|
+
s3_client = _s3_client()
|
|
213
|
+
_, _, bucket, _ = target.split("/", 3)
|
|
214
|
+
|
|
215
|
+
for batch in _list_folder(target, batch=True):
|
|
216
|
+
s3_client.delete_objects(Bucket=bucket, Delete={"Objects": batch})
|
|
217
|
+
LOGGER.info(f"Deleted {len(batch)} objects")
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _delete_file(target):
|
|
221
|
+
s3_client = _s3_client()
|
|
222
|
+
_, _, bucket, key = target.split("/", 3)
|
|
223
|
+
|
|
224
|
+
LOGGER.info(f"Deleting {target}")
|
|
225
|
+
s3_client.delete_object(Bucket=bucket, Key=key)
|
|
226
|
+
LOGGER.info(f"{target} is deleted")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def delete(target):
|
|
230
|
+
"""Delete a file or a folder from S3.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
target : str
|
|
235
|
+
The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
|
|
236
|
+
assumed to be a folder, otherwise it is assumed to be a file.
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
assert target.startswith("s3://")
|
|
240
|
+
|
|
241
|
+
if target.endswith("/"):
|
|
242
|
+
_delete_folder(target)
|
|
243
|
+
else:
|
|
244
|
+
_delete_file(target)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def list_folder(target):
|
|
248
|
+
"""List the objects in a folder on S3.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
target : str
|
|
253
|
+
The URL of a folder on S3. The url should start with 's3://'.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
list
|
|
258
|
+
A list of the objects names in the folder.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
assert target.startswith("s3://")
|
|
262
|
+
return [o["Key"] for o in _list_folder(target)]
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def count_objects_in_folder(target):
|
|
266
|
+
"""Count the objects in a folder on S3.
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
----------
|
|
270
|
+
target : str
|
|
271
|
+
The URL of a folder on S3. The url should start with 's3://'.
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
int
|
|
276
|
+
The number of objects in the folder.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
assert target.startswith("s3://")
|
|
280
|
+
return _count_objects_in_folder(target)
|
anemoi/utils/text.py
CHANGED
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
|
|
8
8
|
"""Text utilities"""
|
|
9
9
|
|
|
10
|
-
import sys
|
|
11
10
|
from collections import defaultdict
|
|
12
11
|
|
|
13
12
|
# https://en.wikipedia.org/wiki/Box-drawing_character
|
|
@@ -143,13 +142,13 @@ class Tree:
|
|
|
143
142
|
self._kids.append(node)
|
|
144
143
|
return node
|
|
145
144
|
|
|
146
|
-
def print(self
|
|
145
|
+
def print(self):
|
|
147
146
|
padding = []
|
|
148
147
|
|
|
149
148
|
while self._factorise():
|
|
150
149
|
pass
|
|
151
150
|
|
|
152
|
-
self._print(padding
|
|
151
|
+
self._print(padding)
|
|
153
152
|
|
|
154
153
|
def _leaves(self, result):
|
|
155
154
|
if self.is_leaf:
|
|
@@ -206,21 +205,21 @@ class Tree:
|
|
|
206
205
|
|
|
207
206
|
return result
|
|
208
207
|
|
|
209
|
-
def _print(self, padding
|
|
208
|
+
def _print(self, padding):
|
|
210
209
|
for i, p in enumerate(padding[:-1]):
|
|
211
210
|
if p == " └":
|
|
212
211
|
padding[i] = " "
|
|
213
212
|
if p == " ├":
|
|
214
213
|
padding[i] = " │"
|
|
215
214
|
if padding:
|
|
216
|
-
print(f"{''.join(padding)}─{self._text}"
|
|
215
|
+
print(f"{''.join(padding)}─{self._text}")
|
|
217
216
|
else:
|
|
218
|
-
print(self._text
|
|
217
|
+
print(self._text)
|
|
219
218
|
padding.append(" ")
|
|
220
219
|
for i, k in enumerate(self._kids):
|
|
221
220
|
sep = " ├" if i < len(self._kids) - 1 else " └"
|
|
222
221
|
padding[-1] = sep
|
|
223
|
-
k._print(padding
|
|
222
|
+
k._print(padding)
|
|
224
223
|
|
|
225
224
|
padding.pop()
|
|
226
225
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
anemoi/utils/__init__.py,sha256=zZZpbKIoGWwdCOuo6YSruLR7C0GzvzI1Wzhyqaa0K7M,456
|
|
2
2
|
anemoi/utils/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
|
|
3
|
-
anemoi/utils/_version.py,sha256=
|
|
3
|
+
anemoi/utils/_version.py,sha256=gK2CDe_mbvAwKw5ZjOIg75LuB0kCZ4LyDYjtXPapvJw,411
|
|
4
4
|
anemoi/utils/caching.py,sha256=HrC9aFHlcCTaM2Z5u0ivGIXz7eFu35UQQhUuwwuG2pk,1743
|
|
5
5
|
anemoi/utils/checkpoints.py,sha256=1_3mg4B-ykTVfIvIUEv7IxGyREx_ZcilVbB3U-V6O6I,5165
|
|
6
6
|
anemoi/utils/cli.py,sha256=d3TT9WSm6TDkbaJ9fL74wWG91Y27a9Uh17fPM4SomTs,3300
|
|
@@ -9,16 +9,16 @@ anemoi/utils/dates.py,sha256=Ot9OTY1uFvHxW1EU4DPv3oUqmzvkXTwKuwhlfVlY788,8426
|
|
|
9
9
|
anemoi/utils/grib.py,sha256=gVfo4KYQv31iRyoqRDwk5tiqZDUgOIvhag_kO0qjYD0,3067
|
|
10
10
|
anemoi/utils/humanize.py,sha256=LD6dGnqChxA5j3tMhSybsAGRQzi33d_qS9pUoUHubkc,10330
|
|
11
11
|
anemoi/utils/provenance.py,sha256=v54L9jF1JgYcclOhg3iojRl1v3ajbiWz_oc289xTgO4,9574
|
|
12
|
-
anemoi/utils/s3.py,sha256=
|
|
13
|
-
anemoi/utils/text.py,sha256=
|
|
12
|
+
anemoi/utils/s3.py,sha256=nCgblljfe5bLYfHphXO3yi-bJdIYXk8KWBl7o-NB6Ng,9429
|
|
13
|
+
anemoi/utils/text.py,sha256=4Zlc4r9dzRjkKL9xqp2vuQsoJY15bJ3y_Xv3YW_XsmU,8510
|
|
14
14
|
anemoi/utils/timer.py,sha256=JKOgFkpJxmVRn57DEBolmTGwr25P-ePTWASBd8CLeqM,972
|
|
15
15
|
anemoi/utils/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
|
|
16
16
|
anemoi/utils/commands/checkpoint.py,sha256=SEnAizU3WklqMXUjmIh4eNrgBVwmheKG9gEBS90zwYU,1741
|
|
17
17
|
anemoi/utils/mars/__init__.py,sha256=RAeY8gJ7ZvsPlcIvrQ4fy9xVHs3SphTAPw_XJDtNIKo,1750
|
|
18
18
|
anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
|
|
19
|
-
anemoi_utils-0.3.
|
|
20
|
-
anemoi_utils-0.3.
|
|
21
|
-
anemoi_utils-0.3.
|
|
22
|
-
anemoi_utils-0.3.
|
|
23
|
-
anemoi_utils-0.3.
|
|
24
|
-
anemoi_utils-0.3.
|
|
19
|
+
anemoi_utils-0.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
20
|
+
anemoi_utils-0.3.4.dist-info/METADATA,sha256=ZNwG-WDJJsQdKJcqddkLGE69hZIz1TD8rpK6wwu9qz8,15513
|
|
21
|
+
anemoi_utils-0.3.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
22
|
+
anemoi_utils-0.3.4.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
|
|
23
|
+
anemoi_utils-0.3.4.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
24
|
+
anemoi_utils-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|