anemoi-utils 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

Files changed (52) hide show
  1. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/.pre-commit-config.yaml +1 -1
  2. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/PKG-INFO +1 -1
  3. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/conf.py +8 -4
  4. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/index.rst +2 -0
  5. anemoi_utils-0.3.5/docs/modules/s3.rst +8 -0
  6. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/_version.py +2 -2
  7. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/cli.py +1 -1
  8. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/config.py +18 -12
  9. anemoi_utils-0.3.5/src/anemoi/utils/s3.py +463 -0
  10. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/PKG-INFO +1 -1
  11. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/SOURCES.txt +1 -0
  12. anemoi_utils-0.3.3/src/anemoi/utils/s3.py +0 -57
  13. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/.github/workflows/python-publish.yml +0 -0
  14. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/.gitignore +0 -0
  15. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/.readthedocs.yaml +0 -0
  16. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/LICENSE +0 -0
  17. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/README.md +0 -0
  18. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/Makefile +0 -0
  19. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/_static/logo.png +0 -0
  20. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/_static/style.css +0 -0
  21. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/_templates/.gitkeep +0 -0
  22. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/installing.rst +0 -0
  23. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/checkpoints.rst +0 -0
  24. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/config.rst +0 -0
  25. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/dates.rst +0 -0
  26. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/grib.rst +0 -0
  27. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/humanize.rst +0 -0
  28. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/provenance.rst +0 -0
  29. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/modules/text.rst +0 -0
  30. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/docs/requirements.txt +0 -0
  31. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/pyproject.toml +0 -0
  32. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/setup.cfg +0 -0
  33. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/__init__.py +0 -0
  34. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/__main__.py +0 -0
  35. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/caching.py +0 -0
  36. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/checkpoints.py +0 -0
  37. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/commands/__init__.py +0 -0
  38. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/commands/checkpoint.py +0 -0
  39. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/dates.py +0 -0
  40. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/grib.py +0 -0
  41. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/humanize.py +0 -0
  42. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/mars/__init__.py +0 -0
  43. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/mars/mars.yaml +0 -0
  44. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/provenance.py +0 -0
  45. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/text.py +0 -0
  46. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi/utils/timer.py +0 -0
  47. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/dependency_links.txt +0 -0
  48. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/entry_points.txt +0 -0
  49. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/requires.txt +0 -0
  50. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/src/anemoi_utils.egg-info/top_level.txt +0 -0
  51. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/tests/test_dates.py +0 -0
  52. {anemoi_utils-0.3.3 → anemoi_utils-0.3.5}/tests/test_utils.py +0 -0
@@ -38,7 +38,7 @@ repos:
38
38
  hooks:
39
39
  - id: ruff
40
40
  # Next line if for documenation cod snippets
41
- exclude: '^(dev/.*|[A-Za-z].*_)\.py$'
41
+ exclude: '^[^_].*_\.py$'
42
42
  args:
43
43
  - --line-length=120
44
44
  - --fix
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-utils
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -93,12 +93,16 @@ intersphinx_mapping = {
93
93
  "https://anemoi-inference.readthedocs.io/en/latest/",
94
94
  ("../../anemoi-inference/docs/_build/html/objects.inv", None),
95
95
  ),
96
+ "anemoi-graphs": (
97
+ "https://anemoi-graphs.readthedocs.io/en/latest/",
98
+ ("../../anemoi-graphs/docs/_build/html/objects.inv", None),
99
+ ),
100
+ "anemoi-registry": (
101
+ "https://anemoi-registry.readthedocs.io/en/latest/",
102
+ ("../../anemoi-registry/docs/_build/html/objects.inv", None),
103
+ ),
96
104
  }
97
105
 
98
-
99
- # https://www.notion.so/Deepnote-Launch-Buttons-63c642a5e875463495ed2341e83a4b2a
100
-
101
-
102
106
  # -- Options for HTML output -------------------------------------------------
103
107
 
104
108
  # The theme to use for HTML and HTML Help pages. See the documentation for
@@ -47,8 +47,10 @@ of the *Anemoi* packages.
47
47
  - :ref:`anemoi-utils <anemoi-utils:index-page>`
48
48
  - :ref:`anemoi-datasets <anemoi-datasets:index-page>`
49
49
  - :ref:`anemoi-models <anemoi-models:index-page>`
50
+ - :ref:`anemoi-graphs <anemoi-graphs:index-page>`
50
51
  - :ref:`anemoi-training <anemoi-training:index-page>`
51
52
  - :ref:`anemoi-inference <anemoi-inference:index-page>`
53
+ - :ref:`anemoi-registry <anemoi-registry:index-page>`
52
54
 
53
55
  *********
54
56
  License
@@ -0,0 +1,8 @@
1
+ ####
2
+ s3
3
+ ####
4
+
5
+ .. automodule:: anemoi.utils.s3
6
+ :members:
7
+ :no-undoc-members:
8
+ :show-inheritance:
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.3.3'
16
- __version_tuple__ = version_tuple = (0, 3, 3)
15
+ __version__ = version = '0.3.5'
16
+ __version_tuple__ = version_tuple = (0, 3, 5)
@@ -64,7 +64,7 @@ def register_commands(here, package, select, fail=None):
64
64
  result = {}
65
65
  not_available = {}
66
66
 
67
- for p in os.listdir(here):
67
+ for p in sorted(os.listdir(here)):
68
68
  full = os.path.join(here, p)
69
69
  if p.startswith("_"):
70
70
  continue
@@ -8,6 +8,7 @@
8
8
 
9
9
  import logging
10
10
  import os
11
+ import threading
11
12
 
12
13
  try:
13
14
  import tomllib # Only available since 3.11
@@ -60,16 +61,10 @@ class DotDict(dict):
60
61
 
61
62
 
62
63
  CONFIG = None
64
+ CONFIG_LOCK = threading.Lock()
63
65
 
64
66
 
65
- def load_config():
66
- """Load the configuration from `~/.anemoi.toml`.
67
-
68
- Returns
69
- -------
70
- DotDict
71
- The configuration
72
- """
67
+ def _load_config():
73
68
  global CONFIG
74
69
  if CONFIG is not None:
75
70
  return CONFIG
@@ -86,9 +81,20 @@ def load_config():
86
81
  return DotDict(CONFIG)
87
82
 
88
83
 
89
- def save_config():
90
- """Save the configuration to `~/.anemoi.toml`."""
84
+ def load_config():
85
+ """Load the configuration from `~/.anemoi.toml`.
86
+
87
+ Returns
88
+ -------
89
+ DotDict
90
+ The configuration
91
+ """
92
+ with CONFIG_LOCK:
93
+ return _load_config()
94
+
91
95
 
96
+ def check_config_mode():
92
97
  conf = os.path.expanduser("~/.anemoi.toml")
93
- with open(conf, "w") as f:
94
- tomllib.dump(CONFIG, f)
98
+ mode = os.stat(conf).st_mode
99
+ if mode & 0o777 != 0o600:
100
+ raise SystemError(f"Configuration file {conf} is not secure. " "Please run `chmod 600 ~/.anemoi.toml`.")
@@ -0,0 +1,463 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ """This module provides functions to upload, download, list and delete files and folders on S3.
9
+ The functions of this package expect that the AWS credentials are set up in the environment
10
+ typicaly by setting the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables or
11
+ by creating a `~/.aws/credentials` file. It is also possible to set the `endpoint_url` in the same file
12
+ to use a different S3 compatible service::
13
+
14
+ [default]
15
+ endpoint_url = https://some-storage.somewhere.world
16
+ aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx
17
+ aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx
18
+
19
+ """
20
+
21
+ import concurrent
22
+ import logging
23
+ import os
24
+ import threading
25
+ from copy import deepcopy
26
+
27
+ import tqdm
28
+
29
+ from .config import check_config_mode
30
+ from .config import load_config
31
+ from .humanize import bytes
32
+
33
+ LOGGER = logging.getLogger(__name__)
34
+
35
+
36
+ # s3_clients are not thread-safe, so we need to create a new client for each thread
37
+
38
+ thread_local = threading.local()
39
+
40
+
41
+ def s3_client(bucket):
42
+ import boto3
43
+
44
+ config = load_config()
45
+ if "object-storage" in config:
46
+ check_config_mode()
47
+
48
+ if not hasattr(thread_local, "s3_clients"):
49
+ thread_local.s3_clients = {}
50
+
51
+ if bucket not in thread_local.s3_clients:
52
+
53
+ options = {}
54
+ options.update(config.get("object-storage", {}))
55
+ options.update(config.get("object-storage", {}).get(bucket, {}))
56
+
57
+ type = options.pop("type", "s3")
58
+ if type != "s3":
59
+ raise ValueError(f"Unsupported object storage type {type}")
60
+
61
+ if "config" in options:
62
+ from botocore.client import Config
63
+
64
+ options["config"] = Config(**options["config"])
65
+ del options["config"]
66
+
67
+ thread_local.s3_clients[bucket] = boto3.client("s3", **options)
68
+
69
+ return thread_local.s3_clients[bucket]
70
+
71
+
72
+ class Transfer:
73
+
74
+ def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1):
75
+ assert verbosity == 1, verbosity
76
+
77
+ # from boto3.s3.transfer import TransferConfig
78
+ # config = TransferConfig(use_threads=False)
79
+ config = None
80
+ with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
81
+ try:
82
+ if verbosity > 0:
83
+ LOGGER.info(f"{self.action} {source} to {target}")
84
+
85
+ total = 0
86
+
87
+ futures = []
88
+ for name in self.list_source(source):
89
+
90
+ futures.append(
91
+ executor.submit(
92
+ self.transfer_file,
93
+ source=self.source_path(name, source),
94
+ target=self.target_path(name, source, target),
95
+ overwrite=overwrite,
96
+ resume=resume,
97
+ verbosity=verbosity - 1,
98
+ config=config,
99
+ )
100
+ )
101
+ total += self.source_size(name)
102
+
103
+ if len(futures) % 10000 == 0:
104
+ if verbosity > 0:
105
+ LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes(total)})")
106
+ done, _ = concurrent.futures.wait(
107
+ futures,
108
+ timeout=0.001,
109
+ return_when=concurrent.futures.FIRST_EXCEPTION,
110
+ )
111
+ # Trigger exceptions if any
112
+ for future in done:
113
+ future.result()
114
+
115
+ if verbosity > 0:
116
+ LOGGER.info(f"{self.action} {len(futures):,} files ({bytes(total)})")
117
+ with tqdm.tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
118
+ for future in futures:
119
+ pbar.update(future.result())
120
+ else:
121
+ for future in futures:
122
+ future.result()
123
+
124
+ except Exception:
125
+ executor.shutdown(wait=False, cancel_futures=True)
126
+ raise
127
+
128
+
129
+ class Upload(Transfer):
130
+ action = "Uploading"
131
+
132
+ def list_source(self, source):
133
+ for root, _, files in os.walk(source):
134
+ for file in files:
135
+ yield os.path.join(root, file)
136
+
137
+ def source_path(self, local_path, source):
138
+ return local_path
139
+
140
+ def target_path(self, source_path, source, target):
141
+ relative_path = os.path.relpath(source_path, source)
142
+ s3_path = os.path.join(target, relative_path)
143
+ return s3_path
144
+
145
+ def source_size(self, local_path):
146
+ return os.path.getsize(local_path)
147
+
148
+ def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
149
+
150
+ from botocore.exceptions import ClientError
151
+
152
+ assert target.startswith("s3://")
153
+
154
+ _, _, bucket, key = target.split("/", 3)
155
+ s3 = s3_client(bucket)
156
+
157
+ size = os.path.getsize(source)
158
+
159
+ if verbosity > 0:
160
+ LOGGER.info(f"{self.action} {source} to {target} ({bytes(size)})")
161
+
162
+ try:
163
+ results = s3.head_object(Bucket=bucket, Key=key)
164
+ remote_size = int(results["ContentLength"])
165
+ except ClientError as e:
166
+ if e.response["Error"]["Code"] != "404":
167
+ raise
168
+ remote_size = None
169
+
170
+ if remote_size is not None:
171
+ if remote_size != size:
172
+ LOGGER.warning(
173
+ f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
174
+ )
175
+ elif resume:
176
+ # LOGGER.info(f"{target} already exists, skipping")
177
+ return size
178
+
179
+ if remote_size is not None and not overwrite and not resume:
180
+ raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
181
+
182
+ if verbosity > 0:
183
+ with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
184
+ s3.upload_file(source, bucket, key, Callback=lambda x: pbar.update(x), Config=config)
185
+ else:
186
+ s3.upload_file(source, bucket, key, Config=config)
187
+
188
+ return size
189
+
190
+
191
+ class Download(Transfer):
192
+ action = "Downloading"
193
+
194
+ def list_source(self, source):
195
+ yield from _list_objects(source)
196
+
197
+ def source_path(self, s3_object, source):
198
+ _, _, bucket, _ = source.split("/", 3)
199
+ return f"s3://{bucket}/{s3_object['Key']}"
200
+
201
+ def target_path(self, s3_object, source, target):
202
+ _, _, _, folder = source.split("/", 3)
203
+ local_path = os.path.join(target, os.path.relpath(s3_object["Key"], folder))
204
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
205
+ return local_path
206
+
207
+ def source_size(self, s3_object):
208
+ return s3_object["Size"]
209
+
210
+ def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
211
+ # from boto3.s3.transfer import TransferConfig
212
+
213
+ _, _, bucket, key = source.split("/", 3)
214
+ s3 = s3_client(bucket)
215
+
216
+ try:
217
+ response = s3.head_object(Bucket=bucket, Key=key)
218
+ except s3.exceptions.ClientError as e:
219
+ print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key)
220
+ if e.response["Error"]["Code"] == "404":
221
+ raise ValueError(f"{source} does not exist ({bucket}, {key})")
222
+ raise
223
+
224
+ size = int(response["ContentLength"])
225
+
226
+ if verbosity > 0:
227
+ LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})")
228
+
229
+ if overwrite:
230
+ resume = False
231
+
232
+ if resume:
233
+ if os.path.exists(target):
234
+ local_size = os.path.getsize(target)
235
+ if local_size != size:
236
+ LOGGER.warning(
237
+ f"{target} already with different size, re-downloading (remote={size}, local={size})"
238
+ )
239
+ else:
240
+ # if verbosity > 0:
241
+ # LOGGER.info(f"{target} already exists, skipping")
242
+ return size
243
+
244
+ if os.path.exists(target) and not overwrite:
245
+ raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
246
+
247
+ if verbosity > 0:
248
+ with tqdm.tqdm(total=size, unit="B", unit_scale=True, unit_divisor=1024, leave=False) as pbar:
249
+ s3.download_file(bucket, key, target, Callback=lambda x: pbar.update(x), Config=config)
250
+ else:
251
+ s3.download_file(bucket, key, target, Config=config)
252
+
253
+ return size
254
+
255
+
256
+ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
257
+ """Upload a file or a folder to S3.
258
+
259
+ Parameters
260
+ ----------
261
+ source : str
262
+ A path to a file or a folder to upload.
263
+ target : str
264
+ A URL to a file or a folder on S3. The url should start with 's3://'.
265
+ overwrite : bool, optional
266
+ If the data is alreay on S3 it will be overwritten, by default False
267
+ resume : bool, optional
268
+ If the data is alreay on S3 it will not be uploaded, unless the remote file
269
+ has a different size, by default False
270
+ threads : int, optional
271
+ The number of threads to use when uploading a directory, by default 1
272
+ """
273
+
274
+ uploader = Upload()
275
+ if os.path.isdir(source):
276
+ uploader.transfer_folder(
277
+ source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity, threads=threads
278
+ )
279
+ else:
280
+ uploader.transfer_file(source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity)
281
+
282
+
283
+ def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
284
+ """Download a file or a folder from S3.
285
+
286
+ Parameters
287
+ ----------
288
+ source : str
289
+ The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
290
+ assumed to be a folder, otherwise it is assumed to be a file.
291
+ target : str
292
+ The local path where the file or folder will be downloaded.
293
+ overwrite : bool, optional
294
+ If false, files which have already been download will be skipped, unless their size
295
+ does not match their size on S3 , by default False
296
+ resume : bool, optional
297
+ If the data is alreay on local it will not be downloaded, unless the remote file
298
+ has a different size, by default False
299
+ threads : int, optional
300
+ The number of threads to use when downloading a directory, by default 1
301
+ """
302
+ assert source.startswith("s3://")
303
+
304
+ downloader = Download()
305
+
306
+ if source.endswith("/"):
307
+ downloader.transfer_folder(
308
+ source=source,
309
+ target=target,
310
+ overwrite=overwrite,
311
+ resume=resume,
312
+ verbosity=verbosity,
313
+ threads=threads,
314
+ )
315
+ else:
316
+ downloader.transfer_file(
317
+ source=source,
318
+ target=target,
319
+ overwrite=overwrite,
320
+ resume=resume,
321
+ verbosity=verbosity,
322
+ )
323
+
324
+
325
+ def _list_objects(target, batch=False):
326
+ _, _, bucket, prefix = target.split("/", 3)
327
+ s3 = s3_client(bucket)
328
+
329
+ paginator = s3.get_paginator("list_objects_v2")
330
+
331
+ for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
332
+ if "Contents" in page:
333
+ objects = deepcopy(page["Contents"])
334
+ if batch:
335
+ yield objects
336
+ else:
337
+ yield from objects
338
+
339
+
340
+ def _delete_folder(target):
341
+ _, _, bucket, _ = target.split("/", 3)
342
+ s3 = s3_client(bucket)
343
+
344
+ total = 0
345
+ for batch in _list_objects(target, batch=True):
346
+ LOGGER.info(f"Deleting {len(batch):,} objects from {target}")
347
+ s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": o["Key"]} for o in batch]})
348
+ total += len(batch)
349
+ LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
350
+
351
+
352
+ def _delete_file(target):
353
+ from botocore.exceptions import ClientError
354
+
355
+ _, _, bucket, key = target.split("/", 3)
356
+ s3 = s3_client(bucket)
357
+
358
+ try:
359
+ s3.head_object(Bucket=bucket, Key=key)
360
+ exits = True
361
+ except ClientError as e:
362
+ if e.response["Error"]["Code"] != "404":
363
+ raise
364
+ exits = False
365
+
366
+ if not exits:
367
+ LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
368
+ return
369
+
370
+ LOGGER.info(f"Deleting {target}")
371
+ print(s3.delete_object(Bucket=bucket, Key=key))
372
+ LOGGER.info(f"{target} is deleted")
373
+
374
+
375
+ def delete(target):
376
+ """Delete a file or a folder from S3.
377
+
378
+ Parameters
379
+ ----------
380
+ target : str
381
+ The URL of a file or a folder on S3. The url should start with 's3://'. If the URL ends with a '/' it is
382
+ assumed to be a folder, otherwise it is assumed to be a file.
383
+ """
384
+
385
+ assert target.startswith("s3://")
386
+
387
+ if target.endswith("/"):
388
+ _delete_folder(target)
389
+ else:
390
+ _delete_file(target)
391
+
392
+
393
+ def list_folder(folder):
394
+ """List the sub folders in a folder on S3.
395
+
396
+ Parameters
397
+ ----------
398
+ folder : str
399
+ The URL of a folder on S3. The url should start with 's3://'.
400
+
401
+ Returns
402
+ -------
403
+ list
404
+ A list of the subfolders names in the folder.
405
+ """
406
+
407
+ assert folder.startswith("s3://")
408
+ if not folder.endswith("/"):
409
+ folder += "/"
410
+
411
+ _, _, bucket, prefix = folder.split("/", 3)
412
+
413
+ s3 = s3_client(bucket)
414
+ paginator = s3.get_paginator("list_objects_v2")
415
+
416
+ for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter="/"):
417
+ if "CommonPrefixes" in page:
418
+ yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")]
419
+
420
+
421
+ def object_info(target):
422
+ """Get information about an object on S3.
423
+
424
+ Parameters
425
+ ----------
426
+ target : str
427
+ The URL of a file or a folder on S3. The url should start with 's3://'.
428
+
429
+ Returns
430
+ -------
431
+ dict
432
+ A dictionary with information about the object.
433
+ """
434
+
435
+ _, _, bucket, key = target.split("/", 3)
436
+ s3 = s3_client(bucket)
437
+
438
+ try:
439
+ return s3.head_object(Bucket=bucket, Key=key)
440
+ except s3.exceptions.ClientError as e:
441
+ if e.response["Error"]["Code"] == "404":
442
+ raise ValueError(f"{target} does not exist")
443
+ raise
444
+
445
+
446
+ def object_acl(target):
447
+ """Get information about an object's ACL on S3.
448
+
449
+ Parameters
450
+ ----------
451
+ target : str
452
+ The URL of a file or a folder on S3. The url should start with 's3://'.
453
+
454
+ Returns
455
+ -------
456
+ dict
457
+ A dictionary with information about the object's ACL.
458
+ """
459
+
460
+ _, _, bucket, key = target.split("/", 3)
461
+ s3 = s3_client()
462
+
463
+ return s3.get_object_acl(Bucket=bucket, Key=key)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-utils
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -19,6 +19,7 @@ docs/modules/dates.rst
19
19
  docs/modules/grib.rst
20
20
  docs/modules/humanize.rst
21
21
  docs/modules/provenance.rst
22
+ docs/modules/s3.rst
22
23
  docs/modules/text.rst
23
24
  src/anemoi/utils/__init__.py
24
25
  src/anemoi/utils/__main__.py
@@ -1,57 +0,0 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
- # This software is licensed under the terms of the Apache Licence Version 2.0
3
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
- # In applying this licence, ECMWF does not waive the privileges and immunities
5
- # granted to it by virtue of its status as an intergovernmental organisation
6
- # nor does it submit to any jurisdiction.
7
- import logging
8
- import os
9
- from contextlib import closing
10
-
11
- import boto3
12
- import tqdm
13
-
14
- LOG = logging.getLogger(__name__)
15
-
16
-
17
- def upload(source, target, overwrite=False, ignore_existing=False):
18
- # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html
19
- assert target.startswith("s3://")
20
-
21
- _, _, bucket, key = target.split("/", 3)
22
-
23
- LOG.info(f"Uploading {source} to {target}")
24
- s3_client = boto3.client("s3")
25
-
26
- if not overwrite:
27
- results = s3_client.list_objects(Bucket=bucket, Prefix=key)
28
- if results.get("Contents"):
29
- if ignore_existing:
30
- LOG.info(f"{target} already exists, skipping")
31
- return
32
- else:
33
- raise ValueError(f"{target} already exists, use --overwrite to replace")
34
-
35
- size = os.path.getsize(source)
36
- with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True)) as t:
37
- s3_client.upload_file(source, bucket, key, Callback=lambda x: t.update(x))
38
-
39
- LOG.info(f"{target} is ready")
40
-
41
-
42
- def download(source, target, overwrite=False):
43
- assert source.startswith("s3://")
44
-
45
- _, _, bucket, key = source.split("/", 3)
46
-
47
- s3 = boto3.client("s3")
48
- response = s3.head_object(Bucket=bucket, Key=key)
49
- size = response["ContentLength"]
50
-
51
- if not overwrite:
52
- if os.path.exists(source) and os.path.getsize(source) == size:
53
- LOG.info(f"{source} already exists, skipping")
54
- return
55
-
56
- with closing(tqdm.tqdm(total=size, unit="B", unit_scale=True)) as t:
57
- s3.download_file(bucket, key, target, Callback=lambda x: t.update(x))
File without changes
File without changes
File without changes
File without changes
File without changes