anemoi-utils 0.4.23__py3-none-any.whl → 0.4.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

anemoi/utils/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.4.23'
21
- __version_tuple__ = version_tuple = (0, 4, 23)
20
+ __version__ = version = '0.4.24'
21
+ __version_tuple__ = version_tuple = (0, 4, 24)
anemoi/utils/remote/s3.py CHANGED
@@ -24,6 +24,7 @@ the `~/.config/anemoi/settings.toml`
24
24
  or `~/.config/anemoi/settings-secrets.toml` files.
25
25
  """
26
26
 
27
+ import fnmatch
27
28
  import logging
28
29
  import os
29
30
  import threading
@@ -38,15 +39,15 @@ from ..humanize import bytes_to_human
38
39
  from . import BaseDownload
39
40
  from . import BaseUpload
40
41
 
41
- LOGGER = logging.getLogger(__name__)
42
-
42
+ LOG = logging.getLogger(__name__)
43
+ SECRETS = ["aws_access_key_id", "aws_secret_access_key"]
43
44
 
44
45
  # s3_clients are not thread-safe, so we need to create a new client for each thread
45
46
 
46
47
  thread_local = threading.local()
47
48
 
48
49
 
49
- def s3_client(bucket: str, region: str = None) -> Any:
50
+ def s3_client(bucket: str, *, region: str = None, service: str = "s3") -> Any:
50
51
  """Get an S3 client for the specified bucket and region.
51
52
 
52
53
  Parameters
@@ -55,6 +56,8 @@ def s3_client(bucket: str, region: str = None) -> Any:
55
56
  The name of the S3 bucket.
56
57
  region : str, optional
57
58
  The AWS region of the S3 bucket.
59
+ service : str, optional
60
+ The AWS service to use, default is "s3".
58
61
 
59
62
  Returns
60
63
  -------
@@ -68,7 +71,7 @@ def s3_client(bucket: str, region: str = None) -> Any:
68
71
  if not hasattr(thread_local, "s3_clients"):
69
72
  thread_local.s3_clients = {}
70
73
 
71
- key = f"{bucket}-{region}"
74
+ key = f"{bucket}-{region}-{service}"
72
75
 
73
76
  if key in thread_local.s3_clients:
74
77
  return thread_local.s3_clients[key]
@@ -96,17 +99,27 @@ def s3_client(bucket: str, region: str = None) -> Any:
96
99
  # We may be accessing a different S3 compatible service
97
100
  # Use anemoi.config to get the configuration
98
101
 
99
- options = {}
100
- config = load_config(secrets=["aws_access_key_id", "aws_secret_access_key"])
102
+ region = "unknown-region"
103
+
104
+ options = {"region_name": region}
105
+ config = load_config(secrets=SECRETS)
101
106
 
102
107
  cfg = config.get("object-storage", {})
108
+ candidate = None
103
109
  for k, v in cfg.items():
104
110
  if isinstance(v, (str, int, float, bool)):
105
111
  options[k] = v
106
112
 
107
- for k, v in cfg.get(bucket, {}).items():
108
- if isinstance(v, (str, int, float, bool)):
109
- options[k] = v
113
+ if isinstance(v, dict):
114
+ if fnmatch.fnmatch(bucket, k):
115
+ if candidate is not None:
116
+ raise ValueError(f"Multiple object storage configurations match {bucket}: {candidate} and {k}")
117
+ candidate = k
118
+
119
+ if candidate is not None:
120
+ for k, v in cfg.get(candidate, {}).items():
121
+ if isinstance(v, (str, int, float, bool)):
122
+ options[k] = v
110
123
 
111
124
  type = options.pop("type", "s3")
112
125
  if type != "s3":
@@ -115,11 +128,27 @@ def s3_client(bucket: str, region: str = None) -> Any:
115
128
  if "config" in options:
116
129
  boto3_config.update(options["config"])
117
130
  del options["config"]
118
- from botocore.client import Config
119
131
 
120
132
  options["config"] = Config(**boto3_config)
121
133
 
122
- thread_local.s3_clients[key] = boto3.client("s3", **options)
134
+ def _(options):
135
+
136
+ def __(k, v):
137
+ if k in SECRETS:
138
+ return "***"
139
+ return v
140
+
141
+ if isinstance(options, dict):
142
+ return {k: __(k, v) for k, v in options.items()}
143
+
144
+ if isinstance(options, list):
145
+ return [_(o) for o in options]
146
+
147
+ return options
148
+
149
+ LOG.info(f"Using S3 options: {_(options)}")
150
+
151
+ thread_local.s3_clients[key] = boto3.client(service, **options)
123
152
 
124
153
  return thread_local.s3_clients[key]
125
154
 
@@ -215,7 +244,7 @@ class S3Upload(BaseUpload):
215
244
  size = os.path.getsize(source)
216
245
 
217
246
  if verbosity > 0:
218
- LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
247
+ LOG.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
219
248
 
220
249
  try:
221
250
  results = s3.head_object(Bucket=bucket, Key=key)
@@ -227,7 +256,7 @@ class S3Upload(BaseUpload):
227
256
 
228
257
  if remote_size is not None:
229
258
  if remote_size != size:
230
- LOGGER.warning(
259
+ LOG.warning(
231
260
  f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
232
261
  )
233
262
  elif resume:
@@ -400,7 +429,7 @@ class S3Download(BaseDownload):
400
429
  size = int(response["ContentLength"])
401
430
 
402
431
  if verbosity > 0:
403
- LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
432
+ LOG.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
404
433
 
405
434
  if overwrite:
406
435
  resume = False
@@ -409,7 +438,7 @@ class S3Download(BaseDownload):
409
438
  if os.path.exists(target):
410
439
  local_size = os.path.getsize(target)
411
440
  if local_size != size:
412
- LOGGER.warning(
441
+ LOG.warning(
413
442
  f"{target} already with different size, re-downloading (remote={size}, local={local_size})"
414
443
  )
415
444
  else:
@@ -464,7 +493,7 @@ def _list_objects(target: str, batch: bool = False) -> Iterable:
464
493
  yield from objects
465
494
 
466
495
 
467
- def _delete_folder(target: str) -> None:
496
+ def delete_folder(target: str) -> None:
468
497
  """Delete a folder from S3.
469
498
 
470
499
  Parameters
@@ -477,13 +506,13 @@ def _delete_folder(target: str) -> None:
477
506
 
478
507
  total = 0
479
508
  for batch in _list_objects(target, batch=True):
480
- LOGGER.info(f"Deleting {len(batch):,} objects from {target}")
509
+ LOG.info(f"Deleting {len(batch):,} objects from {target}")
481
510
  s3.delete_objects(Bucket=bucket, Delete={"Objects": [{"Key": o["Key"]} for o in batch]})
482
511
  total += len(batch)
483
- LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
512
+ LOG.info(f"Deleted {len(batch):,} objects (total={total:,})")
484
513
 
485
514
 
486
- def _delete_file(target: str) -> None:
515
+ def delete_file(target: str) -> None:
487
516
  """Delete a file from S3.
488
517
 
489
518
  Parameters
@@ -505,12 +534,12 @@ def _delete_file(target: str) -> None:
505
534
  exits = False
506
535
 
507
536
  if not exits:
508
- LOGGER.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
537
+ LOG.warning(f"{target} does not exist. Did you mean to delete a folder? Then add a trailing '/'")
509
538
  return
510
539
 
511
- LOGGER.info(f"Deleting {target}")
540
+ LOG.info(f"Deleting {target}")
512
541
  s3.delete_object(Bucket=bucket, Key=key)
513
- LOGGER.info(f"{target} is deleted")
542
+ LOG.info(f"{target} is deleted")
514
543
 
515
544
 
516
545
  def delete(target: str) -> None:
@@ -525,9 +554,9 @@ def delete(target: str) -> None:
525
554
  assert target.startswith("s3://")
526
555
 
527
556
  if target.endswith("/"):
528
- _delete_folder(target)
557
+ delete_folder(target)
529
558
  else:
530
- _delete_file(target)
559
+ delete_file(target)
531
560
 
532
561
 
533
562
  def list_folder(folder: str) -> Iterable:
@@ -581,7 +610,33 @@ def object_info(target: str) -> dict:
581
610
  return s3.head_object(Bucket=bucket, Key=key)
582
611
  except s3.exceptions.ClientError as e:
583
612
  if e.response["Error"]["Code"] == "404":
584
- raise ValueError(f"{target} does not exist")
613
+ raise FileNotFoundError(f"{target} does not exist")
614
+ raise
615
+
616
+
617
+ def object_exists(target: str) -> bool:
618
+ """Check if an object exists.
619
+
620
+ Parameters
621
+ ----------
622
+ target : str
623
+ The URL of a file or a folder on S3. The URL should start with 's3://'.
624
+
625
+ Returns
626
+ -------
627
+ bool
628
+ True if the object exists, False otherwise.
629
+ """
630
+
631
+ _, _, bucket, key = target.split("/", 3)
632
+ s3 = s3_client(bucket)
633
+
634
+ try:
635
+ s3.head_object(Bucket=bucket, Key=key)
636
+ return True
637
+ except s3.exceptions.ClientError as e:
638
+ if e.response["Error"]["Code"] == "404":
639
+ return False
585
640
  raise
586
641
 
587
642
 
@@ -600,7 +655,7 @@ def object_acl(target: str) -> dict:
600
655
  """
601
656
 
602
657
  _, _, bucket, key = target.split("/", 3)
603
- s3 = s3_client()
658
+ s3 = s3_client(bucket)
604
659
 
605
660
  return s3.get_object_acl(Bucket=bucket, Key=key)
606
661
 
@@ -643,3 +698,29 @@ def upload(source: str, target: str, *args, **kwargs) -> None:
643
698
 
644
699
  assert target.startswith("s3://"), f"target {target} should start with 's3://'"
645
700
  return transfer(source, target, *args, **kwargs)
701
+
702
+
703
+ def quotas(target: str) -> dict:
704
+ """Get the quotas for an S3 bucket.
705
+
706
+ Parameters
707
+ ----------
708
+ target : str
709
+ The URL of a file or a folder on S3. The URL should start with 's3://'.
710
+
711
+ Returns
712
+ -------
713
+ dict
714
+ A dictionary with the quotas for the bucket.
715
+ """
716
+ from botocore.exceptions import ClientError
717
+
718
+ _, _, bucket, _ = target.split("/", 3)
719
+ s3 = s3_client(bucket, service="service-quotas")
720
+
721
+ try:
722
+ return s3.list_service_quotas(ServiceCode="ec2")
723
+ except ClientError as e:
724
+ if e.response["Error"]["Code"] == "404":
725
+ raise ValueError(f"{target} does not exist")
726
+ raise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anemoi-utils
3
- Version: 0.4.23
3
+ Version: 0.4.24
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -1,6 +1,6 @@
1
1
  anemoi/utils/__init__.py,sha256=uVhpF-VjIl_4mMywOVtgTutgsdIsqz-xdkwxeMhzuag,730
2
2
  anemoi/utils/__main__.py,sha256=6LlE4MYrPvqqrykxXh7XMi50UZteUY59NeM8P9Zs2dU,910
3
- anemoi/utils/_version.py,sha256=AnB7uXIf9bJtYv1LZYemC9XOrzt9AeZnOcUDxAsfacA,513
3
+ anemoi/utils/_version.py,sha256=eqPTx1mit5QVY_CIZPeIYTsnPPxmLZEDtDv2l58DdBE,513
4
4
  anemoi/utils/caching.py,sha256=rXbeAmpBcMbbfN4EVblaHWKicsrtx1otER84FEBtz98,6183
5
5
  anemoi/utils/checkpoints.py,sha256=N4WpAZXa4etrpSEKhHqUUtG2-x9w3FJMHcLO-dDAXPY,9600
6
6
  anemoi/utils/cli.py,sha256=IyZfnSw0u0yYnrjOrzvm2RuuKvDk4cVb8pf8BkaChgA,6209
@@ -29,13 +29,13 @@ anemoi/utils/mars/__init__.py,sha256=b-Lc3L1TAQd9ODs0Z1YSJzgZCO1K_M3DSgx_yd2qXvM
29
29
  anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
30
30
  anemoi/utils/mars/requests.py,sha256=VFMHBVAAl0_2lOcMBa1lvaKHctN0lDJsI6_U4BucGew,1142
31
31
  anemoi/utils/remote/__init__.py,sha256=swPWHQoh-B6Xq9R489tPw0FykMue7f-bJ8enneFYSYE,20776
32
- anemoi/utils/remote/s3.py,sha256=dcXcgNddlgxwJ_OpgqOff8EWk-LT2mz20m7FcHHqz7w,17869
32
+ anemoi/utils/remote/s3.py,sha256=lhLxwBU-AKERzYURXJ-cOlaccEYmjoXqlvX3zYu5h-8,19977
33
33
  anemoi/utils/remote/ssh.py,sha256=xNtsawh8okytCKRehkRCVExbHZj-CRUQNormEHglfuw,8088
34
34
  anemoi/utils/schemas/__init__.py,sha256=nkinKlsPLPXEjfTYQT1mpKC4cvs-14w_zBkDRxakwxw,698
35
35
  anemoi/utils/schemas/errors.py,sha256=lgOXzVTYzAE0qWQf3OZ42vCWixv8lilSqLLhzARBmvI,1831
36
- anemoi_utils-0.4.23.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
37
- anemoi_utils-0.4.23.dist-info/METADATA,sha256=jxTYBIvx6wzO0g8ftu2MoSPfeViRSTUrE38Zy3HUvlw,15439
38
- anemoi_utils-0.4.23.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
39
- anemoi_utils-0.4.23.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
40
- anemoi_utils-0.4.23.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
41
- anemoi_utils-0.4.23.dist-info/RECORD,,
36
+ anemoi_utils-0.4.24.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
37
+ anemoi_utils-0.4.24.dist-info/METADATA,sha256=EsIP40bMdXsSEAp1v4iFkKMrzdVaE4Cw892SLNlzuWs,15439
38
+ anemoi_utils-0.4.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ anemoi_utils-0.4.24.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
40
+ anemoi_utils-0.4.24.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
41
+ anemoi_utils-0.4.24.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5