anemoi-utils 0.3.10__tar.gz → 0.3.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

Files changed (52) hide show
  1. {anemoi_utils-0.3.10/src/anemoi_utils.egg-info → anemoi_utils-0.3.12}/PKG-INFO +1 -1
  2. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/installing.rst +1 -1
  3. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/_version.py +2 -2
  4. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/caching.py +20 -9
  5. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/config.py +5 -4
  6. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/humanize.py +109 -14
  7. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/provenance.py +1 -1
  8. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/s3.py +99 -31
  9. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12/src/anemoi_utils.egg-info}/PKG-INFO +1 -1
  10. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/.github/workflows/python-publish.yml +0 -0
  11. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/.gitignore +0 -0
  12. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/.pre-commit-config.yaml +0 -0
  13. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/.readthedocs.yaml +0 -0
  14. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/LICENSE +0 -0
  15. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/README.md +0 -0
  16. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/Makefile +0 -0
  17. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/_static/logo.png +0 -0
  18. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/_static/style.css +0 -0
  19. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/_templates/.gitkeep +0 -0
  20. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/conf.py +0 -0
  21. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/index.rst +0 -0
  22. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/checkpoints.rst +0 -0
  23. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/config.rst +0 -0
  24. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/dates.rst +0 -0
  25. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/grib.rst +0 -0
  26. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/humanize.rst +0 -0
  27. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/provenance.rst +0 -0
  28. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/s3.rst +0 -0
  29. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/modules/text.rst +0 -0
  30. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/docs/requirements.txt +0 -0
  31. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/pyproject.toml +0 -0
  32. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/setup.cfg +0 -0
  33. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/__init__.py +0 -0
  34. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/__main__.py +0 -0
  35. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/checkpoints.py +0 -0
  36. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/cli.py +0 -0
  37. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/commands/__init__.py +0 -0
  38. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/commands/config.py +0 -0
  39. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/dates.py +0 -0
  40. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/grib.py +0 -0
  41. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/hindcasts.py +0 -0
  42. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/mars/__init__.py +0 -0
  43. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/mars/mars.yaml +0 -0
  44. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/text.py +0 -0
  45. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi/utils/timer.py +0 -0
  46. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi_utils.egg-info/SOURCES.txt +0 -0
  47. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi_utils.egg-info/dependency_links.txt +0 -0
  48. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi_utils.egg-info/entry_points.txt +0 -0
  49. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi_utils.egg-info/requires.txt +0 -0
  50. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/src/anemoi_utils.egg-info/top_level.txt +0 -0
  51. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/tests/test_dates.py +0 -0
  52. {anemoi_utils-0.3.10 → anemoi_utils-0.3.12}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-utils
3
- Version: 0.3.10
3
+ Version: 0.3.12
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -22,7 +22,7 @@ The options are:
22
22
 
23
23
  .. code:: bash
24
24
 
25
- git clone ...
25
+ git clone git@github.com:ecmwf/anemoi-registry.git
26
26
  cd anemoi-utils
27
27
  pip install .[dev]
28
28
  pip install -r docs/requirements.txt
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.3.10'
16
- __version_tuple__ = version_tuple = (0, 3, 10)
15
+ __version__ = version = '0.3.12'
16
+ __version_tuple__ = version_tuple = (0, 3, 12)
@@ -9,17 +9,26 @@ import hashlib
9
9
  import json
10
10
  import os
11
11
  import time
12
+ from threading import Lock
13
+
14
+ LOCK = Lock()
15
+ CACHE = {}
12
16
 
13
17
 
14
18
  def cache(key, proc, collection="default", expires=None):
15
- path = os.path.join(os.path.expanduser("~"), ".cache", "anemoi", collection)
16
- os.makedirs(path, exist_ok=True)
17
19
 
18
20
  key = json.dumps(key, sort_keys=True)
19
21
  m = hashlib.md5()
20
22
  m.update(key.encode("utf-8"))
23
+ m = m.hexdigest()
24
+
25
+ if m in CACHE:
26
+ return CACHE[m]
27
+
28
+ path = os.path.join(os.path.expanduser("~"), ".cache", "anemoi", collection)
29
+ os.makedirs(path, exist_ok=True)
21
30
 
22
- filename = os.path.join(path, m.hexdigest())
31
+ filename = os.path.join(path, m)
23
32
  if os.path.exists(filename):
24
33
  with open(filename, "r") as f:
25
34
  data = json.load(f)
@@ -35,6 +44,7 @@ def cache(key, proc, collection="default", expires=None):
35
44
  with open(filename, "w") as f:
36
45
  json.dump(data, f)
37
46
 
47
+ CACHE[m] = value
38
48
  return value
39
49
 
40
50
 
@@ -49,11 +59,12 @@ class cached:
49
59
  full = f"{func.__module__}.{func.__name__}"
50
60
 
51
61
  def wrapped(*args, **kwargs):
52
- return cache(
53
- (full, args, kwargs),
54
- lambda: func(*args, **kwargs),
55
- self.collection,
56
- self.expires,
57
- )
62
+ with LOCK:
63
+ return cache(
64
+ (full, args, kwargs),
65
+ lambda: func(*args, **kwargs),
66
+ self.collection,
67
+ self.expires,
68
+ )
58
69
 
59
70
  return wrapped
@@ -205,8 +205,9 @@ def load_any_dict_format(path):
205
205
 
206
206
  def _load_config(name="settings.toml", secrets=None, defaults=None):
207
207
 
208
- if name in CONFIG:
209
- return CONFIG[name]
208
+ key = json.dumps((name, secrets, defaults), sort_keys=True, default=str)
209
+ if key in CONFIG:
210
+ return CONFIG[key]
210
211
 
211
212
  path = config_path(name)
212
213
  if os.path.exists(path):
@@ -238,8 +239,8 @@ def _load_config(name="settings.toml", secrets=None, defaults=None):
238
239
  secret_config = _load_config(secret_name)
239
240
  _merge_dicts(config, secret_config)
240
241
 
241
- CONFIG[name] = DotDict(config)
242
- return CONFIG[name]
242
+ CONFIG[key] = DotDict(config)
243
+ return CONFIG[key]
243
244
 
244
245
 
245
246
  def _save_config(name, data):
@@ -12,10 +12,11 @@
12
12
  import datetime
13
13
  import json
14
14
  import re
15
+ import warnings
15
16
  from collections import defaultdict
16
17
 
17
18
 
18
- def bytes(n: float) -> str:
19
+ def bytes_to_human(n: float) -> str:
19
20
  """Convert a number of bytes to a human readable string
20
21
 
21
22
  >>> bytes(4096)
@@ -55,7 +56,16 @@ def bytes(n: float) -> str:
55
56
  return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i])
56
57
 
57
58
 
58
- def base2(n) -> str:
59
+ def bytes(n: float) -> str:
60
+ warnings.warn(
61
+ "Function bytes is deprecated and will be removed in a future version. Use bytes_to_human instead.",
62
+ category=DeprecationWarning,
63
+ stacklevel=2,
64
+ )
65
+ return bytes_to_human(n)
66
+
67
+
68
+ def base2_to_human(n) -> str:
59
69
 
60
70
  u = ["", "K", "M", "G", "T", " P", "E", "Z", "Y"]
61
71
  i = 0
@@ -65,6 +75,16 @@ def base2(n) -> str:
65
75
  return "%g%s" % (int(n * 10 + 0.5) / 10.0, u[i])
66
76
 
67
77
 
78
+ def base2(n) -> str:
79
+
80
+ warnings.warn(
81
+ "Function base2 is deprecated and will be removed in a future version. Use base2_to_human instead.",
82
+ category=DeprecationWarning,
83
+ stacklevel=2,
84
+ )
85
+ return base2_to_human(n)
86
+
87
+
68
88
  PERIODS = (
69
89
  (7 * 24 * 60 * 60, "week"),
70
90
  (24 * 60 * 60, "day"),
@@ -81,7 +101,7 @@ def _plural(count):
81
101
  return ""
82
102
 
83
103
 
84
- def seconds(seconds: float) -> str:
104
+ def seconds_to_human(seconds: float) -> str:
85
105
  """Convert a number of seconds to a human readable string
86
106
 
87
107
  >>> seconds(4000)
@@ -140,12 +160,17 @@ def seconds(seconds: float) -> str:
140
160
  return " ".join(s)
141
161
 
142
162
 
143
- def number(value):
144
- return f"{value:,}"
163
+ def seconds(seconds: float) -> str:
164
+ warnings.warn(
165
+ "Function seconds is deprecated and will be removed in a future version. Use seconds_to_human instead.",
166
+ category=DeprecationWarning,
167
+ stacklevel=2,
168
+ )
169
+ return seconds_to_human(seconds)
145
170
 
146
171
 
147
172
  def plural(value, what):
148
- return f"{number(value)} {what}{_plural(value)}"
173
+ return f"{value:,} {what}{_plural(value)}"
149
174
 
150
175
 
151
176
  DOW = [
@@ -395,7 +420,7 @@ def list_to_human(lst, conjunction="and"):
395
420
  return f" {conjunction} ".join(lst)
396
421
 
397
422
 
398
- def as_number(value, name, units, none_ok):
423
+ def human_to_number(value, name, units, none_ok):
399
424
  if value is None and none_ok:
400
425
  return None
401
426
 
@@ -414,17 +439,44 @@ def as_number(value, name, units, none_ok):
414
439
  return value * units[unit]
415
440
 
416
441
 
417
- def as_seconds(value, name=None, none_ok=False):
442
+ def as_number(value, name=None, units=None, none_ok=False):
443
+ warnings.warn(
444
+ "Function as_number is deprecated and will be removed in a future version. Use human_to_number instead.",
445
+ category=DeprecationWarning,
446
+ stacklevel=2,
447
+ )
448
+ return human_to_number(value, name, units, none_ok)
449
+
450
+
451
+ def human_seconds(value, name=None, none_ok=False):
418
452
  units = dict(s=1, m=60, h=3600, d=86400, w=86400 * 7)
419
- return as_number(value, name, units, none_ok)
453
+ return human_to_number(value, name, units, none_ok)
420
454
 
421
455
 
422
- def as_percent(value, name=None, none_ok=False):
456
+ def as_seconds(value, name=None, none_ok=False):
457
+ warnings.warn(
458
+ "Function as_seconds is deprecated and will be removed in a future version. Use human_seconds instead.",
459
+ category=DeprecationWarning,
460
+ stacklevel=2,
461
+ )
462
+ return human_seconds(value, name, none_ok)
463
+
464
+
465
+ def human_to_percent(value, name=None, none_ok=False):
423
466
  units = {"%": 1}
424
- return as_number(value, name, units, none_ok)
467
+ return human_to_number(value, name, units, none_ok)
425
468
 
426
469
 
427
- def as_bytes(value, name=None, none_ok=False):
470
+ def as_percent(value, name=None, none_ok=False):
471
+ warnings.warn(
472
+ "Function as_percent is deprecated and will be removed in a future version. Use human_to_percent instead.",
473
+ category=DeprecationWarning,
474
+ stacklevel=2,
475
+ )
476
+ return human_to_percent(value, name, none_ok)
477
+
478
+
479
+ def human_to_bytes(value, name=None, none_ok=False):
428
480
  units = {}
429
481
  n = 1
430
482
  for u in "KMGTP":
@@ -432,10 +484,19 @@ def as_bytes(value, name=None, none_ok=False):
432
484
  units[u] = n
433
485
  units[u.lower()] = n
434
486
 
435
- return as_number(value, name, units, none_ok)
487
+ return human_to_number(value, name, units, none_ok)
436
488
 
437
489
 
438
- def as_timedelta(value, name=None, none_ok=False):
490
+ def as_bytes(value, name=None, none_ok=False):
491
+ warnings.warn(
492
+ "Function as_bytes is deprecated and will be removed in a future version. Use human_to_bytes instead.",
493
+ category=DeprecationWarning,
494
+ stacklevel=2,
495
+ )
496
+ return human_to_bytes(value, name, none_ok)
497
+
498
+
499
+ def human_to_timedelta(value, name=None, none_ok=False):
439
500
  if value is None and none_ok:
440
501
  return None
441
502
 
@@ -471,6 +532,15 @@ def as_timedelta(value, name=None, none_ok=False):
471
532
  )
472
533
 
473
534
 
535
+ def as_timedelta(value, name=None, none_ok=False):
536
+ warnings.warn(
537
+ "Function as_timedelta is deprecated and will be removed in a future version. Use human_to_timedelta instead.",
538
+ category=DeprecationWarning,
539
+ stacklevel=2,
540
+ )
541
+ return human_to_timedelta(value, name, none_ok)
542
+
543
+
474
544
  def rounded_datetime(d):
475
545
  if float(d.microsecond) / 1000.0 / 1000.0 >= 0.5:
476
546
  d = d + datetime.timedelta(seconds=1)
@@ -530,3 +600,28 @@ def json_pretty_dump(obj, max_line_length=120, default=str):
530
600
  return json.dumps(obj, default=default)
531
601
 
532
602
  return _format_json(obj)
603
+
604
+
605
+ def shorten_list(lst, max_length=5):
606
+ """Shorten a list to a maximum length.
607
+
608
+ Parameters
609
+ ----------
610
+ lst
611
+ The list to be shortened.
612
+ max_length
613
+ Maximum length of the shortened list.
614
+
615
+ Returns
616
+ -------
617
+ list
618
+ Shortened list.
619
+ """
620
+ if len(lst) <= max_length:
621
+ return lst
622
+ else:
623
+ half = max_length // 2
624
+ result = list(lst[:half]) + ["..."] + list(lst[max_length - half :])
625
+ if isinstance(lst, tuple):
626
+ return tuple(result)
627
+ return result
@@ -86,7 +86,7 @@ def version(versions, name, module, roots, namespaces, paths, full):
86
86
  paths.add((name, path))
87
87
 
88
88
  try:
89
- versions[name] = module.__version__
89
+ versions[name] = str(module.__version__)
90
90
  return
91
91
  except AttributeError:
92
92
  pass
@@ -16,6 +16,10 @@ to use a different S3 compatible service::
16
16
  aws_access_key_id = xxxxxxxxxxxxxxxxxxxxxxxx
17
17
  aws_secret_access_key = xxxxxxxxxxxxxxxxxxxxxxxx
18
18
 
19
+ Alternatively, the `endpoint_url`, and keys can be set in one of
20
+ the `~/.config/anemoi/settings.toml`
21
+ or `~/.config/anemoi/settings-secrets.toml` files.
22
+
19
23
  """
20
24
 
21
25
  import concurrent.futures
@@ -27,7 +31,7 @@ from copy import deepcopy
27
31
  import tqdm
28
32
 
29
33
  from .config import load_config
30
- from .humanize import bytes
34
+ from .humanize import bytes_to_human
31
35
 
32
36
  LOGGER = logging.getLogger(__name__)
33
37
 
@@ -37,17 +41,43 @@ LOGGER = logging.getLogger(__name__)
37
41
  thread_local = threading.local()
38
42
 
39
43
 
40
- def s3_client(bucket):
44
+ def s3_client(bucket, region=None):
41
45
  import boto3
42
-
43
- config = load_config(secrets=["aws_access_key_id", "aws_secret_access_key"])
46
+ from botocore import UNSIGNED
47
+ from botocore.client import Config
44
48
 
45
49
  if not hasattr(thread_local, "s3_clients"):
46
50
  thread_local.s3_clients = {}
47
51
 
48
- if bucket not in thread_local.s3_clients:
52
+ key = f"{bucket}-{region}"
53
+
54
+ boto3_config = dict(max_pool_connections=25)
55
+
56
+ if key in thread_local.s3_clients:
57
+ return thread_local.s3_clients[key]
58
+
59
+ boto3_config = dict(max_pool_connections=25)
60
+
61
+ if region:
62
+ # This is using AWS
63
+
64
+ options = {"region_name": region}
65
+
66
+ # Anonymous access
67
+ if not (
68
+ os.path.exists(os.path.expanduser("~/.aws/credentials"))
69
+ or ("AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ)
70
+ ):
71
+ boto3_config["signature_version"] = UNSIGNED
72
+
73
+ else:
74
+
75
+ # We may be accessing a different S3 compatible service
76
+ # Use anemoi.config to get the configuration
49
77
 
50
78
  options = {}
79
+ config = load_config(secrets=["aws_access_key_id", "aws_secret_access_key"])
80
+
51
81
  cfg = config.get("object-storage", {})
52
82
  for k, v in cfg.items():
53
83
  if isinstance(v, (str, int, float, bool)):
@@ -62,21 +92,29 @@ def s3_client(bucket):
62
92
  raise ValueError(f"Unsupported object storage type {type}")
63
93
 
64
94
  if "config" in options:
95
+ boto3_config.update(options["config"])
96
+ del options["config"]
65
97
  from botocore.client import Config
66
98
 
67
- options["config"] = Config(**options["config"])
68
- del options["config"]
99
+ options["config"] = Config(**boto3_config)
100
+
101
+ thread_local.s3_clients[key] = boto3.client("s3", **options)
102
+
103
+ return thread_local.s3_clients[key]
69
104
 
70
- thread_local.s3_clients[bucket] = boto3.client("s3", **options)
71
105
 
72
- return thread_local.s3_clients[bucket]
106
+ def _ignore(number_of_files, total_size, total_transferred, transfering):
107
+ pass
73
108
 
74
109
 
75
110
  class Transfer:
76
111
 
77
- def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1):
112
+ def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1, progress=None):
78
113
  assert verbosity == 1, verbosity
79
114
 
115
+ if progress is None:
116
+ progress = _ignore
117
+
80
118
  # from boto3.s3.transfer import TransferConfig
81
119
  # config = TransferConfig(use_threads=False)
82
120
  config = None
@@ -85,7 +123,8 @@ class Transfer:
85
123
  if verbosity > 0:
86
124
  LOGGER.info(f"{self.action} {source} to {target}")
87
125
 
88
- total = 0
126
+ total_size = 0
127
+ total_transferred = 0
89
128
 
90
129
  futures = []
91
130
  for name in self.list_source(source):
@@ -101,11 +140,14 @@ class Transfer:
101
140
  config=config,
102
141
  )
103
142
  )
104
- total += self.source_size(name)
143
+ total_size += self.source_size(name)
105
144
 
106
145
  if len(futures) % 10000 == 0:
146
+
147
+ progress(len(futures), total_size, 0, False)
148
+
107
149
  if verbosity > 0:
108
- LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes(total)})")
150
+ LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes_to_human(total_size)})")
109
151
  done, _ = concurrent.futures.wait(
110
152
  futures,
111
153
  timeout=0.001,
@@ -115,14 +157,22 @@ class Transfer:
115
157
  for future in done:
116
158
  future.result()
117
159
 
160
+ number_of_files = len(futures)
161
+ progress(number_of_files, total_size, 0, True)
162
+
118
163
  if verbosity > 0:
119
- LOGGER.info(f"{self.action} {len(futures):,} files ({bytes(total)})")
120
- with tqdm.tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
121
- for future in futures:
122
- pbar.update(future.result())
164
+ LOGGER.info(f"{self.action} {number_of_files:,} files ({bytes_to_human(total_size)})")
165
+ with tqdm.tqdm(total=total_size, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
166
+ for future in concurrent.futures.as_completed(futures):
167
+ size = future.result()
168
+ pbar.update(size)
169
+ total_transferred += size
170
+ progress(number_of_files, total_size, total_transferred, True)
123
171
  else:
124
- for future in futures:
125
- future.result()
172
+ for future in concurrent.futures.as_completed(futures):
173
+ size = future.result()
174
+ total_transferred += size
175
+ progress(number_of_files, total_size, total_transferred, True)
126
176
 
127
177
  except Exception:
128
178
  executor.shutdown(wait=False, cancel_futures=True)
@@ -148,7 +198,7 @@ class Upload(Transfer):
148
198
  def source_size(self, local_path):
149
199
  return os.path.getsize(local_path)
150
200
 
151
- def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
201
+ def transfer_file(self, source, target, overwrite, resume, verbosity, progress=None, config=None):
152
202
  try:
153
203
  return self._transfer_file(source, target, overwrite, resume, verbosity, config=config)
154
204
  except Exception as e:
@@ -168,7 +218,7 @@ class Upload(Transfer):
168
218
  size = os.path.getsize(source)
169
219
 
170
220
  if verbosity > 0:
171
- LOGGER.info(f"{self.action} {source} to {target} ({bytes(size)})")
221
+ LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
172
222
 
173
223
  try:
174
224
  results = s3.head_object(Bucket=bucket, Key=key)
@@ -218,7 +268,7 @@ class Download(Transfer):
218
268
  def source_size(self, s3_object):
219
269
  return s3_object["Size"]
220
270
 
221
- def transfer_file(self, source, target, overwrite, resume, verbosity, config=None):
271
+ def transfer_file(self, source, target, overwrite, resume, verbosity, progress, config=None):
222
272
  try:
223
273
  return self._transfer_file(source, target, overwrite, resume, verbosity, config=config)
224
274
  except Exception as e:
@@ -235,7 +285,6 @@ class Download(Transfer):
235
285
  try:
236
286
  response = s3.head_object(Bucket=bucket, Key=key)
237
287
  except s3.exceptions.ClientError as e:
238
- print(e.response["Error"]["Code"], e.response["Error"]["Message"], bucket, key)
239
288
  if e.response["Error"]["Code"] == "404":
240
289
  raise ValueError(f"{source} does not exist ({bucket}, {key})")
241
290
  raise
@@ -243,7 +292,7 @@ class Download(Transfer):
243
292
  size = int(response["ContentLength"])
244
293
 
245
294
  if verbosity > 0:
246
- LOGGER.info(f"Downloading {source} to {target} ({bytes(size)})")
295
+ LOGGER.info(f"Downloading {source} to {target} ({bytes_to_human(size)})")
247
296
 
248
297
  if overwrite:
249
298
  resume = False
@@ -272,7 +321,7 @@ class Download(Transfer):
272
321
  return size
273
322
 
274
323
 
275
- def upload(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
324
+ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
276
325
  """Upload a file or a folder to S3.
277
326
 
278
327
  Parameters
@@ -286,6 +335,9 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, thread
286
335
  resume : bool, optional
287
336
  If the data is alreay on S3 it will not be uploaded, unless the remote file
288
337
  has a different size, by default False
338
+ progress: callable, optional
339
+ A callable that will be called with the number of files, the total size of the files, the total size
340
+ transferred and a boolean indicating if the transfer has started. By default None
289
341
  threads : int, optional
290
342
  The number of threads to use when uploading a directory, by default 1
291
343
  """
@@ -293,13 +345,26 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, thread
293
345
  uploader = Upload()
294
346
  if os.path.isdir(source):
295
347
  uploader.transfer_folder(
296
- source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity, threads=threads
348
+ source=source,
349
+ target=target,
350
+ overwrite=overwrite,
351
+ resume=resume,
352
+ verbosity=verbosity,
353
+ progress=progress,
354
+ threads=threads,
297
355
  )
298
356
  else:
299
- uploader.transfer_file(source=source, target=target, overwrite=overwrite, resume=resume, verbosity=verbosity)
357
+ uploader.transfer_file(
358
+ source=source,
359
+ target=target,
360
+ overwrite=overwrite,
361
+ resume=resume,
362
+ verbosity=verbosity,
363
+ progress=progress,
364
+ )
300
365
 
301
366
 
302
- def download(source, target, *, overwrite=False, resume=False, verbosity=1, threads=1):
367
+ def download(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
303
368
  """Download a file or a folder from S3.
304
369
 
305
370
  Parameters
@@ -315,6 +380,9 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
315
380
  resume : bool, optional
316
381
  If the data is alreay on local it will not be downloaded, unless the remote file
317
382
  has a different size, by default False
383
+ progress: callable, optional
384
+ A callable that will be called with the number of files, the total size of the files, the total size
385
+ transferred and a boolean indicating if the transfer has started. By default None
318
386
  threads : int, optional
319
387
  The number of threads to use when downloading a directory, by default 1
320
388
  """
@@ -329,6 +397,7 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
329
397
  overwrite=overwrite,
330
398
  resume=resume,
331
399
  verbosity=verbosity,
400
+ progress=progress,
332
401
  threads=threads,
333
402
  )
334
403
  else:
@@ -338,6 +407,7 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, thre
338
407
  overwrite=overwrite,
339
408
  resume=resume,
340
409
  verbosity=verbosity,
410
+ progress=progress,
341
411
  )
342
412
 
343
413
 
@@ -387,7 +457,7 @@ def _delete_file(target):
387
457
  return
388
458
 
389
459
  LOGGER.info(f"Deleting {target}")
390
- print(s3.delete_object(Bucket=bucket, Key=key))
460
+ s3.delete_object(Bucket=bucket, Key=key)
391
461
  LOGGER.info(f"{target} is deleted")
392
462
 
393
463
 
@@ -423,13 +493,11 @@ def list_folder(folder):
423
493
  A list of the subfolders names in the folder.
424
494
  """
425
495
 
426
- print(folder)
427
496
  assert folder.startswith("s3://")
428
497
  if not folder.endswith("/"):
429
498
  folder += "/"
430
499
 
431
500
  _, _, bucket, prefix = folder.split("/", 3)
432
- print(bucket, prefix)
433
501
 
434
502
  s3 = s3_client(bucket)
435
503
  paginator = s3.get_paginator("list_objects_v2")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-utils
3
- Version: 0.3.10
3
+ Version: 0.3.12
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
File without changes
File without changes
File without changes
File without changes