anemoi-utils 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

anemoi/utils/__main__.py CHANGED
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env python
2
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
5
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
- #
10
9
 
11
10
  from anemoi.utils.cli import cli_main
12
11
  from anemoi.utils.cli import make_parser
anemoi/utils/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.4'
16
- __version_tuple__ = version_tuple = (0, 4, 4)
15
+ __version__ = version = '0.4.5'
16
+ __version_tuple__ = version_tuple = (0, 4, 5)
@@ -94,8 +94,8 @@ def load_metadata(path: str, *, supporting_arrays=False, name: str = DEFAULT_NAM
94
94
  with zipfile.ZipFile(path, "r") as f:
95
95
  metadata = json.load(f.open(metadata, "r"))
96
96
  if supporting_arrays:
97
- metadata["supporting_arrays"] = load_supporting_arrays(f, metadata.get("supporting_arrays", {}))
98
- return metadata, supporting_arrays
97
+ arrays = load_supporting_arrays(f, metadata.get("supporting_arrays_paths", {}))
98
+ return metadata, arrays
99
99
 
100
100
  return metadata
101
101
  else:
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env python
2
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
5
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
- #
10
9
 
11
10
  import os
12
11
 
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env python3
2
1
  # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
@@ -0,0 +1,76 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ from __future__ import annotations
11
+
12
+ import functools
13
+ from typing import Any
14
+ from typing import Callable
15
+
16
+
17
+ def aliases(
18
+ aliases: dict[str, str | list[str]] | None = None, **kwargs: str | list[str]
19
+ ) -> Callable[[Callable], Callable]:
20
+ """Alias keyword arguments in a function call.
21
+
22
+ Allows for dynamically renaming keyword arguments in a function call.
23
+
24
+ Parameters
25
+ ----------
26
+ aliases : dict[str, str | list[str]] | None, optional
27
+ Key, value pair of aliases, with keys being the true name, and value being a str or list of aliases,
28
+ by default None
29
+ **kwargs : str | list[str]
30
+ Kwargs form of aliases
31
+
32
+ Returns
33
+ -------
34
+ Callable
35
+ Decorator function that renames keyword arguments in a function call.
36
+
37
+ Raises
38
+ ------
39
+ ValueError
40
+ If the aliasing would result in duplicate keys.
41
+
42
+ Examples
43
+ --------
44
+ ```python
45
+ @aliases(a="b", c=["d", "e"])
46
+ def func(a, c):
47
+ return a, c
48
+
49
+ func(a=1, c=2) # (1, 2)
50
+ func(b=1, d=2) # (1, 2)
51
+ ```
52
+
53
+ """
54
+
55
+ if aliases is None:
56
+ aliases = {}
57
+ aliases.update(kwargs)
58
+
59
+ aliases = {v: k for k, vs in aliases.items() for v in (vs if isinstance(vs, list) else [vs])}
60
+
61
+ def decorator(func: Callable) -> Callable:
62
+ @functools.wraps(func)
63
+ def wrapper(*args, **kwargs) -> Any:
64
+ keys = kwargs.keys()
65
+ for k in set(keys).intersection(set(aliases.keys())):
66
+ if aliases[k] in keys:
67
+ raise ValueError(
68
+ f"When aliasing {k} with {aliases[k]} duplicate keys were present. Cannot include both."
69
+ )
70
+ kwargs[aliases[k]] = kwargs.pop(k)
71
+
72
+ return func(*args, **kwargs)
73
+
74
+ return wrapper
75
+
76
+ return decorator
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
anemoi/utils/registry.py CHANGED
@@ -33,11 +33,12 @@ class Wrapper:
33
33
  class Registry:
34
34
  """A registry of factories"""
35
35
 
36
- def __init__(self, package):
36
+ def __init__(self, package, key="_type"):
37
37
 
38
38
  self.package = package
39
39
  self.registered = {}
40
40
  self.kind = package.split(".")[-1]
41
+ self.key = key
41
42
 
42
43
  def register(self, name: str, factory: callable = None):
43
44
 
@@ -86,6 +87,8 @@ class Registry:
86
87
  self.registered[name] = entry_point.load()
87
88
 
88
89
  if name not in self.registered:
90
+ for e in self.registered:
91
+ LOG.info(f"Registered: {e}")
89
92
  raise ValueError(f"Cannot load '{name}' from {self.package}")
90
93
 
91
94
  return self.registered[name]
@@ -96,3 +99,31 @@ class Registry:
96
99
 
97
100
  def __call__(self, name: str, *args, **kwargs):
98
101
  return self.create(name, *args, **kwargs)
102
+
103
+ def from_config(self, config, *args, **kwargs):
104
+ if isinstance(config, str):
105
+ config = {config: {}}
106
+
107
+ if not isinstance(config, dict):
108
+ raise ValueError(f"Invalid config: {config}")
109
+
110
+ if self.key in config:
111
+ config = config.copy()
112
+ key = config.pop(self.key)
113
+ return self.create(key, *args, **config, **kwargs)
114
+
115
+ if len(config) == 1:
116
+ key = list(config.keys())[0]
117
+ value = config[key]
118
+
119
+ if isinstance(value, dict):
120
+ return self.create(key, *args, **value, **kwargs)
121
+
122
+ if isinstance(value, list):
123
+ return self.create(key, *args, *value, **kwargs)
124
+
125
+ return self.create(key, *args, value, **kwargs)
126
+
127
+ raise ValueError(
128
+ f"Entry '{config}' must either be a string, a dictionray with a single entry, or a dictionary with a '{self.key}' key"
129
+ )
@@ -0,0 +1,328 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ import concurrent.futures
9
+ import logging
10
+ import os
11
+ import shutil
12
+ from abc import abstractmethod
13
+
14
+ import tqdm
15
+
16
+ from ..humanize import bytes_to_human
17
+
18
+ LOGGER = logging.getLogger(__name__)
19
+
20
+
21
+ def _ignore(number_of_files, total_size, total_transferred, transfering):
22
+ pass
23
+
24
+
25
+ class Loader:
26
+
27
+ def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1, progress=None):
28
+ assert verbosity == 1, verbosity
29
+
30
+ if progress is None:
31
+ progress = _ignore
32
+
33
+ # from boto3.s3.transfer import TransferConfig
34
+ # config = TransferConfig(use_threads=False)
35
+ config = None
36
+ with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
37
+ try:
38
+ if verbosity > 0:
39
+ LOGGER.info(f"{self.action} {source} to {target}")
40
+
41
+ total_size = 0
42
+ total_transferred = 0
43
+
44
+ futures = []
45
+ for name in self.list_source(source):
46
+
47
+ futures.append(
48
+ executor.submit(
49
+ self.transfer_file,
50
+ source=self.source_path(name, source),
51
+ target=self.target_path(name, source, target),
52
+ overwrite=overwrite,
53
+ resume=resume,
54
+ verbosity=verbosity - 1,
55
+ config=config,
56
+ )
57
+ )
58
+ total_size += self.source_size(name)
59
+
60
+ if len(futures) % 10000 == 0:
61
+
62
+ progress(len(futures), total_size, 0, False)
63
+
64
+ if verbosity > 0:
65
+ LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes_to_human(total_size)})")
66
+ done, _ = concurrent.futures.wait(
67
+ futures,
68
+ timeout=0.001,
69
+ return_when=concurrent.futures.FIRST_EXCEPTION,
70
+ )
71
+ # Trigger exceptions if any
72
+ for future in done:
73
+ future.result()
74
+
75
+ number_of_files = len(futures)
76
+ progress(number_of_files, total_size, 0, True)
77
+
78
+ if verbosity > 0:
79
+ LOGGER.info(f"{self.action} {number_of_files:,} files ({bytes_to_human(total_size)})")
80
+ with tqdm.tqdm(total=total_size, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
81
+ for future in concurrent.futures.as_completed(futures):
82
+ size = future.result()
83
+ pbar.update(size)
84
+ total_transferred += size
85
+ progress(number_of_files, total_size, total_transferred, True)
86
+ else:
87
+ for future in concurrent.futures.as_completed(futures):
88
+ size = future.result()
89
+ total_transferred += size
90
+ progress(number_of_files, total_size, total_transferred, True)
91
+
92
+ except Exception:
93
+ executor.shutdown(wait=False, cancel_futures=True)
94
+ raise
95
+
96
+ def transfer_file(self, source, target, overwrite, resume, verbosity, threads=1, progress=None, config=None):
97
+ try:
98
+ return self._transfer_file(source, target, overwrite, resume, verbosity, threads=threads, config=config)
99
+ except Exception as e:
100
+ LOGGER.exception(f"Error transferring {source} to {target}")
101
+ LOGGER.error(e)
102
+ raise
103
+
104
+ @abstractmethod
105
+ def list_source(self, source):
106
+ raise NotImplementedError
107
+
108
+ @abstractmethod
109
+ def source_path(self, local_path, source):
110
+ raise NotImplementedError
111
+
112
+ @abstractmethod
113
+ def target_path(self, source_path, source, target):
114
+ raise NotImplementedError
115
+
116
+ @abstractmethod
117
+ def source_size(self, local_path):
118
+ raise NotImplementedError
119
+
120
+ @abstractmethod
121
+ def copy(self, source, target, **kwargs):
122
+ raise NotImplementedError
123
+
124
+ @abstractmethod
125
+ def get_temporary_target(self, target, pattern):
126
+ raise NotImplementedError
127
+
128
+ @abstractmethod
129
+ def rename_target(self, target, temporary_target):
130
+ raise NotImplementedError
131
+
132
+
133
+ class BaseDownload(Loader):
134
+ action = "Downloading"
135
+
136
+ @abstractmethod
137
+ def copy(self, source, target, **kwargs):
138
+ raise NotImplementedError
139
+
140
+ def get_temporary_target(self, target, pattern):
141
+ dirname, basename = os.path.split(target)
142
+ return pattern.format(dirname=dirname, basename=basename)
143
+
144
+ def rename_target(self, target, new_target):
145
+ os.rename(target, new_target)
146
+
147
+ def delete_target(self, target):
148
+ if os.path.exists(target):
149
+ shutil.rmtree(target)
150
+
151
+
152
+ class BaseUpload(Loader):
153
+ action = "Uploading"
154
+
155
+ def copy(self, source, target, **kwargs):
156
+ if os.path.isdir(source):
157
+ self.transfer_folder(source=source, target=target, **kwargs)
158
+ else:
159
+ self.transfer_file(source=source, target=target, **kwargs)
160
+
161
+ def list_source(self, source):
162
+ for root, _, files in os.walk(source):
163
+ for file in files:
164
+ yield os.path.join(root, file)
165
+
166
+ def source_path(self, local_path, source):
167
+ return local_path
168
+
169
+ def target_path(self, source_path, source, target):
170
+ relative_path = os.path.relpath(source_path, source)
171
+ path = os.path.join(target, relative_path)
172
+ return path
173
+
174
+ def source_size(self, local_path):
175
+ return os.path.getsize(local_path)
176
+
177
+
178
+ class TransferMethodNotImplementedError(NotImplementedError):
179
+ pass
180
+
181
+
182
+ class Transfer:
183
+ """This is the internal API and should not be used directly. Use the transfer function instead."""
184
+
185
+ TransferMethodNotImplementedError = TransferMethodNotImplementedError
186
+
187
+ def __init__(
188
+ self,
189
+ source,
190
+ target,
191
+ overwrite=False,
192
+ resume=False,
193
+ verbosity=1,
194
+ threads=1,
195
+ progress=None,
196
+ temporary_target=False,
197
+ ):
198
+ if target == ".":
199
+ target = os.path.basename(source)
200
+
201
+ temporary_target = {
202
+ False: "{dirname}/{basename}",
203
+ True: "{dirname}-downloading/{basename}",
204
+ "-tmp/*": "{dirname}-tmp/{basename}",
205
+ "*-tmp": "{dirname}/{basename}-tmp",
206
+ "tmp-*": "{dirname}/tmp-{basename}",
207
+ }.get(temporary_target, temporary_target)
208
+ assert isinstance(temporary_target, str), (type(temporary_target), temporary_target)
209
+
210
+ self.source = source
211
+ self.target = target
212
+ self.overwrite = overwrite
213
+ self.resume = resume
214
+ self.verbosity = verbosity
215
+ self.threads = threads
216
+ self.progress = progress
217
+ self.temporary_target = temporary_target
218
+
219
+ cls = _find_transfer_class(self.source, self.target)
220
+ self.loader = cls()
221
+
222
+ def run(self):
223
+
224
+ target = self.loader.get_temporary_target(self.target, self.temporary_target)
225
+ if target != self.target:
226
+ LOGGER.info(f"Using temporary target {target} to copy to {self.target}")
227
+
228
+ if self.overwrite:
229
+ # delete the target if it exists
230
+ LOGGER.info(f"Deleting {self.target}")
231
+ self.delete_target(target)
232
+
233
+ # carefully delete the temporary target if it exists
234
+ head, tail = os.path.split(self.target)
235
+ head_, tail_ = os.path.split(target)
236
+ if not head_.startswith(head) or tail not in tail_:
237
+ LOGGER.info(f"{target} is too different from {self.target} to delete it automatically.")
238
+ else:
239
+ self.delete_target(target)
240
+
241
+ self.loader.copy(
242
+ self.source,
243
+ target,
244
+ overwrite=self.overwrite,
245
+ resume=self.resume,
246
+ verbosity=self.verbosity,
247
+ threads=self.threads,
248
+ progress=self.progress,
249
+ )
250
+
251
+ self.rename_target(target, self.target)
252
+
253
+ return self
254
+
255
+ def rename_target(self, target, new_target):
256
+ if target != new_target:
257
+ LOGGER.info(f"Renaming temporary target {target} into {self.target}")
258
+ return self.loader.rename_target(target, new_target)
259
+
260
+ def delete_target(self, target):
261
+ return self.loader.delete_target(target)
262
+
263
+
264
+ def _find_transfer_class(source, target):
265
+ from_ssh = source.startswith("ssh://")
266
+ into_ssh = target.startswith("ssh://")
267
+
268
+ from_s3 = source.startswith("s3://")
269
+ into_s3 = target.startswith("s3://")
270
+
271
+ from_local = not from_ssh and not from_s3
272
+ into_local = not into_ssh and not into_s3
273
+
274
+ # check that exactly one source type and one target type is specified
275
+ assert sum([into_ssh, into_local, into_s3]) == 1, (into_ssh, into_local, into_s3)
276
+ assert sum([from_ssh, from_local, from_s3]) == 1, (from_ssh, from_local, from_s3)
277
+
278
+ if from_local and into_ssh: # local -> ssh
279
+ from .ssh import RsyncUpload
280
+
281
+ return RsyncUpload
282
+
283
+ if from_s3 and into_local: # local <- S3
284
+ from .s3 import S3Download
285
+
286
+ return S3Download
287
+
288
+ if from_local and into_s3: # local -> S3
289
+ from .s3 import S3Upload
290
+
291
+ return S3Upload
292
+
293
+ raise TransferMethodNotImplementedError(f"Transfer from {source} to {target} is not implemented")
294
+
295
+
296
+ # this is the public API
297
+ def transfer(*args, **kwargs) -> Loader:
298
+ """Parameters
299
+ ----------
300
+ source : str
301
+ A path to a local file or folder or a URL to a file or a folder on S3.
302
+ The url should start with 's3://'.
303
+ target : str
304
+ A path to a local file or folder or a URL to a file or a folder on S3 or a remote folder.
305
+ The url should start with 's3://' or 'ssh://'.
306
+ overwrite : bool, optional
307
+ If the data is alreay on in the target location it will be overwritten.
308
+ By default False
309
+ resume : bool, optional
310
+ If the data is alreay on S3 it will not be uploaded, unless the remote file has a different size
311
+ Ignored if the target is an SSH remote folder (ssh://).
312
+ By default False
313
+ verbosity : int, optional
314
+ The level of verbosity, by default 1
315
+ progress: callable, optional
316
+ A callable that will be called with the number of files, the total size of the files, the total size
317
+ transferred and a boolean indicating if the transfer has started. By default None
318
+ threads : int, optional
319
+ The number of threads to use when uploading a directory, by default 1
320
+ temporary_target : bool, optional
321
+ Experimental feature
322
+ If True and if the target location supports it, the data will be uploaded to a temporary location
323
+ then renamed to the final location. Supported by SSH and local targets, not supported by S3.
324
+ By default False
325
+ """
326
+ copier = Transfer(*args, **kwargs)
327
+ copier.run()
328
+ return copier