anemoi-utils 0.4.4__tar.gz → 0.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi_utils-0.4.5/.github/workflows/changelog-pr-update.yml +18 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/changelog-release-update.yml +1 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.pre-commit-config.yaml +3 -3
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/CHANGELOG.md +4 -3
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/PKG-INFO +2 -1
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/pyproject.toml +3 -4
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/__main__.py +2 -3
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/_version.py +2 -2
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/checkpoints.py +2 -2
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/commands/__init__.py +2 -3
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/commands/config.py +0 -1
- anemoi_utils-0.4.5/src/anemoi/utils/compatibility.py +76 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/mars/__init__.py +3 -1
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/registry.py +32 -1
- anemoi_utils-0.4.5/src/anemoi/utils/remote/__init__.py +328 -0
- {anemoi_utils-0.4.4/src/anemoi/utils → anemoi_utils-0.4.5/src/anemoi/utils/remote}/s3.py +42 -216
- anemoi_utils-0.4.5/src/anemoi/utils/remote/ssh.py +133 -0
- anemoi_utils-0.4.5/src/anemoi/utils/s3.py +63 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/PKG-INFO +2 -1
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/SOURCES.txt +12 -1
- anemoi_utils-0.4.5/tests/test-transfer-data/directory/b/c/x +1 -0
- anemoi_utils-0.4.5/tests/test-transfer-data/directory/b/y +1 -0
- anemoi_utils-0.4.5/tests/test-transfer-data/directory/exotic filename ;^/"'[=.,#]()/303/252/303/274/303/247/303/262/342/234/205.txt" +1 -0
- anemoi_utils-0.4.5/tests/test-transfer-data/directory/z +1 -0
- anemoi_utils-0.4.5/tests/test-transfer-data/file +1 -0
- anemoi_utils-0.4.5/tests/test_compatibility.py +32 -0
- anemoi_utils-0.4.5/tests/test_remote.py +175 -0
- anemoi_utils-0.4.4/.github/workflows/changelog-pr-update.yml +0 -18
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.gitattributes +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/CODEOWNERS +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/ci-hpc-config.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/ci.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/label-public-pr.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/python-publish.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/python-pull-request.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.github/workflows/readthedocs-pr-update.yml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.gitignore +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/.readthedocs.yaml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/CONTRIBUTORS.md +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/LICENSE +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/README.md +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/Makefile +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/_static/logo.png +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/_static/style.css +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/_templates/.gitkeep +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/conf.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/index.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/installing.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/checkpoints.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/config.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/dates.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/grib.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/humanize.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/provenance.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/s3.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/docs/modules/text.rst +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/setup.cfg +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/__init__.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/caching.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/cli.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/config.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/dates.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/grib.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/hindcasts.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/humanize.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/mars/mars.yaml +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/provenance.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/sanitise.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/sanitize.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/text.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi/utils/timer.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/dependency_links.txt +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/entry_points.txt +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/requires.txt +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/src/anemoi_utils.egg-info/top_level.txt +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/tests/test_dates.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/tests/test_frequency.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/tests/test_provenance.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/tests/test_sanetise.py +0 -0
- {anemoi_utils-0.4.4 → anemoi_utils-0.4.5}/tests/test_utils.py +0 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# name: Check Changelog Update on PR
|
|
2
|
+
# on:
|
|
3
|
+
# pull_request:
|
|
4
|
+
# types: [assigned, opened, synchronize, reopened, labeled, unlabeled]
|
|
5
|
+
# branches:
|
|
6
|
+
# - main
|
|
7
|
+
# - develop
|
|
8
|
+
# paths-ignore:
|
|
9
|
+
# - .pre-commit-config.yaml
|
|
10
|
+
# - .readthedocs.yaml
|
|
11
|
+
# jobs:
|
|
12
|
+
# Check-Changelog:
|
|
13
|
+
# name: Check Changelog Action
|
|
14
|
+
# runs-on: ubuntu-20.04
|
|
15
|
+
# steps:
|
|
16
|
+
# - uses: tarides/changelog-check-action@v2
|
|
17
|
+
# with:
|
|
18
|
+
# changelog: CHANGELOG.md
|
|
@@ -27,7 +27,7 @@ repos:
|
|
|
27
27
|
- id: python-check-blanket-noqa # Check for # noqa: all
|
|
28
28
|
- id: python-no-log-warn # Check for log.warn
|
|
29
29
|
- repo: https://github.com/psf/black-pre-commit-mirror
|
|
30
|
-
rev: 24.
|
|
30
|
+
rev: 24.10.0
|
|
31
31
|
hooks:
|
|
32
32
|
- id: black
|
|
33
33
|
args: [--line-length=120]
|
|
@@ -40,7 +40,7 @@ repos:
|
|
|
40
40
|
- --force-single-line-imports
|
|
41
41
|
- --profile black
|
|
42
42
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
43
|
-
rev: v0.
|
|
43
|
+
rev: v0.7.2
|
|
44
44
|
hooks:
|
|
45
45
|
- id: ruff
|
|
46
46
|
args:
|
|
@@ -65,7 +65,7 @@ repos:
|
|
|
65
65
|
- id: docconvert
|
|
66
66
|
args: ["numpy"]
|
|
67
67
|
- repo: https://github.com/tox-dev/pyproject-fmt
|
|
68
|
-
rev: "
|
|
68
|
+
rev: "v2.5.0"
|
|
69
69
|
hooks:
|
|
70
70
|
- id: pyproject-fmt
|
|
71
71
|
- repo: https://github.com/jshwi/docsig # Check docstrings against function sig
|
|
@@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
8
8
|
Please add your functional changes to the appropriate section in the PR.
|
|
9
9
|
Keep it human-readable, your future self will thank you!
|
|
10
10
|
|
|
11
|
-
## [
|
|
11
|
+
## [0.4.4](https://github.com/ecmwf/anemoi-utils/compare/0.4.3...0.4.4) - 2024-11-01
|
|
12
12
|
|
|
13
13
|
## [0.4.3](https://github.com/ecmwf/anemoi-utils/compare/0.4.1...0.4.3) - 2024-10-26
|
|
14
14
|
|
|
@@ -20,7 +20,6 @@ Keep it human-readable, your future self will thank you!
|
|
|
20
20
|
- Optional renaming of subcommands via `command` attribute [#34](https://github.com/ecmwf/anemoi-utils/pull/34)
|
|
21
21
|
- `skip_on_hpc` pytest marker for tests that should not be run on HPC [36](https://github.com/ecmwf/anemoi-utils/pull/36)
|
|
22
22
|
|
|
23
|
-
|
|
24
23
|
## [0.4.1](https://github.com/ecmwf/anemoi-utils/compare/0.4.0...0.4.1) - 2024-10-23
|
|
25
24
|
|
|
26
25
|
## Fixed
|
|
@@ -51,7 +50,9 @@ Keep it human-readable, your future self will thank you!
|
|
|
51
50
|
- Changelog merge strategy- Codeowners file
|
|
52
51
|
- Create dependency on wcwidth. MIT licence.
|
|
53
52
|
- Add distribution name dictionary to provenance [#15](https://github.com/ecmwf/anemoi-utils/pull/15) & [#19](https://github.com/ecmwf/anemoi-utils/pull/19)
|
|
54
|
-
- Add
|
|
53
|
+
- Add anonymize() function.
|
|
54
|
+
- Add transfer to ssh:// target (experimental)
|
|
55
|
+
- Deprecated 'anemoi.utils.s3'
|
|
55
56
|
|
|
56
57
|
### Changed
|
|
57
58
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.5
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -219,6 +219,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
219
219
|
Classifier: Programming Language :: Python :: 3.10
|
|
220
220
|
Classifier: Programming Language :: Python :: 3.11
|
|
221
221
|
Classifier: Programming Language :: Python :: 3.12
|
|
222
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
222
223
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
223
224
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
224
225
|
Requires-Python: >=3.9
|
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
-
# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
|
|
11
|
-
|
|
12
10
|
[build-system]
|
|
13
11
|
requires = [ "setuptools>=60", "setuptools-scm>=8" ]
|
|
14
12
|
|
|
@@ -35,6 +33,7 @@ classifiers = [
|
|
|
35
33
|
"Programming Language :: Python :: 3.10",
|
|
36
34
|
"Programming Language :: Python :: 3.11",
|
|
37
35
|
"Programming Language :: Python :: 3.12",
|
|
36
|
+
"Programming Language :: Python :: 3.13",
|
|
38
37
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
39
38
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
40
39
|
]
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
|
-
#
|
|
10
9
|
|
|
11
10
|
from anemoi.utils.cli import cli_main
|
|
12
11
|
from anemoi.utils.cli import make_parser
|
|
@@ -94,8 +94,8 @@ def load_metadata(path: str, *, supporting_arrays=False, name: str = DEFAULT_NAM
|
|
|
94
94
|
with zipfile.ZipFile(path, "r") as f:
|
|
95
95
|
metadata = json.load(f.open(metadata, "r"))
|
|
96
96
|
if supporting_arrays:
|
|
97
|
-
|
|
98
|
-
return metadata,
|
|
97
|
+
arrays = load_supporting_arrays(f, metadata.get("supporting_arrays_paths", {}))
|
|
98
|
+
return metadata, arrays
|
|
99
99
|
|
|
100
100
|
return metadata
|
|
101
101
|
else:
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
# (C) Copyright 2024 ECMWF.
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
3
2
|
#
|
|
4
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
|
-
#
|
|
10
9
|
|
|
11
10
|
import os
|
|
12
11
|
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import functools
|
|
13
|
+
from typing import Any
|
|
14
|
+
from typing import Callable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def aliases(
|
|
18
|
+
aliases: dict[str, str | list[str]] | None = None, **kwargs: str | list[str]
|
|
19
|
+
) -> Callable[[Callable], Callable]:
|
|
20
|
+
"""Alias keyword arguments in a function call.
|
|
21
|
+
|
|
22
|
+
Allows for dynamically renaming keyword arguments in a function call.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
aliases : dict[str, str | list[str]] | None, optional
|
|
27
|
+
Key, value pair of aliases, with keys being the true name, and value being a str or list of aliases,
|
|
28
|
+
by default None
|
|
29
|
+
**kwargs : str | list[str]
|
|
30
|
+
Kwargs form of aliases
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
Callable
|
|
35
|
+
Decorator function that renames keyword arguments in a function call.
|
|
36
|
+
|
|
37
|
+
Raises
|
|
38
|
+
------
|
|
39
|
+
ValueError
|
|
40
|
+
If the aliasing would result in duplicate keys.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
```python
|
|
45
|
+
@aliases(a="b", c=["d", "e"])
|
|
46
|
+
def func(a, c):
|
|
47
|
+
return a, c
|
|
48
|
+
|
|
49
|
+
func(a=1, c=2) # (1, 2)
|
|
50
|
+
func(b=1, d=2) # (1, 2)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
if aliases is None:
|
|
56
|
+
aliases = {}
|
|
57
|
+
aliases.update(kwargs)
|
|
58
|
+
|
|
59
|
+
aliases = {v: k for k, vs in aliases.items() for v in (vs if isinstance(vs, list) else [vs])}
|
|
60
|
+
|
|
61
|
+
def decorator(func: Callable) -> Callable:
|
|
62
|
+
@functools.wraps(func)
|
|
63
|
+
def wrapper(*args, **kwargs) -> Any:
|
|
64
|
+
keys = kwargs.keys()
|
|
65
|
+
for k in set(keys).intersection(set(aliases.keys())):
|
|
66
|
+
if aliases[k] in keys:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"When aliasing {k} with {aliases[k]} duplicate keys were present. Cannot include both."
|
|
69
|
+
)
|
|
70
|
+
kwargs[aliases[k]] = kwargs.pop(k)
|
|
71
|
+
|
|
72
|
+
return func(*args, **kwargs)
|
|
73
|
+
|
|
74
|
+
return wrapper
|
|
75
|
+
|
|
76
|
+
return decorator
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# (C) Copyright 2024
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
2
3
|
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
4
|
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
4
6
|
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
8
|
# nor does it submit to any jurisdiction.
|
|
@@ -33,11 +33,12 @@ class Wrapper:
|
|
|
33
33
|
class Registry:
|
|
34
34
|
"""A registry of factories"""
|
|
35
35
|
|
|
36
|
-
def __init__(self, package):
|
|
36
|
+
def __init__(self, package, key="_type"):
|
|
37
37
|
|
|
38
38
|
self.package = package
|
|
39
39
|
self.registered = {}
|
|
40
40
|
self.kind = package.split(".")[-1]
|
|
41
|
+
self.key = key
|
|
41
42
|
|
|
42
43
|
def register(self, name: str, factory: callable = None):
|
|
43
44
|
|
|
@@ -86,6 +87,8 @@ class Registry:
|
|
|
86
87
|
self.registered[name] = entry_point.load()
|
|
87
88
|
|
|
88
89
|
if name not in self.registered:
|
|
90
|
+
for e in self.registered:
|
|
91
|
+
LOG.info(f"Registered: {e}")
|
|
89
92
|
raise ValueError(f"Cannot load '{name}' from {self.package}")
|
|
90
93
|
|
|
91
94
|
return self.registered[name]
|
|
@@ -96,3 +99,31 @@ class Registry:
|
|
|
96
99
|
|
|
97
100
|
def __call__(self, name: str, *args, **kwargs):
|
|
98
101
|
return self.create(name, *args, **kwargs)
|
|
102
|
+
|
|
103
|
+
def from_config(self, config, *args, **kwargs):
|
|
104
|
+
if isinstance(config, str):
|
|
105
|
+
config = {config: {}}
|
|
106
|
+
|
|
107
|
+
if not isinstance(config, dict):
|
|
108
|
+
raise ValueError(f"Invalid config: {config}")
|
|
109
|
+
|
|
110
|
+
if self.key in config:
|
|
111
|
+
config = config.copy()
|
|
112
|
+
key = config.pop(self.key)
|
|
113
|
+
return self.create(key, *args, **config, **kwargs)
|
|
114
|
+
|
|
115
|
+
if len(config) == 1:
|
|
116
|
+
key = list(config.keys())[0]
|
|
117
|
+
value = config[key]
|
|
118
|
+
|
|
119
|
+
if isinstance(value, dict):
|
|
120
|
+
return self.create(key, *args, **value, **kwargs)
|
|
121
|
+
|
|
122
|
+
if isinstance(value, list):
|
|
123
|
+
return self.create(key, *args, *value, **kwargs)
|
|
124
|
+
|
|
125
|
+
return self.create(key, *args, value, **kwargs)
|
|
126
|
+
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Entry '{config}' must either be a string, a dictionray with a single entry, or a dictionary with a '{self.key}' key"
|
|
129
|
+
)
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import concurrent.futures
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
from abc import abstractmethod
|
|
13
|
+
|
|
14
|
+
import tqdm
|
|
15
|
+
|
|
16
|
+
from ..humanize import bytes_to_human
|
|
17
|
+
|
|
18
|
+
LOGGER = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _ignore(number_of_files, total_size, total_transferred, transfering):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Loader:
|
|
26
|
+
|
|
27
|
+
def transfer_folder(self, *, source, target, overwrite=False, resume=False, verbosity=1, threads=1, progress=None):
|
|
28
|
+
assert verbosity == 1, verbosity
|
|
29
|
+
|
|
30
|
+
if progress is None:
|
|
31
|
+
progress = _ignore
|
|
32
|
+
|
|
33
|
+
# from boto3.s3.transfer import TransferConfig
|
|
34
|
+
# config = TransferConfig(use_threads=False)
|
|
35
|
+
config = None
|
|
36
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
|
|
37
|
+
try:
|
|
38
|
+
if verbosity > 0:
|
|
39
|
+
LOGGER.info(f"{self.action} {source} to {target}")
|
|
40
|
+
|
|
41
|
+
total_size = 0
|
|
42
|
+
total_transferred = 0
|
|
43
|
+
|
|
44
|
+
futures = []
|
|
45
|
+
for name in self.list_source(source):
|
|
46
|
+
|
|
47
|
+
futures.append(
|
|
48
|
+
executor.submit(
|
|
49
|
+
self.transfer_file,
|
|
50
|
+
source=self.source_path(name, source),
|
|
51
|
+
target=self.target_path(name, source, target),
|
|
52
|
+
overwrite=overwrite,
|
|
53
|
+
resume=resume,
|
|
54
|
+
verbosity=verbosity - 1,
|
|
55
|
+
config=config,
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
total_size += self.source_size(name)
|
|
59
|
+
|
|
60
|
+
if len(futures) % 10000 == 0:
|
|
61
|
+
|
|
62
|
+
progress(len(futures), total_size, 0, False)
|
|
63
|
+
|
|
64
|
+
if verbosity > 0:
|
|
65
|
+
LOGGER.info(f"Preparing transfer, {len(futures):,} files... ({bytes_to_human(total_size)})")
|
|
66
|
+
done, _ = concurrent.futures.wait(
|
|
67
|
+
futures,
|
|
68
|
+
timeout=0.001,
|
|
69
|
+
return_when=concurrent.futures.FIRST_EXCEPTION,
|
|
70
|
+
)
|
|
71
|
+
# Trigger exceptions if any
|
|
72
|
+
for future in done:
|
|
73
|
+
future.result()
|
|
74
|
+
|
|
75
|
+
number_of_files = len(futures)
|
|
76
|
+
progress(number_of_files, total_size, 0, True)
|
|
77
|
+
|
|
78
|
+
if verbosity > 0:
|
|
79
|
+
LOGGER.info(f"{self.action} {number_of_files:,} files ({bytes_to_human(total_size)})")
|
|
80
|
+
with tqdm.tqdm(total=total_size, unit="B", unit_scale=True, unit_divisor=1024) as pbar:
|
|
81
|
+
for future in concurrent.futures.as_completed(futures):
|
|
82
|
+
size = future.result()
|
|
83
|
+
pbar.update(size)
|
|
84
|
+
total_transferred += size
|
|
85
|
+
progress(number_of_files, total_size, total_transferred, True)
|
|
86
|
+
else:
|
|
87
|
+
for future in concurrent.futures.as_completed(futures):
|
|
88
|
+
size = future.result()
|
|
89
|
+
total_transferred += size
|
|
90
|
+
progress(number_of_files, total_size, total_transferred, True)
|
|
91
|
+
|
|
92
|
+
except Exception:
|
|
93
|
+
executor.shutdown(wait=False, cancel_futures=True)
|
|
94
|
+
raise
|
|
95
|
+
|
|
96
|
+
def transfer_file(self, source, target, overwrite, resume, verbosity, threads=1, progress=None, config=None):
|
|
97
|
+
try:
|
|
98
|
+
return self._transfer_file(source, target, overwrite, resume, verbosity, threads=threads, config=config)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
LOGGER.exception(f"Error transferring {source} to {target}")
|
|
101
|
+
LOGGER.error(e)
|
|
102
|
+
raise
|
|
103
|
+
|
|
104
|
+
@abstractmethod
|
|
105
|
+
def list_source(self, source):
|
|
106
|
+
raise NotImplementedError
|
|
107
|
+
|
|
108
|
+
@abstractmethod
|
|
109
|
+
def source_path(self, local_path, source):
|
|
110
|
+
raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def target_path(self, source_path, source, target):
|
|
114
|
+
raise NotImplementedError
|
|
115
|
+
|
|
116
|
+
@abstractmethod
|
|
117
|
+
def source_size(self, local_path):
|
|
118
|
+
raise NotImplementedError
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def copy(self, source, target, **kwargs):
|
|
122
|
+
raise NotImplementedError
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
def get_temporary_target(self, target, pattern):
|
|
126
|
+
raise NotImplementedError
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def rename_target(self, target, temporary_target):
|
|
130
|
+
raise NotImplementedError
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class BaseDownload(Loader):
|
|
134
|
+
action = "Downloading"
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
def copy(self, source, target, **kwargs):
|
|
138
|
+
raise NotImplementedError
|
|
139
|
+
|
|
140
|
+
def get_temporary_target(self, target, pattern):
|
|
141
|
+
dirname, basename = os.path.split(target)
|
|
142
|
+
return pattern.format(dirname=dirname, basename=basename)
|
|
143
|
+
|
|
144
|
+
def rename_target(self, target, new_target):
|
|
145
|
+
os.rename(target, new_target)
|
|
146
|
+
|
|
147
|
+
def delete_target(self, target):
|
|
148
|
+
if os.path.exists(target):
|
|
149
|
+
shutil.rmtree(target)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class BaseUpload(Loader):
|
|
153
|
+
action = "Uploading"
|
|
154
|
+
|
|
155
|
+
def copy(self, source, target, **kwargs):
|
|
156
|
+
if os.path.isdir(source):
|
|
157
|
+
self.transfer_folder(source=source, target=target, **kwargs)
|
|
158
|
+
else:
|
|
159
|
+
self.transfer_file(source=source, target=target, **kwargs)
|
|
160
|
+
|
|
161
|
+
def list_source(self, source):
|
|
162
|
+
for root, _, files in os.walk(source):
|
|
163
|
+
for file in files:
|
|
164
|
+
yield os.path.join(root, file)
|
|
165
|
+
|
|
166
|
+
def source_path(self, local_path, source):
|
|
167
|
+
return local_path
|
|
168
|
+
|
|
169
|
+
def target_path(self, source_path, source, target):
|
|
170
|
+
relative_path = os.path.relpath(source_path, source)
|
|
171
|
+
path = os.path.join(target, relative_path)
|
|
172
|
+
return path
|
|
173
|
+
|
|
174
|
+
def source_size(self, local_path):
|
|
175
|
+
return os.path.getsize(local_path)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TransferMethodNotImplementedError(NotImplementedError):
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class Transfer:
|
|
183
|
+
"""This is the internal API and should not be used directly. Use the transfer function instead."""
|
|
184
|
+
|
|
185
|
+
TransferMethodNotImplementedError = TransferMethodNotImplementedError
|
|
186
|
+
|
|
187
|
+
def __init__(
|
|
188
|
+
self,
|
|
189
|
+
source,
|
|
190
|
+
target,
|
|
191
|
+
overwrite=False,
|
|
192
|
+
resume=False,
|
|
193
|
+
verbosity=1,
|
|
194
|
+
threads=1,
|
|
195
|
+
progress=None,
|
|
196
|
+
temporary_target=False,
|
|
197
|
+
):
|
|
198
|
+
if target == ".":
|
|
199
|
+
target = os.path.basename(source)
|
|
200
|
+
|
|
201
|
+
temporary_target = {
|
|
202
|
+
False: "{dirname}/{basename}",
|
|
203
|
+
True: "{dirname}-downloading/{basename}",
|
|
204
|
+
"-tmp/*": "{dirname}-tmp/{basename}",
|
|
205
|
+
"*-tmp": "{dirname}/{basename}-tmp",
|
|
206
|
+
"tmp-*": "{dirname}/tmp-{basename}",
|
|
207
|
+
}.get(temporary_target, temporary_target)
|
|
208
|
+
assert isinstance(temporary_target, str), (type(temporary_target), temporary_target)
|
|
209
|
+
|
|
210
|
+
self.source = source
|
|
211
|
+
self.target = target
|
|
212
|
+
self.overwrite = overwrite
|
|
213
|
+
self.resume = resume
|
|
214
|
+
self.verbosity = verbosity
|
|
215
|
+
self.threads = threads
|
|
216
|
+
self.progress = progress
|
|
217
|
+
self.temporary_target = temporary_target
|
|
218
|
+
|
|
219
|
+
cls = _find_transfer_class(self.source, self.target)
|
|
220
|
+
self.loader = cls()
|
|
221
|
+
|
|
222
|
+
def run(self):
|
|
223
|
+
|
|
224
|
+
target = self.loader.get_temporary_target(self.target, self.temporary_target)
|
|
225
|
+
if target != self.target:
|
|
226
|
+
LOGGER.info(f"Using temporary target {target} to copy to {self.target}")
|
|
227
|
+
|
|
228
|
+
if self.overwrite:
|
|
229
|
+
# delete the target if it exists
|
|
230
|
+
LOGGER.info(f"Deleting {self.target}")
|
|
231
|
+
self.delete_target(target)
|
|
232
|
+
|
|
233
|
+
# carefully delete the temporary target if it exists
|
|
234
|
+
head, tail = os.path.split(self.target)
|
|
235
|
+
head_, tail_ = os.path.split(target)
|
|
236
|
+
if not head_.startswith(head) or tail not in tail_:
|
|
237
|
+
LOGGER.info(f"{target} is too different from {self.target} to delete it automatically.")
|
|
238
|
+
else:
|
|
239
|
+
self.delete_target(target)
|
|
240
|
+
|
|
241
|
+
self.loader.copy(
|
|
242
|
+
self.source,
|
|
243
|
+
target,
|
|
244
|
+
overwrite=self.overwrite,
|
|
245
|
+
resume=self.resume,
|
|
246
|
+
verbosity=self.verbosity,
|
|
247
|
+
threads=self.threads,
|
|
248
|
+
progress=self.progress,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
self.rename_target(target, self.target)
|
|
252
|
+
|
|
253
|
+
return self
|
|
254
|
+
|
|
255
|
+
def rename_target(self, target, new_target):
|
|
256
|
+
if target != new_target:
|
|
257
|
+
LOGGER.info(f"Renaming temporary target {target} into {self.target}")
|
|
258
|
+
return self.loader.rename_target(target, new_target)
|
|
259
|
+
|
|
260
|
+
def delete_target(self, target):
|
|
261
|
+
return self.loader.delete_target(target)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _find_transfer_class(source, target):
|
|
265
|
+
from_ssh = source.startswith("ssh://")
|
|
266
|
+
into_ssh = target.startswith("ssh://")
|
|
267
|
+
|
|
268
|
+
from_s3 = source.startswith("s3://")
|
|
269
|
+
into_s3 = target.startswith("s3://")
|
|
270
|
+
|
|
271
|
+
from_local = not from_ssh and not from_s3
|
|
272
|
+
into_local = not into_ssh and not into_s3
|
|
273
|
+
|
|
274
|
+
# check that exactly one source type and one target type is specified
|
|
275
|
+
assert sum([into_ssh, into_local, into_s3]) == 1, (into_ssh, into_local, into_s3)
|
|
276
|
+
assert sum([from_ssh, from_local, from_s3]) == 1, (from_ssh, from_local, from_s3)
|
|
277
|
+
|
|
278
|
+
if from_local and into_ssh: # local -> ssh
|
|
279
|
+
from .ssh import RsyncUpload
|
|
280
|
+
|
|
281
|
+
return RsyncUpload
|
|
282
|
+
|
|
283
|
+
if from_s3 and into_local: # local <- S3
|
|
284
|
+
from .s3 import S3Download
|
|
285
|
+
|
|
286
|
+
return S3Download
|
|
287
|
+
|
|
288
|
+
if from_local and into_s3: # local -> S3
|
|
289
|
+
from .s3 import S3Upload
|
|
290
|
+
|
|
291
|
+
return S3Upload
|
|
292
|
+
|
|
293
|
+
raise TransferMethodNotImplementedError(f"Transfer from {source} to {target} is not implemented")
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# this is the public API
|
|
297
|
+
def transfer(*args, **kwargs) -> Loader:
|
|
298
|
+
"""Parameters
|
|
299
|
+
----------
|
|
300
|
+
source : str
|
|
301
|
+
A path to a local file or folder or a URL to a file or a folder on S3.
|
|
302
|
+
The url should start with 's3://'.
|
|
303
|
+
target : str
|
|
304
|
+
A path to a local file or folder or a URL to a file or a folder on S3 or a remote folder.
|
|
305
|
+
The url should start with 's3://' or 'ssh://'.
|
|
306
|
+
overwrite : bool, optional
|
|
307
|
+
If the data is alreay on in the target location it will be overwritten.
|
|
308
|
+
By default False
|
|
309
|
+
resume : bool, optional
|
|
310
|
+
If the data is alreay on S3 it will not be uploaded, unless the remote file has a different size
|
|
311
|
+
Ignored if the target is an SSH remote folder (ssh://).
|
|
312
|
+
By default False
|
|
313
|
+
verbosity : int, optional
|
|
314
|
+
The level of verbosity, by default 1
|
|
315
|
+
progress: callable, optional
|
|
316
|
+
A callable that will be called with the number of files, the total size of the files, the total size
|
|
317
|
+
transferred and a boolean indicating if the transfer has started. By default None
|
|
318
|
+
threads : int, optional
|
|
319
|
+
The number of threads to use when uploading a directory, by default 1
|
|
320
|
+
temporary_target : bool, optional
|
|
321
|
+
Experimental feature
|
|
322
|
+
If True and if the target location supports it, the data will be uploaded to a temporary location
|
|
323
|
+
then renamed to the final location. Supported by SSH and local targets, not supported by S3.
|
|
324
|
+
By default False
|
|
325
|
+
"""
|
|
326
|
+
copier = Transfer(*args, **kwargs)
|
|
327
|
+
copier.run()
|
|
328
|
+
return copier
|