anemoi-utils 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/__init__.py +3 -1
- anemoi/utils/__main__.py +2 -3
- anemoi/utils/_version.py +2 -2
- anemoi/utils/checkpoints.py +74 -9
- anemoi/utils/cli.py +14 -2
- anemoi/utils/commands/__init__.py +2 -3
- anemoi/utils/commands/config.py +0 -1
- anemoi/utils/compatibility.py +76 -0
- anemoi/utils/config.py +3 -2
- anemoi/utils/dates.py +7 -2
- anemoi/utils/mars/__init__.py +3 -1
- anemoi/utils/registry.py +129 -0
- anemoi/utils/remote/__init__.py +328 -0
- anemoi/utils/remote/s3.py +386 -0
- anemoi/utils/remote/ssh.py +133 -0
- anemoi/utils/s3.py +47 -544
- {anemoi_utils-0.4.3.dist-info → anemoi_utils-0.4.5.dist-info}/METADATA +2 -1
- anemoi_utils-0.4.5.dist-info/RECORD +32 -0
- {anemoi_utils-0.4.3.dist-info → anemoi_utils-0.4.5.dist-info}/WHEEL +1 -1
- anemoi_utils-0.4.3.dist-info/RECORD +0 -27
- {anemoi_utils-0.4.3.dist-info → anemoi_utils-0.4.5.dist-info}/LICENSE +0 -0
- {anemoi_utils-0.4.3.dist-info → anemoi_utils-0.4.5.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.4.3.dist-info → anemoi_utils-0.4.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
|
|
2
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
3
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
4
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
5
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
6
|
+
# nor does it submit to any jurisdiction.
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import random
|
|
11
|
+
import shlex
|
|
12
|
+
import subprocess
|
|
13
|
+
|
|
14
|
+
from ..humanize import bytes_to_human
|
|
15
|
+
from . import BaseUpload
|
|
16
|
+
|
|
17
|
+
LOGGER = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def call_process(*args):
|
|
21
|
+
proc = subprocess.Popen(
|
|
22
|
+
args,
|
|
23
|
+
stdout=subprocess.PIPE,
|
|
24
|
+
stderr=subprocess.PIPE,
|
|
25
|
+
)
|
|
26
|
+
stdout, stderr = proc.communicate()
|
|
27
|
+
if proc.returncode != 0:
|
|
28
|
+
print(stdout)
|
|
29
|
+
msg = f"{' '.join(args)} failed: {stderr}"
|
|
30
|
+
raise RuntimeError(msg)
|
|
31
|
+
|
|
32
|
+
return stdout.decode("utf-8").strip()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SshBaseUpload(BaseUpload):
|
|
36
|
+
|
|
37
|
+
def _parse_target(self, target):
|
|
38
|
+
assert target.startswith("ssh://"), target
|
|
39
|
+
|
|
40
|
+
target = target[6:]
|
|
41
|
+
hostname, path = target.split(":")
|
|
42
|
+
|
|
43
|
+
if "+" in hostname:
|
|
44
|
+
hostnames = hostname.split("+")
|
|
45
|
+
hostname = hostnames[random.randint(0, len(hostnames) - 1)]
|
|
46
|
+
|
|
47
|
+
return hostname, path
|
|
48
|
+
|
|
49
|
+
def get_temporary_target(self, target, pattern):
|
|
50
|
+
hostname, path = self._parse_target(target)
|
|
51
|
+
dirname, basename = os.path.split(path)
|
|
52
|
+
path = pattern.format(dirname=dirname, basename=basename)
|
|
53
|
+
return f"ssh://{hostname}:{path}"
|
|
54
|
+
|
|
55
|
+
def rename_target(self, target, new_target):
|
|
56
|
+
hostname, path = self._parse_target(target)
|
|
57
|
+
hostname, new_path = self._parse_target(new_target)
|
|
58
|
+
call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(new_path)))
|
|
59
|
+
call_process("ssh", hostname, "mv", shlex.quote(path), shlex.quote(new_path))
|
|
60
|
+
|
|
61
|
+
def delete_target(self, target):
|
|
62
|
+
pass
|
|
63
|
+
# hostname, path = self._parse_target(target)
|
|
64
|
+
# LOGGER.info(f"Deleting {target}")
|
|
65
|
+
# call_process("ssh", hostname, "rm", "-rf", shlex.quote(path))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class RsyncUpload(SshBaseUpload):
|
|
69
|
+
|
|
70
|
+
def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
|
|
71
|
+
hostname, path = self._parse_target(target)
|
|
72
|
+
|
|
73
|
+
size = os.path.getsize(source)
|
|
74
|
+
|
|
75
|
+
if verbosity > 0:
|
|
76
|
+
LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
|
|
77
|
+
|
|
78
|
+
call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(path)))
|
|
79
|
+
call_process(
|
|
80
|
+
"rsync",
|
|
81
|
+
"-av",
|
|
82
|
+
"--partial",
|
|
83
|
+
# it would be nice to avoid two ssh calls, but the following is not possible,
|
|
84
|
+
# this is because it requires a shell command and would not be safe.
|
|
85
|
+
# # f"--rsync-path='mkdir -p {os.path.dirname(path)} && rsync'",
|
|
86
|
+
source,
|
|
87
|
+
f"{hostname}:{path}",
|
|
88
|
+
)
|
|
89
|
+
return size
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ScpUpload(SshBaseUpload):
|
|
93
|
+
|
|
94
|
+
def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
|
|
95
|
+
hostname, path = self._parse_target(target)
|
|
96
|
+
|
|
97
|
+
size = os.path.getsize(source)
|
|
98
|
+
|
|
99
|
+
if verbosity > 0:
|
|
100
|
+
LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
|
|
101
|
+
|
|
102
|
+
remote_size = None
|
|
103
|
+
try:
|
|
104
|
+
out = call_process("ssh", hostname, "stat", "-c", "%s", shlex.quote(path))
|
|
105
|
+
remote_size = int(out)
|
|
106
|
+
except RuntimeError:
|
|
107
|
+
remote_size = None
|
|
108
|
+
|
|
109
|
+
if remote_size is not None:
|
|
110
|
+
if remote_size != size:
|
|
111
|
+
LOGGER.warning(
|
|
112
|
+
f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
|
|
113
|
+
)
|
|
114
|
+
elif resume:
|
|
115
|
+
# LOGGER.info(f"{target} already exists, skipping")
|
|
116
|
+
return size
|
|
117
|
+
|
|
118
|
+
if remote_size is not None and not overwrite and not resume:
|
|
119
|
+
raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
|
|
120
|
+
|
|
121
|
+
call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(path)))
|
|
122
|
+
call_process("scp", source, shlex.quote(f"{hostname}:{path}"))
|
|
123
|
+
|
|
124
|
+
return size
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def upload(source, target, **kwargs) -> None:
|
|
128
|
+
uploader = RsyncUpload()
|
|
129
|
+
|
|
130
|
+
if os.path.isdir(source):
|
|
131
|
+
uploader.transfer_folder(source=source, target=target, **kwargs)
|
|
132
|
+
else:
|
|
133
|
+
uploader.transfer_file(source=source, target=target, **kwargs)
|