anemoi-utils 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

@@ -0,0 +1,133 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ import logging
9
+ import os
10
+ import random
11
+ import shlex
12
+ import subprocess
13
+
14
+ from ..humanize import bytes_to_human
15
+ from . import BaseUpload
16
+
17
+ LOGGER = logging.getLogger(__name__)
18
+
19
+
20
+ def call_process(*args):
21
+ proc = subprocess.Popen(
22
+ args,
23
+ stdout=subprocess.PIPE,
24
+ stderr=subprocess.PIPE,
25
+ )
26
+ stdout, stderr = proc.communicate()
27
+ if proc.returncode != 0:
28
+ print(stdout)
29
+ msg = f"{' '.join(args)} failed: {stderr}"
30
+ raise RuntimeError(msg)
31
+
32
+ return stdout.decode("utf-8").strip()
33
+
34
+
35
+ class SshBaseUpload(BaseUpload):
36
+
37
+ def _parse_target(self, target):
38
+ assert target.startswith("ssh://"), target
39
+
40
+ target = target[6:]
41
+ hostname, path = target.split(":")
42
+
43
+ if "+" in hostname:
44
+ hostnames = hostname.split("+")
45
+ hostname = hostnames[random.randint(0, len(hostnames) - 1)]
46
+
47
+ return hostname, path
48
+
49
+ def get_temporary_target(self, target, pattern):
50
+ hostname, path = self._parse_target(target)
51
+ dirname, basename = os.path.split(path)
52
+ path = pattern.format(dirname=dirname, basename=basename)
53
+ return f"ssh://{hostname}:{path}"
54
+
55
+ def rename_target(self, target, new_target):
56
+ hostname, path = self._parse_target(target)
57
+ hostname, new_path = self._parse_target(new_target)
58
+ call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(new_path)))
59
+ call_process("ssh", hostname, "mv", shlex.quote(path), shlex.quote(new_path))
60
+
61
+ def delete_target(self, target):
62
+ pass
63
+ # hostname, path = self._parse_target(target)
64
+ # LOGGER.info(f"Deleting {target}")
65
+ # call_process("ssh", hostname, "rm", "-rf", shlex.quote(path))
66
+
67
+
68
+ class RsyncUpload(SshBaseUpload):
69
+
70
+ def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
71
+ hostname, path = self._parse_target(target)
72
+
73
+ size = os.path.getsize(source)
74
+
75
+ if verbosity > 0:
76
+ LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
77
+
78
+ call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(path)))
79
+ call_process(
80
+ "rsync",
81
+ "-av",
82
+ "--partial",
83
+ # it would be nice to avoid two ssh calls, but the following is not possible,
84
+ # this is because it requires a shell command and would not be safe.
85
+ # # f"--rsync-path='mkdir -p {os.path.dirname(path)} && rsync'",
86
+ source,
87
+ f"{hostname}:{path}",
88
+ )
89
+ return size
90
+
91
+
92
+ class ScpUpload(SshBaseUpload):
93
+
94
+ def _transfer_file(self, source, target, overwrite, resume, verbosity, threads, config=None):
95
+ hostname, path = self._parse_target(target)
96
+
97
+ size = os.path.getsize(source)
98
+
99
+ if verbosity > 0:
100
+ LOGGER.info(f"{self.action} {source} to {target} ({bytes_to_human(size)})")
101
+
102
+ remote_size = None
103
+ try:
104
+ out = call_process("ssh", hostname, "stat", "-c", "%s", shlex.quote(path))
105
+ remote_size = int(out)
106
+ except RuntimeError:
107
+ remote_size = None
108
+
109
+ if remote_size is not None:
110
+ if remote_size != size:
111
+ LOGGER.warning(
112
+ f"{target} already exists, but with different size, re-uploading (remote={remote_size}, local={size})"
113
+ )
114
+ elif resume:
115
+ # LOGGER.info(f"{target} already exists, skipping")
116
+ return size
117
+
118
+ if remote_size is not None and not overwrite and not resume:
119
+ raise ValueError(f"{target} already exists, use 'overwrite' to replace or 'resume' to skip")
120
+
121
+ call_process("ssh", hostname, "mkdir", "-p", shlex.quote(os.path.dirname(path)))
122
+ call_process("scp", source, shlex.quote(f"{hostname}:{path}"))
123
+
124
+ return size
125
+
126
+
127
+ def upload(source, target, **kwargs) -> None:
128
+ uploader = RsyncUpload()
129
+
130
+ if os.path.isdir(source):
131
+ uploader.transfer_folder(source=source, target=target, **kwargs)
132
+ else:
133
+ uploader.transfer_file(source=source, target=target, **kwargs)