anemoi-utils 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/utils/_version.py +2 -2
- anemoi/utils/dates.py +11 -0
- anemoi/utils/mlflow/__init__.py +8 -0
- anemoi/utils/mlflow/auth.py +255 -0
- anemoi/utils/mlflow/client.py +76 -0
- anemoi/utils/mlflow/utils.py +44 -0
- anemoi/utils/sanitise.py +35 -45
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/METADATA +7 -2
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/RECORD +13 -9
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/WHEEL +0 -0
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/entry_points.txt +0 -0
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/licenses/LICENSE +0 -0
- {anemoi_utils-0.4.27.dist-info → anemoi_utils-0.4.29.dist-info}/top_level.txt +0 -0
anemoi/utils/_version.py
CHANGED
anemoi/utils/dates.py
CHANGED
|
@@ -199,6 +199,15 @@ def as_timedelta(frequency: Union[int, str, datetime.timedelta]) -> datetime.tim
|
|
|
199
199
|
except ValueError:
|
|
200
200
|
pass
|
|
201
201
|
|
|
202
|
+
if frequency.startswith(" ") or frequency.startswith(" "):
|
|
203
|
+
frequency = frequency.strip()
|
|
204
|
+
|
|
205
|
+
if frequency.startswith("-"):
|
|
206
|
+
return -as_timedelta(frequency[1:])
|
|
207
|
+
|
|
208
|
+
if frequency.startswith("+"):
|
|
209
|
+
return as_timedelta(frequency[1:])
|
|
210
|
+
|
|
202
211
|
if re.match(r"^\d+[hdms]$", frequency, re.IGNORECASE):
|
|
203
212
|
unit = frequency[-1].lower()
|
|
204
213
|
v = int(frequency[:-1])
|
|
@@ -261,6 +270,8 @@ def frequency_to_string(frequency: datetime.timedelta) -> str:
|
|
|
261
270
|
frequency = frequency_to_timedelta(frequency)
|
|
262
271
|
|
|
263
272
|
total_seconds = frequency.total_seconds()
|
|
273
|
+
if total_seconds < 0:
|
|
274
|
+
return f"-{frequency_to_string(-frequency)}"
|
|
264
275
|
assert int(total_seconds) == total_seconds, total_seconds
|
|
265
276
|
total_seconds = int(total_seconds)
|
|
266
277
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from datetime import timezone
|
|
18
|
+
from functools import wraps
|
|
19
|
+
from getpass import getpass
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
import requests
|
|
23
|
+
from requests.exceptions import HTTPError
|
|
24
|
+
|
|
25
|
+
from ..config import config_path
|
|
26
|
+
from ..config import load_config
|
|
27
|
+
from ..config import save_config
|
|
28
|
+
from ..remote import robust
|
|
29
|
+
from ..timer import Timer
|
|
30
|
+
|
|
31
|
+
REFRESH_EXPIRE_DAYS = 29
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from collections.abc import Callable
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TokenAuth:
|
|
39
|
+
"""Manage authentication with a keycloak token server."""
|
|
40
|
+
|
|
41
|
+
config_file = "mlflow-token.json"
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
url: str,
|
|
46
|
+
enabled: bool = True,
|
|
47
|
+
target_env_var: str = "MLFLOW_TRACKING_TOKEN",
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Initialise the token authentication object.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
url : str
|
|
54
|
+
URL of the authentication server.
|
|
55
|
+
enabled : bool, optional
|
|
56
|
+
Set this to False to turn off authentication, by default True
|
|
57
|
+
target_env_var : str, optional
|
|
58
|
+
The environment variable to store the access token in after authenticating,
|
|
59
|
+
by default `MLFLOW_TRACKING_TOKEN`
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
self.url = url
|
|
63
|
+
self.target_env_var = target_env_var
|
|
64
|
+
self._enabled = enabled
|
|
65
|
+
|
|
66
|
+
config = self.load_config()
|
|
67
|
+
|
|
68
|
+
self._refresh_token = config.get("refresh_token")
|
|
69
|
+
self.refresh_expires = config.get("refresh_expires", 0)
|
|
70
|
+
self.access_token = None
|
|
71
|
+
self.access_expires = 0
|
|
72
|
+
|
|
73
|
+
# the command line tool adds a default handler to the root logger on runtime,
|
|
74
|
+
# so we init our logger here (on runtime, not on import) to avoid duplicate handlers
|
|
75
|
+
self.log = logging.getLogger(__name__)
|
|
76
|
+
|
|
77
|
+
def __call__(self) -> None:
|
|
78
|
+
self.authenticate()
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def refresh_token(self) -> str:
|
|
82
|
+
return self._refresh_token
|
|
83
|
+
|
|
84
|
+
@refresh_token.setter
|
|
85
|
+
def refresh_token(self, value: str) -> None:
|
|
86
|
+
self._refresh_token = value
|
|
87
|
+
self.refresh_expires = time.time() + (REFRESH_EXPIRE_DAYS * 86400) # 86400 seconds in a day
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def load_config() -> dict:
|
|
91
|
+
path = config_path(TokenAuth.config_file)
|
|
92
|
+
|
|
93
|
+
if not os.path.exists(path):
|
|
94
|
+
save_config(TokenAuth.config_file, {})
|
|
95
|
+
|
|
96
|
+
if os.path.exists(path) and os.stat(path).st_mode & 0o777 != 0o600:
|
|
97
|
+
os.chmod(path, 0o600)
|
|
98
|
+
|
|
99
|
+
return load_config(TokenAuth.config_file)
|
|
100
|
+
|
|
101
|
+
def enabled(fn: Callable) -> Callable: # noqa: N805
|
|
102
|
+
"""Decorator to call or ignore a function based on the `enabled` flag."""
|
|
103
|
+
|
|
104
|
+
@wraps(fn)
|
|
105
|
+
def _wrapper(self: TokenAuth, *args, **kwargs) -> Callable | None:
|
|
106
|
+
if self._enabled:
|
|
107
|
+
return fn(self, *args, **kwargs)
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
return _wrapper
|
|
111
|
+
|
|
112
|
+
@enabled
|
|
113
|
+
def login(self, force_credentials: bool = False, **kwargs: dict) -> None:
|
|
114
|
+
"""Acquire a new refresh token and save it to disk.
|
|
115
|
+
|
|
116
|
+
If an existing valid refresh token is already on disk it will be used.
|
|
117
|
+
If not, or the token has expired, the user will be asked to obtain one from the API.
|
|
118
|
+
|
|
119
|
+
Refresh token expiry time is set in the `REFRESH_EXPIRE_DAYS` constant (default 29 days).
|
|
120
|
+
|
|
121
|
+
This function should be called once, interactively, right before starting a training run.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
force_credentials : bool, optional
|
|
126
|
+
Force a credential login even if a refreh token is available, by default False.
|
|
127
|
+
kwargs : dict
|
|
128
|
+
Additional keyword arguments.
|
|
129
|
+
|
|
130
|
+
Raises
|
|
131
|
+
------
|
|
132
|
+
RuntimeError
|
|
133
|
+
A new refresh token could not be acquired.
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
del kwargs # unused
|
|
137
|
+
self.log.info("🌐 Logging in to %s", self.url)
|
|
138
|
+
new_refresh_token = None
|
|
139
|
+
|
|
140
|
+
if not force_credentials and self.refresh_token and self.refresh_expires > time.time():
|
|
141
|
+
new_refresh_token = self._token_request(ignore_exc=True).get("refresh_token")
|
|
142
|
+
|
|
143
|
+
if not new_refresh_token:
|
|
144
|
+
self.log.info("📝 Please obtain a seed refresh token from %s/seed", self.url)
|
|
145
|
+
self.log.info("📝 and paste it here (you will not see the output, just press enter after pasting):")
|
|
146
|
+
self.refresh_token = getpass("Refresh Token: ")
|
|
147
|
+
|
|
148
|
+
# perform a new refresh token request to check if the seed refresh token is valid
|
|
149
|
+
new_refresh_token = self._token_request().get("refresh_token")
|
|
150
|
+
|
|
151
|
+
if not new_refresh_token:
|
|
152
|
+
msg = "❌ Failed to log in. Please try again."
|
|
153
|
+
raise RuntimeError(msg)
|
|
154
|
+
|
|
155
|
+
self.refresh_token = new_refresh_token
|
|
156
|
+
self.save()
|
|
157
|
+
|
|
158
|
+
self.log.info("✅ Successfully logged in to MLflow. Happy logging!")
|
|
159
|
+
|
|
160
|
+
@enabled
|
|
161
|
+
def authenticate(self, **kwargs: dict) -> None:
|
|
162
|
+
"""Check the access token and refresh it if necessary. A new refresh token will also be acquired upon refresh.
|
|
163
|
+
|
|
164
|
+
This requires a valid refresh token to be available, obtained from the `login` method.
|
|
165
|
+
|
|
166
|
+
The access token is stored in memory and in an environment variable.
|
|
167
|
+
If the access token is still valid, this function does nothing.
|
|
168
|
+
|
|
169
|
+
This function should be called before every MLflow API request.
|
|
170
|
+
|
|
171
|
+
Raises
|
|
172
|
+
------
|
|
173
|
+
RuntimeError
|
|
174
|
+
No refresh token is available or the token request failed.
|
|
175
|
+
|
|
176
|
+
"""
|
|
177
|
+
del kwargs # unused
|
|
178
|
+
if self.access_expires > time.time():
|
|
179
|
+
return
|
|
180
|
+
|
|
181
|
+
if not self.refresh_token or self.refresh_expires < time.time():
|
|
182
|
+
msg = "You are not logged in to MLflow. Please log in first."
|
|
183
|
+
raise RuntimeError(msg)
|
|
184
|
+
|
|
185
|
+
with Timer("Access token refreshed", self.log):
|
|
186
|
+
response = self._token_request()
|
|
187
|
+
|
|
188
|
+
self.access_token = response.get("access_token")
|
|
189
|
+
self.access_expires = time.time() + (response.get("expires_in") * 0.7) # bit of buffer
|
|
190
|
+
self.refresh_token = response.get("refresh_token")
|
|
191
|
+
|
|
192
|
+
os.environ[self.target_env_var] = self.access_token
|
|
193
|
+
|
|
194
|
+
@enabled
|
|
195
|
+
def save(self, **kwargs: dict) -> None:
|
|
196
|
+
"""Save the latest refresh token to disk."""
|
|
197
|
+
del kwargs # unused
|
|
198
|
+
if not self.refresh_token:
|
|
199
|
+
self.log.warning("No refresh token to save.")
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
config = {
|
|
203
|
+
"url": self.url,
|
|
204
|
+
"refresh_token": self.refresh_token,
|
|
205
|
+
"refresh_expires": self.refresh_expires,
|
|
206
|
+
}
|
|
207
|
+
save_config(self.config_file, config)
|
|
208
|
+
|
|
209
|
+
expire_date = datetime.fromtimestamp(self.refresh_expires, tz=timezone.utc)
|
|
210
|
+
self.log.info(
|
|
211
|
+
"Your MLflow login token is valid until %s UTC",
|
|
212
|
+
expire_date.strftime("%Y-%m-%d %H:%M:%S"),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def _token_request(
|
|
216
|
+
self,
|
|
217
|
+
ignore_exc: bool = False,
|
|
218
|
+
) -> dict:
|
|
219
|
+
path = "refreshtoken"
|
|
220
|
+
payload = {"refresh_token": self.refresh_token}
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
response = self._request(path, payload)
|
|
224
|
+
except Exception:
|
|
225
|
+
if ignore_exc:
|
|
226
|
+
return {}
|
|
227
|
+
raise
|
|
228
|
+
|
|
229
|
+
return response
|
|
230
|
+
|
|
231
|
+
def _request(self, path: str, payload: dict) -> dict:
|
|
232
|
+
|
|
233
|
+
headers = {
|
|
234
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
response = robust(requests.post)(
|
|
239
|
+
f"{self.url}/{path}",
|
|
240
|
+
headers=headers,
|
|
241
|
+
json=payload,
|
|
242
|
+
timeout=60,
|
|
243
|
+
)
|
|
244
|
+
response.raise_for_status()
|
|
245
|
+
response_json = response.json()
|
|
246
|
+
|
|
247
|
+
if response_json.get("status", "") != "OK":
|
|
248
|
+
error_description = response_json.get("response", "Error acquiring token.")
|
|
249
|
+
msg = f"❌ {error_description}"
|
|
250
|
+
raise RuntimeError(msg)
|
|
251
|
+
|
|
252
|
+
return response_json["response"]
|
|
253
|
+
except HTTPError:
|
|
254
|
+
self.log.exception("HTTP error occurred")
|
|
255
|
+
raise
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from mlflow import MlflowClient
|
|
17
|
+
except ImportError:
|
|
18
|
+
raise ImportError(
|
|
19
|
+
"The `mlflow` package is required to use AnemoiMLflowclient. Please install it with `pip install mlflow`."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from .auth import TokenAuth
|
|
23
|
+
from .utils import health_check
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AnemoiMlflowClient(MlflowClient):
|
|
27
|
+
"""Anemoi extension of the MLflow client with token authentication support."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
tracking_uri: str,
|
|
32
|
+
*args,
|
|
33
|
+
authentication: bool = False,
|
|
34
|
+
check_health: bool = True,
|
|
35
|
+
**kwargs,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""Behaves like a normal `mlflow.MlflowClient` but with token authentication injected on every call.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
tracking_uri : str
|
|
42
|
+
The URI of the MLflow tracking server.
|
|
43
|
+
authentication : bool, optional
|
|
44
|
+
Enable token authentication, by default False
|
|
45
|
+
check_health : bool, optional
|
|
46
|
+
Check the health of the MLflow server on init, by default True
|
|
47
|
+
*args : Any
|
|
48
|
+
Additional arguments to pass to the MLflow client.
|
|
49
|
+
**kwargs : Any
|
|
50
|
+
Additional keyword arguments to pass to the MLflow client.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
self.anemoi_auth = TokenAuth(tracking_uri, enabled=authentication)
|
|
54
|
+
if check_health:
|
|
55
|
+
super().__getattribute__("anemoi_auth").authenticate()
|
|
56
|
+
health_check(tracking_uri)
|
|
57
|
+
super().__init__(tracking_uri, *args, **kwargs)
|
|
58
|
+
|
|
59
|
+
def __getattribute__(self, name: str) -> Any:
|
|
60
|
+
"""Intercept attribute access and inject authentication."""
|
|
61
|
+
attr = super().__getattribute__(name)
|
|
62
|
+
if callable(attr) and name != "anemoi_auth":
|
|
63
|
+
super().__getattribute__("anemoi_auth").authenticate()
|
|
64
|
+
return attr
|
|
65
|
+
|
|
66
|
+
def login(self, force_credentials: bool = False, **kwargs) -> None:
|
|
67
|
+
"""Explicitly log in to the MLflow server by acquiring or refreshing the token.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
force_credentials : bool, optional
|
|
72
|
+
Force a credential login even if a refresh token is available, by default False.
|
|
73
|
+
kwargs : dict
|
|
74
|
+
Additional keyword arguments passed to the underlying TokenAuth.login.
|
|
75
|
+
"""
|
|
76
|
+
self.anemoi_auth.login(force_credentials=force_credentials, **kwargs)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
from ..remote import robust
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def health_check(tracking_uri: str) -> None:
|
|
19
|
+
"""Query the health endpoint of an MLflow server.
|
|
20
|
+
|
|
21
|
+
If the server is not reachable, raise an error and remind the user that authentication may be required.
|
|
22
|
+
|
|
23
|
+
Raises
|
|
24
|
+
------
|
|
25
|
+
ConnectionError
|
|
26
|
+
If the server is not reachable.
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
token = os.getenv("MLFLOW_TRACKING_TOKEN")
|
|
30
|
+
|
|
31
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
32
|
+
response = robust(requests.get, retry_after=30, maximum_tries=10)(
|
|
33
|
+
f"{tracking_uri}/health",
|
|
34
|
+
headers=headers,
|
|
35
|
+
timeout=60,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if response.text == "OK":
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
error_msg = f"Could not connect to MLflow server at {tracking_uri}. "
|
|
42
|
+
if not token:
|
|
43
|
+
error_msg += "The server may require authentication, did you forget to turn it on?"
|
|
44
|
+
raise ConnectionError(error_msg)
|
anemoi/utils/sanitise.py
CHANGED
|
@@ -17,19 +17,24 @@ from urllib.parse import urlencode
|
|
|
17
17
|
from urllib.parse import urlparse
|
|
18
18
|
from urllib.parse import urlunparse
|
|
19
19
|
|
|
20
|
-
# Patterns used
|
|
20
|
+
# Patterns used by earthkit-data for url-patterns and path-patterns
|
|
21
21
|
|
|
22
|
-
RE1 = re.compile(r"{([^}]*)}")
|
|
23
|
-
RE2 = re.compile(r"\(([^}]*)\)")
|
|
22
|
+
RE1 = re.compile(r"{([^}]*)}") # {*}
|
|
23
|
+
RE2 = re.compile(r"\(([^}]*)\)") # (*)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def sanitise(obj: Any) -> Any:
|
|
27
|
-
"""Sanitise an object by replacing all full paths with shortened versions and URL
|
|
26
|
+
def sanitise(obj: Any, level=1) -> Any:
|
|
27
|
+
"""Sanitise an object by replacing all full paths with shortened versions and URL credentials with '***'.
|
|
28
28
|
|
|
29
29
|
Parameters
|
|
30
30
|
----------
|
|
31
31
|
obj : Any
|
|
32
32
|
The object to sanitise.
|
|
33
|
+
level : int, optional
|
|
34
|
+
The level of sanitation. The higher levels will also apply the levels below it.
|
|
35
|
+
- 1: Shorten file paths to file name and hide credentials in URLs (default).
|
|
36
|
+
- 2: Hide hostnames in URLs.
|
|
37
|
+
- 3: Hide full file paths and URLs.
|
|
33
38
|
|
|
34
39
|
Returns
|
|
35
40
|
-------
|
|
@@ -37,6 +42,8 @@ def sanitise(obj: Any) -> Any:
|
|
|
37
42
|
The sanitised object.
|
|
38
43
|
"""
|
|
39
44
|
|
|
45
|
+
assert level in (1, 2, 3), "level must be 1, 2 or 3"
|
|
46
|
+
|
|
40
47
|
if isinstance(obj, dict):
|
|
41
48
|
return {sanitise(k): sanitise(v) for k, v in obj.items()}
|
|
42
49
|
|
|
@@ -47,29 +54,21 @@ def sanitise(obj: Any) -> Any:
|
|
|
47
54
|
return tuple(sanitise(v) for v in obj)
|
|
48
55
|
|
|
49
56
|
if isinstance(obj, str):
|
|
50
|
-
return _sanitise_string(obj)
|
|
57
|
+
return _sanitise_string(obj, level)
|
|
51
58
|
|
|
52
59
|
return obj
|
|
53
60
|
|
|
54
61
|
|
|
55
|
-
def _sanitise_string(obj: str) -> str:
|
|
56
|
-
"""Sanitise a string by replacing full paths and URL passwords.
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
obj : str
|
|
61
|
-
The string to sanitise.
|
|
62
|
-
|
|
63
|
-
Returns
|
|
64
|
-
-------
|
|
65
|
-
str
|
|
66
|
-
The sanitised string.
|
|
67
|
-
"""
|
|
62
|
+
def _sanitise_string(obj: str, level=1) -> str:
|
|
63
|
+
"""Sanitise a string by replacing full paths and URL passwords."""
|
|
68
64
|
|
|
69
65
|
parsed = urlparse(obj, allow_fragments=True)
|
|
70
66
|
|
|
71
67
|
if parsed.scheme and parsed.scheme[0].isalpha():
|
|
72
|
-
return _sanitise_url(parsed)
|
|
68
|
+
return _sanitise_url(parsed, level)
|
|
69
|
+
|
|
70
|
+
if level > 2:
|
|
71
|
+
return "hidden"
|
|
73
72
|
|
|
74
73
|
if obj.startswith("/") or obj.startswith("~"):
|
|
75
74
|
return _sanitise_path(obj)
|
|
@@ -77,19 +76,8 @@ def _sanitise_string(obj: str) -> str:
|
|
|
77
76
|
return obj
|
|
78
77
|
|
|
79
78
|
|
|
80
|
-
def _sanitise_url(parsed: Any) -> str:
|
|
81
|
-
"""Sanitise a URL by replacing passwords with '***'.
|
|
82
|
-
|
|
83
|
-
Parameters
|
|
84
|
-
----------
|
|
85
|
-
parsed : Any
|
|
86
|
-
The parsed URL.
|
|
87
|
-
|
|
88
|
-
Returns
|
|
89
|
-
-------
|
|
90
|
-
str
|
|
91
|
-
The sanitised URL.
|
|
92
|
-
"""
|
|
79
|
+
def _sanitise_url(parsed: Any, level=1) -> str:
|
|
80
|
+
"""Sanitise a URL by replacing passwords with '***'."""
|
|
93
81
|
|
|
94
82
|
LIST = [
|
|
95
83
|
"pass",
|
|
@@ -107,6 +95,9 @@ def _sanitise_url(parsed: Any) -> str:
|
|
|
107
95
|
"_api_key",
|
|
108
96
|
"username",
|
|
109
97
|
"login",
|
|
98
|
+
"auth",
|
|
99
|
+
"auth_token",
|
|
100
|
+
"auth_key",
|
|
110
101
|
]
|
|
111
102
|
|
|
112
103
|
scheme, netloc, path, params, query, fragment = parsed
|
|
@@ -130,26 +121,25 @@ def _sanitise_url(parsed: Any) -> str:
|
|
|
130
121
|
qs[k] = "hidden"
|
|
131
122
|
params = urlencode(qs, doseq=True)
|
|
132
123
|
|
|
133
|
-
|
|
124
|
+
if level > 1:
|
|
125
|
+
if (bits := netloc.split("@")) and len(bits) > 1:
|
|
126
|
+
netloc = f"{bits[0]}@hidden"
|
|
127
|
+
else:
|
|
128
|
+
netloc = "hidden"
|
|
134
129
|
|
|
130
|
+
if level > 2:
|
|
131
|
+
return urlunparse([scheme, netloc, "", "", "", ""])
|
|
135
132
|
|
|
136
|
-
|
|
137
|
-
"""Sanitise a file path by shortening it.
|
|
133
|
+
return urlunparse([scheme, netloc, path, params, query, fragment])
|
|
138
134
|
|
|
139
|
-
Parameters
|
|
140
|
-
----------
|
|
141
|
-
path : str
|
|
142
|
-
The file path to sanitise.
|
|
143
135
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
str
|
|
147
|
-
The sanitised file path.
|
|
148
|
-
"""
|
|
136
|
+
def _sanitise_path(path: str) -> str:
|
|
137
|
+
"""Sanitise a file path by shortening it."""
|
|
149
138
|
bits = list(reversed(Path(path).parts))
|
|
150
139
|
result = [bits.pop(0)]
|
|
151
140
|
for bit in bits:
|
|
152
141
|
if RE1.match(bit) or RE2.match(bit):
|
|
142
|
+
# keep earthkit-data folder patterns
|
|
153
143
|
result.append(bit)
|
|
154
144
|
continue
|
|
155
145
|
if result[-1] == "...":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: anemoi-utils
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.29
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -236,7 +236,7 @@ Requires-Dist: rich
|
|
|
236
236
|
Requires-Dist: tomli; python_version < "3.11"
|
|
237
237
|
Requires-Dist: tqdm
|
|
238
238
|
Provides-Extra: all
|
|
239
|
-
Requires-Dist: anemoi-utils[grib,provenance,s3,text]; extra == "all"
|
|
239
|
+
Requires-Dist: anemoi-utils[grib,mlflow,provenance,s3,text]; extra == "all"
|
|
240
240
|
Provides-Extra: dev
|
|
241
241
|
Requires-Dist: anemoi-utils[all,docs,tests]; extra == "dev"
|
|
242
242
|
Provides-Extra: docs
|
|
@@ -250,13 +250,18 @@ Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
|
250
250
|
Requires-Dist: termcolor; extra == "docs"
|
|
251
251
|
Provides-Extra: grib
|
|
252
252
|
Requires-Dist: requests; extra == "grib"
|
|
253
|
+
Provides-Extra: mlflow
|
|
254
|
+
Requires-Dist: mlflow-skinny>=2.11.1; extra == "mlflow"
|
|
255
|
+
Requires-Dist: requests; extra == "mlflow"
|
|
253
256
|
Provides-Extra: provenance
|
|
254
257
|
Requires-Dist: gitpython; extra == "provenance"
|
|
255
258
|
Requires-Dist: nvsmi; extra == "provenance"
|
|
256
259
|
Provides-Extra: s3
|
|
257
260
|
Requires-Dist: boto3>1.36; extra == "s3"
|
|
258
261
|
Provides-Extra: tests
|
|
262
|
+
Requires-Dist: anemoi-utils[mlflow]; extra == "tests"
|
|
259
263
|
Requires-Dist: pytest; extra == "tests"
|
|
264
|
+
Requires-Dist: pytest-mock>=3; extra == "tests"
|
|
260
265
|
Provides-Extra: text
|
|
261
266
|
Requires-Dist: termcolor; extra == "text"
|
|
262
267
|
Requires-Dist: wcwidth; extra == "text"
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
anemoi/utils/__init__.py,sha256=uVhpF-VjIl_4mMywOVtgTutgsdIsqz-xdkwxeMhzuag,730
|
|
2
2
|
anemoi/utils/__main__.py,sha256=6LlE4MYrPvqqrykxXh7XMi50UZteUY59NeM8P9Zs2dU,910
|
|
3
|
-
anemoi/utils/_version.py,sha256=
|
|
3
|
+
anemoi/utils/_version.py,sha256=M33mpOYpqw35qLoEcg5GxMG4YQ25eohx8wEQkM6FTT0,513
|
|
4
4
|
anemoi/utils/caching.py,sha256=rXbeAmpBcMbbfN4EVblaHWKicsrtx1otER84FEBtz98,6183
|
|
5
5
|
anemoi/utils/checkpoints.py,sha256=PydBqA-wI8_05zU-3yT-ZP53GnvuqB7vPXl-w6-9XX8,9541
|
|
6
6
|
anemoi/utils/cli.py,sha256=Cd3ESQkNWecbGnJjkR_SKHdFPETJWFrHqg5ovtANDKs,6522
|
|
7
7
|
anemoi/utils/compatibility.py,sha256=wRBRMmxQP88rNcWiP5gqXliwYQbBv1iCAsDjcCRi5UY,2234
|
|
8
8
|
anemoi/utils/config.py,sha256=EEfcSxW2CD6fFOzDtqz_uYlMKuYq4X5QinJW_8GBYj4,17325
|
|
9
|
-
anemoi/utils/dates.py,sha256=
|
|
9
|
+
anemoi/utils/dates.py,sha256=Ew11G-Upl0VM9R9Sgg3Be6KIkXfpMCD69kgiao7Xv48,17534
|
|
10
10
|
anemoi/utils/devtools.py,sha256=W3OBu96MkXRIl7Qh1SE5Zd6aB1R0QlnmlrlpBYM0fVY,3527
|
|
11
11
|
anemoi/utils/grib.py,sha256=201WcxjjAl92Y2HX2kZ2S8Qr5dN-oG7nV-vQLaybzP4,3610
|
|
12
12
|
anemoi/utils/grids.py,sha256=VlhuN8MZDBu-G50QFI1XHV9IXLway2LpvI9X8sDjb5s,6047
|
|
@@ -17,7 +17,7 @@ anemoi/utils/provenance.py,sha256=iTsn4r-VPq2D8tSHPSuAIqG077_opkqMT42G03DRWJg,14
|
|
|
17
17
|
anemoi/utils/registry.py,sha256=e3nOIRyMYQ-mpEvaHAv5tuvMYNbkJ5yz94ns7BnvkjM,9717
|
|
18
18
|
anemoi/utils/rules.py,sha256=VspUoPmw7tijrs6l_wl4vDjr_zVQsFjx9ITiBSvxgc8,6972
|
|
19
19
|
anemoi/utils/s3.py,sha256=xMT48kbcelcjjqsaU567WI3oZ5eqo88Rlgyx5ECszAU,4074
|
|
20
|
-
anemoi/utils/sanitise.py,sha256=
|
|
20
|
+
anemoi/utils/sanitise.py,sha256=XkQzibDbu-VFJkJC4WcB9ovkcTkVAynXtkn1Tlc2CC4,4019
|
|
21
21
|
anemoi/utils/sanitize.py,sha256=43ZKDcfVpeXSsJ9TFEc9aZnD6oe2cUh151XnDspM98M,462
|
|
22
22
|
anemoi/utils/testing.py,sha256=DeTAkmg-RCMPXBBRUy_Gti5UJriUPRKT6ruE3JL2GVc,10372
|
|
23
23
|
anemoi/utils/text.py,sha256=HkzIvi24obDceFLpJEwBJ9PmPrJUkQN2TrElJ-A87gU,14441
|
|
@@ -30,14 +30,18 @@ anemoi/utils/commands/transfer.py,sha256=29o1RQ46dCJ6kTD1sbxp8XFgB-Qx3RceMccypBM
|
|
|
30
30
|
anemoi/utils/mars/__init__.py,sha256=b-Lc3L1TAQd9ODs0Z1YSJzgZCO1K_M3DSgx_yd2qXvM,2724
|
|
31
31
|
anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
|
|
32
32
|
anemoi/utils/mars/requests.py,sha256=VFMHBVAAl0_2lOcMBa1lvaKHctN0lDJsI6_U4BucGew,1142
|
|
33
|
+
anemoi/utils/mlflow/__init__.py,sha256=hCW0QcLHJmE-C1r38P27_ZOvCLNewex5iQEtZqx2ckI,393
|
|
34
|
+
anemoi/utils/mlflow/auth.py,sha256=Zgn2Ru7GE99_ZTBeskl5M1WnrlEGNU9XxsRVgVoUwmM,8376
|
|
35
|
+
anemoi/utils/mlflow/client.py,sha256=Y34ceLcp1-H0XTt8h8-IhHKX9bApc_QJcgVrzZKtabY,2752
|
|
36
|
+
anemoi/utils/mlflow/utils.py,sha256=0d-dp7YmDLJ59ikJWUcZooqBIdrNDxPShDQubcX-a3k,1310
|
|
33
37
|
anemoi/utils/remote/__init__.py,sha256=7nHu-LRspYW2Fx9GNLjsxpytAUIvhIbOjb0Xmxb-33s,20988
|
|
34
38
|
anemoi/utils/remote/s3.py,sha256=DxO_TjmetX_r3ZvGaHjpz40oqvcQYP3Vd_A4ojMGlSA,21379
|
|
35
39
|
anemoi/utils/remote/ssh.py,sha256=xNtsawh8okytCKRehkRCVExbHZj-CRUQNormEHglfuw,8088
|
|
36
40
|
anemoi/utils/schemas/__init__.py,sha256=nkinKlsPLPXEjfTYQT1mpKC4cvs-14w_zBkDRxakwxw,698
|
|
37
41
|
anemoi/utils/schemas/errors.py,sha256=lgOXzVTYzAE0qWQf3OZ42vCWixv8lilSqLLhzARBmvI,1831
|
|
38
|
-
anemoi_utils-0.4.
|
|
39
|
-
anemoi_utils-0.4.
|
|
40
|
-
anemoi_utils-0.4.
|
|
41
|
-
anemoi_utils-0.4.
|
|
42
|
-
anemoi_utils-0.4.
|
|
43
|
-
anemoi_utils-0.4.
|
|
42
|
+
anemoi_utils-0.4.29.dist-info/licenses/LICENSE,sha256=8HznKF1Vi2IvfLsKNE5A2iVyiri3pRjRPvPC9kxs6qk,11354
|
|
43
|
+
anemoi_utils-0.4.29.dist-info/METADATA,sha256=v5sHgt4rZA9mZ2YW1pMO0S76mlvZ09hoB3Q-G7xp6-Y,15717
|
|
44
|
+
anemoi_utils-0.4.29.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
45
|
+
anemoi_utils-0.4.29.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
|
|
46
|
+
anemoi_utils-0.4.29.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
|
|
47
|
+
anemoi_utils-0.4.29.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|