anemoi-utils 0.4.28__tar.gz → 0.4.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

Files changed (110) hide show
  1. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.pre-commit-config.yaml +2 -2
  2. anemoi_utils-0.4.29/.release-please-manifest.json +3 -0
  3. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/CHANGELOG.md +16 -0
  4. {anemoi_utils-0.4.28/src/anemoi_utils.egg-info → anemoi_utils-0.4.29}/PKG-INFO +3 -2
  5. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/pyproject.toml +2 -2
  6. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/_version.py +2 -2
  7. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/dates.py +11 -0
  8. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mlflow/auth.py +9 -0
  9. anemoi_utils-0.4.29/src/anemoi/utils/mlflow/utils.py +44 -0
  10. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/sanitise.py +35 -45
  11. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29/src/anemoi_utils.egg-info}/PKG-INFO +3 -2
  12. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi_utils.egg-info/SOURCES.txt +1 -2
  13. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi_utils.egg-info/requires.txt +2 -1
  14. anemoi_utils-0.4.28/tests/test_sanetise.py → anemoi_utils-0.4.29/tests/test_sanitise.py +11 -0
  15. anemoi_utils-0.4.28/.release-please-manifest.json +0 -3
  16. anemoi_utils-0.4.28/src/anemoi/utils/mlflow/utils.py +0 -159
  17. anemoi_utils-0.4.28/tests/test_mlflow_utils.py +0 -76
  18. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.gitattributes +0 -0
  19. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/CODEOWNERS +0 -0
  20. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/ci-hpc-config.yml +0 -0
  21. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/dependabot.yml +0 -0
  22. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/labeler.yml +0 -0
  23. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/pull_request_template.md +0 -0
  24. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/downstream-ci-hpc.yml +0 -0
  25. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/pr-conventional-commit.yml +0 -0
  26. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/pr-label-conventional-commits.yml +0 -0
  27. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/pr-label-file-based.yml +0 -0
  28. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/pr-label-public.yml +0 -0
  29. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/python-publish.yml +0 -0
  30. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/python-pull-request.yml +0 -0
  31. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/readthedocs-pr-update.yml +0 -0
  32. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.github/workflows/release-please.yml +0 -0
  33. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.gitignore +0 -0
  34. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.readthedocs.yaml +0 -0
  35. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/.release-please-config.json +0 -0
  36. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/CONTRIBUTORS.md +0 -0
  37. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/LICENSE +0 -0
  38. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/README.md +0 -0
  39. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/Makefile +0 -0
  40. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/_static/logo.png +0 -0
  41. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/_static/style.css +0 -0
  42. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/_templates/.gitkeep +0 -0
  43. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/_templates/apidoc/package.rst.jinja +0 -0
  44. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/conf.py +0 -0
  45. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/index.rst +0 -0
  46. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/installing.rst +0 -0
  47. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/checkpoints.rst +0 -0
  48. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/config.rst +0 -0
  49. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/dates.rst +0 -0
  50. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/grib.rst +0 -0
  51. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/humanize.rst +0 -0
  52. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/provenance.rst +0 -0
  53. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/s3.rst +0 -0
  54. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/testing.rst +0 -0
  55. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/modules/text.rst +0 -0
  56. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/docs/scripts/api_build.sh +0 -0
  57. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/setup.cfg +0 -0
  58. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/__init__.py +0 -0
  59. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/__main__.py +0 -0
  60. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/caching.py +0 -0
  61. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/checkpoints.py +0 -0
  62. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/cli.py +0 -0
  63. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/commands/__init__.py +0 -0
  64. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/commands/config.py +0 -0
  65. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/commands/metadata.py +0 -0
  66. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/commands/requests.py +0 -0
  67. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/commands/transfer.py +0 -0
  68. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/compatibility.py +0 -0
  69. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/config.py +0 -0
  70. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/devtools.py +0 -0
  71. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/grib.py +0 -0
  72. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/grids.py +0 -0
  73. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/hindcasts.py +0 -0
  74. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/humanize.py +0 -0
  75. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/logs.py +0 -0
  76. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mars/__init__.py +0 -0
  77. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mars/mars.yaml +0 -0
  78. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mars/requests.py +0 -0
  79. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mlflow/__init__.py +0 -0
  80. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/mlflow/client.py +0 -0
  81. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/provenance.py +0 -0
  82. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/registry.py +0 -0
  83. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/remote/__init__.py +0 -0
  84. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/remote/s3.py +0 -0
  85. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/remote/ssh.py +0 -0
  86. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/rules.py +0 -0
  87. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/s3.py +0 -0
  88. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/sanitize.py +0 -0
  89. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/schemas/__init__.py +0 -0
  90. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/schemas/errors.py +0 -0
  91. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/testing.py +0 -0
  92. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/text.py +0 -0
  93. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi/utils/timer.py +0 -0
  94. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi_utils.egg-info/dependency_links.txt +0 -0
  95. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi_utils.egg-info/entry_points.txt +0 -0
  96. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/src/anemoi_utils.egg-info/top_level.txt +0 -0
  97. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test-transfer-data/directory/b/c/x +0 -0
  98. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test-transfer-data/directory/b/y +0 -0
  99. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test-transfer-data/directory/exotic filename ;^/"'[=.,#]()/303/252/303/274/303/247/303/262/342/234/205.txt" +0 -0
  100. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test-transfer-data/directory/z +0 -0
  101. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test-transfer-data/file +0 -0
  102. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_caching.py +0 -0
  103. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_compatibility.py +0 -0
  104. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_dates.py +0 -0
  105. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_frequency.py +0 -0
  106. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_mlflow_auth.py +0 -0
  107. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_mlflow_client.py +0 -0
  108. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_provenance.py +0 -0
  109. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_remote.py +0 -0
  110. {anemoi_utils-0.4.28 → anemoi_utils-0.4.29}/tests/test_utils.py +0 -0
@@ -41,7 +41,7 @@ repos:
41
41
  - --profile black
42
42
  - --project anemoi
43
43
  - repo: https://github.com/astral-sh/ruff-pre-commit
44
- rev: v0.11.12
44
+ rev: v0.12.2
45
45
  hooks:
46
46
  - id: ruff
47
47
  args:
@@ -69,7 +69,7 @@ repos:
69
69
  hooks:
70
70
  - id: pyproject-fmt
71
71
  - repo: https://github.com/jshwi/docsig # Check docstrings against function sig
72
- rev: v0.69.3
72
+ rev: v0.70.0
73
73
  hooks:
74
74
  - id: docsig
75
75
  args:
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.4.29"
3
+ }
@@ -8,6 +8,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8
8
  Please add your functional changes to the appropriate section in the PR.
9
9
  Keep it human-readable, your future self will thank you!
10
10
 
11
+ ## [0.4.29](https://github.com/ecmwf/anemoi-utils/compare/0.4.28...0.4.29) (2025-07-22)
12
+
13
+
14
+ ### Features
15
+
16
+ * Better support for negative timedeltas ([#180](https://github.com/ecmwf/anemoi-utils/issues/180)) ([3f8041a](https://github.com/ecmwf/anemoi-utils/commit/3f8041a46b525b6fcbe6171cd8a8a40ec30b2c1f))
17
+ * **deps:** Use mlflow-skinny instead of mlflow ([#184](https://github.com/ecmwf/anemoi-utils/issues/184)) ([82e5c30](https://github.com/ecmwf/anemoi-utils/commit/82e5c3053962cd8e1e8f6a1ea9e8f92492e497b4))
18
+ * Protect mlflow token file ([#183](https://github.com/ecmwf/anemoi-utils/issues/183)) ([fdf0fc8](https://github.com/ecmwf/anemoi-utils/commit/fdf0fc84ee3e8076928f6c888374cd3aa008023b))
19
+ * **sanitise:** Sanitation level ([#175](https://github.com/ecmwf/anemoi-utils/issues/175)) ([8d85d8f](https://github.com/ecmwf/anemoi-utils/commit/8d85d8fd889bf72b8066cc021d4d7b329a360848))
20
+ * Support negative timedelta ([#178](https://github.com/ecmwf/anemoi-utils/issues/178)) ([546f6ec](https://github.com/ecmwf/anemoi-utils/commit/546f6ec76534cd39094957ce3b57b34f14f7a000))
21
+
22
+
23
+ ### Bug Fixes
24
+
25
+ * Clean utils ([#185](https://github.com/ecmwf/anemoi-utils/issues/185)) ([de3c7a4](https://github.com/ecmwf/anemoi-utils/commit/de3c7a47f14c258997942564717c480caa124ee6))
26
+
11
27
  ## [0.4.28](https://github.com/ecmwf/anemoi-utils/compare/0.4.27...0.4.28) (2025-07-03)
12
28
 
13
29
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anemoi-utils
3
- Version: 0.4.28
3
+ Version: 0.4.29
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -251,7 +251,7 @@ Requires-Dist: termcolor; extra == "docs"
251
251
  Provides-Extra: grib
252
252
  Requires-Dist: requests; extra == "grib"
253
253
  Provides-Extra: mlflow
254
- Requires-Dist: mlflow>=2.11.1; extra == "mlflow"
254
+ Requires-Dist: mlflow-skinny>=2.11.1; extra == "mlflow"
255
255
  Requires-Dist: requests; extra == "mlflow"
256
256
  Provides-Extra: provenance
257
257
  Requires-Dist: gitpython; extra == "provenance"
@@ -259,6 +259,7 @@ Requires-Dist: nvsmi; extra == "provenance"
259
259
  Provides-Extra: s3
260
260
  Requires-Dist: boto3>1.36; extra == "s3"
261
261
  Provides-Extra: tests
262
+ Requires-Dist: anemoi-utils[mlflow]; extra == "tests"
262
263
  Requires-Dist: pytest; extra == "tests"
263
264
  Requires-Dist: pytest-mock>=3; extra == "tests"
264
265
  Provides-Extra: text
@@ -69,7 +69,7 @@ optional-dependencies.docs = [
69
69
 
70
70
  optional-dependencies.grib = [ "requests" ]
71
71
 
72
- optional-dependencies.mlflow = [ "mlflow>=2.11.1", "requests" ]
72
+ optional-dependencies.mlflow = [ "mlflow-skinny>=2.11.1", "requests" ]
73
73
 
74
74
  optional-dependencies.provenance = [ "gitpython", "nvsmi" ]
75
75
 
@@ -77,7 +77,7 @@ optional-dependencies.s3 = [
77
77
  "boto3>1.36",
78
78
  ]
79
79
 
80
- optional-dependencies.tests = [ "pytest", "pytest-mock>=3" ]
80
+ optional-dependencies.tests = [ "anemoi-utils[mlflow]", "pytest", "pytest-mock>=3" ]
81
81
 
82
82
  optional-dependencies.text = [ "termcolor", "wcwidth" ]
83
83
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.4.28'
21
- __version_tuple__ = version_tuple = (0, 4, 28)
20
+ __version__ = version = '0.4.29'
21
+ __version_tuple__ = version_tuple = (0, 4, 29)
@@ -199,6 +199,15 @@ def as_timedelta(frequency: Union[int, str, datetime.timedelta]) -> datetime.tim
199
199
  except ValueError:
200
200
  pass
201
201
 
202
+ if frequency.startswith(" ") or frequency.startswith(" "):
203
+ frequency = frequency.strip()
204
+
205
+ if frequency.startswith("-"):
206
+ return -as_timedelta(frequency[1:])
207
+
208
+ if frequency.startswith("+"):
209
+ return as_timedelta(frequency[1:])
210
+
202
211
  if re.match(r"^\d+[hdms]$", frequency, re.IGNORECASE):
203
212
  unit = frequency[-1].lower()
204
213
  v = int(frequency[:-1])
@@ -261,6 +270,8 @@ def frequency_to_string(frequency: datetime.timedelta) -> str:
261
270
  frequency = frequency_to_timedelta(frequency)
262
271
 
263
272
  total_seconds = frequency.total_seconds()
273
+ if total_seconds < 0:
274
+ return f"-{frequency_to_string(-frequency)}"
264
275
  assert int(total_seconds) == total_seconds, total_seconds
265
276
  total_seconds = int(total_seconds)
266
277
 
@@ -22,6 +22,7 @@ from typing import TYPE_CHECKING
22
22
  import requests
23
23
  from requests.exceptions import HTTPError
24
24
 
25
+ from ..config import config_path
25
26
  from ..config import load_config
26
27
  from ..config import save_config
27
28
  from ..remote import robust
@@ -87,6 +88,14 @@ class TokenAuth:
87
88
 
88
89
  @staticmethod
89
90
  def load_config() -> dict:
91
+ path = config_path(TokenAuth.config_file)
92
+
93
+ if not os.path.exists(path):
94
+ save_config(TokenAuth.config_file, {})
95
+
96
+ if os.path.exists(path) and os.stat(path).st_mode & 0o777 != 0o600:
97
+ os.chmod(path, 0o600)
98
+
90
99
  return load_config(TokenAuth.config_file)
91
100
 
92
101
  def enabled(fn: Callable) -> Callable: # noqa: N805
@@ -0,0 +1,44 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+ from __future__ import annotations
10
+
11
+ import os
12
+
13
+ import requests
14
+
15
+ from ..remote import robust
16
+
17
+
18
+ def health_check(tracking_uri: str) -> None:
19
+ """Query the health endpoint of an MLflow server.
20
+
21
+ If the server is not reachable, raise an error and remind the user that authentication may be required.
22
+
23
+ Raises
24
+ ------
25
+ ConnectionError
26
+ If the server is not reachable.
27
+
28
+ """
29
+ token = os.getenv("MLFLOW_TRACKING_TOKEN")
30
+
31
+ headers = {"Authorization": f"Bearer {token}"}
32
+ response = robust(requests.get, retry_after=30, maximum_tries=10)(
33
+ f"{tracking_uri}/health",
34
+ headers=headers,
35
+ timeout=60,
36
+ )
37
+
38
+ if response.text == "OK":
39
+ return
40
+
41
+ error_msg = f"Could not connect to MLflow server at {tracking_uri}. "
42
+ if not token:
43
+ error_msg += "The server may require authentication, did you forget to turn it on?"
44
+ raise ConnectionError(error_msg)
@@ -17,19 +17,24 @@ from urllib.parse import urlencode
17
17
  from urllib.parse import urlparse
18
18
  from urllib.parse import urlunparse
19
19
 
20
- # Patterns used but earthkit-data for url-patterns and path-patterns
20
+ # Patterns used by earthkit-data for url-patterns and path-patterns
21
21
 
22
- RE1 = re.compile(r"{([^}]*)}")
23
- RE2 = re.compile(r"\(([^}]*)\)")
22
+ RE1 = re.compile(r"{([^}]*)}") # {*}
23
+ RE2 = re.compile(r"\(([^}]*)\)") # (*)
24
24
 
25
25
 
26
- def sanitise(obj: Any) -> Any:
27
- """Sanitise an object by replacing all full paths with shortened versions and URL passwords with '***'.
26
+ def sanitise(obj: Any, level=1) -> Any:
27
+ """Sanitise an object by replacing all full paths with shortened versions and URL credentials with '***'.
28
28
 
29
29
  Parameters
30
30
  ----------
31
31
  obj : Any
32
32
  The object to sanitise.
33
+ level : int, optional
34
+ The level of sanitation. The higher levels will also apply the levels below it.
35
+ - 1: Shorten file paths to file name and hide credentials in URLs (default).
36
+ - 2: Hide hostnames in URLs.
37
+ - 3: Hide full file paths and URLs.
33
38
 
34
39
  Returns
35
40
  -------
@@ -37,6 +42,8 @@ def sanitise(obj: Any) -> Any:
37
42
  The sanitised object.
38
43
  """
39
44
 
45
+ assert level in (1, 2, 3), "level must be 1, 2 or 3"
46
+
40
47
  if isinstance(obj, dict):
41
48
  return {sanitise(k): sanitise(v) for k, v in obj.items()}
42
49
 
@@ -47,29 +54,21 @@ def sanitise(obj: Any) -> Any:
47
54
  return tuple(sanitise(v) for v in obj)
48
55
 
49
56
  if isinstance(obj, str):
50
- return _sanitise_string(obj)
57
+ return _sanitise_string(obj, level)
51
58
 
52
59
  return obj
53
60
 
54
61
 
55
- def _sanitise_string(obj: str) -> str:
56
- """Sanitise a string by replacing full paths and URL passwords.
57
-
58
- Parameters
59
- ----------
60
- obj : str
61
- The string to sanitise.
62
-
63
- Returns
64
- -------
65
- str
66
- The sanitised string.
67
- """
62
+ def _sanitise_string(obj: str, level=1) -> str:
63
+ """Sanitise a string by replacing full paths and URL passwords."""
68
64
 
69
65
  parsed = urlparse(obj, allow_fragments=True)
70
66
 
71
67
  if parsed.scheme and parsed.scheme[0].isalpha():
72
- return _sanitise_url(parsed)
68
+ return _sanitise_url(parsed, level)
69
+
70
+ if level > 2:
71
+ return "hidden"
73
72
 
74
73
  if obj.startswith("/") or obj.startswith("~"):
75
74
  return _sanitise_path(obj)
@@ -77,19 +76,8 @@ def _sanitise_string(obj: str) -> str:
77
76
  return obj
78
77
 
79
78
 
80
- def _sanitise_url(parsed: Any) -> str:
81
- """Sanitise a URL by replacing passwords with '***'.
82
-
83
- Parameters
84
- ----------
85
- parsed : Any
86
- The parsed URL.
87
-
88
- Returns
89
- -------
90
- str
91
- The sanitised URL.
92
- """
79
+ def _sanitise_url(parsed: Any, level=1) -> str:
80
+ """Sanitise a URL by replacing passwords with '***'."""
93
81
 
94
82
  LIST = [
95
83
  "pass",
@@ -107,6 +95,9 @@ def _sanitise_url(parsed: Any) -> str:
107
95
  "_api_key",
108
96
  "username",
109
97
  "login",
98
+ "auth",
99
+ "auth_token",
100
+ "auth_key",
110
101
  ]
111
102
 
112
103
  scheme, netloc, path, params, query, fragment = parsed
@@ -130,26 +121,25 @@ def _sanitise_url(parsed: Any) -> str:
130
121
  qs[k] = "hidden"
131
122
  params = urlencode(qs, doseq=True)
132
123
 
133
- return urlunparse([scheme, netloc, path, params, query, fragment])
124
+ if level > 1:
125
+ if (bits := netloc.split("@")) and len(bits) > 1:
126
+ netloc = f"{bits[0]}@hidden"
127
+ else:
128
+ netloc = "hidden"
134
129
 
130
+ if level > 2:
131
+ return urlunparse([scheme, netloc, "", "", "", ""])
135
132
 
136
- def _sanitise_path(path: str) -> str:
137
- """Sanitise a file path by shortening it.
133
+ return urlunparse([scheme, netloc, path, params, query, fragment])
138
134
 
139
- Parameters
140
- ----------
141
- path : str
142
- The file path to sanitise.
143
135
 
144
- Returns
145
- -------
146
- str
147
- The sanitised file path.
148
- """
136
+ def _sanitise_path(path: str) -> str:
137
+ """Sanitise a file path by shortening it."""
149
138
  bits = list(reversed(Path(path).parts))
150
139
  result = [bits.pop(0)]
151
140
  for bit in bits:
152
141
  if RE1.match(bit) or RE2.match(bit):
142
+ # keep earthkit-data folder patterns
153
143
  result.append(bit)
154
144
  continue
155
145
  if result[-1] == "...":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: anemoi-utils
3
- Version: 0.4.28
3
+ Version: 0.4.29
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -251,7 +251,7 @@ Requires-Dist: termcolor; extra == "docs"
251
251
  Provides-Extra: grib
252
252
  Requires-Dist: requests; extra == "grib"
253
253
  Provides-Extra: mlflow
254
- Requires-Dist: mlflow>=2.11.1; extra == "mlflow"
254
+ Requires-Dist: mlflow-skinny>=2.11.1; extra == "mlflow"
255
255
  Requires-Dist: requests; extra == "mlflow"
256
256
  Provides-Extra: provenance
257
257
  Requires-Dist: gitpython; extra == "provenance"
@@ -259,6 +259,7 @@ Requires-Dist: nvsmi; extra == "provenance"
259
259
  Provides-Extra: s3
260
260
  Requires-Dist: boto3>1.36; extra == "s3"
261
261
  Provides-Extra: tests
262
+ Requires-Dist: anemoi-utils[mlflow]; extra == "tests"
262
263
  Requires-Dist: pytest; extra == "tests"
263
264
  Requires-Dist: pytest-mock>=3; extra == "tests"
264
265
  Provides-Extra: text
@@ -94,10 +94,9 @@ tests/test_dates.py
94
94
  tests/test_frequency.py
95
95
  tests/test_mlflow_auth.py
96
96
  tests/test_mlflow_client.py
97
- tests/test_mlflow_utils.py
98
97
  tests/test_provenance.py
99
98
  tests/test_remote.py
100
- tests/test_sanetise.py
99
+ tests/test_sanitise.py
101
100
  tests/test_utils.py
102
101
  tests/test-transfer-data/file
103
102
  tests/test-transfer-data/directory/exotic filename ;^"'[=.,#]()êüçò✅.txt
@@ -34,7 +34,7 @@ termcolor
34
34
  requests
35
35
 
36
36
  [mlflow]
37
- mlflow>=2.11.1
37
+ mlflow-skinny>=2.11.1
38
38
  requests
39
39
 
40
40
  [provenance]
@@ -45,6 +45,7 @@ nvsmi
45
45
  boto3>1.36
46
46
 
47
47
  [tests]
48
+ anemoi-utils[mlflow]
48
49
  pytest
49
50
  pytest-mock>=3
50
51
 
@@ -47,6 +47,12 @@ def test_sanitise_urls() -> None:
47
47
  assert sanitise("http://www.example.com/path;username=secret") == "http://www.example.com/path;username=hidden"
48
48
  assert sanitise("http://www.example.com/path;login=secret") == "http://www.example.com/path;login=hidden"
49
49
 
50
+ assert sanitise("http://www.example.com/path;_api_token=secret", level=2) == "http://hidden/path;_api_token=hidden"
51
+ assert sanitise("http://johndoe:password@host:port/path", level=2) == "http://user:***@hidden/path"
52
+ assert sanitise("http://host:port/path", level=2) == "http://hidden/path"
53
+
54
+ assert sanitise("http://www.example.com/path;_api_token=secret", level=3) == "http://hidden"
55
+
50
56
 
51
57
  def test_sanitise_paths() -> None:
52
58
  """Test the sanitise function for sanitizing file paths."""
@@ -65,6 +71,11 @@ def test_sanitise_paths() -> None:
65
71
  assert sanitise("sub/folder/test.grib") == "sub/folder/test.grib"
66
72
  assert sanitise("./folder/test.grib") == "./folder/test.grib"
67
73
 
74
+ assert sanitise("./folder/test.grib", level=2) == "./folder/test.grib"
75
+
76
+ assert sanitise("./folder/test.grib", level=3) == "hidden"
77
+ assert sanitise("/home/johndoe/.ssh/id_rsa", level=3) == "hidden"
78
+
68
79
 
69
80
  if __name__ == "__main__":
70
81
  for name, obj in list(globals().items()):
@@ -1,3 +0,0 @@
1
- {
2
- ".": "0.4.28"
3
- }
@@ -1,159 +0,0 @@
1
- # (C) Copyright 2024 Anemoi contributors.
2
- #
3
- # This software is licensed under the terms of the Apache Licence Version 2.0
4
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
- #
6
- # In applying this licence, ECMWF does not waive the privileges and immunities
7
- # granted to it by virtue of its status as an intergovernmental organisation
8
- # nor does it submit to any jurisdiction.
9
- from __future__ import annotations
10
-
11
- import functools
12
- import os
13
- from typing import Any
14
-
15
- import requests
16
-
17
- from ..remote import robust
18
-
19
-
20
- def health_check(tracking_uri: str) -> None:
21
- """Query the health endpoint of an MLflow server.
22
-
23
- If the server is not reachable, raise an error and remind the user that authentication may be required.
24
-
25
- Raises
26
- ------
27
- ConnectionError
28
- If the server is not reachable.
29
-
30
- """
31
- token = os.getenv("MLFLOW_TRACKING_TOKEN")
32
-
33
- headers = {"Authorization": f"Bearer {token}"}
34
- response = robust(requests.get, retry_after=30, maximum_tries=10)(
35
- f"{tracking_uri}/health",
36
- headers=headers,
37
- timeout=60,
38
- )
39
-
40
- if response.text == "OK":
41
- return
42
-
43
- error_msg = f"Could not connect to MLflow server at {tracking_uri}. "
44
- if not token:
45
- error_msg += "The server may require authentication, did you forget to turn it on?"
46
- raise ConnectionError(error_msg)
47
-
48
-
49
- def expand_iterables(
50
- params: dict[str, Any],
51
- *,
52
- size_threshold: int | None = None,
53
- recursive: bool = True,
54
- delimiter: str = ".",
55
- ) -> dict[str, Any]:
56
- """Expand any iterable values to the form {key.i: value_i}.
57
-
58
- If expanded will also add {key.all: [value_0, value_1, ...], key.length: len([value_0, value_1, ...])}.
59
-
60
- If `size_threshold` is not None, expand the iterable only if the length of str(value) is
61
- greater than `size_threshold`.
62
-
63
- Parameters
64
- ----------
65
- params : dict[str, Any]
66
- Parameters to be expanded.
67
- size_threshold : int | None, optional
68
- Threshold of str(value) to expand iterable at.
69
- Default is None.
70
- recursive : bool, optional
71
- Expand nested dictionaries.
72
- Default is True.
73
- delimiter: str, optional
74
- Delimiter to use for keys.
75
- Default is ".".
76
-
77
- Returns
78
- -------
79
- dict[str, Any]
80
- Dictionary with all iterable values expanded.
81
-
82
- Examples
83
- --------
84
- >>> expand_iterables({'a': ['a', 'b', 'c']})
85
- {'a.0': 'a', 'a.1': 'b', 'a.2': 'c', 'a.all': ['a', 'b', 'c'], 'a.length': 3}
86
- >>> expand_iterables({'a': {'b': ['a', 'b', 'c']}})
87
- {'a': {'b.0': 'a', 'b.1': 'b', 'b.2': 'c', 'b.all': ['a', 'b', 'c'], 'b.length': 3}}
88
- >>> expand_iterables({'a': ['a', 'b', 'c']}, size_threshold=100)
89
- {'a': ['a', 'b', 'c']}
90
- >>> expand_iterables({'a': [[0,1,2], 'b', 'c']})
91
- {'a.0': {0: 0, 1: 1, 2: 2}, 'a.1': 'b', 'a.2': 'c', 'a.all': [[0, 1, 2], 'b', 'c'], 'a.length': 3}
92
- """
93
-
94
- def should_be_expanded(x: Any) -> bool:
95
- return size_threshold is None or len(str(x)) > size_threshold
96
-
97
- nested_func = functools.partial(expand_iterables, size_threshold=size_threshold, recursive=recursive)
98
-
99
- def expand(val: dict | list) -> dict[str, Any]:
100
- if not recursive:
101
- return val
102
- if isinstance(val, dict):
103
- return nested_func(val)
104
- if isinstance(val, list):
105
- return nested_func(dict(enumerate(val)))
106
- return val
107
-
108
- expanded_params = {}
109
-
110
- for key, value in params.items():
111
- if isinstance(value, (list, tuple)):
112
- if should_be_expanded(value):
113
- for i, v in enumerate(value):
114
- expanded_params[f"{key}{delimiter}{i}"] = expand(v)
115
-
116
- expanded_params[f"{key}{delimiter}all"] = value
117
- expanded_params[f"{key}{delimiter}length"] = len(value)
118
- else:
119
- expanded_params[key] = value
120
- else:
121
- expanded_params[key] = expand(value)
122
- return expanded_params
123
-
124
-
125
- def clean_config_params(params: dict[str, Any]) -> dict[str, Any]:
126
- """Clean up params to avoid issues with mlflow.
127
-
128
- Too many logged params will make the server take longer to render the
129
- experiment.
130
-
131
- Parameters
132
- ----------
133
- params : dict[str, Any]
134
- Parameters to clean up.
135
-
136
- Returns
137
- -------
138
- dict[str, Any]
139
- Cleaned up params ready for MlFlow.
140
- """
141
- prefixes_to_remove = [
142
- "hardware",
143
- "data",
144
- "dataloader",
145
- "model",
146
- "training",
147
- "diagnostics",
148
- "graph",
149
- "metadata.config",
150
- "config.dataset.sourcesmetadata.dataset.variables_metadata",
151
- "metadata.dataset.sources",
152
- "metadata.dataset.specific",
153
- "metadata.dataset.variables_metadata",
154
- ]
155
-
156
- keys_to_remove = [key for key in params if any(key.startswith(prefix) for prefix in prefixes_to_remove)]
157
- for key in keys_to_remove:
158
- del params[key]
159
- return params
@@ -1,76 +0,0 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
- # This software is licensed under the terms of the Apache Licence Version 2.0
3
- # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
- # In applying this licence, ECMWF does not waive the privileges and immunities
5
- # granted to it by virtue of its status as an intergovernmental organisation
6
- # nor does it submit to any jurisdiction.
7
-
8
- from anemoi.utils.mlflow.utils import clean_config_params
9
- from anemoi.utils.mlflow.utils import expand_iterables
10
-
11
-
12
- def test_clean_config_params() -> None:
13
- params = {
14
- "config.dataset.format": None,
15
- "config.model.num_channels": None,
16
- "model.num_channels": None,
17
- "data.frequency": None,
18
- "diagnostics.plot": None,
19
- "hardware.num_gpus": None,
20
- "metadata.config.dataset": None,
21
- "metadata.dataset.sources/1.specific.forward.forward.attrs.variables_metadata.z_500.mars.expver": None,
22
- "metadata.dataset.specific.forward.forward.attrs.variables_metadata.z_500.mars.expver": None,
23
- "config.data.normalizer.default": None,
24
- "config.data.normalizer.std": None,
25
- "config.data.normalizer.min-max": None,
26
- "config.data.normalizer.max": None,
27
- }
28
-
29
- cleaned = clean_config_params(params)
30
- result = {
31
- "config.dataset.format": None,
32
- "config.model.num_channels": None,
33
- "config.data.normalizer.default": None,
34
- "config.data.normalizer.std": None,
35
- "config.data.normalizer.min-max": None,
36
- "config.data.normalizer.max": None,
37
- }
38
- assert cleaned == result
39
-
40
-
41
- def test_expand_iterables_single_iterable() -> None:
42
- # Test case with a single iterable
43
- dictionary = {"a": ["a", "b", "c"]}
44
- expanded = expand_iterables(dictionary)
45
- assert expanded == {"a.0": "a", "a.1": "b", "a.2": "c", "a.all": ["a", "b", "c"], "a.length": 3}
46
-
47
-
48
- def test_expand_iterables_size_threshold() -> None:
49
- # Test case with a single iterable
50
- dictionary = {"a": ["a", "b", "c"]}
51
- expanded = expand_iterables(dictionary, size_threshold=100)
52
- assert expanded == dictionary
53
-
54
-
55
- def test_expand_iterables_with_nested_dict() -> None:
56
- dictionary = {"a": {"b": ["a", "b", "c"]}}
57
- expanded = expand_iterables(dictionary)
58
- assert expanded == {"a": {"b.0": "a", "b.1": "b", "b.2": "c", "b.all": ["a", "b", "c"], "b.length": 3}}
59
-
60
-
61
- def test_expand_iterables_with_nested_dict_thresholded() -> None:
62
- dictionary = {"a": {"b": ["a", "b", "c"]}, "c": ["d"]}
63
- expanded = expand_iterables(dictionary, size_threshold=5)
64
- assert expanded == {"a": {"b.0": "a", "b.1": "b", "b.2": "c", "b.all": ["a", "b", "c"], "b.length": 3}, "c": ["d"]}
65
-
66
-
67
- def test_expand_iterables_with_nested_list() -> None:
68
- dictionary = {"a": [[0, 1, 2], "b", "c"]}
69
- expanded = expand_iterables(dictionary)
70
- assert expanded == {
71
- "a.0": {0: 0, 1: 1, 2: 2},
72
- "a.1": "b",
73
- "a.2": "c",
74
- "a.all": [[0, 1, 2], "b", "c"],
75
- "a.length": 3,
76
- }
File without changes
File without changes
File without changes
File without changes