anemoi-utils 0.3.14__py3-none-any.whl → 0.3.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anemoi-utils might be problematic. Click here for more details.

anemoi/utils/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.3.14'
16
- __version_tuple__ = version_tuple = (0, 3, 14)
15
+ __version__ = version = '0.3.17'
16
+ __version_tuple__ = version_tuple = (0, 3, 17)
anemoi/utils/caching.py CHANGED
@@ -49,6 +49,7 @@ def cache(key, proc, collection="default", expires=None):
49
49
 
50
50
 
51
51
  class cached:
52
+ """Decorator to cache the result of a function."""
52
53
 
53
54
  def __init__(self, collection="default", expires=None):
54
55
  self.collection = collection
@@ -47,7 +47,7 @@ def has_metadata(path: str, name: str = DEFAULT_NAME) -> bool:
47
47
  return False
48
48
 
49
49
 
50
- def load_metadata(path: str, name: str = DEFAULT_NAME):
50
+ def load_metadata(path: str, name: str = DEFAULT_NAME) -> dict:
51
51
  """Load metadata from a checkpoint file
52
52
 
53
53
  Parameters
@@ -59,8 +59,8 @@ def load_metadata(path: str, name: str = DEFAULT_NAME):
59
59
 
60
60
  Returns
61
61
  -------
62
- JSON
63
- The content of the metadata file
62
+ dict
63
+ The content of the metadata file from JSON
64
64
 
65
65
  Raises
66
66
  ------
@@ -82,7 +82,7 @@ def load_metadata(path: str, name: str = DEFAULT_NAME):
82
82
  raise ValueError(f"Could not find '{name}' in {path}.")
83
83
 
84
84
 
85
- def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER):
85
+ def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER) -> None:
86
86
  """Save metadata to a checkpoint file
87
87
 
88
88
  Parameters
@@ -93,6 +93,8 @@ def save_metadata(path, metadata, name=DEFAULT_NAME, folder=DEFAULT_FOLDER):
93
93
  A JSON serializable object
94
94
  name : str, optional
95
95
  The name of the metadata file in the zip archive
96
+ folder : str, optional
97
+ The folder where the metadata file will be saved
96
98
  """
97
99
  with zipfile.ZipFile(path, "a") as zipf:
98
100
 
anemoi/utils/cli.py CHANGED
@@ -55,6 +55,8 @@ def make_parser(description, commands):
55
55
 
56
56
 
57
57
  class Failed(Command):
58
+ """Command not available."""
59
+
58
60
  def __init__(self, name, error):
59
61
  self.name = name
60
62
  self.error = error
anemoi/utils/config.py CHANGED
@@ -5,6 +5,7 @@
5
5
  # granted to it by virtue of its status as an intergovernmental organisation
6
6
  # nor does it submit to any jurisdiction.
7
7
 
8
+ from __future__ import annotations
8
9
 
9
10
  import json
10
11
  import logging
@@ -175,9 +176,14 @@ def config_path(name="settings.toml"):
175
176
  return full
176
177
 
177
178
 
178
- def load_any_dict_format(path):
179
+ def load_any_dict_format(path) -> dict:
179
180
  """Load a configuration file in any supported format: JSON, YAML and TOML.
180
181
 
182
+ Parameters
183
+ ----------
184
+ path : str
185
+ The path to the configuration file.
186
+
181
187
  Returns
182
188
  -------
183
189
  dict
@@ -243,7 +249,7 @@ def _load_config(name="settings.toml", secrets=None, defaults=None):
243
249
  return CONFIG[key]
244
250
 
245
251
 
246
- def _save_config(name, data):
252
+ def _save_config(name, data) -> None:
247
253
  CONFIG.pop(name, None)
248
254
 
249
255
  conf = config_path(name)
@@ -265,7 +271,7 @@ def _save_config(name, data):
265
271
  f.write(data)
266
272
 
267
273
 
268
- def save_config(name, data):
274
+ def save_config(name, data) -> None:
269
275
  """Save a configuration file.
270
276
 
271
277
  Parameters
@@ -281,13 +287,17 @@ def save_config(name, data):
281
287
  _save_config(name, data)
282
288
 
283
289
 
284
- def load_config(name="settings.toml", secrets=None, defaults=None):
290
+ def load_config(name="settings.toml", secrets=None, defaults=None) -> DotDict | str:
285
291
  """Read a configuration file.
286
292
 
287
293
  Parameters
288
294
  ----------
289
295
  name : str, optional
290
296
  The name of the config file to read, by default "settings.toml"
297
+ secrets : str or list, optional
298
+ The name of the secrets file, by default None
299
+ defaults : str or dict, optional
300
+ The name of the defaults file, by default None
291
301
 
292
302
  Returns
293
303
  -------
@@ -299,7 +309,7 @@ def load_config(name="settings.toml", secrets=None, defaults=None):
299
309
  return _load_config(name, secrets, defaults)
300
310
 
301
311
 
302
- def load_raw_config(name, default=None):
312
+ def load_raw_config(name, default=None) -> DotDict | str:
303
313
 
304
314
  path = config_path(name)
305
315
  if os.path.exists(path):
@@ -308,13 +318,17 @@ def load_raw_config(name, default=None):
308
318
  return default
309
319
 
310
320
 
311
- def check_config_mode(name="settings.toml", secrets_name=None, secrets=None):
321
+ def check_config_mode(name="settings.toml", secrets_name=None, secrets=None) -> None:
312
322
  """Check that a configuration file is secure.
313
323
 
314
324
  Parameters
315
325
  ----------
316
326
  name : str, optional
317
327
  The name of the configuration file, by default "settings.toml"
328
+ secrets_name : str, optional
329
+ The name of the secrets file, by default None
330
+ secrets : list, optional
331
+ The list of secrets to check, by default None
318
332
 
319
333
  Raises
320
334
  ------
anemoi/utils/dates.py CHANGED
@@ -10,9 +10,7 @@ import calendar
10
10
  import datetime
11
11
  import re
12
12
 
13
- import isodate
14
-
15
- from .hindcasts import HindcastDatesTimes
13
+ import aniso8601
16
14
 
17
15
 
18
16
  def normalise_frequency(frequency):
@@ -25,7 +23,7 @@ def normalise_frequency(frequency):
25
23
  return {"h": v, "d": v * 24}[unit]
26
24
 
27
25
 
28
- def no_time_zone(date):
26
+ def _no_time_zone(date) -> datetime.datetime:
29
27
  """Remove time zone information from a date.
30
28
 
31
29
  Parameters
@@ -43,13 +41,15 @@ def no_time_zone(date):
43
41
 
44
42
 
45
43
  # this function is use in anemoi-datasets
46
- def as_datetime(date):
44
+ def as_datetime(date, keep_time_zone=False) -> datetime.datetime:
47
45
  """Convert a date to a datetime object, removing any time zone information.
48
46
 
49
47
  Parameters
50
48
  ----------
51
49
  date : datetime.date or datetime.datetime or str
52
50
  The date to convert.
51
+ keep_time_zone : bool, optional
52
+ If True, the time zone information is kept, by default False.
53
53
 
54
54
  Returns
55
55
  -------
@@ -57,92 +57,82 @@ def as_datetime(date):
57
57
  The datetime object.
58
58
  """
59
59
 
60
+ tidy = _no_time_zone if not keep_time_zone else lambda x: x
61
+
60
62
  if isinstance(date, datetime.datetime):
61
- return no_time_zone(date)
63
+ return tidy(date)
62
64
 
63
65
  if isinstance(date, datetime.date):
64
- return no_time_zone(datetime.datetime(date.year, date.month, date.day))
66
+ return tidy(datetime.datetime(date.year, date.month, date.day))
65
67
 
66
68
  if isinstance(date, str):
67
- return no_time_zone(datetime.datetime.fromisoformat(date))
69
+ return tidy(datetime.datetime.fromisoformat(date))
68
70
 
69
71
  raise ValueError(f"Invalid date type: {type(date)}")
70
72
 
71
73
 
72
- def _compress_dates(dates):
73
- dates = sorted(dates)
74
- if len(dates) < 3:
75
- yield dates
76
- return
77
-
78
- prev = first = dates.pop(0)
79
- curr = dates.pop(0)
80
- delta = curr - prev
81
- while curr - prev == delta:
82
- prev = curr
83
- if not dates:
84
- break
85
- curr = dates.pop(0)
86
-
87
- yield (first, prev, delta)
88
- if dates:
89
- yield from _compress_dates([curr] + dates)
90
-
91
-
92
- def compress_dates(dates):
93
- dates = [as_datetime(_) for _ in dates]
94
- result = []
74
+ def _as_datetime_list(date, default_increment):
75
+ if isinstance(date, (list, tuple)):
76
+ for d in date:
77
+ yield from _as_datetime_list(d, default_increment)
95
78
 
96
- for n in _compress_dates(dates):
97
- if isinstance(n, list):
98
- result.extend([str(_) for _ in n])
99
- else:
100
- result.append(" ".join([str(n[0]), "to", str(n[1]), "by", str(n[2])]))
101
-
102
- return result
79
+ if isinstance(date, str):
80
+ # Check for ISO format
81
+ try:
82
+ start, end = aniso8601.parse_interval(date)
83
+ while start <= end:
84
+ yield as_datetime(start)
85
+ start += default_increment
103
86
 
87
+ return
104
88
 
105
- def print_dates(dates):
106
- print(compress_dates(dates))
89
+ except aniso8601.exceptions.ISOFormatError:
90
+ pass
107
91
 
92
+ try:
93
+ intervals = aniso8601.parse_repeating_interval(date)
94
+ for date in intervals:
95
+ yield as_datetime(date)
96
+ return
97
+ except aniso8601.exceptions.ISOFormatError:
98
+ pass
108
99
 
109
- def frequency_to_string(frequency):
110
- # TODO: use iso8601
111
- frequency = frequency_to_timedelta(frequency)
100
+ yield as_datetime(date)
112
101
 
113
- total_seconds = frequency.total_seconds()
114
- assert int(total_seconds) == total_seconds, total_seconds
115
- total_seconds = int(total_seconds)
116
102
 
117
- seconds = total_seconds
118
-
119
- days = seconds // (24 * 3600)
120
- seconds %= 24 * 3600
121
- hours = seconds // 3600
122
- seconds %= 3600
123
- minutes = seconds // 60
124
- seconds %= 60
125
-
126
- if days > 0 and hours == 0 and minutes == 0 and seconds == 0:
127
- return f"{days}d"
103
+ def as_datetime_list(date, default_increment=1):
104
+ default_increment = frequency_to_timedelta(default_increment)
105
+ return list(_as_datetime_list(date, default_increment))
128
106
 
129
- if days == 0 and hours > 0 and minutes == 0 and seconds == 0:
130
- return f"{hours}h"
131
107
 
132
- if days == 0 and hours == 0 and minutes > 0 and seconds == 0:
133
- return f"{minutes}m"
108
+ def frequency_to_timedelta(frequency) -> datetime.timedelta:
109
+ """Convert a frequency to a timedelta object.
134
110
 
135
- if days == 0 and hours == 0 and minutes == 0 and seconds > 0:
136
- return f"{seconds}s"
111
+ Parameters
112
+ ----------
113
+ frequency : int or str or datetime.timedelta
114
+ The frequency to convert. If an integer, it is assumed to be in hours. If a string, it can be in the format:
137
115
 
138
- if days > 0:
139
- return f"{total_seconds}s"
116
+ - "1h" for 1 hour
117
+ - "1d" for 1 day
118
+ - "1m" for 1 minute
119
+ - "1s" for 1 second
120
+ - "1:30" for 1 hour and 30 minutes
121
+ - "1:30:10" for 1 hour, 30 minutes and 10 seconds
122
+ - "PT10M" for 10 minutes (ISO8601)
140
123
 
141
- return str(frequency)
124
+ If a timedelta object is provided, it is returned as is.
142
125
 
126
+ Returns
127
+ -------
128
+ datetime.timedelta
129
+ The timedelta object.
143
130
 
144
- def frequency_to_timedelta(frequency):
145
- # TODO: use iso8601 or check pytimeparse
131
+ Raises
132
+ ------
133
+ ValueError
134
+ Exception raised if the frequency cannot be converted to a timedelta.
135
+ """
146
136
 
147
137
  if isinstance(frequency, datetime.timedelta):
148
138
  return frequency
@@ -172,17 +162,77 @@ def frequency_to_timedelta(frequency):
172
162
 
173
163
  # ISO8601
174
164
  try:
175
- return isodate.parse_duration(frequency)
176
- except isodate.isoerror.ISO8601Error:
165
+ return aniso8601.parse_duration(frequency)
166
+ except aniso8601.exceptions.ISOFormatError:
177
167
  pass
178
168
 
179
169
  raise ValueError(f"Cannot convert frequency {frequency} to timedelta")
180
170
 
181
171
 
182
- def normalize_date(x):
183
- if isinstance(x, str):
184
- return no_time_zone(datetime.datetime.fromisoformat(x))
185
- return x
172
+ def frequency_to_string(frequency) -> str:
173
+ """Convert a frequency (i.e. a datetime.timedelta) to a string.
174
+
175
+ Parameters
176
+ ----------
177
+ frequency : datetime.timedelta
178
+ The frequency to convert.
179
+
180
+ Returns
181
+ -------
182
+ str
183
+ A string representation of the frequency.
184
+ """
185
+
186
+ frequency = frequency_to_timedelta(frequency)
187
+
188
+ total_seconds = frequency.total_seconds()
189
+ assert int(total_seconds) == total_seconds, total_seconds
190
+ total_seconds = int(total_seconds)
191
+
192
+ seconds = total_seconds
193
+
194
+ days = seconds // (24 * 3600)
195
+ seconds %= 24 * 3600
196
+ hours = seconds // 3600
197
+ seconds %= 3600
198
+ minutes = seconds // 60
199
+ seconds %= 60
200
+
201
+ if days > 0 and hours == 0 and minutes == 0 and seconds == 0:
202
+ return f"{days}d"
203
+
204
+ if days == 0 and hours > 0 and minutes == 0 and seconds == 0:
205
+ return f"{hours}h"
206
+
207
+ if days == 0 and hours == 0 and minutes > 0 and seconds == 0:
208
+ return f"{minutes}m"
209
+
210
+ if days == 0 and hours == 0 and minutes == 0 and seconds > 0:
211
+ return f"{seconds}s"
212
+
213
+ if days > 0:
214
+ return f"{total_seconds}s"
215
+
216
+ return str(frequency)
217
+
218
+
219
+ def frequency_to_seconds(frequency) -> int:
220
+ """Convert a frequency to seconds.
221
+
222
+ Parameters
223
+ ----------
224
+ frequency : _type_
225
+ _description_
226
+
227
+ Returns
228
+ -------
229
+ int
230
+ Number of seconds.
231
+ """
232
+
233
+ result = frequency_to_timedelta(frequency).total_seconds()
234
+ assert int(result) == result, result
235
+ return int(result)
186
236
 
187
237
 
188
238
  DOW = {
@@ -261,7 +311,7 @@ class DateTimes:
261
311
  """
262
312
  self.start = as_datetime(start)
263
313
  self.end = as_datetime(end)
264
- self.increment = datetime.timedelta(hours=increment)
314
+ self.increment = frequency_to_timedelta(increment)
265
315
  self.day_of_month = _make_day(day_of_month)
266
316
  self.day_of_week = _make_week(day_of_week)
267
317
  self.calendar_months = _make_months(calendar_months)
@@ -354,6 +404,8 @@ class Autumn(DateTimes):
354
404
 
355
405
 
356
406
  class ConcatDateTimes:
407
+ """ConcatDateTimes is an iterator that generates datetime objects from a list of dates."""
408
+
357
409
  def __init__(self, *dates):
358
410
  if len(dates) == 1 and isinstance(dates[0], list):
359
411
  dates = dates[0]
@@ -366,6 +418,8 @@ class ConcatDateTimes:
366
418
 
367
419
 
368
420
  class EnumDateTimes:
421
+ """EnumDateTimes is an iterator that generates datetime objects from a list of dates."""
422
+
369
423
  def __init__(self, dates):
370
424
  self.dates = dates
371
425
 
@@ -385,6 +439,8 @@ def datetimes_factory(*args, **kwargs):
385
439
  name = kwargs.get("name")
386
440
 
387
441
  if name == "hindcast":
442
+ from .hindcasts import HindcastDatesTimes
443
+
388
444
  reference_dates = kwargs["reference_dates"]
389
445
  reference_dates = datetimes_factory(reference_dates)
390
446
  years = kwargs["years"]
@@ -392,8 +448,7 @@ def datetimes_factory(*args, **kwargs):
392
448
 
393
449
  kwargs = kwargs.copy()
394
450
  if "frequency" in kwargs:
395
- freq = kwargs.pop("frequency")
396
- kwargs["increment"] = normalise_frequency(freq)
451
+ kwargs["increment"] = kwargs.pop("frequency")
397
452
  return DateTimes(**kwargs)
398
453
 
399
454
  if not any((isinstance(x, dict) or isinstance(x, list)) for x in args):
@@ -409,3 +464,8 @@ def datetimes_factory(*args, **kwargs):
409
464
  return datetimes_factory(*a)
410
465
 
411
466
  return ConcatDateTimes(*[datetimes_factory(a) for a in args])
467
+
468
+
469
+ if __name__ == "__main__":
470
+ print(as_datetime_list("R10/2023-01-01T00:00:00Z/P1D"))
471
+ print(as_datetime_list("2007-03-01T13:00:00/2008-05-11T15:30:00", "200h"))
anemoi/utils/grib.py CHANGED
@@ -95,8 +95,8 @@ def units(param) -> str:
95
95
 
96
96
  Parameters
97
97
  ----------
98
- paramid : int or str
99
- Parameter id ir name.
98
+ param : int or str
99
+ Parameter id or name.
100
100
 
101
101
  Returns
102
102
  -------
@@ -112,7 +112,7 @@ def units(param) -> str:
112
112
  return _units()[unit_id]
113
113
 
114
114
 
115
- def must_be_positive(param):
115
+ def must_be_positive(param) -> bool:
116
116
  """Check if a parameter must be positive.
117
117
 
118
118
  Parameters
anemoi/utils/humanize.py CHANGED
@@ -15,6 +15,8 @@ import re
15
15
  import warnings
16
16
  from collections import defaultdict
17
17
 
18
+ from anemoi.utils.dates import as_datetime
19
+
18
20
 
19
21
  def bytes_to_human(n: float) -> str:
20
22
  """Convert a number of bytes to a human readable string
@@ -215,7 +217,7 @@ def __(n):
215
217
  return "th"
216
218
 
217
219
 
218
- def when(then, now=None, short=True, use_utc=False):
220
+ def when(then, now=None, short=True, use_utc=False) -> str:
219
221
  """Generate a human readable string for a date, relative to now
220
222
 
221
223
  >>> when(datetime.datetime.now() - datetime.timedelta(hours=2))
@@ -241,6 +243,8 @@ def when(then, now=None, short=True, use_utc=False):
241
243
  The reference date, by default NOW
242
244
  short : bool, optional
243
245
  Genererate shorter strings, by default True
246
+ use_utc : bool, optional
247
+ Use UTC time, by default False
244
248
 
245
249
  Returns
246
250
  -------
@@ -364,7 +368,7 @@ def string_distance(s, t):
364
368
  return d[m, n]
365
369
 
366
370
 
367
- def did_you_mean(word, vocabulary):
371
+ def did_you_mean(word, vocabulary) -> str:
368
372
  """Pick the closest word in a vocabulary
369
373
 
370
374
  >>> did_you_mean("aple", ["banana", "lemon", "apple", "orange"])
@@ -393,7 +397,7 @@ def dict_to_human(query):
393
397
  return list_to_human(lst)
394
398
 
395
399
 
396
- def list_to_human(lst, conjunction="and"):
400
+ def list_to_human(lst, conjunction="and") -> str:
397
401
  """Convert a list of strings to a human readable string
398
402
 
399
403
  >>> list_to_human(["banana", "lemon", "apple", "orange"])
@@ -408,8 +412,8 @@ def list_to_human(lst, conjunction="and"):
408
412
 
409
413
  Returns
410
414
  -------
411
- _type_
412
- _description_
415
+ str
416
+ Human readable string of list
413
417
  """
414
418
  if not lst:
415
419
  return "??"
@@ -548,19 +552,21 @@ def rounded_datetime(d):
548
552
  return d
549
553
 
550
554
 
551
- def json_pretty_dump(obj, max_line_length=120, default=str):
555
+ def json_pretty_dump(obj, max_line_length=120, default=str) -> str:
552
556
  """Custom JSON dump function that keeps dicts and lists on one line if they are short enough.
553
557
 
554
558
  Parameters
555
559
  ----------
556
- obj
560
+ obj : Any
557
561
  The object to be dumped as JSON.
558
- max_line_length
559
- Maximum allowed line length for pretty-printing.
562
+ max_line_length : int, optional
563
+ Maximum allowed line length for pretty-printing. Default is 120.
564
+ default : function, optional
565
+ Default function to convert non-serializable objects. Default is str.
560
566
 
561
567
  Returns
562
568
  -------
563
- unknown
569
+ str
564
570
  JSON string.
565
571
  """
566
572
 
@@ -569,14 +575,14 @@ def json_pretty_dump(obj, max_line_length=120, default=str):
569
575
 
570
576
  Parameters
571
577
  ----------
572
- obj
578
+ obj : Any
573
579
  The object to format.
574
- indent_level
575
- Current indentation level.
580
+ indent_level : int, optional
581
+ Current indentation level. Default is 0.
576
582
 
577
583
  Returns
578
584
  -------
579
- unknown
585
+ str
580
586
  Formatted JSON string.
581
587
  """
582
588
  indent = " " * 4 * indent_level
@@ -602,15 +608,15 @@ def json_pretty_dump(obj, max_line_length=120, default=str):
602
608
  return _format_json(obj)
603
609
 
604
610
 
605
- def shorten_list(lst, max_length=5):
611
+ def shorten_list(lst, max_length=5) -> list:
606
612
  """Shorten a list to a maximum length.
607
613
 
608
614
  Parameters
609
615
  ----------
610
- lst
616
+ lst : list
611
617
  The list to be shortened.
612
- max_length
613
- Maximum length of the shortened list.
618
+ max_length : int, optional
619
+ Maximum length of the shortened list. Default is 5.
614
620
 
615
621
  Returns
616
622
  -------
@@ -625,3 +631,60 @@ def shorten_list(lst, max_length=5):
625
631
  if isinstance(lst, tuple):
626
632
  return tuple(result)
627
633
  return result
634
+
635
+
636
+ def _compress_dates(dates):
637
+ dates = sorted(dates)
638
+ if len(dates) < 3:
639
+ yield dates
640
+ return
641
+
642
+ prev = first = dates.pop(0)
643
+ curr = dates.pop(0)
644
+ delta = curr - prev
645
+ while curr - prev == delta:
646
+ prev = curr
647
+ if not dates:
648
+ break
649
+ curr = dates.pop(0)
650
+
651
+ yield (first, prev, delta)
652
+ if dates:
653
+ yield from _compress_dates([curr] + dates)
654
+
655
+
656
+ def compress_dates(dates) -> str:
657
+ """Compress a list of dates into a human-readable format.
658
+
659
+ Parameters
660
+ ----------
661
+ dates : list
662
+ A list of dates, as datetime objects or strings.
663
+
664
+ Returns
665
+ -------
666
+ str
667
+ A human-readable string representing the compressed dates.
668
+ """
669
+
670
+ dates = [as_datetime(_) for _ in dates]
671
+ result = []
672
+
673
+ for n in _compress_dates(dates):
674
+ if isinstance(n, list):
675
+ result.extend([str(_) for _ in n])
676
+ else:
677
+ result.append(" ".join([str(n[0]), "to", str(n[1]), "by", str(n[2])]))
678
+
679
+ return result
680
+
681
+
682
+ def print_dates(dates) -> None:
683
+ """Print a list of dates in a human-readable format.
684
+
685
+ Parameters
686
+ ----------
687
+ dates : list
688
+ A list of dates, as datetime objects or strings.
689
+ """
690
+ print(compress_dates(dates))
@@ -199,7 +199,7 @@ def _paths(path_or_object):
199
199
  return paths
200
200
 
201
201
 
202
- def git_check(*args):
202
+ def git_check(*args) -> dict:
203
203
  """Return the git information for the given arguments.
204
204
 
205
205
  Arguments can be:
@@ -209,6 +209,11 @@ def git_check(*args):
209
209
  - an object or a class
210
210
  - a path to a directory
211
211
 
212
+ Parameters
213
+ ----------
214
+ args : list
215
+ The list of arguments to check
216
+
212
217
  Returns
213
218
  -------
214
219
  dict
anemoi/utils/s3.py CHANGED
@@ -321,7 +321,7 @@ class Download(Transfer):
321
321
  return size
322
322
 
323
323
 
324
- def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
324
+ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1) -> None:
325
325
  """Upload a file or a folder to S3.
326
326
 
327
327
  Parameters
@@ -335,6 +335,8 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progre
335
335
  resume : bool, optional
336
336
  If the data is alreay on S3 it will not be uploaded, unless the remote file
337
337
  has a different size, by default False
338
+ verbosity : int, optional
339
+ The level of verbosity, by default 1
338
340
  progress: callable, optional
339
341
  A callable that will be called with the number of files, the total size of the files, the total size
340
342
  transferred and a boolean indicating if the transfer has started. By default None
@@ -365,7 +367,7 @@ def upload(source, target, *, overwrite=False, resume=False, verbosity=1, progre
365
367
  )
366
368
 
367
369
 
368
- def download(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1):
370
+ def download(source, target, *, overwrite=False, resume=False, verbosity=1, progress=None, threads=1) -> None:
369
371
  """Download a file or a folder from S3.
370
372
 
371
373
  Parameters
@@ -381,6 +383,8 @@ def download(source, target, *, overwrite=False, resume=False, verbosity=1, prog
381
383
  resume : bool, optional
382
384
  If the data is alreay on local it will not be downloaded, unless the remote file
383
385
  has a different size, by default False
386
+ verbosity : int, optional
387
+ The level of verbosity, by default 1
384
388
  progress: callable, optional
385
389
  A callable that will be called with the number of files, the total size of the files, the total size
386
390
  transferred and a boolean indicating if the transfer has started. By default None
@@ -427,7 +431,7 @@ def _list_objects(target, batch=False):
427
431
  yield from objects
428
432
 
429
433
 
430
- def _delete_folder(target):
434
+ def _delete_folder(target) -> None:
431
435
  _, _, bucket, _ = target.split("/", 3)
432
436
  s3 = s3_client(bucket)
433
437
 
@@ -439,7 +443,7 @@ def _delete_folder(target):
439
443
  LOGGER.info(f"Deleted {len(batch):,} objects (total={total:,})")
440
444
 
441
445
 
442
- def _delete_file(target):
446
+ def _delete_file(target) -> None:
443
447
  from botocore.exceptions import ClientError
444
448
 
445
449
  _, _, bucket, key = target.split("/", 3)
@@ -462,7 +466,7 @@ def _delete_file(target):
462
466
  LOGGER.info(f"{target} is deleted")
463
467
 
464
468
 
465
- def delete(target):
469
+ def delete(target) -> None:
466
470
  """Delete a file or a folder from S3.
467
471
 
468
472
  Parameters
@@ -480,7 +484,7 @@ def delete(target):
480
484
  _delete_file(target)
481
485
 
482
486
 
483
- def list_folder(folder):
487
+ def list_folder(folder) -> list:
484
488
  """List the sub folders in a folder on S3.
485
489
 
486
490
  Parameters
@@ -508,7 +512,7 @@ def list_folder(folder):
508
512
  yield from [folder + _["Prefix"] for _ in page.get("CommonPrefixes")]
509
513
 
510
514
 
511
- def object_info(target):
515
+ def object_info(target) -> dict:
512
516
  """Get information about an object on S3.
513
517
 
514
518
  Parameters
@@ -533,7 +537,7 @@ def object_info(target):
533
537
  raise
534
538
 
535
539
 
536
- def object_acl(target):
540
+ def object_acl(target) -> dict:
537
541
  """Get information about an object's ACL on S3.
538
542
 
539
543
  Parameters
@@ -0,0 +1,10 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+ from .sanetise import sanetise as sanetize
9
+
10
+ __all__ = ["sanetize"]
@@ -0,0 +1,115 @@
1
+ # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
2
+ # This software is licensed under the terms of the Apache Licence Version 2.0
3
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
4
+ # In applying this licence, ECMWF does not waive the privileges and immunities
5
+ # granted to it by virtue of its status as an intergovernmental organisation
6
+ # nor does it submit to any jurisdiction.
7
+
8
+
9
+ import os
10
+ import re
11
+ from pathlib import Path
12
+ from urllib.parse import parse_qs
13
+ from urllib.parse import urlencode
14
+ from urllib.parse import urlparse
15
+ from urllib.parse import urlunparse
16
+
17
+ # Patterns used but earthkit-data for url-patterns and path-patterns
18
+
19
+ RE1 = re.compile(r"{([^}]*)}")
20
+ RE2 = re.compile(r"\(([^}]*)\)")
21
+
22
+
23
+ def sanitise(obj):
24
+ """sanitise an object:
25
+ - by replacing all full paths with shortened versions.
26
+ - by replacing URL passwords with '***'.
27
+ """
28
+
29
+ if isinstance(obj, dict):
30
+ return {sanitise(k): sanitise(v) for k, v in obj.items()}
31
+
32
+ if isinstance(obj, list):
33
+ return [sanitise(v) for v in obj]
34
+
35
+ if isinstance(obj, tuple):
36
+ return tuple(sanitise(v) for v in obj)
37
+
38
+ if isinstance(obj, str):
39
+ return _sanitise_string(obj)
40
+
41
+ return obj
42
+
43
+
44
+ def _sanitise_string(obj):
45
+
46
+ parsed = urlparse(obj, allow_fragments=True)
47
+
48
+ if parsed.scheme:
49
+ return _sanitise_url(parsed)
50
+
51
+ if obj.startswith("/") or obj.startswith("~"):
52
+ return _sanitise_path(obj)
53
+
54
+ return obj
55
+
56
+
57
+ def _sanitise_url(parsed):
58
+
59
+ LIST = [
60
+ "pass",
61
+ "password",
62
+ "token",
63
+ "user",
64
+ "key",
65
+ "pwd",
66
+ "_key",
67
+ "_token",
68
+ "apikey",
69
+ "api_key",
70
+ "api_token",
71
+ "_api_token",
72
+ "_api_key",
73
+ "username",
74
+ "login",
75
+ ]
76
+
77
+ scheme, netloc, path, params, query, fragment = parsed
78
+
79
+ if parsed.password or parsed.username:
80
+ _, host = netloc.split("@")
81
+ user = "user:***" if parsed.password else "user"
82
+ netloc = f"{user}@{host}"
83
+
84
+ if query:
85
+ qs = parse_qs(query)
86
+ for k in LIST:
87
+ if k in qs:
88
+ qs[k] = "hidden"
89
+ query = urlencode(qs, doseq=True)
90
+
91
+ if params:
92
+ qs = parse_qs(params)
93
+ for k in LIST:
94
+ if k in qs:
95
+ qs[k] = "hidden"
96
+ params = urlencode(qs, doseq=True)
97
+
98
+ return urlunparse([scheme, netloc, path, params, query, fragment])
99
+
100
+
101
+ def _sanitise_path(path):
102
+ bits = list(reversed(Path(path).parts))
103
+ result = [bits.pop(0)]
104
+ for bit in bits:
105
+ if RE1.match(bit) or RE2.match(bit):
106
+ result.append(bit)
107
+ continue
108
+ if result[-1] == "...":
109
+ continue
110
+ result.append("...")
111
+ result = os.path.join(*reversed(result))
112
+ if bits[-1] == "/":
113
+ result = os.path.join("/", result)
114
+
115
+ return result
anemoi/utils/text.py CHANGED
@@ -174,6 +174,8 @@ def green(text):
174
174
 
175
175
 
176
176
  class Tree:
177
+ """Tree data structure."""
178
+
177
179
  def __init__(self, actor, parent=None):
178
180
  self._actor = actor
179
181
  self._kids = []
@@ -308,7 +310,7 @@ class Tree:
308
310
  }
309
311
 
310
312
 
311
- def table(rows, header, align, margin=0):
313
+ def table(rows, header, align, margin=0) -> str:
312
314
  """Format a table
313
315
 
314
316
  >>> table([['Aa', 12, 5],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-utils
3
- Version: 0.3.14
3
+ Version: 0.3.17
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -223,26 +223,14 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
223
223
  Classifier: Programming Language :: Python :: Implementation :: PyPy
224
224
  Requires-Python: >=3.9
225
225
  License-File: LICENSE
226
- Requires-Dist: isodate
226
+ Requires-Dist: aniso8601
227
227
  Requires-Dist: pyyaml
228
228
  Requires-Dist: tomli
229
229
  Requires-Dist: tqdm
230
230
  Provides-Extra: all
231
- Requires-Dist: gitpython ; extra == 'all'
232
- Requires-Dist: nvsmi ; extra == 'all'
233
- Requires-Dist: requests ; extra == 'all'
234
- Requires-Dist: termcolor ; extra == 'all'
231
+ Requires-Dist: anemoi-utils[grib,provenance,text] ; extra == 'all'
235
232
  Provides-Extra: dev
236
- Requires-Dist: gitpython ; extra == 'dev'
237
- Requires-Dist: nbsphinx ; extra == 'dev'
238
- Requires-Dist: nvsmi ; extra == 'dev'
239
- Requires-Dist: pandoc ; extra == 'dev'
240
- Requires-Dist: pytest ; extra == 'dev'
241
- Requires-Dist: requests ; extra == 'dev'
242
- Requires-Dist: sphinx ; extra == 'dev'
243
- Requires-Dist: sphinx-argparse <0.5 ; extra == 'dev'
244
- Requires-Dist: sphinx-rtd-theme ; extra == 'dev'
245
- Requires-Dist: termcolor ; extra == 'dev'
233
+ Requires-Dist: anemoi-utils[all,docs,tests] ; extra == 'dev'
246
234
  Provides-Extra: docs
247
235
  Requires-Dist: nbsphinx ; extra == 'docs'
248
236
  Requires-Dist: pandoc ; extra == 'docs'
@@ -260,4 +248,5 @@ Provides-Extra: tests
260
248
  Requires-Dist: pytest ; extra == 'tests'
261
249
  Provides-Extra: text
262
250
  Requires-Dist: termcolor ; extra == 'text'
251
+ Requires-Dist: wcwidth ; extra == 'text'
263
252
 
@@ -0,0 +1,27 @@
1
+ anemoi/utils/__init__.py,sha256=zZZpbKIoGWwdCOuo6YSruLR7C0GzvzI1Wzhyqaa0K7M,456
2
+ anemoi/utils/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
3
+ anemoi/utils/_version.py,sha256=EanR9QKHDmsyYNdsvPdG4re1cWDyd1A_td5gwFF-ouQ,413
4
+ anemoi/utils/caching.py,sha256=bCOjP1jcDbwi7BID3XaR9BbkD1k3UipkP74NbgJuEFA,1974
5
+ anemoi/utils/checkpoints.py,sha256=qWtqkQvMDlPIcfqz_GmOyUf0mmHZ6QEInpvgSJYIQjY,5275
6
+ anemoi/utils/cli.py,sha256=9TrBXkDjBsos2d0z6wdFDRZIjft3HWGutSgAFi0zCK4,3712
7
+ anemoi/utils/config.py,sha256=fHM6kENZhdD350WaEDekSdH_Fs0diovj-nPuv_a7bko,9408
8
+ anemoi/utils/dates.py,sha256=dgGbTqpGOpYDGgWfXL_69HutXTCHDFI2DhvG-9I9WQI,12341
9
+ anemoi/utils/grib.py,sha256=mrk1drJm2jaPYERQX45QfX2MP4eUqRv0J-Y8IRSRTRE,3073
10
+ anemoi/utils/hindcasts.py,sha256=X8k-81ltmkTDHdviY0SJgvMg7XDu07xoc5ALlUxyPoo,1453
11
+ anemoi/utils/humanize.py,sha256=-xQraQWMLwNaLQAWfPi4K05qieQLgkiyYmV6bfhr10U,16611
12
+ anemoi/utils/provenance.py,sha256=EtlQrubTpEGkTG2UpoRlNGGjKMDcJ-9H3j96vedGyaI,9673
13
+ anemoi/utils/s3.py,sha256=MuY-PrHpt6iKM2RK7v74YoCdqvVJ8UjBDJh0wxUR9Co,18720
14
+ anemoi/utils/sanetize.py,sha256=cQ4r52bQuegMqsEzCN2flXYk_0y_amwpEhMx2-G78j4,488
15
+ anemoi/utils/sanitise.py,sha256=VKIUiwm0EHPdkFUR6FkAxe94933yQx2obQtN6YROH5M,2862
16
+ anemoi/utils/text.py,sha256=WvxkRlPpmK7HLOpWZmyqPQG29GMF8IFaCJp7frfIWNI,10436
17
+ anemoi/utils/timer.py,sha256=EQcucuwUaGeSpt2S1APJlwSOu6kC47MK9f4h-r8c_AY,990
18
+ anemoi/utils/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
19
+ anemoi/utils/commands/config.py,sha256=KEffXZh0ZQfn8t6LXresfd94kDY0gEyulx9Wto5ttW0,824
20
+ anemoi/utils/mars/__init__.py,sha256=RAeY8gJ7ZvsPlcIvrQ4fy9xVHs3SphTAPw_XJDtNIKo,1750
21
+ anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
22
+ anemoi_utils-0.3.17.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ anemoi_utils-0.3.17.dist-info/METADATA,sha256=saT-_cFT11QQw7MU-ogdaBT1U_VKl_00zHqo5QRPVF4,15055
24
+ anemoi_utils-0.3.17.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
25
+ anemoi_utils-0.3.17.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
26
+ anemoi_utils-0.3.17.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
27
+ anemoi_utils-0.3.17.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,25 +0,0 @@
1
- anemoi/utils/__init__.py,sha256=zZZpbKIoGWwdCOuo6YSruLR7C0GzvzI1Wzhyqaa0K7M,456
2
- anemoi/utils/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
3
- anemoi/utils/_version.py,sha256=uXvaQV0x-j_stHaI7VjSOLUNVrd36A2OO0ftk9b0Dmw,413
4
- anemoi/utils/caching.py,sha256=EZ4bRG72aTvTxzrbYCgjFpdIn8OtA1rzoRmGg8caWsI,1919
5
- anemoi/utils/checkpoints.py,sha256=1_3mg4B-ykTVfIvIUEv7IxGyREx_ZcilVbB3U-V6O6I,5165
6
- anemoi/utils/cli.py,sha256=SWb5_itARlDCq6yEf-VvagTioSW2phKXXFMW2ihXu18,3678
7
- anemoi/utils/config.py,sha256=s8eqlHsuak058_NJXGMOoT2HenwiZJKcZ9plUWvO7tw,8865
8
- anemoi/utils/dates.py,sha256=yDIV8hYakNaC9iyYD9WRTNClFO6ZKyM8JKCRUiFJEv8,10413
9
- anemoi/utils/grib.py,sha256=gVfo4KYQv31iRyoqRDwk5tiqZDUgOIvhag_kO0qjYD0,3067
10
- anemoi/utils/hindcasts.py,sha256=X8k-81ltmkTDHdviY0SJgvMg7XDu07xoc5ALlUxyPoo,1453
11
- anemoi/utils/humanize.py,sha256=-MRGgGG_STTWzIagL4RQnOAooqAIigCcuN-RZ6CK75M,14982
12
- anemoi/utils/provenance.py,sha256=NL36lM_aCw3fG6VIAouZCRBAJv8a6M3x9cScrFxCMcA,9579
13
- anemoi/utils/s3.py,sha256=kDzbs4nVD2lQuppSe88NVSNpy0wSZpuzkmcAgN2irkU,18506
14
- anemoi/utils/text.py,sha256=5HBqNwhifus4d3OUnod5q1VgCBdEpzE7o0IR0S85knw,10397
15
- anemoi/utils/timer.py,sha256=EQcucuwUaGeSpt2S1APJlwSOu6kC47MK9f4h-r8c_AY,990
16
- anemoi/utils/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
17
- anemoi/utils/commands/config.py,sha256=KEffXZh0ZQfn8t6LXresfd94kDY0gEyulx9Wto5ttW0,824
18
- anemoi/utils/mars/__init__.py,sha256=RAeY8gJ7ZvsPlcIvrQ4fy9xVHs3SphTAPw_XJDtNIKo,1750
19
- anemoi/utils/mars/mars.yaml,sha256=R0dujp75lLA4wCWhPeOQnzJ45WZAYLT8gpx509cBFlc,66
20
- anemoi_utils-0.3.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
21
- anemoi_utils-0.3.14.dist-info/METADATA,sha256=CRVUi4_pMo2st6kwOUgQ0jyEFwl67ZcIHpDo7a4y2NE,15470
22
- anemoi_utils-0.3.14.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
23
- anemoi_utils-0.3.14.dist-info/entry_points.txt,sha256=LENOkn88xzFQo-V59AKoA_F_cfYQTJYtrNTtf37YgHY,60
24
- anemoi_utils-0.3.14.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
25
- anemoi_utils-0.3.14.dist-info/RECORD,,