anemoi-utils 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anemoi-utils might be problematic. Click here for more details.
- anemoi/utils/__init__.py +9 -0
- anemoi/utils/_version.py +16 -0
- anemoi/utils/checkpoints.py +76 -0
- anemoi/utils/config.py +94 -0
- anemoi/utils/dates.py +248 -0
- anemoi/utils/grib.py +73 -0
- anemoi/utils/humanize.py +474 -0
- anemoi/utils/provenance.py +353 -0
- anemoi/utils/text.py +345 -0
- anemoi_utils-0.1.6.dist-info/LICENSE +201 -0
- anemoi_utils-0.1.6.dist-info/METADATA +253 -0
- anemoi_utils-0.1.6.dist-info/RECORD +14 -0
- anemoi_utils-0.1.6.dist-info/WHEEL +5 -0
- anemoi_utils-0.1.6.dist-info/top_level.txt +1 -0
anemoi/utils/humanize.py
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
# (C) Copyright 2020 ECMWF.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
6
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
7
|
+
# nor does it submit to any jurisdiction.
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
"""Generate human readable strings"""
|
|
11
|
+
|
|
12
|
+
import datetime
|
|
13
|
+
import re
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def bytes(n: float) -> str:
|
|
18
|
+
"""Convert a number of bytes to a human readable string
|
|
19
|
+
|
|
20
|
+
>>> bytes(4096)
|
|
21
|
+
'4 KiB'
|
|
22
|
+
|
|
23
|
+
>>> bytes(4000)
|
|
24
|
+
'3.9 KiB'
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
n : float
|
|
29
|
+
the number of bytes
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
str
|
|
34
|
+
a human readable string
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
if n < 0:
|
|
44
|
+
sign = "-"
|
|
45
|
+
n -= 0
|
|
46
|
+
else:
|
|
47
|
+
sign = ""
|
|
48
|
+
|
|
49
|
+
u = ["", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB"]
|
|
50
|
+
i = 0
|
|
51
|
+
while n >= 1024:
|
|
52
|
+
n /= 1024.0
|
|
53
|
+
i += 1
|
|
54
|
+
return "%s%g%s" % (sign, int(n * 10 + 0.5) / 10.0, u[i])
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def base2(n) -> str:
|
|
58
|
+
|
|
59
|
+
u = ["", "K", "M", "G", "T", " P", "E", "Z", "Y"]
|
|
60
|
+
i = 0
|
|
61
|
+
while n >= 1024:
|
|
62
|
+
n /= 1024.0
|
|
63
|
+
i += 1
|
|
64
|
+
return "%g%s" % (int(n * 10 + 0.5) / 10.0, u[i])
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
PERIODS = (
|
|
68
|
+
(7 * 24 * 60 * 60, "week"),
|
|
69
|
+
(24 * 60 * 60, "day"),
|
|
70
|
+
(60 * 60, "hour"),
|
|
71
|
+
(60, "minute"),
|
|
72
|
+
(1, "second"),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _plural(count):
|
|
77
|
+
if count > 1:
|
|
78
|
+
return "s"
|
|
79
|
+
else:
|
|
80
|
+
return ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def seconds(seconds: float) -> str:
|
|
84
|
+
"""Convert a number of seconds to a human readable string
|
|
85
|
+
|
|
86
|
+
>>> seconds(4000)
|
|
87
|
+
'1 hour 6 minutes 40 seconds'
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
seconds : float
|
|
92
|
+
The number of seconds
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
str
|
|
97
|
+
A human readable string
|
|
98
|
+
|
|
99
|
+
"""
|
|
100
|
+
if isinstance(seconds, datetime.timedelta):
|
|
101
|
+
seconds = seconds.total_seconds()
|
|
102
|
+
|
|
103
|
+
if seconds == 0:
|
|
104
|
+
return "instantaneous"
|
|
105
|
+
|
|
106
|
+
if seconds < 0.1:
|
|
107
|
+
units = [
|
|
108
|
+
None,
|
|
109
|
+
"milli",
|
|
110
|
+
"micro",
|
|
111
|
+
"nano",
|
|
112
|
+
"pico",
|
|
113
|
+
"femto",
|
|
114
|
+
"atto",
|
|
115
|
+
"zepto",
|
|
116
|
+
"yocto",
|
|
117
|
+
]
|
|
118
|
+
i = 0
|
|
119
|
+
while seconds < 1.0 and i < len(units) - 1:
|
|
120
|
+
seconds *= 1000
|
|
121
|
+
i += 1
|
|
122
|
+
if seconds > 100 and i > 0:
|
|
123
|
+
seconds /= 1000
|
|
124
|
+
i -= 1
|
|
125
|
+
seconds = round(seconds * 10) / 10
|
|
126
|
+
return f"{seconds:g} {units[i]}second{_plural(seconds)}"
|
|
127
|
+
|
|
128
|
+
n = seconds
|
|
129
|
+
s = []
|
|
130
|
+
for p in PERIODS:
|
|
131
|
+
m = int(n / p[0])
|
|
132
|
+
if m:
|
|
133
|
+
s.append("%d %s%s" % (m, p[1], _plural(m)))
|
|
134
|
+
n %= p[0]
|
|
135
|
+
|
|
136
|
+
if not s:
|
|
137
|
+
seconds = round(seconds * 10) / 10
|
|
138
|
+
s.append("%g second%s" % (seconds, _plural(seconds)))
|
|
139
|
+
return " ".join(s)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def number(value):
|
|
143
|
+
return f"{value:,}"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def plural(value, what):
|
|
147
|
+
return f"{number(value)} {what}{_plural(value)}"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
DOW = [
|
|
151
|
+
"Monday",
|
|
152
|
+
"Tuesday",
|
|
153
|
+
"Wednesday",
|
|
154
|
+
"Thursday",
|
|
155
|
+
"Friday",
|
|
156
|
+
"Saturday",
|
|
157
|
+
"Sunday",
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
MONTH = [
|
|
161
|
+
"January",
|
|
162
|
+
"February",
|
|
163
|
+
"March",
|
|
164
|
+
"April",
|
|
165
|
+
"May",
|
|
166
|
+
"June",
|
|
167
|
+
"July",
|
|
168
|
+
"August",
|
|
169
|
+
"September",
|
|
170
|
+
"October",
|
|
171
|
+
"November",
|
|
172
|
+
"December",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def __(n):
|
|
177
|
+
if n in (11, 12, 13):
|
|
178
|
+
return "th"
|
|
179
|
+
|
|
180
|
+
if n % 10 == 1:
|
|
181
|
+
return "st"
|
|
182
|
+
|
|
183
|
+
if n % 10 == 2:
|
|
184
|
+
return "nd"
|
|
185
|
+
|
|
186
|
+
if n % 10 == 3:
|
|
187
|
+
return "rd"
|
|
188
|
+
|
|
189
|
+
return "th"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def when(then, now=None, short=True):
|
|
193
|
+
"""Generate a human readable string for a date, relative to now
|
|
194
|
+
|
|
195
|
+
>>> when(datetime.datetime.now() - datetime.timedelta(hours=2))
|
|
196
|
+
'2 hours ago'
|
|
197
|
+
|
|
198
|
+
>>> when(datetime.datetime.now() - datetime.timedelta(days=1))
|
|
199
|
+
'yesterday at 08:46'
|
|
200
|
+
|
|
201
|
+
>>> when(datetime.datetime.now() - datetime.timedelta(days=5))
|
|
202
|
+
'last Sunday'
|
|
203
|
+
|
|
204
|
+
>>> when(datetime.datetime.now() - datetime.timedelta(days=365))
|
|
205
|
+
'last year'
|
|
206
|
+
|
|
207
|
+
>>> when(datetime.datetime.now() + datetime.timedelta(days=365))
|
|
208
|
+
'next year'
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
then : datetime.datetime
|
|
213
|
+
A datetime
|
|
214
|
+
now : datetime.datetime, optional
|
|
215
|
+
The reference date, by default NOW
|
|
216
|
+
short : bool, optional
|
|
217
|
+
Genererate shorter strings, by default True
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
str
|
|
222
|
+
A human readable string
|
|
223
|
+
|
|
224
|
+
"""
|
|
225
|
+
last = "last"
|
|
226
|
+
|
|
227
|
+
if now is None:
|
|
228
|
+
now = datetime.datetime.now()
|
|
229
|
+
|
|
230
|
+
diff = (now - then).total_seconds()
|
|
231
|
+
|
|
232
|
+
if diff < 0:
|
|
233
|
+
last = "next"
|
|
234
|
+
diff = -diff
|
|
235
|
+
|
|
236
|
+
diff = int(diff)
|
|
237
|
+
|
|
238
|
+
if diff == 0:
|
|
239
|
+
return "right now"
|
|
240
|
+
|
|
241
|
+
def _(x):
|
|
242
|
+
if last == "last":
|
|
243
|
+
return "%s ago" % (x,)
|
|
244
|
+
else:
|
|
245
|
+
return "in %s" % (x,)
|
|
246
|
+
|
|
247
|
+
if diff < 60:
|
|
248
|
+
diff = int(diff + 0.5)
|
|
249
|
+
return _("%s second%s" % (diff, _plural(diff)))
|
|
250
|
+
|
|
251
|
+
if diff < 60 * 60:
|
|
252
|
+
diff /= 60
|
|
253
|
+
diff = int(diff + 0.5)
|
|
254
|
+
return _("%s minute%s" % (diff, _plural(diff)))
|
|
255
|
+
|
|
256
|
+
if diff < 60 * 60 * 6:
|
|
257
|
+
diff /= 60 * 60
|
|
258
|
+
diff = int(diff + 0.5)
|
|
259
|
+
return _("%s hour%s" % (diff, _plural(diff)))
|
|
260
|
+
|
|
261
|
+
jnow = now.toordinal()
|
|
262
|
+
jthen = then.toordinal()
|
|
263
|
+
|
|
264
|
+
if jnow == jthen:
|
|
265
|
+
return "today at %02d:%02d" % (then.hour, then.minute)
|
|
266
|
+
|
|
267
|
+
if jnow == jthen + 1:
|
|
268
|
+
return "yesterday at %02d:%02d" % (then.hour, then.minute)
|
|
269
|
+
|
|
270
|
+
if jnow == jthen - 1:
|
|
271
|
+
return "tomorrow at %02d:%02d" % (then.hour, then.minute)
|
|
272
|
+
|
|
273
|
+
if abs(jnow - jthen) <= 7:
|
|
274
|
+
if last == "next":
|
|
275
|
+
last = "this"
|
|
276
|
+
return "%s %s" % (
|
|
277
|
+
last,
|
|
278
|
+
DOW[then.weekday()],
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if abs(jnow - jthen) < 32 and now.month == then.month:
|
|
282
|
+
return "the %d%s of this month" % (then.day, __(then.day))
|
|
283
|
+
|
|
284
|
+
if abs(jnow - jthen) < 64 and now.month == then.month + 1:
|
|
285
|
+
return "the %d%s of %s month" % (then.day, __(then.day), last)
|
|
286
|
+
|
|
287
|
+
if short:
|
|
288
|
+
years = int(abs(jnow - jthen) / 365.25 + 0.5)
|
|
289
|
+
if years == 1:
|
|
290
|
+
return "%s year" % last
|
|
291
|
+
|
|
292
|
+
if years > 1:
|
|
293
|
+
return _("%d years" % (years,))
|
|
294
|
+
|
|
295
|
+
month = then.month
|
|
296
|
+
if now.year != then.year:
|
|
297
|
+
month -= 12
|
|
298
|
+
|
|
299
|
+
d = abs(now.month - month)
|
|
300
|
+
if d >= 12:
|
|
301
|
+
return _("a year")
|
|
302
|
+
else:
|
|
303
|
+
return _("%d month%s" % (d, _plural(d)))
|
|
304
|
+
|
|
305
|
+
return "on %s %d %s %d" % (
|
|
306
|
+
DOW[then.weekday()],
|
|
307
|
+
then.day,
|
|
308
|
+
MONTH[then.month],
|
|
309
|
+
then.year,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def string_distance(s, t):
|
|
314
|
+
import numpy as np
|
|
315
|
+
|
|
316
|
+
m = len(s)
|
|
317
|
+
n = len(t)
|
|
318
|
+
d = np.zeros((m + 1, n + 1), dtype=int)
|
|
319
|
+
|
|
320
|
+
one = int(1)
|
|
321
|
+
zero = int(0)
|
|
322
|
+
|
|
323
|
+
d[:, 0] = np.arange(m + 1)
|
|
324
|
+
d[0, :] = np.arange(n + 1)
|
|
325
|
+
|
|
326
|
+
for i in range(1, m + 1):
|
|
327
|
+
for j in range(1, n + 1):
|
|
328
|
+
cost = zero if s[i - 1] == t[j - 1] else one
|
|
329
|
+
d[i, j] = min(
|
|
330
|
+
d[i - 1, j] + one,
|
|
331
|
+
d[i, j - 1] + one,
|
|
332
|
+
d[i - 1, j - 1] + cost,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return d[m, n]
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def did_you_mean(word, vocabulary):
|
|
339
|
+
"""Pick the closest word in a vocabulary
|
|
340
|
+
|
|
341
|
+
>>> did_you_mean("aple", ["banana", "lemon", "apple", "orange"])
|
|
342
|
+
'apple'
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
word : str
|
|
347
|
+
The word to look for
|
|
348
|
+
vocabulary : list of strings
|
|
349
|
+
The list of known words
|
|
350
|
+
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
str
|
|
354
|
+
The closest word in the vocabulary
|
|
355
|
+
"""
|
|
356
|
+
_, best = min((string_distance(word, w), w) for w in vocabulary)
|
|
357
|
+
# if distance < min(len(word), len(best)):
|
|
358
|
+
return best
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def dict_to_human(query):
|
|
362
|
+
lst = [f"{k}={v}" for k, v in sorted(query.items())]
|
|
363
|
+
|
|
364
|
+
return list_to_human(lst)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def list_to_human(lst, conjunction="and"):
|
|
368
|
+
"""Convert a list of strings to a human readable string
|
|
369
|
+
|
|
370
|
+
>>> list_to_human(["banana", "lemon", "apple", "orange"])
|
|
371
|
+
'banana, lemon, apple and orange'
|
|
372
|
+
|
|
373
|
+
Parameters
|
|
374
|
+
----------
|
|
375
|
+
lst : list of str
|
|
376
|
+
The list of strings to concatenate
|
|
377
|
+
conjunction : str, optional
|
|
378
|
+
The word to connect the last word in the list (like "or" or "and"), by default "and"
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
_type_
|
|
383
|
+
_description_
|
|
384
|
+
"""
|
|
385
|
+
if not lst:
|
|
386
|
+
return "??"
|
|
387
|
+
|
|
388
|
+
if len(lst) > 2:
|
|
389
|
+
lst = [", ".join(lst[:-1]), lst[-1]]
|
|
390
|
+
|
|
391
|
+
return f" {conjunction} ".join(lst)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def as_number(value, name, units, none_ok):
|
|
395
|
+
if value is None and none_ok:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
value = str(value)
|
|
399
|
+
# TODO: support floats
|
|
400
|
+
m = re.search(r"^\s*(\d+)\s*([%\w]+)?\s*$", value)
|
|
401
|
+
if m is None:
|
|
402
|
+
raise ValueError(f"{name}: invalid number/unit {value}")
|
|
403
|
+
value = int(m.group(1))
|
|
404
|
+
if m.group(2) is None:
|
|
405
|
+
return value
|
|
406
|
+
unit = m.group(2)[0]
|
|
407
|
+
if unit not in units:
|
|
408
|
+
valid = ", ".join(units.keys())
|
|
409
|
+
raise ValueError(f"{name}: invalid unit '{unit}', valid values are {valid}")
|
|
410
|
+
return value * units[unit]
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def as_seconds(value, name=None, none_ok=False):
|
|
414
|
+
units = dict(s=1, m=60, h=3600, d=86400, w=86400 * 7)
|
|
415
|
+
return as_number(value, name, units, none_ok)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def as_percent(value, name=None, none_ok=False):
|
|
419
|
+
units = {"%": 1}
|
|
420
|
+
return as_number(value, name, units, none_ok)
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def as_bytes(value, name=None, none_ok=False):
|
|
424
|
+
units = {}
|
|
425
|
+
n = 1
|
|
426
|
+
for u in "KMGTP":
|
|
427
|
+
n *= 1024
|
|
428
|
+
units[u] = n
|
|
429
|
+
units[u.lower()] = n
|
|
430
|
+
|
|
431
|
+
return as_number(value, name, units, none_ok)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def as_timedelta(value, name=None, none_ok=False):
|
|
435
|
+
if value is None and none_ok:
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
save = value
|
|
439
|
+
value = re.sub(r"[^a-zA-Z0-9]", "", value.lower())
|
|
440
|
+
value = re.sub(r"([a-zA-Z])[a-zA-Z]*", r"\1", value)
|
|
441
|
+
# value = re.sub(r"[^dmhsw0-9]", "", value)
|
|
442
|
+
bits = [b for b in re.split(r"([dmhsw])", value) if b != ""]
|
|
443
|
+
|
|
444
|
+
times = defaultdict(int)
|
|
445
|
+
|
|
446
|
+
val = None
|
|
447
|
+
|
|
448
|
+
for i, n in enumerate(bits):
|
|
449
|
+
if i % 2 == 0:
|
|
450
|
+
val = int(n)
|
|
451
|
+
else:
|
|
452
|
+
assert n in ("d", "m", "h", "s", "w")
|
|
453
|
+
times[n] = val
|
|
454
|
+
val = None
|
|
455
|
+
|
|
456
|
+
if val is not None:
|
|
457
|
+
if name:
|
|
458
|
+
raise ValueError(f"{name}: invalid period '{save}'")
|
|
459
|
+
raise ValueError(f"Invalid period '{save}'")
|
|
460
|
+
|
|
461
|
+
return datetime.timedelta(
|
|
462
|
+
weeks=times["w"],
|
|
463
|
+
days=times["d"],
|
|
464
|
+
hours=times["h"],
|
|
465
|
+
minutes=times["m"],
|
|
466
|
+
seconds=times["s"],
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def rounded_datetime(d):
|
|
471
|
+
if float(d.microsecond) / 1000.0 / 1000.0 >= 0.5:
|
|
472
|
+
d = d + datetime.timedelta(seconds=1)
|
|
473
|
+
d = d.replace(microsecond=0)
|
|
474
|
+
return d
|