hestia-earth-utils 0.16.12__py3-none-any.whl → 0.16.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hestia_earth/utils/date.py +439 -0
- hestia_earth/utils/tools.py +69 -3
- hestia_earth/utils/version.py +1 -1
- {hestia_earth_utils-0.16.12.dist-info → hestia_earth_utils-0.16.13.dist-info}/METADATA +1 -1
- {hestia_earth_utils-0.16.12.dist-info → hestia_earth_utils-0.16.13.dist-info}/RECORD +9 -9
- {hestia_earth_utils-0.16.12.data → hestia_earth_utils-0.16.13.data}/scripts/hestia-format-upload +0 -0
- {hestia_earth_utils-0.16.12.data → hestia_earth_utils-0.16.13.data}/scripts/hestia-pivot-csv +0 -0
- {hestia_earth_utils-0.16.12.dist-info → hestia_earth_utils-0.16.13.dist-info}/WHEEL +0 -0
- {hestia_earth_utils-0.16.12.dist-info → hestia_earth_utils-0.16.13.dist-info}/top_level.txt +0 -0
hestia_earth/utils/date.py
CHANGED
|
@@ -1,6 +1,15 @@
|
|
|
1
|
+
from calendar import monthrange
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from dateutil.relativedelta import relativedelta
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from math import floor
|
|
6
|
+
from typing import Any, Callable, Literal, Optional, Union
|
|
7
|
+
|
|
1
8
|
from dateutil.parser import parse
|
|
2
9
|
import re
|
|
3
10
|
|
|
11
|
+
from .tools import is_list_like, safe_parse_date
|
|
12
|
+
|
|
4
13
|
SECOND = 1
|
|
5
14
|
MINUTE = 60 * SECOND
|
|
6
15
|
HOUR = 60 * MINUTE
|
|
@@ -12,6 +21,8 @@ def diff_in_days(from_date: str, to_date: str) -> float:
|
|
|
12
21
|
"""
|
|
13
22
|
Return the difference in days between two dates.
|
|
14
23
|
|
|
24
|
+
Deprecated, use `diff_in` function with `unit = TimeUnit.DAY` instead.
|
|
25
|
+
|
|
15
26
|
Parameters
|
|
16
27
|
----------
|
|
17
28
|
from_date : str
|
|
@@ -32,6 +43,8 @@ def diff_in_years(from_date: str, to_date: str) -> float:
|
|
|
32
43
|
"""
|
|
33
44
|
Return the difference in years between two dates.
|
|
34
45
|
|
|
46
|
+
Deprecated, use `diff_in` function with `unit = TimeUnit.YEAR` instead.
|
|
47
|
+
|
|
35
48
|
Parameters
|
|
36
49
|
----------
|
|
37
50
|
from_date : str
|
|
@@ -51,6 +64,8 @@ def is_in_days(date: str) -> bool:
|
|
|
51
64
|
"""
|
|
52
65
|
Check if the date as a string contains year, month and day.
|
|
53
66
|
|
|
67
|
+
Deprecated, use `validate_datestr_format` with `valid_format = DatestrFormat.YEAR_MONTH_DAY` instead.
|
|
68
|
+
|
|
54
69
|
Parameters
|
|
55
70
|
----------
|
|
56
71
|
date : str
|
|
@@ -71,6 +86,8 @@ def is_in_months(date: str) -> bool:
|
|
|
71
86
|
"""
|
|
72
87
|
Check if the date as a string contains year, month but no day.
|
|
73
88
|
|
|
89
|
+
Deprecated, use `validate_datestr_format` with `valid_format = DatestrFormat.YEAR_MONTH` instead.
|
|
90
|
+
|
|
74
91
|
Parameters
|
|
75
92
|
----------
|
|
76
93
|
date : str
|
|
@@ -84,3 +101,425 @@ def is_in_months(date: str) -> bool:
|
|
|
84
101
|
return (
|
|
85
102
|
date is not None and re.compile(r"^[\d]{4}\-[\d]{2}$").match(date) is not None
|
|
86
103
|
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
OLDEST_DATE = "1800"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class TimeUnit(Enum):
|
|
110
|
+
YEAR = "year"
|
|
111
|
+
MONTH = "month"
|
|
112
|
+
DAY = "day"
|
|
113
|
+
HOUR = "hour"
|
|
114
|
+
MINUTE = "minute"
|
|
115
|
+
SECOND = "second"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class DatestrFormat(Enum):
|
|
119
|
+
"""
|
|
120
|
+
Enum representing ISO date formats permitted by HESTIA.
|
|
121
|
+
|
|
122
|
+
See: https://en.wikipedia.org/wiki/ISO_8601
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
YEAR = r"%Y"
|
|
126
|
+
YEAR_MONTH = r"%Y-%m"
|
|
127
|
+
YEAR_MONTH_DAY = r"%Y-%m-%d"
|
|
128
|
+
YEAR_MONTH_DAY_HOUR_MINUTE_SECOND = r"%Y-%m-%dT%H:%M:%S"
|
|
129
|
+
MONTH = r"--%m"
|
|
130
|
+
MONTH_DAY = r"--%m-%d"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
DatestrGapfillMode = Literal["start", "middle", "end"]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
DATESTR_FORMAT_TO_EXPECTED_LENGTH = {
|
|
137
|
+
DatestrFormat.YEAR: len("2001"),
|
|
138
|
+
DatestrFormat.YEAR_MONTH: len("2001-01"),
|
|
139
|
+
DatestrFormat.YEAR_MONTH_DAY: len("2001-01-01"),
|
|
140
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND: len("2001-01-01T00:00:00"),
|
|
141
|
+
DatestrFormat.MONTH: len("--01"),
|
|
142
|
+
DatestrFormat.MONTH_DAY: len("--01-01"),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
TIME_UNIT_TO_DATESTR_FORMAT = {
|
|
147
|
+
TimeUnit.YEAR: DatestrFormat.YEAR,
|
|
148
|
+
TimeUnit.MONTH: DatestrFormat.YEAR_MONTH,
|
|
149
|
+
TimeUnit.DAY: DatestrFormat.YEAR_MONTH_DAY,
|
|
150
|
+
TimeUnit.HOUR: DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND,
|
|
151
|
+
TimeUnit.MINUTE: DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND,
|
|
152
|
+
TimeUnit.SECOND: DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND,
|
|
153
|
+
}
|
|
154
|
+
"""
|
|
155
|
+
Minimum Datestr format required to express DatetimeUnit.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
_SECONDS_IN_MINUTE = 60 # 60 seconds in a minute
|
|
160
|
+
_MINUTES_IN_HOUR = 60 # 60 minutes in an hour
|
|
161
|
+
_HOURS_IN_DAY = 24 # 24 hours in a day
|
|
162
|
+
_MONTHS_IN_YEAR = 12 # 12 months in a year
|
|
163
|
+
|
|
164
|
+
_DAYS_IN_YEAR = YEAR # average days in a year (365.2425)
|
|
165
|
+
_DAYS_IN_MONTH = (
|
|
166
|
+
_DAYS_IN_YEAR / _MONTHS_IN_YEAR
|
|
167
|
+
) # average days in a month (365.2425/12)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
DATETIME_UNIT_CONVERSIONS: dict[str, dict[str, float]] = {
|
|
171
|
+
TimeUnit.YEAR.value: {
|
|
172
|
+
TimeUnit.MONTH.value: _MONTHS_IN_YEAR,
|
|
173
|
+
TimeUnit.DAY.value: _DAYS_IN_YEAR,
|
|
174
|
+
TimeUnit.HOUR.value: _DAYS_IN_YEAR * _HOURS_IN_DAY,
|
|
175
|
+
TimeUnit.MINUTE.value: _DAYS_IN_YEAR * _HOURS_IN_DAY * _MINUTES_IN_HOUR,
|
|
176
|
+
TimeUnit.SECOND.value: _DAYS_IN_YEAR
|
|
177
|
+
* _HOURS_IN_DAY
|
|
178
|
+
* _MINUTES_IN_HOUR
|
|
179
|
+
* _SECONDS_IN_MINUTE,
|
|
180
|
+
},
|
|
181
|
+
TimeUnit.MONTH.value: {
|
|
182
|
+
TimeUnit.YEAR.value: 1 / _MONTHS_IN_YEAR,
|
|
183
|
+
TimeUnit.DAY.value: _DAYS_IN_MONTH,
|
|
184
|
+
TimeUnit.HOUR.value: _DAYS_IN_MONTH * _HOURS_IN_DAY,
|
|
185
|
+
TimeUnit.MINUTE.value: _DAYS_IN_MONTH * _HOURS_IN_DAY * _MINUTES_IN_HOUR,
|
|
186
|
+
TimeUnit.SECOND.value: _DAYS_IN_MONTH
|
|
187
|
+
* _HOURS_IN_DAY
|
|
188
|
+
* _MINUTES_IN_HOUR
|
|
189
|
+
* _SECONDS_IN_MINUTE,
|
|
190
|
+
},
|
|
191
|
+
TimeUnit.DAY.value: {
|
|
192
|
+
TimeUnit.YEAR.value: 1 / _DAYS_IN_YEAR,
|
|
193
|
+
TimeUnit.MONTH.value: 1 / _DAYS_IN_MONTH,
|
|
194
|
+
TimeUnit.HOUR.value: _HOURS_IN_DAY,
|
|
195
|
+
TimeUnit.MINUTE.value: _HOURS_IN_DAY * _MINUTES_IN_HOUR,
|
|
196
|
+
TimeUnit.SECOND.value: _HOURS_IN_DAY * _MINUTES_IN_HOUR * _SECONDS_IN_MINUTE,
|
|
197
|
+
},
|
|
198
|
+
TimeUnit.HOUR.value: {
|
|
199
|
+
TimeUnit.YEAR.value: 1 / (_HOURS_IN_DAY * _DAYS_IN_YEAR),
|
|
200
|
+
TimeUnit.MONTH.value: 1 / (_HOURS_IN_DAY * _DAYS_IN_MONTH),
|
|
201
|
+
TimeUnit.DAY.value: 1 / (_HOURS_IN_DAY),
|
|
202
|
+
TimeUnit.MINUTE.value: _MINUTES_IN_HOUR,
|
|
203
|
+
TimeUnit.SECOND.value: _MINUTES_IN_HOUR * _SECONDS_IN_MINUTE,
|
|
204
|
+
},
|
|
205
|
+
TimeUnit.MINUTE.value: {
|
|
206
|
+
TimeUnit.YEAR.value: 1 / (_MINUTES_IN_HOUR * _HOURS_IN_DAY * _DAYS_IN_YEAR),
|
|
207
|
+
TimeUnit.MONTH.value: 1 / (_MINUTES_IN_HOUR * _HOURS_IN_DAY * _DAYS_IN_MONTH),
|
|
208
|
+
TimeUnit.DAY.value: 1 / (_MINUTES_IN_HOUR * _HOURS_IN_DAY),
|
|
209
|
+
TimeUnit.HOUR.value: 1 / _MINUTES_IN_HOUR,
|
|
210
|
+
TimeUnit.SECOND.value: _SECONDS_IN_MINUTE,
|
|
211
|
+
},
|
|
212
|
+
TimeUnit.SECOND.value: {
|
|
213
|
+
TimeUnit.YEAR.value: 1
|
|
214
|
+
/ (_SECONDS_IN_MINUTE * _MINUTES_IN_HOUR * _HOURS_IN_DAY * _DAYS_IN_YEAR),
|
|
215
|
+
TimeUnit.MONTH.value: 1
|
|
216
|
+
/ (_SECONDS_IN_MINUTE * _MINUTES_IN_HOUR * _HOURS_IN_DAY * _DAYS_IN_MONTH),
|
|
217
|
+
TimeUnit.DAY.value: 1 / (_SECONDS_IN_MINUTE * _MINUTES_IN_HOUR * _HOURS_IN_DAY),
|
|
218
|
+
TimeUnit.HOUR.value: 1 / (_SECONDS_IN_MINUTE * _MINUTES_IN_HOUR),
|
|
219
|
+
TimeUnit.MINUTE.value: 1 / _SECONDS_IN_MINUTE,
|
|
220
|
+
},
|
|
221
|
+
}
|
|
222
|
+
"""
|
|
223
|
+
A dict of TimeUnit conversion factors with format:
|
|
224
|
+
```
|
|
225
|
+
{
|
|
226
|
+
source (str): {
|
|
227
|
+
dest (str): conversion_factor (float)
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
```
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _get_time_unit_conversion(
|
|
235
|
+
src_unit: TimeUnit, dest_unit: TimeUnit, default_value: float = 1
|
|
236
|
+
):
|
|
237
|
+
src_key = src_unit if isinstance(src_unit, str) else src_unit.value
|
|
238
|
+
dest_key = dest_unit if isinstance(dest_unit, str) else dest_unit.value
|
|
239
|
+
return DATETIME_UNIT_CONVERSIONS.get(src_key, {}).get(dest_key, default_value)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def convert_duration(
|
|
243
|
+
duration: float,
|
|
244
|
+
src_unit: TimeUnit,
|
|
245
|
+
dest_unit: TimeUnit,
|
|
246
|
+
default_conversion_factor: float = 1,
|
|
247
|
+
):
|
|
248
|
+
conversion_factor = _get_time_unit_conversion(
|
|
249
|
+
src_unit, dest_unit, default_conversion_factor
|
|
250
|
+
)
|
|
251
|
+
return duration * conversion_factor
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _check_datestr_format(datestr: str, format: DatestrFormat) -> bool:
|
|
255
|
+
"""
|
|
256
|
+
Use `datetime.strptime` to determine if a datestr is in a particular ISO format.
|
|
257
|
+
"""
|
|
258
|
+
try:
|
|
259
|
+
expected_length = DATESTR_FORMAT_TO_EXPECTED_LENGTH.get(format, 0)
|
|
260
|
+
format_str = format.value
|
|
261
|
+
return len(datestr) == expected_length and bool(
|
|
262
|
+
datetime.strptime(datestr, format_str)
|
|
263
|
+
)
|
|
264
|
+
except ValueError:
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _get_datestr_format(
|
|
269
|
+
datestr: str, default: Optional[Any] = None
|
|
270
|
+
) -> Union[DatestrFormat, Any, None]:
|
|
271
|
+
"""
|
|
272
|
+
Check a datestr against each ISO format permitted by the HESTIA schema and
|
|
273
|
+
return the matching format.
|
|
274
|
+
"""
|
|
275
|
+
return next(
|
|
276
|
+
(
|
|
277
|
+
date_format
|
|
278
|
+
for date_format in DatestrFormat
|
|
279
|
+
if _check_datestr_format(str(datestr), date_format)
|
|
280
|
+
),
|
|
281
|
+
default,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def validate_datestr_format(
|
|
286
|
+
datestr: str,
|
|
287
|
+
valid_format: Union[DatestrFormat, list[DatestrFormat]] = [
|
|
288
|
+
DatestrFormat.YEAR,
|
|
289
|
+
DatestrFormat.YEAR_MONTH,
|
|
290
|
+
DatestrFormat.YEAR_MONTH_DAY,
|
|
291
|
+
],
|
|
292
|
+
):
|
|
293
|
+
valid_formats = valid_format if is_list_like(valid_format) else [valid_format]
|
|
294
|
+
format_ = _get_datestr_format(datestr)
|
|
295
|
+
return format_ in valid_formats
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _gapfill_datestr_start(datestr: str, *_) -> str:
|
|
299
|
+
"""
|
|
300
|
+
Gapfill an incomplete datestr with the earliest possible date and time.
|
|
301
|
+
|
|
302
|
+
Datestr will snap to the start of the year/month/day as appropriate.
|
|
303
|
+
"""
|
|
304
|
+
return datestr + "YYYY-01-01T00:00:00"[len(datestr) :]
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _days_in_month(datestr: str) -> int:
|
|
308
|
+
"""
|
|
309
|
+
Get the number of days in the datestr's month. If datestr invalid, return minimum value of 28.
|
|
310
|
+
"""
|
|
311
|
+
datetime = safe_parse_date(datestr)
|
|
312
|
+
return monthrange(datetime.year, datetime.month)[1] if datetime else 28
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _gapfill_datestr_end(datestr: str, format: DatestrFormat) -> str:
|
|
316
|
+
"""
|
|
317
|
+
Gapfill an incomplete datestr with the latest possible date and time.
|
|
318
|
+
|
|
319
|
+
Datestr will snap to the end of the year/month/day as appropriate.
|
|
320
|
+
"""
|
|
321
|
+
days = _days_in_month(datestr) if format == DatestrFormat.YEAR_MONTH else 31
|
|
322
|
+
completion_str = f"YYYY-12-{days}T23:59:59"
|
|
323
|
+
return datestr + completion_str[len(datestr) :]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _gapfill_datestr_middle(datestr: str, format: DatestrFormat) -> str:
|
|
327
|
+
"""
|
|
328
|
+
Gap-fill an incomplete datestr with the middle value, halfway between the latest and earliest values.
|
|
329
|
+
"""
|
|
330
|
+
start_date_obj = datetime.strptime(
|
|
331
|
+
_gapfill_datestr_start(datestr),
|
|
332
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND.value,
|
|
333
|
+
)
|
|
334
|
+
end_date_obj = datetime.strptime(
|
|
335
|
+
_gapfill_datestr_end(datestr, format=format),
|
|
336
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND.value,
|
|
337
|
+
)
|
|
338
|
+
middle_date = start_date_obj + (end_date_obj - start_date_obj) / 2
|
|
339
|
+
return datetime.strftime(
|
|
340
|
+
middle_date, DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND.value
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
DATESTR_GAPFILL_MODE_TO_GAPFILL_FUNCTION: dict[DatestrGapfillMode, Callable] = {
|
|
345
|
+
"start": _gapfill_datestr_start,
|
|
346
|
+
"middle": _gapfill_datestr_middle,
|
|
347
|
+
"end": _gapfill_datestr_end,
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
_VALID_GAPFILL_DATE_FORMATS = {
|
|
351
|
+
DatestrFormat.YEAR,
|
|
352
|
+
DatestrFormat.YEAR_MONTH,
|
|
353
|
+
DatestrFormat.YEAR_MONTH_DAY,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def gapfill_datestr(datestr: str, mode: DatestrGapfillMode = "start") -> str:
|
|
358
|
+
"""
|
|
359
|
+
Gapfill incomplete datestrs and returns them in the format `YYYY-MM-DDTHH:mm:ss`.
|
|
360
|
+
"""
|
|
361
|
+
datestr_ = str(datestr)
|
|
362
|
+
format_ = _get_datestr_format(datestr_)
|
|
363
|
+
should_run = format_ in _VALID_GAPFILL_DATE_FORMATS
|
|
364
|
+
return (
|
|
365
|
+
None
|
|
366
|
+
if datestr is None
|
|
367
|
+
else (
|
|
368
|
+
DATESTR_GAPFILL_MODE_TO_GAPFILL_FUNCTION[mode](datestr_, format_)
|
|
369
|
+
if should_run
|
|
370
|
+
else datestr_
|
|
371
|
+
)
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def convert_datestr(
|
|
376
|
+
datestr: str,
|
|
377
|
+
target_format: DatestrFormat,
|
|
378
|
+
gapfill_mode: DatestrGapfillMode = "start",
|
|
379
|
+
) -> str:
|
|
380
|
+
should_run = validate_datestr_format(datestr, _VALID_GAPFILL_DATE_FORMATS)
|
|
381
|
+
return (
|
|
382
|
+
datetime.strptime(
|
|
383
|
+
gapfill_datestr(datestr, gapfill_mode),
|
|
384
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND.value,
|
|
385
|
+
).strftime(target_format.value)
|
|
386
|
+
if should_run
|
|
387
|
+
else datestr
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def parse_gapfilled_datestr(
|
|
392
|
+
datestr: str, gapfill_mode: DatestrGapfillMode = "start", default: Any = None
|
|
393
|
+
):
|
|
394
|
+
return safe_parse_date(gapfill_datestr(datestr, mode=gapfill_mode), default=default)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def datestrs_match(
|
|
398
|
+
datestr_a: str, datestr_b: str, mode: DatestrGapfillMode = "start"
|
|
399
|
+
) -> bool:
|
|
400
|
+
"""
|
|
401
|
+
Comparison of non-gap-filled string dates.
|
|
402
|
+
example: For end dates, '2010' would match '2010-12-31', but not '2010-01-01'
|
|
403
|
+
"""
|
|
404
|
+
return gapfill_datestr(datestr=datestr_a, mode=mode) == gapfill_datestr(
|
|
405
|
+
datestr=datestr_b, mode=mode
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _diff_in_years_calendar(a: datetime, b: datetime, *, add_second: bool, **_) -> int:
|
|
410
|
+
reverse = a > b
|
|
411
|
+
b_ = (
|
|
412
|
+
b
|
|
413
|
+
if not add_second
|
|
414
|
+
else b - relativedelta(seconds=1) if reverse else b + relativedelta(seconds=1)
|
|
415
|
+
)
|
|
416
|
+
diff = relativedelta(b_, a)
|
|
417
|
+
return diff.years
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _diff_in_months_calendar(a: datetime, b: datetime, *, add_second: bool, **_) -> int:
|
|
421
|
+
reverse = a > b
|
|
422
|
+
b_ = (
|
|
423
|
+
b
|
|
424
|
+
if not add_second
|
|
425
|
+
else b - relativedelta(seconds=1) if reverse else b + relativedelta(seconds=1)
|
|
426
|
+
)
|
|
427
|
+
diff = relativedelta(b_, a)
|
|
428
|
+
return diff.years * 12 + diff.months
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _diff(
|
|
432
|
+
a: datetime, b: datetime, *, unit: TimeUnit, add_second: bool, complete_only: bool
|
|
433
|
+
) -> Union[float, int]:
|
|
434
|
+
reverse = a > b
|
|
435
|
+
b_ = (
|
|
436
|
+
b
|
|
437
|
+
if not add_second
|
|
438
|
+
else b - relativedelta(seconds=1) if reverse else b + relativedelta(seconds=1)
|
|
439
|
+
)
|
|
440
|
+
diff = convert_duration((b_ - a).total_seconds(), TimeUnit.SECOND, unit)
|
|
441
|
+
return floor(diff) if complete_only else diff
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
DIFF_FUNCTION = {
|
|
445
|
+
(TimeUnit.YEAR, True): _diff_in_years_calendar,
|
|
446
|
+
(TimeUnit.MONTH, True): _diff_in_months_calendar,
|
|
447
|
+
}
|
|
448
|
+
"""
|
|
449
|
+
(unit: TimeUnit, calendar: bool): Callable
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def diff_in(
|
|
454
|
+
a: Union[datetime, str],
|
|
455
|
+
b: Union[datetime, str],
|
|
456
|
+
unit: TimeUnit,
|
|
457
|
+
add_second=False,
|
|
458
|
+
calendar=False,
|
|
459
|
+
gapfill_mode: DatestrGapfillMode = "start",
|
|
460
|
+
):
|
|
461
|
+
"""
|
|
462
|
+
Calculate the difference between two dates.
|
|
463
|
+
|
|
464
|
+
This function does NOT return the absolute difference. If `b` is before `a` the function will return a negative
|
|
465
|
+
value.
|
|
466
|
+
|
|
467
|
+
If dates are passed as datestrings, they will be parsed into datetime objects. Caution is advised when using
|
|
468
|
+
datestrings with formats `--MM` and `--MM-DD` as these might be parsed in unexpected ways.
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
a : datetime | str
|
|
473
|
+
The first date.
|
|
474
|
+
|
|
475
|
+
b: datetime | str
|
|
476
|
+
The second date.
|
|
477
|
+
|
|
478
|
+
unit : TimeUnit
|
|
479
|
+
The time unit to calculate the diff in.
|
|
480
|
+
|
|
481
|
+
add_second : bool, optional, default = `False`
|
|
482
|
+
A flag to determine whether to add one second to diff results.
|
|
483
|
+
|
|
484
|
+
Set to `True` in cases where you are calculating the duration of nodes with incomplete datestrings.
|
|
485
|
+
|
|
486
|
+
For example, a node with `"startDate"` = `"2000"` and `"endDate"` = `"2001"` will ordinarily be assumed to take
|
|
487
|
+
place over the entirety of 2000 and 2001 (i.e., from `"2000-01-01T00-00-00"` to `"2001-12-31T23-59-59"`).
|
|
488
|
+
However, If `add_second` = `False`, the diff in days will be slightly less than 731 because the final second of
|
|
489
|
+
2001-12-31 is not accounted for. If `True` the diff will be exactly 731.
|
|
490
|
+
|
|
491
|
+
calendar : bool, optional, default = `False`
|
|
492
|
+
A flag to determine whether to use calendar time units.
|
|
493
|
+
|
|
494
|
+
If `True` the diff in years between `"2000"` and `"2001"` will be exactly 1, if `False` the diff will be
|
|
495
|
+
slightly over 1 because a leap year is longer than the average year.
|
|
496
|
+
|
|
497
|
+
If `True` the diff in months between `"2000-02"` and `"2000-03"` will be exactly 1, if `False` the diff will be
|
|
498
|
+
approximately 0.95 because February is shorter than the average month.
|
|
499
|
+
|
|
500
|
+
For all units, if `True`, only complete units will be counted, For example, the diff in days between
|
|
501
|
+
`"2000-01-01:00:00:00"` and `"2000-01-01:12:00:00"` will be 0. If `False` the diff will be 0.5.
|
|
502
|
+
|
|
503
|
+
gapfill_mode : DatestrGapfillMode, optional, default = `"start"`
|
|
504
|
+
How to gapfill incomplete datestrings (`"start"`, `"middle"` or `"end"`).
|
|
505
|
+
|
|
506
|
+
Returns
|
|
507
|
+
-------
|
|
508
|
+
diff : float | int
|
|
509
|
+
The difference between the dates in the selected units.
|
|
510
|
+
"""
|
|
511
|
+
a_, b_ = (
|
|
512
|
+
(
|
|
513
|
+
d
|
|
514
|
+
if isinstance(d, datetime)
|
|
515
|
+
else parse_gapfilled_datestr(d, gapfill_mode=gapfill_mode)
|
|
516
|
+
)
|
|
517
|
+
for d in (a, b)
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
diff_func = DIFF_FUNCTION.get(
|
|
521
|
+
(unit, calendar),
|
|
522
|
+
lambda *_, **kwargs: _diff(a_, b_, **kwargs, complete_only=calendar),
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
return diff_func(a_, b_, unit=unit, add_second=add_second)
|
hestia_earth/utils/tools.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Generator, Iterable
|
|
2
2
|
from dateutil.parser import parse
|
|
3
|
-
from statistics import mean
|
|
4
3
|
from functools import reduce
|
|
5
|
-
from math import log10, floor
|
|
6
4
|
import numpy
|
|
5
|
+
from math import log10, floor
|
|
6
|
+
from statistics import mean
|
|
7
|
+
import time
|
|
8
|
+
from typing import Literal
|
|
7
9
|
from hestia_earth.schema import NodeType
|
|
8
10
|
|
|
9
11
|
|
|
@@ -268,3 +270,67 @@ def pick(value: dict, keys: list) -> dict:
|
|
|
268
270
|
|
|
269
271
|
def unique_values(values: list, key: str = "@id"):
|
|
270
272
|
return list({v[key]: v for v in values}.values())
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def is_list_like(obj) -> bool:
|
|
276
|
+
"""
|
|
277
|
+
Return `True` if the input arg is an instance of an `Iterable` (excluding `str` and `bytes`) or a `Generator`, else
|
|
278
|
+
return `False`.
|
|
279
|
+
"""
|
|
280
|
+
return isinstance(obj, (Iterable, Generator)) and not isinstance(obj, (str, bytes))
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
TO_LIST_LIKE_CONSTRUCTOR = {"list": list, "set": set, "tuple": tuple}
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _as_list_like(obj, to: Literal["list", "set", "tuple"] = "list"):
|
|
287
|
+
"""
|
|
288
|
+
Convert an object to either a list, set or tuple.
|
|
289
|
+
|
|
290
|
+
If the object is list-like, convert it to the target iterable. If the object is not list-like, wrap the
|
|
291
|
+
object in the iterable.
|
|
292
|
+
|
|
293
|
+
`str` and `bytes` objects are not consider list-like and, therefore, will be wrapped.
|
|
294
|
+
"""
|
|
295
|
+
constructor = TO_LIST_LIKE_CONSTRUCTOR.get(to, list)
|
|
296
|
+
return (
|
|
297
|
+
obj
|
|
298
|
+
if isinstance(obj, constructor)
|
|
299
|
+
else constructor(obj) if is_list_like(obj) else constructor([obj])
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def as_list(obj) -> list:
|
|
304
|
+
"""
|
|
305
|
+
Convert an object to a list.
|
|
306
|
+
|
|
307
|
+
If the object is a list, return it. Else, if the object is list-like, convert it into a list. Else, wrap the object
|
|
308
|
+
in a list (e.g., `[obj]`).
|
|
309
|
+
|
|
310
|
+
`str` and `bytes` objects are not consider list-like and, therefore, will be wrapped (e.g., `"abc"` -> `["abc"]`).
|
|
311
|
+
"""
|
|
312
|
+
return _as_list_like(obj, "list")
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def as_set(obj) -> set:
|
|
316
|
+
"""
|
|
317
|
+
Convert an object to a set.
|
|
318
|
+
|
|
319
|
+
If the object is a set, return it. Else, if the object is list-like, convert it into a set. Else, wrap the object
|
|
320
|
+
in a set (e.g., `{obj}`).
|
|
321
|
+
|
|
322
|
+
`str` and `bytes` objects are not consider list-like and, therefore, will be wrapped (e.g., `"abc"` -> `{"abc"}`).
|
|
323
|
+
"""
|
|
324
|
+
return _as_list_like(obj, "set")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def as_tuple(obj) -> tuple:
|
|
328
|
+
"""
|
|
329
|
+
Convert an object to a tuple.
|
|
330
|
+
|
|
331
|
+
If the object is a tuple, return it. Else, if the object is list-like, convert it into a tuple. Else, wrap the
|
|
332
|
+
object in a tuple (e.g., `(obj, )`)
|
|
333
|
+
|
|
334
|
+
`str` and `bytes` objects are not consider list-like and, therefore, will be wrapped (e.g., `"abc"` -> `("abc", )`).
|
|
335
|
+
"""
|
|
336
|
+
return _as_list_like(obj, "tuple")
|
hestia_earth/utils/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "0.16.
|
|
1
|
+
VERSION = "0.16.13"
|
|
@@ -3,7 +3,7 @@ hestia_earth/utils/api.py,sha256=b6g87ylAgdWvwPlDeZDv74UGfXVe1KFXap5-Rv5daSE,992
|
|
|
3
3
|
hestia_earth/utils/blank_node.py,sha256=K_8wWACiwOqEJR71ClQFInzsIdDH3UUSwrtCZoh9V-o,7854
|
|
4
4
|
hestia_earth/utils/calculation_status.py,sha256=f5b05cEFXMfFI1clirIt7v3Y9H2Nja66GYv1NCyZjf0,2381
|
|
5
5
|
hestia_earth/utils/cycle.py,sha256=oo0CesLMblL8ewI4s7eXdyNjr9R9df4Vyr0iRXCYFu4,1326
|
|
6
|
-
hestia_earth/utils/date.py,sha256=
|
|
6
|
+
hestia_earth/utils/date.py,sha256=cwcGtYVh2K5rH7k5uO1Olovy-hzIDrvsfaiR94ZlyO0,16165
|
|
7
7
|
hestia_earth/utils/descriptive_stats.py,sha256=YvDI6EWCcZWw8yCxYhqyzMCDqCu2X8DjvygmMK_AVvc,1633
|
|
8
8
|
hestia_earth/utils/emission.py,sha256=rHHf5vwe-RxTOaOJ9N0MuyJOLpDnPjjxv6MHYSpPgcU,2165
|
|
9
9
|
hestia_earth/utils/lookup.py,sha256=SArKqjqs_Yt5cC6TIk12WVQNToun2spZ_iNUm4fX9FA,8274
|
|
@@ -14,8 +14,8 @@ hestia_earth/utils/request.py,sha256=EfitmS13abPxfl5gSAOyHRUYhk4R2Rfv94fF9lvOT00
|
|
|
14
14
|
hestia_earth/utils/stats.py,sha256=vTNyKcMKmX0DoodM9QEG7HF8qm2Wf-4ckMWQFWZ1VgE,34729
|
|
15
15
|
hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
|
|
16
16
|
hestia_earth/utils/term.py,sha256=aBVYuYv55nPqJPyt5mN4Fz652s_1hwUPckNUZX0pMP8,1064
|
|
17
|
-
hestia_earth/utils/tools.py,sha256=
|
|
18
|
-
hestia_earth/utils/version.py,sha256=
|
|
17
|
+
hestia_earth/utils/tools.py,sha256=WMx05cBtBR8mYQnLLBA2cgF1x2tI41514diFDeX4gLQ,7533
|
|
18
|
+
hestia_earth/utils/version.py,sha256=YWLgGWDdFT25ECpxkRof587RqHI-fEIwQWo3m9sB62U,20
|
|
19
19
|
hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
hestia_earth/utils/pivot/_shared.py,sha256=8-AZAAitXU53BYpOFFGnuCy5VBSphyClgIqGU-kg6CM,1436
|
|
21
21
|
hestia_earth/utils/pivot/pivot_csv.py,sha256=TtePpo9L_GKISpQMW9dwjBv2tHHE822rb-j6_bflVOA,12251
|
|
@@ -25,9 +25,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=mseexhzjteRDzzoFe2fEXe9MYLmvj
|
|
|
25
25
|
hestia_earth/utils/storage/_local_client.py,sha256=KbYqTfniIU5R5J1m_unCQip9kOz9EGIGI0OH0QvD8eo,551
|
|
26
26
|
hestia_earth/utils/storage/_s3_client.py,sha256=8TCxiHfxE7G8kdp3CnEFrxgmPwfPyci3-blsowE2T7o,3146
|
|
27
27
|
hestia_earth/utils/storage/_sns_client.py,sha256=pvtXYw-sQ8ns3mlDz7ld9iZp3FYSm0xSXSXMJ5IPnBc,380
|
|
28
|
-
hestia_earth_utils-0.16.
|
|
29
|
-
hestia_earth_utils-0.16.
|
|
30
|
-
hestia_earth_utils-0.16.
|
|
31
|
-
hestia_earth_utils-0.16.
|
|
32
|
-
hestia_earth_utils-0.16.
|
|
33
|
-
hestia_earth_utils-0.16.
|
|
28
|
+
hestia_earth_utils-0.16.13.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
|
|
29
|
+
hestia_earth_utils-0.16.13.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
|
|
30
|
+
hestia_earth_utils-0.16.13.dist-info/METADATA,sha256=9PXZMETvhkITPCS-eqRcbNCAVGy15Ub05rPDqjFzOZw,1870
|
|
31
|
+
hestia_earth_utils-0.16.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
32
|
+
hestia_earth_utils-0.16.13.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
|
|
33
|
+
hestia_earth_utils-0.16.13.dist-info/RECORD,,
|
{hestia_earth_utils-0.16.12.data → hestia_earth_utils-0.16.13.data}/scripts/hestia-format-upload
RENAMED
|
File without changes
|
{hestia_earth_utils-0.16.12.data → hestia_earth_utils-0.16.13.data}/scripts/hestia-pivot-csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|