emmykit 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emmykit/__init__.py +517 -0
- emmykit/_version.py +13 -0
- emmykit/constants.py +65 -0
- emmykit/datetime_utils.py +800 -0
- emmykit/diff_view.py +362 -0
- emmykit/docker_utils.py +118 -0
- emmykit/embedded_scripts.py +560 -0
- emmykit/extensions.py +323 -0
- emmykit/file_io.py +56 -0
- emmykit/files.py +329 -0
- emmykit/hosts.py +153 -0
- emmykit/html_files.py +140 -0
- emmykit/humanize.py +146 -0
- emmykit/inflect_utils.py +179 -0
- emmykit/introspection.py +467 -0
- emmykit/io_subprocess.py +231 -0
- emmykit/json_io.py +335 -0
- emmykit/lint.py +827 -0
- emmykit/llm.py +2158 -0
- emmykit/logging_utils.py +255 -0
- emmykit/media.py +513 -0
- emmykit/net_targets.py +46 -0
- emmykit/network.py +581 -0
- emmykit/numeric_helpers.py +62 -0
- emmykit/options.py +67 -0
- emmykit/paths_ensure.py +25 -0
- emmykit/prompts.py +49 -0
- emmykit/py.typed +0 -0
- emmykit/python_env.py +139 -0
- emmykit/safe_paths.py +316 -0
- emmykit/system.py +225 -0
- emmykit/text.py +205 -0
- emmykit/text_constants.py +21 -0
- emmykit/treeview.py +217 -0
- emmykit-0.3.2.dist-info/METADATA +4461 -0
- emmykit-0.3.2.dist-info/RECORD +38 -0
- emmykit-0.3.2.dist-info/WHEEL +4 -0
- emmykit-0.3.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,800 @@
|
|
|
1
|
+
"""datetime_utils — extracted from univ_defs.py."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from emmykit.inflect_utils import my_plural
|
|
9
|
+
from emmykit.logging_utils import fallback_logging_config
|
|
10
|
+
from emmykit.numeric_helpers import is_float, seconds_in_unit
|
|
11
|
+
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
from typing import Any, Final, TypeAlias
|
|
14
|
+
|
|
15
|
+
def human_timespan(timespan: int | float) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Format a time span in seconds into a human-readable string.
|
|
18
|
+
Negative values are treated as absolute.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
timespan: A float or int representing the time span in seconds.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
A human-readable string describing the time span, such as
|
|
25
|
+
"1 year, 2 weeks, 3 days, 4 hours, 5 minutes and 6.789 seconds".
|
|
26
|
+
If the timespan is zero, returns "0 seconds".
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
None.
|
|
30
|
+
"""
|
|
31
|
+
# Work in integer milliseconds to avoid float modulo issues
|
|
32
|
+
total_ms = int(round(abs(float(timespan)) * 1000))
|
|
33
|
+
if total_ms == 0:
|
|
34
|
+
return "0 seconds"
|
|
35
|
+
|
|
36
|
+
MS_PER_MINUTE = 60_000
|
|
37
|
+
MS_PER_HOUR = 3_600_000
|
|
38
|
+
MS_PER_DAY = 86_400_000
|
|
39
|
+
MS_PER_WEEK = 604_800_000
|
|
40
|
+
MS_PER_YEAR = 31_557_600_000 # 365.25 days
|
|
41
|
+
|
|
42
|
+
components: list[str] = []
|
|
43
|
+
|
|
44
|
+
years, rem = divmod(total_ms, MS_PER_YEAR)
|
|
45
|
+
weeks, rem = divmod(rem, MS_PER_WEEK)
|
|
46
|
+
days, rem = divmod(rem, MS_PER_DAY)
|
|
47
|
+
hours, rem = divmod(rem, MS_PER_HOUR)
|
|
48
|
+
minutes, rem = divmod(rem, MS_PER_MINUTE)
|
|
49
|
+
seconds = rem / 1000.0 # in [0, 60)
|
|
50
|
+
|
|
51
|
+
if years: components.append(my_plural(years, "year"))
|
|
52
|
+
if weeks: components.append(my_plural(weeks, "week"))
|
|
53
|
+
if days: components.append(my_plural(days, "day"))
|
|
54
|
+
if hours: components.append(my_plural(hours, "hour"))
|
|
55
|
+
if minutes: components.append(my_plural(minutes, "minute"))
|
|
56
|
+
if seconds:
|
|
57
|
+
s = f"{seconds:.3f}".rstrip("0").rstrip(".")
|
|
58
|
+
components.append(f"{s} second" + ("" if seconds == 1.0 else "s"))
|
|
59
|
+
|
|
60
|
+
if len(components) == 1:
|
|
61
|
+
return components[0]
|
|
62
|
+
return ", ".join(components[:-1]) + " and " + components[-1]
|
|
63
|
+
|
|
64
|
+
def format_date_range(date1: dt.datetime, date2: dt.datetime | None = None) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Process a pair of datetime.datetime dates and produce a formatted date range string
|
|
67
|
+
where each date looks like 'Jan 7, 2025'. If date2 is not provided, it is set to date1.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
date1: The first date as a datetime.datetime object.
|
|
71
|
+
date2: The second date as a datetime.datetime object. If None, defaults to date1.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
A formatted string representing the date range, such as 'Jan 7, 2025' or 'Jan 7 - Feb 3, 2025' or 'Jan 7 - 15, 2025'.
|
|
75
|
+
If both dates and times are the same, it returns just one date like 'Jan 7, 2025'.
|
|
76
|
+
If both dates are the same but times are different, it returns a string like '06:04:02 - 19:05:39 on Jan 7, 2025'
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ValueError: If either date1 or date2 is not a datetime.datetime object.
|
|
80
|
+
"""
|
|
81
|
+
import datetime as dt
|
|
82
|
+
|
|
83
|
+
month_names = {
|
|
84
|
+
1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
|
|
85
|
+
5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
|
|
86
|
+
9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# If date2 is not provided, set date2 to date1
|
|
90
|
+
if date2 is None:
|
|
91
|
+
date2 = date1
|
|
92
|
+
|
|
93
|
+
# Make sure both dates are datetime.datetime objects
|
|
94
|
+
if not isinstance(date1, dt.datetime) or not isinstance(date2, dt.datetime):
|
|
95
|
+
raise ValueError(f"Both dates must be datetime.datetime objects, but date1 is {date1} with type {type(date1)} and date2 {date2} with type {type(date2)}.")
|
|
96
|
+
|
|
97
|
+
# Ensure that the first date is earlier than the second.
|
|
98
|
+
if date1 > date2:
|
|
99
|
+
date1, date2 = date2, date1
|
|
100
|
+
|
|
101
|
+
day1, day2 = date1.day, date2.day
|
|
102
|
+
month1, month2 = month_names[date1.month], month_names[date2.month]
|
|
103
|
+
year1, year2 = date1.year, date2.year
|
|
104
|
+
|
|
105
|
+
if year1 == year2:
|
|
106
|
+
if month1 == month2:
|
|
107
|
+
if day1 == day2:
|
|
108
|
+
time1 = date1.strftime("%H:%M:%S")
|
|
109
|
+
time2 = date2.strftime("%H:%M:%S")
|
|
110
|
+
if time1 == time2:
|
|
111
|
+
return f"{month1} {day1:2d}, {year1}"
|
|
112
|
+
return f"{time1} - {time2} on {month1} {day1:2d}, {year1}"
|
|
113
|
+
else:
|
|
114
|
+
return f"{month1} {day1:2d} - {day2:2d}, {year1}"
|
|
115
|
+
else:
|
|
116
|
+
return f"{month1} {day1:2d} - {month2} {day2:2d}, {year1}"
|
|
117
|
+
else:
|
|
118
|
+
return f"{month1} {day1:2d}, {year1} - {month2} {day2:2d}, {year2}"
|
|
119
|
+
|
|
120
|
+
_TIMESTAMP_PATTERN_RE: re.Pattern = re.compile(r"(\d{8}-\d{6}).pkl$")
|
|
121
|
+
|
|
122
|
+
def extract_timestamp(the_string: str) -> str | None:
|
|
123
|
+
"""Extract timestamp string (in format YYYYMMDD-HHMMSS) from the_string, or None if not found."""
|
|
124
|
+
if (m := _TIMESTAMP_PATTERN_RE.search(the_string)):
|
|
125
|
+
try:
|
|
126
|
+
return m.group(1)
|
|
127
|
+
except ValueError:
|
|
128
|
+
return None
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
_TZ_ABBREV_TO_ZONE: dict[str, str] = {
|
|
132
|
+
"UTC" : "UTC",
|
|
133
|
+
"GMT" : "Etc/GMT",
|
|
134
|
+
"EST" : "America/New_York",
|
|
135
|
+
"EDT" : "America/New_York",
|
|
136
|
+
"CST" : "America/Chicago", # WARNING! "CST" can also mean China Standard Time (Asia/Shanghai, UTC+8), so use with caution!
|
|
137
|
+
"CDT" : "America/Chicago",
|
|
138
|
+
"MST" : "America/Denver",
|
|
139
|
+
"MDT" : "America/Denver",
|
|
140
|
+
"PST" : "America/Los_Angeles",
|
|
141
|
+
"PDT" : "America/Los_Angeles",
|
|
142
|
+
"HST" : "Pacific/Honolulu",
|
|
143
|
+
"AKST" : "America/Anchorage",
|
|
144
|
+
"AKDT" : "America/Anchorage",
|
|
145
|
+
"AST" : "America/Puerto_Rico", # Atlantic Standard Time
|
|
146
|
+
"ADT" : "America/Puerto_Rico", # Atlantic Daylight Time
|
|
147
|
+
"NST" : "America/St_Johns", # Newfoundland Standard Time
|
|
148
|
+
"NDT" : "America/St_Johns", # Newfoundland Daylight Time
|
|
149
|
+
"BST" : "Europe/London", # British Summer Time
|
|
150
|
+
"CET" : "Europe/Berlin", # Central European Time
|
|
151
|
+
"CEST" : "Europe/Berlin", # Central European Summer Time
|
|
152
|
+
"EET" : "Europe/Athens", # Eastern European Time
|
|
153
|
+
"EEST" : "Europe/Athens", # Eastern European Summer Time
|
|
154
|
+
"IST" : "Asia/Kolkata", # Indian Standard Time - WARNING! "IST" can also mean Irish Standard Time (Europe/Dublin, UTC+1), so use with caution!
|
|
155
|
+
"JST" : "Asia/Tokyo", # Japan Standard Time
|
|
156
|
+
"KST" : "Asia/Seoul", # Korea Standard Time
|
|
157
|
+
"HKT" : "Asia/Hong_Kong", # Hong Kong Time
|
|
158
|
+
"SGT" : "Asia/Singapore", # Singapore Time
|
|
159
|
+
"AEST" : "Australia/Sydney", # Australian Eastern Standard Time
|
|
160
|
+
"AEDT" : "Australia/Sydney", # Australian Eastern Daylight Time
|
|
161
|
+
"ACST" : "Australia/Adelaide", # Australian Central Standard Time
|
|
162
|
+
"ACDT" : "Australia/Adelaide", # Australian Central Daylight Time
|
|
163
|
+
"AWST" : "Australia/Perth", # Australian Western Standard Time
|
|
164
|
+
"AWDT" : "Australia/Perth", # Australian Western Daylight Time
|
|
165
|
+
"NZT" : "Pacific/Auckland", # New Zealand Time
|
|
166
|
+
"NZST" : "Pacific/Auckland", # New Zealand Standard Time
|
|
167
|
+
"NZDT" : "Pacific/Auckland", # New Zealand Daylight Time
|
|
168
|
+
"WET" : "Europe/Lisbon", # Western European Time
|
|
169
|
+
"WEST" : "Europe/Lisbon", # Western European Summer Time
|
|
170
|
+
# ...add any others you need
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
_TZ_OFFSET_RE: re.Pattern = re.compile(r'''
|
|
174
|
+
^(?P<sign>[+-])
|
|
175
|
+
(?:
|
|
176
|
+
(?P<hours1>\d{1,2})[hH](?P<mins1>\d{1,2})(?:[mM])? # +5h30m
|
|
177
|
+
| (?P<hours1_only>\d{1,2})[hH] # +5h
|
|
178
|
+
| (?P<hours2>\d{1,2}):(?P<mins2>\d{2}) # +5:30
|
|
179
|
+
| (?P<hours3>\d{1,2})(?P<mins3>\d{2}) # +0530
|
|
180
|
+
| (?P<hours4>\d{1,2}) # +5
|
|
181
|
+
)
|
|
182
|
+
$
|
|
183
|
+
''', re.VERBOSE)
|
|
184
|
+
|
|
185
|
+
def parse_timezone(tz_arg: str | dt.tzinfo | None = None) -> dt.tzinfo | str:
|
|
186
|
+
"""
|
|
187
|
+
Parse the given timezone string or tzinfo object into a datetime.tzinfo object.
|
|
188
|
+
If tz_arg is None, return UTC timezone.
|
|
189
|
+
If tz_arg is a string, it can be in one of the following formats:
|
|
190
|
+
- A fixed‐offset like: "+HH:MM", "+HHMM", "+H", "+Hh", "+HhMMm" (or minus variants).
|
|
191
|
+
Examples: "+05:30", "-0530", "+5h", "-5h30m".
|
|
192
|
+
- A string that can be converted to a ZoneInfo object (e.g. 'America/New_York').
|
|
193
|
+
- A timezone abbreviation that maps to a known IANA zone name (e.g. 'EST', 'CET').
|
|
194
|
+
- "Z", "UTC", or "GMT" (case‐insensitive) to represent UTC.
|
|
195
|
+
- A string "Naive" to represent a naive datetime (no timezone).
|
|
196
|
+
If tz_arg is already a tzinfo object, return it as is.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
tz_arg : A timezone string, a datetime.tzinfo object, or None.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
A datetime.tzinfo object representing the parsed timezone, or a string "Naive"
|
|
203
|
+
if the input was "Naive".
|
|
204
|
+
|
|
205
|
+
Raises:
|
|
206
|
+
ValueError if the string cannot be converted to a valid timezone.
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
import datetime as dt
|
|
210
|
+
|
|
211
|
+
# If tz_arg is None, return UTC timezone
|
|
212
|
+
if tz_arg is None:
|
|
213
|
+
return dt.timezone.utc
|
|
214
|
+
|
|
215
|
+
# If tz_arg is already a tzinfo object, return it unchanged
|
|
216
|
+
if isinstance(tz_arg, dt.tzinfo):
|
|
217
|
+
return tz_arg
|
|
218
|
+
|
|
219
|
+
# If tz_arg is a string, try to parse it
|
|
220
|
+
if isinstance(tz_arg, str):
|
|
221
|
+
s = tz_arg.strip()
|
|
222
|
+
up = s.upper()
|
|
223
|
+
|
|
224
|
+
# Handle "Naive" case
|
|
225
|
+
if up == "NAIVE":
|
|
226
|
+
return tz_arg
|
|
227
|
+
|
|
228
|
+
# Bare UTC/GMT/Z
|
|
229
|
+
if up in ("Z", "UTC", "GMT") and len(s) <= 3:
|
|
230
|
+
return dt.timezone.utc
|
|
231
|
+
|
|
232
|
+
# Strip leading "UTC" or "GMT" prefix
|
|
233
|
+
if up.startswith(("UTC", "GMT")):
|
|
234
|
+
rest = s[3:].strip()
|
|
235
|
+
if rest == "":
|
|
236
|
+
return dt.timezone.utc
|
|
237
|
+
s = rest # now s begins with + or -
|
|
238
|
+
|
|
239
|
+
# Try fixed-offset patterns
|
|
240
|
+
m = _TZ_OFFSET_RE.fullmatch(s)
|
|
241
|
+
if m:
|
|
242
|
+
sign = 1 if m.group("sign") == "+" else -1
|
|
243
|
+
|
|
244
|
+
if m.group("hours1") is not None:
|
|
245
|
+
hours = int(m.group("hours1"))
|
|
246
|
+
minutes = int(m.group("mins1"))
|
|
247
|
+
elif m.group("hours1_only") is not None:
|
|
248
|
+
hours = int(m.group("hours1_only"))
|
|
249
|
+
minutes = 0
|
|
250
|
+
elif m.group("hours2") is not None:
|
|
251
|
+
hours = int(m.group("hours2"))
|
|
252
|
+
minutes = int(m.group("mins2"))
|
|
253
|
+
elif m.group("hours3") is not None:
|
|
254
|
+
hours = int(m.group("hours3"))
|
|
255
|
+
minutes = int(m.group("mins3"))
|
|
256
|
+
else:
|
|
257
|
+
hours = int(m.group("hours4"))
|
|
258
|
+
minutes = 0
|
|
259
|
+
|
|
260
|
+
offset = dt.timedelta(hours=hours, minutes=minutes) * sign
|
|
261
|
+
return dt.timezone(offset)
|
|
262
|
+
|
|
263
|
+
# Otherwise, fall back to ZoneInfo
|
|
264
|
+
try:
|
|
265
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
266
|
+
except ImportError: # for Python < 3.9, fall back to backports.zoneinfo
|
|
267
|
+
from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
268
|
+
|
|
269
|
+
# Try to interpret the string as a timezone abbreviation
|
|
270
|
+
if up in _TZ_ABBREV_TO_ZONE:
|
|
271
|
+
zone_name = _TZ_ABBREV_TO_ZONE[up]
|
|
272
|
+
return ZoneInfo(zone_name)
|
|
273
|
+
|
|
274
|
+
# Try to interpret the string as a ZoneInfo name
|
|
275
|
+
try:
|
|
276
|
+
return ZoneInfo(tz_arg)
|
|
277
|
+
except ZoneInfoNotFoundError as e:
|
|
278
|
+
raise ValueError(f"Unknown timezone {tz_arg!r}: {e}") from e
|
|
279
|
+
|
|
280
|
+
raise TypeError(f"Expected None, str, or tzinfo; got {type(tz_arg).__name__!r}")
|
|
281
|
+
|
|
282
|
+
def decimal_year_to_datetime(dec: float, use_astropy: bool = False) -> dt.datetime:
|
|
283
|
+
"""
|
|
284
|
+
Convert a decimal year to a datetime object.
|
|
285
|
+
If use_astropy is True, astropy.time is used for sub-second and leap-second–aware conversion.
|
|
286
|
+
Usage: new_datetime_datetime_object = decimal_year_to_datetime(2002.291)
|
|
287
|
+
"""
|
|
288
|
+
import datetime as dt
|
|
289
|
+
if use_astropy:
|
|
290
|
+
try:
|
|
291
|
+
from astropy.time import Time
|
|
292
|
+
except ImportError as e:
|
|
293
|
+
raise ValueError(f"'use_astropy=True' requires the astropy package: {e}") from e
|
|
294
|
+
t = Time(dec, format="jyear", scale="utc")
|
|
295
|
+
return t.to_datetime().replace(tzinfo=dt.timezone.utc)
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
year = int(dec)
|
|
299
|
+
rem = dec - year
|
|
300
|
+
start_dt = dt.datetime(year, 1, 1, tzinfo=dt.timezone.utc)
|
|
301
|
+
end_dt = dt.datetime(year + 1, 1, 1, tzinfo=dt.timezone.utc)
|
|
302
|
+
year_secs = (end_dt - start_dt).total_seconds()
|
|
303
|
+
return start_dt + dt.timedelta(seconds=rem * year_secs)
|
|
304
|
+
except ValueError as e:
|
|
305
|
+
raise ValueError(f"Failed to convert decimal year {dec} to datetime: {e}") from e
|
|
306
|
+
|
|
307
|
+
def _parse_iso(given_date: str) -> dt.datetime:
|
|
308
|
+
"""Parse an ISO8601 date string and return a datetime object. Raises ValueError if the date string is invalid."""
|
|
309
|
+
from dateutil.parser import isoparse, ParserError
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
return isoparse(given_date)
|
|
313
|
+
except ParserError as e:
|
|
314
|
+
raise ValueError(f"Invalid ISO8601 date '{given_date}'") from e
|
|
315
|
+
|
|
316
|
+
_JD_MJD_SIMPLE_RE: re.Pattern = re.compile(r"\s*(JD|MJD)?\s*[+-]?\d+(\.\d+)?\s*", re.IGNORECASE)
|
|
317
|
+
|
|
318
|
+
_JD_MJD_CAPTURE_RE: re.Pattern = re.compile(r"\s*(?P<prefix>JD|MJD)?\s*(?P<value>[+-]?\d+(?:\.\d+)?)\s*", re.IGNORECASE)
|
|
319
|
+
|
|
320
|
+
_OFFSET_IN_STR_RE: re.Pattern = re.compile(r"(Z|[+-]\d{2}:\d{2}|[+-]\d{4})$")
|
|
321
|
+
|
|
322
|
+
_JD_UNIX_EPOCH: float = 2_440_587.5
|
|
323
|
+
|
|
324
|
+
AnyDateTimeType: TypeAlias = "str | float | int | np.datetime64 | pd.Timestamp | dt.datetime"
|
|
325
|
+
|
|
326
|
+
def _should_convert(given_date: AnyDateTimeType, format_str: str | None = None) -> bool:
|
|
327
|
+
"""Determine if the given date should be converted to a timezone (i.e. if the wall clock should be shifted) or if the timezone should just be attached without shifting the clock."""
|
|
328
|
+
import datetime as dt
|
|
329
|
+
|
|
330
|
+
# 1) Numbers, JD/MJD, decimal years, special keywords
|
|
331
|
+
if isinstance(given_date, (int, float)) and not isinstance(given_date, bool):
|
|
332
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a number: %s, so it will be converted by shifting the clock", given_date)
|
|
333
|
+
return True
|
|
334
|
+
if isinstance(given_date, str):
|
|
335
|
+
u = given_date.strip().upper()
|
|
336
|
+
if u in ("J2000", "UNIX", "NOW"):
|
|
337
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a special keyword: %s, so it will be converted by shifting the clock", u)
|
|
338
|
+
return True
|
|
339
|
+
if format_str and format_str.upper() in ("JD", "MJD"):
|
|
340
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date has a format_str: %s, so it will be converted by shifting the clock", format_str)
|
|
341
|
+
return True
|
|
342
|
+
if _JD_MJD_SIMPLE_RE.fullmatch(given_date):
|
|
343
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a JD/MJD: %s, so it will be converted by shifting the clock", given_date)
|
|
344
|
+
return True
|
|
345
|
+
# explicit offset or Z
|
|
346
|
+
if _OFFSET_IN_STR_RE.search(given_date):
|
|
347
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date has an explicit offset or Z: %s, so it will be converted by shifting the clock", given_date)
|
|
348
|
+
return True
|
|
349
|
+
# 2) Any datetime/timestamp already aware
|
|
350
|
+
if isinstance(given_date, dt.datetime) and given_date.tzinfo is not None:
|
|
351
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is an aware datetime: %s, so it will be converted by shifting the clock", given_date)
|
|
352
|
+
return True
|
|
353
|
+
|
|
354
|
+
# Otherwise treat it as local‐time → attach only
|
|
355
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is not a number, JD/MJD, or aware datetime: %s, so the timezone will be attached without shifting the clock", given_date)
|
|
356
|
+
return False
|
|
357
|
+
|
|
358
|
+
def _finalize_datetime(parsed_dt: dt.datetime, original_input: AnyDateTimeType,
|
|
359
|
+
format_str: str | None, tz_arg: str | dt.tzinfo | None,
|
|
360
|
+
should_convert: bool | None = None) -> dt.datetime:
|
|
361
|
+
"""
|
|
362
|
+
Finalize the datetime object by either converting it to the target timezone or just attaching the timezone without shifting the clock. The boolean argument 'should_convert' can override the default behavior, which is determined by the function _should_convert().
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
parsed_dt: The datetime object that has been parsed from the original input.
|
|
366
|
+
original_input: The original input that was used to parse the datetime.
|
|
367
|
+
format_str: The format string used to parse the datetime, if any.
|
|
368
|
+
tz_arg: The timezone argument, which can be a string or a datetime.tzinfo object.
|
|
369
|
+
should_convert: A boolean indicating whether to convert the datetime to the specified timezone by shifting the clock (True) or just attaching the timezone without shifting (False). If None, the function will determine this based on the type of original_input and format_str.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
A datetime.datetime object in the specified timezone.
|
|
373
|
+
If tz_arg is "Naive", the datetime will be returned without any timezone info.
|
|
374
|
+
If should_convert is True, the datetime will be converted to the specified timezone by shifting the clock.
|
|
375
|
+
If should_convert is False, the timezone will be attached to the datetime without shifting the clock.
|
|
376
|
+
If should_convert is None, the function will determine whether to convert or not based on the type of original_input and format_str.
|
|
377
|
+
|
|
378
|
+
Raises:
|
|
379
|
+
ValueError: If the tz_arg is not a valid timezone string or tzinfo object.
|
|
380
|
+
TypeError: If the parsed_dt is not a datetime.datetime object.
|
|
381
|
+
"""
|
|
382
|
+
if isinstance(tz_arg, str) and tz_arg.strip().upper() == "NAIVE":
|
|
383
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Naive timezone requested, returning datetime %s without any timezone info", parsed_dt)
|
|
384
|
+
return parsed_dt.replace(tzinfo=None)
|
|
385
|
+
target_tz = parse_timezone(tz_arg)
|
|
386
|
+
if should_convert is not False and (_should_convert(original_input, format_str) or should_convert is True):
|
|
387
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Converting datetime %s to timezone %s by shifting the clock", parsed_dt, target_tz)
|
|
388
|
+
return parsed_dt.astimezone(target_tz)
|
|
389
|
+
else:
|
|
390
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Attaching timezone %s to datetime %s without shifting the clock", target_tz, parsed_dt)
|
|
391
|
+
return parsed_dt.replace(tzinfo=target_tz)
|
|
392
|
+
|
|
393
|
+
def parse_datetime(given_date: AnyDateTimeType, timezone: str | dt.tzinfo | None = None,
|
|
394
|
+
format_str: str | None = None,
|
|
395
|
+
should_convert: bool | None = None) -> dt.datetime:
|
|
396
|
+
"""
|
|
397
|
+
Try parsing the given_date string or number into a datetime.datetime object in the specified timezone.
|
|
398
|
+
|
|
399
|
+
If "format_str" is provided, it will be used to parse the date string. These format types are accepted:
|
|
400
|
+
- "seconds" or "milliseconds" indicating the number of seconds or milliseconds since an epoch (Unix epoch by default).
|
|
401
|
+
- "YYYY-MM-DD" or similar ISO8601 formats such as "YYYY-MM-DDTHH:MM:SS", "MM/DD/YYYY", etc.
|
|
402
|
+
- A custom string following this pattern: "units (optional: since/after epoch)", where "units" can be anything that the function seconds_in_unit() accepts (e.g. "days", "weeks", "months", etc.). The optional epoch time can be a string, float, int, numpy.datetime64, pandas.Timestamp, or datetime.datetime object. Example: "days since 1990", "milliseconds after J2000", "sidereal days since 2000-01-01", etc. If the epoch is not specified, it defaults to the Unix epoch (1970-01-01T00:00:00Z)
|
|
403
|
+
|
|
404
|
+
If a boolean "should_convert" is provided, it will override the default behavior of whether to convert the datetime to the specified timezone by shifting the clock or just attaching the timezone without shifting. If None, the function will determine this based on the type of given_date and format_str.
|
|
405
|
+
|
|
406
|
+
If a given_date starts with "JD" or "MJD", it will be treated as a Julian Date or Modified Julian Date, respectively.
|
|
407
|
+
|
|
408
|
+
Otherwise, if given_date is a float or int, treat it as a decimal year by default if format_str is not provided.
|
|
409
|
+
|
|
410
|
+
Any call that doesn't provide a timezone argument will default to UTC.
|
|
411
|
+
The timezone can be a datetime.tzinfo object or a string that can be converted to a ZoneInfo object (e.g. 'America/New_York').
|
|
412
|
+
If the given_date is an "aware" datetime.datetime object which already has a timezone attached, it will be converted to the specified timezone (which may involve changing its date and time if the specified timezone is different).
|
|
413
|
+
The timezone can also be a fixed‐offset like "+05:30" or "-04:00", or the string "Naive" to indicate that the datetime should be treated as a naive datetime (i.e. without any timezone information).
|
|
414
|
+
|
|
415
|
+
Accepts:
|
|
416
|
+
'NOW' (case-insensitive) → current datetime
|
|
417
|
+
strings in YYYY, YYYY-MM, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS, or other ISO8601 formats (e.g. '2002-10-18T07:00:00Z', '2002-10-18 07:00:00+00:00').
|
|
418
|
+
If YYYY is provided, it will default to January 1st of that year at midnight.
|
|
419
|
+
If YYYY-MM is provided, it will default to the first day of that month at midnight.
|
|
420
|
+
If YYYY-MM-DD is provided, it will default to midnight on that day.
|
|
421
|
+
fallback to dateutil.parser.parse for free-form strings ("18 Oct 2002", "March 5th, 2020", etc.)
|
|
422
|
+
floats (e.g. 2002.29178082191777) or integer (e.g. 2002) → decimal year
|
|
423
|
+
numpy.datetime64 objects (e.g. np.datetime64('2002-10-18T07:00:00'))
|
|
424
|
+
pandas.Timestamp objects (e.g. pd.Timestamp('2002-10-18 07:00:00'))
|
|
425
|
+
datetime.datetime objects (e.g. datetime.datetime(2002, 10, 18, 7, 0, 0))
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
given_date: The date to parse, which can be a string, float, int, numpy.datetime64,
|
|
429
|
+
pandas.Timestamp, or datetime.datetime object.
|
|
430
|
+
timezone: A string or datetime.tzinfo object representing the timezone to convert
|
|
431
|
+
the datetime to. If None, defaults to UTC.
|
|
432
|
+
format_str: A string indicating the format of the date. If None, the function will
|
|
433
|
+
try to infer the format from the given_date.
|
|
434
|
+
should_convert: A boolean indicating whether to convert the datetime to the specified
|
|
435
|
+
timezone by shifting the clock (True) or just attaching the timezone
|
|
436
|
+
without shifting (False). If None, the function will determine this
|
|
437
|
+
based on the type of given_date and format_str.
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
datetime.datetime object in the specified timezone.
|
|
441
|
+
Note that datetime.datetime objects cannot represent dates before 1 January 1, 0001 or after 31 December 9999.
|
|
442
|
+
So dates outside this range will raise a ValueError. Future versions of this code may support a wider range of dates (like 44 BC, 44 BCE, etc.) using libraries like 'astropy.time': https://chatgpt.com/share/685c5157-5cac-8006-b68c-4a0731927a50
|
|
443
|
+
However, this will require the function to return an 'astropy.time.Time' object instead of a 'datetime.datetime' object.
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
ValueError: If the given_date cannot be parsed into a datetime object, or if the timezone is invalid.
|
|
447
|
+
TypeError: If the given_date is not a string, float, int, numpy.datetime64, pandas.Timestamp, or datetime.datetime object.
|
|
448
|
+
"""
|
|
449
|
+
import datetime as dt
|
|
450
|
+
fallback_logging_config() # Ensure logging is configured
|
|
451
|
+
|
|
452
|
+
parsed_tz = parse_timezone(timezone) # Ensure timezone is a valid tzinfo object or string
|
|
453
|
+
|
|
454
|
+
parsed_dt = None
|
|
455
|
+
|
|
456
|
+
# Handle special cases:
|
|
457
|
+
if isinstance(given_date, str):
|
|
458
|
+
if given_date.strip().upper() == "J2000":
|
|
459
|
+
# J2000 is January 1, 2000, 11:58:55.816 UTC
|
|
460
|
+
parsed_dt = dt.datetime(2000, 1, 1, 11, 58, 55, 816_000, tzinfo=dt.timezone.utc)
|
|
461
|
+
if given_date.strip().upper() == "UNIX":
|
|
462
|
+
# UNIX epoch is January 1, 1970, 00:00:00 UTC
|
|
463
|
+
parsed_dt = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
|
|
464
|
+
if given_date.strip().upper() == "NOW":
|
|
465
|
+
parsed_dt = dt.datetime.now(tz=dt.timezone.utc)
|
|
466
|
+
|
|
467
|
+
# Handle forced or explicit Julian Date (JD) or Modified Julian Date (MJD)
|
|
468
|
+
m: re.Match | None = None
|
|
469
|
+
prefix: str | None = None
|
|
470
|
+
if parsed_dt is None and isinstance(given_date, str):
|
|
471
|
+
m = _JD_MJD_CAPTURE_RE.fullmatch(given_date)
|
|
472
|
+
if m:
|
|
473
|
+
prefix = m.group("prefix")
|
|
474
|
+
|
|
475
|
+
# Trigger JD/MJD branch only if format_str equals "JD" or "MJD", or prefix was provided
|
|
476
|
+
if parsed_dt is None and (prefix is not None or (format_str and (format_str.upper() == "JD" or format_str.upper() == "MJD"))):
|
|
477
|
+
# Determine raw value
|
|
478
|
+
if isinstance(given_date, (int, float)):
|
|
479
|
+
value = float(given_date)
|
|
480
|
+
else:
|
|
481
|
+
if m is not None:
|
|
482
|
+
value = float(m.group("value"))
|
|
483
|
+
else:
|
|
484
|
+
try:
|
|
485
|
+
value = float(given_date.strip())
|
|
486
|
+
except ValueError as e:
|
|
487
|
+
raise ValueError(f"Expected a JD/MJD numeric value, got {given_date!r}") from e
|
|
488
|
+
|
|
489
|
+
# Determine if MJD conversion needed
|
|
490
|
+
use_mjd = bool((format_str and format_str.upper() == "MJD") or (prefix and prefix.upper() == "MJD"))
|
|
491
|
+
|
|
492
|
+
# Convert MJD to JD if necessary, then to datetime via timedelta from Unix epoch
|
|
493
|
+
jd_val = value + (2_400_000.5 if use_mjd else 0.0)
|
|
494
|
+
unix_secs = (jd_val - _JD_UNIX_EPOCH) * 86_400
|
|
495
|
+
parsed_dt = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=unix_secs)
|
|
496
|
+
|
|
497
|
+
# Check if the given_date is a string that can be parsed as a float
|
|
498
|
+
if parsed_dt is None and isinstance(given_date, str) and is_float(given_date):
|
|
499
|
+
given_date = float(given_date) # Convert string to float if it represents a number
|
|
500
|
+
# Check if the given_date is a float or int but NOT a boolean
|
|
501
|
+
if parsed_dt is None and isinstance(given_date, (int, float)) and not isinstance(given_date, bool):
|
|
502
|
+
if format_str is None:
|
|
503
|
+
# If the given_date is a decimal year, convert it to datetime in the specified timezone
|
|
504
|
+
# Note: This will not shift the clock, just attach the tzinfo.
|
|
505
|
+
parsed_dt = decimal_year_to_datetime(float(given_date))
|
|
506
|
+
else: # If format is provided, parse the date using the specified format.
|
|
507
|
+
if not isinstance(format_str, str):
|
|
508
|
+
raise TypeError(f"Expected 'format' to be a string, got {type(format_str).__name__!r}")
|
|
509
|
+
# Make sure the format string is a valid example of "units (optionally: since/after epoch)"
|
|
510
|
+
# Try to split by since or after, whichever works:
|
|
511
|
+
format_parts = re.split(r'\s+(since|after)\s+', format_str, maxsplit=1)
|
|
512
|
+
if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Parsing date with format string: '%s' split into parts: %s", format_str, format_parts)
|
|
513
|
+
if len(format_parts) > 3:
|
|
514
|
+
raise ValueError(f"Invalid format string: '{format_str}'. Expected at most three parts: 'units', 'since/after', and 'epoch'.")
|
|
515
|
+
# The first part should be acceptable by seconds_in_unit():
|
|
516
|
+
try:
|
|
517
|
+
units = format_parts[0].strip()
|
|
518
|
+
multiplier = seconds_in_unit(units) # This will raise ValueError if the unit is unknown
|
|
519
|
+
except ValueError as e:
|
|
520
|
+
raise ValueError(f"Invalid time unit '{units}' in format string '{format_str}': {e}") from e
|
|
521
|
+
# If the format_parts list has only one part, it means the epoch defaults to the Unix epoch (1970-01-01T00:00:00Z).
|
|
522
|
+
if len(format_parts) == 1:
|
|
523
|
+
# If the format_parts list has only one part, it means the format is just "units" (e.g. "days", "weeks", etc.)
|
|
524
|
+
# In this case, we assume the epoch is the Unix epoch (1970-01-01T00:00:00Z).
|
|
525
|
+
epoch_str = "1970-01-01T00:00:00Z"
|
|
526
|
+
else:
|
|
527
|
+
# If the format_parts list has three parts, the third part is the epoch.
|
|
528
|
+
epoch_str = format_parts[2].strip()
|
|
529
|
+
try:
|
|
530
|
+
epoch = parse_datetime(epoch_str, timezone=parsed_tz)
|
|
531
|
+
except ValueError as e:
|
|
532
|
+
raise ValueError(f"Invalid epoch '{epoch}' in format string '{format_str}': {e}") from e
|
|
533
|
+
# Now we can calculate the datetime based on the given_date (and the multiplier from 'units') and the epoch
|
|
534
|
+
parsed_dt = epoch + dt.timedelta(seconds=float(given_date) * multiplier)
|
|
535
|
+
|
|
536
|
+
if parsed_dt is None and type(given_date) is dt.datetime: # Don't use isinstance() here, because it will also match subclasses like Pandas Timestamp
|
|
537
|
+
parsed_dt = given_date
|
|
538
|
+
elif isinstance(given_date, dt.date): # Handle date objects (without time) as midnight
|
|
539
|
+
parsed_dt = dt.datetime.combine(given_date, dt.time.min)
|
|
540
|
+
|
|
541
|
+
if parsed_dt is None:
|
|
542
|
+
try:
|
|
543
|
+
import numpy as np
|
|
544
|
+
except ImportError:
|
|
545
|
+
np = None
|
|
546
|
+
if np is not None and isinstance(given_date, np.datetime64):
|
|
547
|
+
ts_ns = given_date.astype("datetime64[ns]").astype("int64")
|
|
548
|
+
parsed_dt = dt.datetime.fromtimestamp(
|
|
549
|
+
ts_ns / 1e9,
|
|
550
|
+
tz=parsed_tz if isinstance(parsed_tz, dt.tzinfo) else None,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
if parsed_dt is None:
|
|
554
|
+
try:
|
|
555
|
+
import pandas as pd
|
|
556
|
+
except ImportError:
|
|
557
|
+
pd = None
|
|
558
|
+
if pd is not None and isinstance(given_date, pd.Timestamp):
|
|
559
|
+
parsed_dt = given_date.to_pydatetime()
|
|
560
|
+
|
|
561
|
+
error_message: str = f"The date '{given_date}' is type {type(given_date).__name__!r} in an unknown format. Please use NOW, YYYY, YYYY-MM, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS, other ISO8601 strings, or a decimal year like 2002.291. Datetimes in pandas.Timestamp, numpy.datetime64, or datetime.datetime formats are also accepted and will be converted to datetime.datetime objects in the specified timezone ({parsed_tz})."
|
|
562
|
+
|
|
563
|
+
if parsed_dt is None and not isinstance(given_date, str):
|
|
564
|
+
raise TypeError(error_message)
|
|
565
|
+
|
|
566
|
+
if parsed_dt is not None:
|
|
567
|
+
# Finalize the datetime object by converting it to the target timezone or just attaching the timezone without shifting the clock
|
|
568
|
+
return _finalize_datetime(parsed_dt, given_date, format_str, parsed_tz, should_convert)
|
|
569
|
+
|
|
570
|
+
# From here on, we know it's a str (we raised or otherwise handled non-str types above)
|
|
571
|
+
assert isinstance(given_date, str)
|
|
572
|
+
given_string = given_date
|
|
573
|
+
|
|
574
|
+
if parsed_dt is None and format_str is not None:
|
|
575
|
+
try:
|
|
576
|
+
parsed_dt = dt.datetime.strptime(given_string, format_str)
|
|
577
|
+
except ValueError as e:
|
|
578
|
+
raise ValueError(f"Invalid date format '{given_string}' with specified format '{format_str}': {e}") from e
|
|
579
|
+
|
|
580
|
+
# Try parsing the date string in various formats
|
|
581
|
+
# Start with RFC 2822 format, then ISO8601, then free-form strings
|
|
582
|
+
# Store any errors encountered in a list to provide feedback if all parsing attempts fail.
|
|
583
|
+
errors: list[str] = []
|
|
584
|
+
|
|
585
|
+
if parsed_dt is None:
|
|
586
|
+
import email.utils
|
|
587
|
+
try:
|
|
588
|
+
# parses "Tue, 25 Jun 2025 14:00:00 GMT"
|
|
589
|
+
parsed_dt = email.utils.parsedate_to_datetime(given_string)
|
|
590
|
+
except (TypeError, ValueError) as e:
|
|
591
|
+
errors.append(f"Failed to parse '{given_string}' as an RFC 2822 date: {e}")
|
|
592
|
+
|
|
593
|
+
if parsed_dt is None:
|
|
594
|
+
try:
|
|
595
|
+
parsed_dt = _parse_iso(given_string)
|
|
596
|
+
except ValueError as e:
|
|
597
|
+
errors.append(f"Failed to parse '{given_string}' as an ISO8601 date: {e}")
|
|
598
|
+
|
|
599
|
+
if parsed_dt is None:
|
|
600
|
+
try:
|
|
601
|
+
from dateutil.parser import parse as parse_fuzzy
|
|
602
|
+
parsed_dt = parse_fuzzy(given_string, default=dt.datetime(1900, 1, 1))
|
|
603
|
+
except ValueError as e:
|
|
604
|
+
errors.append(f"Failed to parse '{given_string}' as a free-form date string: {e}")
|
|
605
|
+
|
|
606
|
+
if parsed_dt is None:
|
|
607
|
+
if np is None:
|
|
608
|
+
errors.append("The numpy package is not installed, so numpy.datetime64 objects cannot be parsed.")
|
|
609
|
+
if pd is None:
|
|
610
|
+
errors.append("The pandas package is not installed, so pandas.Timestamp objects cannot be parsed.")
|
|
611
|
+
else:
|
|
612
|
+
# Finalize the datetime object by converting it to the target timezone or just attaching the timezone without shifting the clock
|
|
613
|
+
return _finalize_datetime(parsed_dt, given_string, format_str, parsed_tz, should_convert)
|
|
614
|
+
|
|
615
|
+
raise ValueError(error_message + "\n".join(map(str, errors)) + "\nPlease check the input format and try again.")
|
|
616
|
+
|
|
617
|
+
class Precision:
|
|
618
|
+
"""Integer constants representing date-formatting precision levels.
|
|
619
|
+
|
|
620
|
+
Levels are ordered from coarsest (YEAR=0) to finest (SECOND=4).
|
|
621
|
+
"""
|
|
622
|
+
|
|
623
|
+
YEAR: Final[int] = 0
|
|
624
|
+
MONTH: Final[int] = 1
|
|
625
|
+
DAY: Final[int] = 2
|
|
626
|
+
MINUTE: Final[int] = 3
|
|
627
|
+
SECOND: Final[int] = 4
|
|
628
|
+
|
|
629
|
+
ADAPTIVE_FORMAT_LEVELS: Final[list[str]] = [
|
|
630
|
+
"%Y", # 0: year (2024)
|
|
631
|
+
"%Y-%m", # 1: month (2024-03)
|
|
632
|
+
"%Y-%m-%d", # 2: day (2024-03-15)
|
|
633
|
+
"%Y-%m-%d %H:%M", # 3: minute (2024-03-15 09:30)
|
|
634
|
+
"%Y-%m-%d %H:%M:%S", # 4: second (2024-03-15 09:30:45)
|
|
635
|
+
]
|
|
636
|
+
|
|
637
|
+
def _normalize_to_datetime(date: AnyDateTimeType) -> "dt.datetime | None":
|
|
638
|
+
"""Convert a single date value to datetime.datetime.
|
|
639
|
+
|
|
640
|
+
Accepts datetime.datetime, numpy.datetime64, and matplotlib date floats.
|
|
641
|
+
Returns None for NaT/NaN values.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
date: A date value in any supported format.
|
|
645
|
+
|
|
646
|
+
Returns:
|
|
647
|
+
A datetime.datetime object, or None if the value is NaT/NaN.
|
|
648
|
+
"""
|
|
649
|
+
import datetime as dt
|
|
650
|
+
import numpy as np
|
|
651
|
+
|
|
652
|
+
if isinstance(date, dt.datetime):
|
|
653
|
+
return date
|
|
654
|
+
if isinstance(date, (int, float)):
|
|
655
|
+
if np.isnan(date):
|
|
656
|
+
return None
|
|
657
|
+
import matplotlib.dates as mdates
|
|
658
|
+
return mdates.num2date(date).replace(tzinfo=None)
|
|
659
|
+
if isinstance(date, np.datetime64):
|
|
660
|
+
if np.isnat(date):
|
|
661
|
+
return None
|
|
662
|
+
ts = (date - np.datetime64("1970-01-01T00:00:00")) / np.timedelta64(1, "s")
|
|
663
|
+
return dt.datetime.fromtimestamp(float(ts), tz=dt.timezone.utc).replace(tzinfo=None)
|
|
664
|
+
msg = f"Unsupported date type: {type(date)}"
|
|
665
|
+
raise TypeError(msg)
|
|
666
|
+
|
|
667
|
+
def adaptive_date_labels(
|
|
668
|
+
dates: "Sequence[AnyDateTimeType]",
|
|
669
|
+
*,
|
|
670
|
+
min_precision: int = Precision.YEAR,
|
|
671
|
+
max_precision: int = Precision.SECOND,
|
|
672
|
+
format_levels: "list[str] | None" = None,
|
|
673
|
+
) -> list[str]:
|
|
674
|
+
"""Format dates at the coarsest precision that produces unique labels.
|
|
675
|
+
|
|
676
|
+
Given a sequence of dates, starts formatting at the coarsest level and
|
|
677
|
+
refines until all labels are unique or max_precision is reached.
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
dates: Sequence of date values (datetime.datetime, numpy.datetime64,
|
|
681
|
+
or matplotlib date floats).
|
|
682
|
+
min_precision: Minimum precision level (default: Precision.YEAR).
|
|
683
|
+
The formatter will never produce labels coarser than this.
|
|
684
|
+
max_precision: Maximum precision level (default: Precision.SECOND).
|
|
685
|
+
The formatter stops refining at this level even if labels collide.
|
|
686
|
+
format_levels: Custom format strings for each level. Must have length
|
|
687
|
+
>= max_precision + 1. Defaults to ADAPTIVE_FORMAT_LEVELS.
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
List of formatted date strings, one per input date. Empty strings
|
|
691
|
+
for NaT/NaN values.
|
|
692
|
+
"""
|
|
693
|
+
import numpy as np
|
|
694
|
+
|
|
695
|
+
if isinstance(dates, np.ndarray):
|
|
696
|
+
if dates.size == 0:
|
|
697
|
+
return []
|
|
698
|
+
elif not dates:
|
|
699
|
+
return []
|
|
700
|
+
|
|
701
|
+
levels = format_levels if format_levels is not None else ADAPTIVE_FORMAT_LEVELS
|
|
702
|
+
|
|
703
|
+
normalized = [_normalize_to_datetime(d) for d in dates]
|
|
704
|
+
|
|
705
|
+
if len(normalized) == 1:
|
|
706
|
+
d = normalized[0]
|
|
707
|
+
level = max(min_precision, Precision.DAY)
|
|
708
|
+
level = min(level, max_precision)
|
|
709
|
+
if d is None:
|
|
710
|
+
return [""]
|
|
711
|
+
return [d.strftime(levels[level])]
|
|
712
|
+
|
|
713
|
+
for level in range(min_precision, max_precision + 1):
|
|
714
|
+
fmt = levels[level]
|
|
715
|
+
labels = [d.strftime(fmt) if d is not None else "" for d in normalized]
|
|
716
|
+
non_empty = [lbl for lbl in labels if lbl != ""]
|
|
717
|
+
if len(set(non_empty)) == len(non_empty):
|
|
718
|
+
return labels
|
|
719
|
+
|
|
720
|
+
fmt = levels[max_precision]
|
|
721
|
+
return [d.strftime(fmt) if d is not None else "" for d in normalized]
|
|
722
|
+
|
|
723
|
+
class AdaptiveDateFormatter:
|
|
724
|
+
"""Matplotlib Formatter that auto-selects date label precision.
|
|
725
|
+
|
|
726
|
+
Uses adaptive disambiguation: labels start at the coarsest level and
|
|
727
|
+
refine until all tick labels are unique. Drop-in replacement for any
|
|
728
|
+
matplotlib axis formatter or colorbar formatter.
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
min_precision: Minimum precision level (default: Precision.YEAR).
|
|
732
|
+
max_precision: Maximum precision level (default: Precision.SECOND).
|
|
733
|
+
format_levels: Custom format strings per level.
|
|
734
|
+
|
|
735
|
+
Example:
|
|
736
|
+
>>> ax.xaxis.set_major_formatter(AdaptiveDateFormatter())
|
|
737
|
+
>>> cbar.ax.yaxis.set_major_formatter(AdaptiveDateFormatter())
|
|
738
|
+
"""
|
|
739
|
+
|
|
740
|
+
def __init__(
|
|
741
|
+
self,
|
|
742
|
+
*,
|
|
743
|
+
min_precision: int = Precision.YEAR,
|
|
744
|
+
max_precision: int = Precision.SECOND,
|
|
745
|
+
format_levels: "list[str] | None" = None,
|
|
746
|
+
) -> None:
|
|
747
|
+
"""Initialize the AdaptiveDateFormatter.
|
|
748
|
+
|
|
749
|
+
Args:
|
|
750
|
+
min_precision: Minimum precision level.
|
|
751
|
+
max_precision: Maximum precision level.
|
|
752
|
+
format_levels: Custom format strings per level.
|
|
753
|
+
"""
|
|
754
|
+
import matplotlib.ticker as mticker # noqa: F401
|
|
755
|
+
self._min_precision = min_precision
|
|
756
|
+
self._max_precision = max_precision
|
|
757
|
+
self._format_levels = format_levels
|
|
758
|
+
self._formatter = mticker.Formatter.__new__(mticker.Formatter)
|
|
759
|
+
self._cached_labels: dict[float, str] = {}
|
|
760
|
+
|
|
761
|
+
def format_ticks(self, values: list[float]) -> list[str]:
|
|
762
|
+
"""Format all tick values collectively with disambiguation.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
values: List of matplotlib date floats (from date2num).
|
|
766
|
+
|
|
767
|
+
Returns:
|
|
768
|
+
List of formatted label strings.
|
|
769
|
+
"""
|
|
770
|
+
labels = adaptive_date_labels(
|
|
771
|
+
values,
|
|
772
|
+
min_precision=self._min_precision,
|
|
773
|
+
max_precision=self._max_precision,
|
|
774
|
+
format_levels=self._format_levels,
|
|
775
|
+
)
|
|
776
|
+
self._cached_labels = dict(zip(values, labels))
|
|
777
|
+
return labels
|
|
778
|
+
|
|
779
|
+
def __call__(self, x: float, pos: "int | None" = None) -> str:
|
|
780
|
+
"""Format a single tick value.
|
|
781
|
+
|
|
782
|
+
Uses cached results from format_ticks if available, otherwise
|
|
783
|
+
formats independently at the day level.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
x: A matplotlib date float.
|
|
787
|
+
pos: Tick position (unused, required by matplotlib protocol).
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
Formatted date string.
|
|
791
|
+
"""
|
|
792
|
+
if x in self._cached_labels:
|
|
793
|
+
return self._cached_labels[x]
|
|
794
|
+
labels = adaptive_date_labels(
|
|
795
|
+
[x],
|
|
796
|
+
min_precision=self._min_precision,
|
|
797
|
+
max_precision=self._max_precision,
|
|
798
|
+
format_levels=self._format_levels,
|
|
799
|
+
)
|
|
800
|
+
return labels[0] if labels else ""
|