emmykit 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,800 @@
1
+ """datetime_utils — extracted from univ_defs.py."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import re
7
+
8
+ from emmykit.inflect_utils import my_plural
9
+ from emmykit.logging_utils import fallback_logging_config
10
+ from emmykit.numeric_helpers import is_float, seconds_in_unit
11
+
12
+ from collections.abc import Sequence
13
+ from typing import Any, Final, TypeAlias
14
+
15
+ def human_timespan(timespan: int | float) -> str:
16
+ """
17
+ Format a time span in seconds into a human-readable string.
18
+ Negative values are treated as absolute.
19
+
20
+ Args:
21
+ timespan: A float or int representing the time span in seconds.
22
+
23
+ Returns:
24
+ A human-readable string describing the time span, such as
25
+ "1 year, 2 weeks, 3 days, 4 hours, 5 minutes and 6.789 seconds".
26
+ If the timespan is zero, returns "0 seconds".
27
+
28
+ Raises:
29
+ None.
30
+ """
31
+ # Work in integer milliseconds to avoid float modulo issues
32
+ total_ms = int(round(abs(float(timespan)) * 1000))
33
+ if total_ms == 0:
34
+ return "0 seconds"
35
+
36
+ MS_PER_MINUTE = 60_000
37
+ MS_PER_HOUR = 3_600_000
38
+ MS_PER_DAY = 86_400_000
39
+ MS_PER_WEEK = 604_800_000
40
+ MS_PER_YEAR = 31_557_600_000 # 365.25 days
41
+
42
+ components: list[str] = []
43
+
44
+ years, rem = divmod(total_ms, MS_PER_YEAR)
45
+ weeks, rem = divmod(rem, MS_PER_WEEK)
46
+ days, rem = divmod(rem, MS_PER_DAY)
47
+ hours, rem = divmod(rem, MS_PER_HOUR)
48
+ minutes, rem = divmod(rem, MS_PER_MINUTE)
49
+ seconds = rem / 1000.0 # in [0, 60)
50
+
51
+ if years: components.append(my_plural(years, "year"))
52
+ if weeks: components.append(my_plural(weeks, "week"))
53
+ if days: components.append(my_plural(days, "day"))
54
+ if hours: components.append(my_plural(hours, "hour"))
55
+ if minutes: components.append(my_plural(minutes, "minute"))
56
+ if seconds:
57
+ s = f"{seconds:.3f}".rstrip("0").rstrip(".")
58
+ components.append(f"{s} second" + ("" if seconds == 1.0 else "s"))
59
+
60
+ if len(components) == 1:
61
+ return components[0]
62
+ return ", ".join(components[:-1]) + " and " + components[-1]
63
+
64
+ def format_date_range(date1: dt.datetime, date2: dt.datetime | None = None) -> str:
65
+ """
66
+ Process a pair of datetime.datetime dates and produce a formatted date range string
67
+ where each date looks like 'Jan 7, 2025'. If date2 is not provided, it is set to date1.
68
+
69
+ Args:
70
+ date1: The first date as a datetime.datetime object.
71
+ date2: The second date as a datetime.datetime object. If None, defaults to date1.
72
+
73
+ Returns:
74
+ A formatted string representing the date range, such as 'Jan 7, 2025' or 'Jan 7 - Feb 3, 2025' or 'Jan 7 - 15, 2025'.
75
+ If both dates and times are the same, it returns just one date like 'Jan 7, 2025'.
76
+ If both dates are the same but times are different, it returns a string like '06:04:02 - 19:05:39 on Jan 7, 2025'
77
+
78
+ Raises:
79
+ ValueError: If either date1 or date2 is not a datetime.datetime object.
80
+ """
81
+ import datetime as dt
82
+
83
+ month_names = {
84
+ 1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
85
+ 5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
86
+ 9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
87
+ }
88
+
89
+ # If date2 is not provided, set date2 to date1
90
+ if date2 is None:
91
+ date2 = date1
92
+
93
+ # Make sure both dates are datetime.datetime objects
94
+ if not isinstance(date1, dt.datetime) or not isinstance(date2, dt.datetime):
95
+ raise ValueError(f"Both dates must be datetime.datetime objects, but date1 is {date1} with type {type(date1)} and date2 {date2} with type {type(date2)}.")
96
+
97
+ # Ensure that the first date is earlier than the second.
98
+ if date1 > date2:
99
+ date1, date2 = date2, date1
100
+
101
+ day1, day2 = date1.day, date2.day
102
+ month1, month2 = month_names[date1.month], month_names[date2.month]
103
+ year1, year2 = date1.year, date2.year
104
+
105
+ if year1 == year2:
106
+ if month1 == month2:
107
+ if day1 == day2:
108
+ time1 = date1.strftime("%H:%M:%S")
109
+ time2 = date2.strftime("%H:%M:%S")
110
+ if time1 == time2:
111
+ return f"{month1} {day1:2d}, {year1}"
112
+ return f"{time1} - {time2} on {month1} {day1:2d}, {year1}"
113
+ else:
114
+ return f"{month1} {day1:2d} - {day2:2d}, {year1}"
115
+ else:
116
+ return f"{month1} {day1:2d} - {month2} {day2:2d}, {year1}"
117
+ else:
118
+ return f"{month1} {day1:2d}, {year1} - {month2} {day2:2d}, {year2}"
119
+
120
+ _TIMESTAMP_PATTERN_RE: re.Pattern = re.compile(r"(\d{8}-\d{6}).pkl$")
121
+
122
+ def extract_timestamp(the_string: str) -> str | None:
123
+ """Extract timestamp string (in format YYYYMMDD-HHMMSS) from the_string, or None if not found."""
124
+ if (m := _TIMESTAMP_PATTERN_RE.search(the_string)):
125
+ try:
126
+ return m.group(1)
127
+ except ValueError:
128
+ return None
129
+ return None
130
+
131
+ _TZ_ABBREV_TO_ZONE: dict[str, str] = {
132
+ "UTC" : "UTC",
133
+ "GMT" : "Etc/GMT",
134
+ "EST" : "America/New_York",
135
+ "EDT" : "America/New_York",
136
+ "CST" : "America/Chicago", # WARNING! "CST" can also mean China Standard Time (Asia/Shanghai, UTC+8), so use with caution!
137
+ "CDT" : "America/Chicago",
138
+ "MST" : "America/Denver",
139
+ "MDT" : "America/Denver",
140
+ "PST" : "America/Los_Angeles",
141
+ "PDT" : "America/Los_Angeles",
142
+ "HST" : "Pacific/Honolulu",
143
+ "AKST" : "America/Anchorage",
144
+ "AKDT" : "America/Anchorage",
145
+ "AST" : "America/Puerto_Rico", # Atlantic Standard Time
146
+ "ADT" : "America/Puerto_Rico", # Atlantic Daylight Time
147
+ "NST" : "America/St_Johns", # Newfoundland Standard Time
148
+ "NDT" : "America/St_Johns", # Newfoundland Daylight Time
149
+ "BST" : "Europe/London", # British Summer Time
150
+ "CET" : "Europe/Berlin", # Central European Time
151
+ "CEST" : "Europe/Berlin", # Central European Summer Time
152
+ "EET" : "Europe/Athens", # Eastern European Time
153
+ "EEST" : "Europe/Athens", # Eastern European Summer Time
154
+ "IST" : "Asia/Kolkata", # Indian Standard Time - WARNING! "IST" can also mean Irish Standard Time (Europe/Dublin, UTC+1), so use with caution!
155
+ "JST" : "Asia/Tokyo", # Japan Standard Time
156
+ "KST" : "Asia/Seoul", # Korea Standard Time
157
+ "HKT" : "Asia/Hong_Kong", # Hong Kong Time
158
+ "SGT" : "Asia/Singapore", # Singapore Time
159
+ "AEST" : "Australia/Sydney", # Australian Eastern Standard Time
160
+ "AEDT" : "Australia/Sydney", # Australian Eastern Daylight Time
161
+ "ACST" : "Australia/Adelaide", # Australian Central Standard Time
162
+ "ACDT" : "Australia/Adelaide", # Australian Central Daylight Time
163
+ "AWST" : "Australia/Perth", # Australian Western Standard Time
164
+ "AWDT" : "Australia/Perth", # Australian Western Daylight Time
165
+ "NZT" : "Pacific/Auckland", # New Zealand Time
166
+ "NZST" : "Pacific/Auckland", # New Zealand Standard Time
167
+ "NZDT" : "Pacific/Auckland", # New Zealand Daylight Time
168
+ "WET" : "Europe/Lisbon", # Western European Time
169
+ "WEST" : "Europe/Lisbon", # Western European Summer Time
170
+ # ...add any others you need
171
+ }
172
+
173
+ _TZ_OFFSET_RE: re.Pattern = re.compile(r'''
174
+ ^(?P<sign>[+-])
175
+ (?:
176
+ (?P<hours1>\d{1,2})[hH](?P<mins1>\d{1,2})(?:[mM])? # +5h30m
177
+ | (?P<hours1_only>\d{1,2})[hH] # +5h
178
+ | (?P<hours2>\d{1,2}):(?P<mins2>\d{2}) # +5:30
179
+ | (?P<hours3>\d{1,2})(?P<mins3>\d{2}) # +0530
180
+ | (?P<hours4>\d{1,2}) # +5
181
+ )
182
+ $
183
+ ''', re.VERBOSE)
184
+
185
+ def parse_timezone(tz_arg: str | dt.tzinfo | None = None) -> dt.tzinfo | str:
186
+ """
187
+ Parse the given timezone string or tzinfo object into a datetime.tzinfo object.
188
+ If tz_arg is None, return UTC timezone.
189
+ If tz_arg is a string, it can be in one of the following formats:
190
+ - A fixed‐offset like: "+HH:MM", "+HHMM", "+H", "+Hh", "+HhMMm" (or minus variants).
191
+ Examples: "+05:30", "-0530", "+5h", "-5h30m".
192
+ - A string that can be converted to a ZoneInfo object (e.g. 'America/New_York').
193
+ - A timezone abbreviation that maps to a known IANA zone name (e.g. 'EST', 'CET').
194
+ - "Z", "UTC", or "GMT" (case‐insensitive) to represent UTC.
195
+ - A string "Naive" to represent a naive datetime (no timezone).
196
+ If tz_arg is already a tzinfo object, return it as is.
197
+
198
+ Args:
199
+ tz_arg : A timezone string, a datetime.tzinfo object, or None.
200
+
201
+ Returns:
202
+ A datetime.tzinfo object representing the parsed timezone, or a string "Naive"
203
+ if the input was "Naive".
204
+
205
+ Raises:
206
+ ValueError if the string cannot be converted to a valid timezone.
207
+ """
208
+
209
+ import datetime as dt
210
+
211
+ # If tz_arg is None, return UTC timezone
212
+ if tz_arg is None:
213
+ return dt.timezone.utc
214
+
215
+ # If tz_arg is already a tzinfo object, return it unchanged
216
+ if isinstance(tz_arg, dt.tzinfo):
217
+ return tz_arg
218
+
219
+ # If tz_arg is a string, try to parse it
220
+ if isinstance(tz_arg, str):
221
+ s = tz_arg.strip()
222
+ up = s.upper()
223
+
224
+ # Handle "Naive" case
225
+ if up == "NAIVE":
226
+ return tz_arg
227
+
228
+ # Bare UTC/GMT/Z
229
+ if up in ("Z", "UTC", "GMT") and len(s) <= 3:
230
+ return dt.timezone.utc
231
+
232
+ # Strip leading "UTC" or "GMT" prefix
233
+ if up.startswith(("UTC", "GMT")):
234
+ rest = s[3:].strip()
235
+ if rest == "":
236
+ return dt.timezone.utc
237
+ s = rest # now s begins with + or -
238
+
239
+ # Try fixed-offset patterns
240
+ m = _TZ_OFFSET_RE.fullmatch(s)
241
+ if m:
242
+ sign = 1 if m.group("sign") == "+" else -1
243
+
244
+ if m.group("hours1") is not None:
245
+ hours = int(m.group("hours1"))
246
+ minutes = int(m.group("mins1"))
247
+ elif m.group("hours1_only") is not None:
248
+ hours = int(m.group("hours1_only"))
249
+ minutes = 0
250
+ elif m.group("hours2") is not None:
251
+ hours = int(m.group("hours2"))
252
+ minutes = int(m.group("mins2"))
253
+ elif m.group("hours3") is not None:
254
+ hours = int(m.group("hours3"))
255
+ minutes = int(m.group("mins3"))
256
+ else:
257
+ hours = int(m.group("hours4"))
258
+ minutes = 0
259
+
260
+ offset = dt.timedelta(hours=hours, minutes=minutes) * sign
261
+ return dt.timezone(offset)
262
+
263
+ # Otherwise, fall back to ZoneInfo
264
+ try:
265
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
266
+ except ImportError: # for Python < 3.9, fall back to backports.zoneinfo
267
+ from backports.zoneinfo import ZoneInfo, ZoneInfoNotFoundError
268
+
269
+ # Try to interpret the string as a timezone abbreviation
270
+ if up in _TZ_ABBREV_TO_ZONE:
271
+ zone_name = _TZ_ABBREV_TO_ZONE[up]
272
+ return ZoneInfo(zone_name)
273
+
274
+ # Try to interpret the string as a ZoneInfo name
275
+ try:
276
+ return ZoneInfo(tz_arg)
277
+ except ZoneInfoNotFoundError as e:
278
+ raise ValueError(f"Unknown timezone {tz_arg!r}: {e}") from e
279
+
280
+ raise TypeError(f"Expected None, str, or tzinfo; got {type(tz_arg).__name__!r}")
281
+
282
+ def decimal_year_to_datetime(dec: float, use_astropy: bool = False) -> dt.datetime:
283
+ """
284
+ Convert a decimal year to a datetime object.
285
+ If use_astropy is True, astropy.time is used for sub-second and leap-second–aware conversion.
286
+ Usage: new_datetime_datetime_object = decimal_year_to_datetime(2002.291)
287
+ """
288
+ import datetime as dt
289
+ if use_astropy:
290
+ try:
291
+ from astropy.time import Time
292
+ except ImportError as e:
293
+ raise ValueError(f"'use_astropy=True' requires the astropy package: {e}") from e
294
+ t = Time(dec, format="jyear", scale="utc")
295
+ return t.to_datetime().replace(tzinfo=dt.timezone.utc)
296
+
297
+ try:
298
+ year = int(dec)
299
+ rem = dec - year
300
+ start_dt = dt.datetime(year, 1, 1, tzinfo=dt.timezone.utc)
301
+ end_dt = dt.datetime(year + 1, 1, 1, tzinfo=dt.timezone.utc)
302
+ year_secs = (end_dt - start_dt).total_seconds()
303
+ return start_dt + dt.timedelta(seconds=rem * year_secs)
304
+ except ValueError as e:
305
+ raise ValueError(f"Failed to convert decimal year {dec} to datetime: {e}") from e
306
+
307
+ def _parse_iso(given_date: str) -> dt.datetime:
308
+ """Parse an ISO8601 date string and return a datetime object. Raises ValueError if the date string is invalid."""
309
+ from dateutil.parser import isoparse, ParserError
310
+
311
+ try:
312
+ return isoparse(given_date)
313
+ except ParserError as e:
314
+ raise ValueError(f"Invalid ISO8601 date '{given_date}'") from e
315
+
316
+ _JD_MJD_SIMPLE_RE: re.Pattern = re.compile(r"\s*(JD|MJD)?\s*[+-]?\d+(\.\d+)?\s*", re.IGNORECASE)
317
+
318
+ _JD_MJD_CAPTURE_RE: re.Pattern = re.compile(r"\s*(?P<prefix>JD|MJD)?\s*(?P<value>[+-]?\d+(?:\.\d+)?)\s*", re.IGNORECASE)
319
+
320
+ _OFFSET_IN_STR_RE: re.Pattern = re.compile(r"(Z|[+-]\d{2}:\d{2}|[+-]\d{4})$")
321
+
322
+ _JD_UNIX_EPOCH: float = 2_440_587.5
323
+
324
+ AnyDateTimeType: TypeAlias = "str | float | int | np.datetime64 | pd.Timestamp | dt.datetime"
325
+
326
+ def _should_convert(given_date: AnyDateTimeType, format_str: str | None = None) -> bool:
327
+ """Determine if the given date should be converted to a timezone (i.e. if the wall clock should be shifted) or if the timezone should just be attached without shifting the clock."""
328
+ import datetime as dt
329
+
330
+ # 1) Numbers, JD/MJD, decimal years, special keywords
331
+ if isinstance(given_date, (int, float)) and not isinstance(given_date, bool):
332
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a number: %s, so it will be converted by shifting the clock", given_date)
333
+ return True
334
+ if isinstance(given_date, str):
335
+ u = given_date.strip().upper()
336
+ if u in ("J2000", "UNIX", "NOW"):
337
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a special keyword: %s, so it will be converted by shifting the clock", u)
338
+ return True
339
+ if format_str and format_str.upper() in ("JD", "MJD"):
340
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date has a format_str: %s, so it will be converted by shifting the clock", format_str)
341
+ return True
342
+ if _JD_MJD_SIMPLE_RE.fullmatch(given_date):
343
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is a JD/MJD: %s, so it will be converted by shifting the clock", given_date)
344
+ return True
345
+ # explicit offset or Z
346
+ if _OFFSET_IN_STR_RE.search(given_date):
347
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date has an explicit offset or Z: %s, so it will be converted by shifting the clock", given_date)
348
+ return True
349
+ # 2) Any datetime/timestamp already aware
350
+ if isinstance(given_date, dt.datetime) and given_date.tzinfo is not None:
351
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is an aware datetime: %s, so it will be converted by shifting the clock", given_date)
352
+ return True
353
+
354
+ # Otherwise treat it as local‐time → attach only
355
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Given date is not a number, JD/MJD, or aware datetime: %s, so the timezone will be attached without shifting the clock", given_date)
356
+ return False
357
+
358
+ def _finalize_datetime(parsed_dt: dt.datetime, original_input: AnyDateTimeType,
359
+ format_str: str | None, tz_arg: str | dt.tzinfo | None,
360
+ should_convert: bool | None = None) -> dt.datetime:
361
+ """
362
+ Finalize the datetime object by either converting it to the target timezone or just attaching the timezone without shifting the clock. The boolean argument 'should_convert' can override the default behavior, which is determined by the function _should_convert().
363
+
364
+ Args:
365
+ parsed_dt: The datetime object that has been parsed from the original input.
366
+ original_input: The original input that was used to parse the datetime.
367
+ format_str: The format string used to parse the datetime, if any.
368
+ tz_arg: The timezone argument, which can be a string or a datetime.tzinfo object.
369
+ should_convert: A boolean indicating whether to convert the datetime to the specified timezone by shifting the clock (True) or just attaching the timezone without shifting (False). If None, the function will determine this based on the type of original_input and format_str.
370
+
371
+ Returns:
372
+ A datetime.datetime object in the specified timezone.
373
+ If tz_arg is "Naive", the datetime will be returned without any timezone info.
374
+ If should_convert is True, the datetime will be converted to the specified timezone by shifting the clock.
375
+ If should_convert is False, the timezone will be attached to the datetime without shifting the clock.
376
+ If should_convert is None, the function will determine whether to convert or not based on the type of original_input and format_str.
377
+
378
+ Raises:
379
+ ValueError: If the tz_arg is not a valid timezone string or tzinfo object.
380
+ TypeError: If the parsed_dt is not a datetime.datetime object.
381
+ """
382
+ if isinstance(tz_arg, str) and tz_arg.strip().upper() == "NAIVE":
383
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Naive timezone requested, returning datetime %s without any timezone info", parsed_dt)
384
+ return parsed_dt.replace(tzinfo=None)
385
+ target_tz = parse_timezone(tz_arg)
386
+ if should_convert is not False and (_should_convert(original_input, format_str) or should_convert is True):
387
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Converting datetime %s to timezone %s by shifting the clock", parsed_dt, target_tz)
388
+ return parsed_dt.astimezone(target_tz)
389
+ else:
390
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Attaching timezone %s to datetime %s without shifting the clock", target_tz, parsed_dt)
391
+ return parsed_dt.replace(tzinfo=target_tz)
392
+
393
+ def parse_datetime(given_date: AnyDateTimeType, timezone: str | dt.tzinfo | None = None,
394
+ format_str: str | None = None,
395
+ should_convert: bool | None = None) -> dt.datetime:
396
+ """
397
+ Try parsing the given_date string or number into a datetime.datetime object in the specified timezone.
398
+
399
+ If "format_str" is provided, it will be used to parse the date string. These format types are accepted:
400
+ - "seconds" or "milliseconds" indicating the number of seconds or milliseconds since an epoch (Unix epoch by default).
401
+ - "YYYY-MM-DD" or similar ISO8601 formats such as "YYYY-MM-DDTHH:MM:SS", "MM/DD/YYYY", etc.
402
+ - A custom string following this pattern: "units (optional: since/after epoch)", where "units" can be anything that the function seconds_in_unit() accepts (e.g. "days", "weeks", "months", etc.). The optional epoch time can be a string, float, int, numpy.datetime64, pandas.Timestamp, or datetime.datetime object. Example: "days since 1990", "milliseconds after J2000", "sidereal days since 2000-01-01", etc. If the epoch is not specified, it defaults to the Unix epoch (1970-01-01T00:00:00Z)
403
+
404
+ If a boolean "should_convert" is provided, it will override the default behavior of whether to convert the datetime to the specified timezone by shifting the clock or just attaching the timezone without shifting. If None, the function will determine this based on the type of given_date and format_str.
405
+
406
+ If a given_date starts with "JD" or "MJD", it will be treated as a Julian Date or Modified Julian Date, respectively.
407
+
408
+ Otherwise, if given_date is a float or int, treat it as a decimal year by default if format_str is not provided.
409
+
410
+ Any call that doesn't provide a timezone argument will default to UTC.
411
+ The timezone can be a datetime.tzinfo object or a string that can be converted to a ZoneInfo object (e.g. 'America/New_York').
412
+ If the given_date is an "aware" datetime.datetime object which already has a timezone attached, it will be converted to the specified timezone (which may involve changing its date and time if the specified timezone is different).
413
+ The timezone can also be a fixed‐offset like "+05:30" or "-04:00", or the string "Naive" to indicate that the datetime should be treated as a naive datetime (i.e. without any timezone information).
414
+
415
+ Accepts:
416
+ 'NOW' (case-insensitive) → current datetime
417
+ strings in YYYY, YYYY-MM, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS, or other ISO8601 formats (e.g. '2002-10-18T07:00:00Z', '2002-10-18 07:00:00+00:00').
418
+ If YYYY is provided, it will default to January 1st of that year at midnight.
419
+ If YYYY-MM is provided, it will default to the first day of that month at midnight.
420
+ If YYYY-MM-DD is provided, it will default to midnight on that day.
421
+ fallback to dateutil.parser.parse for free-form strings ("18 Oct 2002", "March 5th, 2020", etc.)
422
+ floats (e.g. 2002.29178082191777) or integer (e.g. 2002) → decimal year
423
+ numpy.datetime64 objects (e.g. np.datetime64('2002-10-18T07:00:00'))
424
+ pandas.Timestamp objects (e.g. pd.Timestamp('2002-10-18 07:00:00'))
425
+ datetime.datetime objects (e.g. datetime.datetime(2002, 10, 18, 7, 0, 0))
426
+
427
+ Args:
428
+ given_date: The date to parse, which can be a string, float, int, numpy.datetime64,
429
+ pandas.Timestamp, or datetime.datetime object.
430
+ timezone: A string or datetime.tzinfo object representing the timezone to convert
431
+ the datetime to. If None, defaults to UTC.
432
+ format_str: A string indicating the format of the date. If None, the function will
433
+ try to infer the format from the given_date.
434
+ should_convert: A boolean indicating whether to convert the datetime to the specified
435
+ timezone by shifting the clock (True) or just attaching the timezone
436
+ without shifting (False). If None, the function will determine this
437
+ based on the type of given_date and format_str.
438
+
439
+ Returns:
440
+ datetime.datetime object in the specified timezone.
441
+ Note that datetime.datetime objects cannot represent dates before 1 January 1, 0001 or after 31 December 9999.
442
+ So dates outside this range will raise a ValueError. Future versions of this code may support a wider range of dates (like 44 BC, 44 BCE, etc.) using libraries like 'astropy.time': https://chatgpt.com/share/685c5157-5cac-8006-b68c-4a0731927a50
443
+ However, this will require the function to return an 'astropy.time.Time' object instead of a 'datetime.datetime' object.
444
+
445
+ Raises:
446
+ ValueError: If the given_date cannot be parsed into a datetime object, or if the timezone is invalid.
447
+ TypeError: If the given_date is not a string, float, int, numpy.datetime64, pandas.Timestamp, or datetime.datetime object.
448
+ """
449
+ import datetime as dt
450
+ fallback_logging_config() # Ensure logging is configured
451
+
452
+ parsed_tz = parse_timezone(timezone) # Ensure timezone is a valid tzinfo object or string
453
+
454
+ parsed_dt = None
455
+
456
+ # Handle special cases:
457
+ if isinstance(given_date, str):
458
+ if given_date.strip().upper() == "J2000":
459
+ # J2000 is January 1, 2000, 11:58:55.816 UTC
460
+ parsed_dt = dt.datetime(2000, 1, 1, 11, 58, 55, 816_000, tzinfo=dt.timezone.utc)
461
+ if given_date.strip().upper() == "UNIX":
462
+ # UNIX epoch is January 1, 1970, 00:00:00 UTC
463
+ parsed_dt = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
464
+ if given_date.strip().upper() == "NOW":
465
+ parsed_dt = dt.datetime.now(tz=dt.timezone.utc)
466
+
467
+ # Handle forced or explicit Julian Date (JD) or Modified Julian Date (MJD)
468
+ m: re.Match | None = None
469
+ prefix: str | None = None
470
+ if parsed_dt is None and isinstance(given_date, str):
471
+ m = _JD_MJD_CAPTURE_RE.fullmatch(given_date)
472
+ if m:
473
+ prefix = m.group("prefix")
474
+
475
+ # Trigger JD/MJD branch only if format_str equals "JD" or "MJD", or prefix was provided
476
+ if parsed_dt is None and (prefix is not None or (format_str and (format_str.upper() == "JD" or format_str.upper() == "MJD"))):
477
+ # Determine raw value
478
+ if isinstance(given_date, (int, float)):
479
+ value = float(given_date)
480
+ else:
481
+ if m is not None:
482
+ value = float(m.group("value"))
483
+ else:
484
+ try:
485
+ value = float(given_date.strip())
486
+ except ValueError as e:
487
+ raise ValueError(f"Expected a JD/MJD numeric value, got {given_date!r}") from e
488
+
489
+ # Determine if MJD conversion needed
490
+ use_mjd = bool((format_str and format_str.upper() == "MJD") or (prefix and prefix.upper() == "MJD"))
491
+
492
+ # Convert MJD to JD if necessary, then to datetime via timedelta from Unix epoch
493
+ jd_val = value + (2_400_000.5 if use_mjd else 0.0)
494
+ unix_secs = (jd_val - _JD_UNIX_EPOCH) * 86_400
495
+ parsed_dt = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=unix_secs)
496
+
497
+ # Check if the given_date is a string that can be parsed as a float
498
+ if parsed_dt is None and isinstance(given_date, str) and is_float(given_date):
499
+ given_date = float(given_date) # Convert string to float if it represents a number
500
+ # Check if the given_date is a float or int but NOT a boolean
501
+ if parsed_dt is None and isinstance(given_date, (int, float)) and not isinstance(given_date, bool):
502
+ if format_str is None:
503
+ # If the given_date is a decimal year, convert it to datetime in the specified timezone
504
+ # Note: This will not shift the clock, just attach the tzinfo.
505
+ parsed_dt = decimal_year_to_datetime(float(given_date))
506
+ else: # If format is provided, parse the date using the specified format.
507
+ if not isinstance(format_str, str):
508
+ raise TypeError(f"Expected 'format' to be a string, got {type(format_str).__name__!r}")
509
+ # Make sure the format string is a valid example of "units (optionally: since/after epoch)"
510
+ # Try to split by since or after, whichever works:
511
+ format_parts = re.split(r'\s+(since|after)\s+', format_str, maxsplit=1)
512
+ if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Parsing date with format string: '%s' split into parts: %s", format_str, format_parts)
513
+ if len(format_parts) > 3:
514
+ raise ValueError(f"Invalid format string: '{format_str}'. Expected at most three parts: 'units', 'since/after', and 'epoch'.")
515
+ # The first part should be acceptable by seconds_in_unit():
516
+ try:
517
+ units = format_parts[0].strip()
518
+ multiplier = seconds_in_unit(units) # This will raise ValueError if the unit is unknown
519
+ except ValueError as e:
520
+ raise ValueError(f"Invalid time unit '{units}' in format string '{format_str}': {e}") from e
521
+ # If the format_parts list has only one part, it means the epoch defaults to the Unix epoch (1970-01-01T00:00:00Z).
522
+ if len(format_parts) == 1:
523
+ # If the format_parts list has only one part, it means the format is just "units" (e.g. "days", "weeks", etc.)
524
+ # In this case, we assume the epoch is the Unix epoch (1970-01-01T00:00:00Z).
525
+ epoch_str = "1970-01-01T00:00:00Z"
526
+ else:
527
+ # If the format_parts list has three parts, the third part is the epoch.
528
+ epoch_str = format_parts[2].strip()
529
+ try:
530
+ epoch = parse_datetime(epoch_str, timezone=parsed_tz)
531
+ except ValueError as e:
532
+ raise ValueError(f"Invalid epoch '{epoch}' in format string '{format_str}': {e}") from e
533
+ # Now we can calculate the datetime based on the given_date (and the multiplier from 'units') and the epoch
534
+ parsed_dt = epoch + dt.timedelta(seconds=float(given_date) * multiplier)
535
+
536
+ if parsed_dt is None and type(given_date) is dt.datetime: # Don't use isinstance() here, because it will also match subclasses like Pandas Timestamp
537
+ parsed_dt = given_date
538
+ elif isinstance(given_date, dt.date): # Handle date objects (without time) as midnight
539
+ parsed_dt = dt.datetime.combine(given_date, dt.time.min)
540
+
541
+ if parsed_dt is None:
542
+ try:
543
+ import numpy as np
544
+ except ImportError:
545
+ np = None
546
+ if np is not None and isinstance(given_date, np.datetime64):
547
+ ts_ns = given_date.astype("datetime64[ns]").astype("int64")
548
+ parsed_dt = dt.datetime.fromtimestamp(
549
+ ts_ns / 1e9,
550
+ tz=parsed_tz if isinstance(parsed_tz, dt.tzinfo) else None,
551
+ )
552
+
553
+ if parsed_dt is None:
554
+ try:
555
+ import pandas as pd
556
+ except ImportError:
557
+ pd = None
558
+ if pd is not None and isinstance(given_date, pd.Timestamp):
559
+ parsed_dt = given_date.to_pydatetime()
560
+
561
+ error_message: str = f"The date '{given_date}' is type {type(given_date).__name__!r} in an unknown format. Please use NOW, YYYY, YYYY-MM, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS, other ISO8601 strings, or a decimal year like 2002.291. Datetimes in pandas.Timestamp, numpy.datetime64, or datetime.datetime formats are also accepted and will be converted to datetime.datetime objects in the specified timezone ({parsed_tz})."
562
+
563
+ if parsed_dt is None and not isinstance(given_date, str):
564
+ raise TypeError(error_message)
565
+
566
+ if parsed_dt is not None:
567
+ # Finalize the datetime object by converting it to the target timezone or just attaching the timezone without shifting the clock
568
+ return _finalize_datetime(parsed_dt, given_date, format_str, parsed_tz, should_convert)
569
+
570
+ # From here on, we know it's a str (we raised or otherwise handled non-str types above)
571
+ assert isinstance(given_date, str)
572
+ given_string = given_date
573
+
574
+ if parsed_dt is None and format_str is not None:
575
+ try:
576
+ parsed_dt = dt.datetime.strptime(given_string, format_str)
577
+ except ValueError as e:
578
+ raise ValueError(f"Invalid date format '{given_string}' with specified format '{format_str}': {e}") from e
579
+
580
+ # Try parsing the date string in various formats
581
+ # Start with RFC 2822 format, then ISO8601, then free-form strings
582
+ # Store any errors encountered in a list to provide feedback if all parsing attempts fail.
583
+ errors: list[str] = []
584
+
585
+ if parsed_dt is None:
586
+ import email.utils
587
+ try:
588
+ # parses "Tue, 25 Jun 2025 14:00:00 GMT"
589
+ parsed_dt = email.utils.parsedate_to_datetime(given_string)
590
+ except (TypeError, ValueError) as e:
591
+ errors.append(f"Failed to parse '{given_string}' as an RFC 2822 date: {e}")
592
+
593
+ if parsed_dt is None:
594
+ try:
595
+ parsed_dt = _parse_iso(given_string)
596
+ except ValueError as e:
597
+ errors.append(f"Failed to parse '{given_string}' as an ISO8601 date: {e}")
598
+
599
+ if parsed_dt is None:
600
+ try:
601
+ from dateutil.parser import parse as parse_fuzzy
602
+ parsed_dt = parse_fuzzy(given_string, default=dt.datetime(1900, 1, 1))
603
+ except ValueError as e:
604
+ errors.append(f"Failed to parse '{given_string}' as a free-form date string: {e}")
605
+
606
+ if parsed_dt is None:
607
+ if np is None:
608
+ errors.append("The numpy package is not installed, so numpy.datetime64 objects cannot be parsed.")
609
+ if pd is None:
610
+ errors.append("The pandas package is not installed, so pandas.Timestamp objects cannot be parsed.")
611
+ else:
612
+ # Finalize the datetime object by converting it to the target timezone or just attaching the timezone without shifting the clock
613
+ return _finalize_datetime(parsed_dt, given_string, format_str, parsed_tz, should_convert)
614
+
615
+ raise ValueError(error_message + "\n".join(map(str, errors)) + "\nPlease check the input format and try again.")
616
+
617
+ class Precision:
618
+ """Integer constants representing date-formatting precision levels.
619
+
620
+ Levels are ordered from coarsest (YEAR=0) to finest (SECOND=4).
621
+ """
622
+
623
+ YEAR: Final[int] = 0
624
+ MONTH: Final[int] = 1
625
+ DAY: Final[int] = 2
626
+ MINUTE: Final[int] = 3
627
+ SECOND: Final[int] = 4
628
+
629
+ ADAPTIVE_FORMAT_LEVELS: Final[list[str]] = [
630
+ "%Y", # 0: year (2024)
631
+ "%Y-%m", # 1: month (2024-03)
632
+ "%Y-%m-%d", # 2: day (2024-03-15)
633
+ "%Y-%m-%d %H:%M", # 3: minute (2024-03-15 09:30)
634
+ "%Y-%m-%d %H:%M:%S", # 4: second (2024-03-15 09:30:45)
635
+ ]
636
+
637
+ def _normalize_to_datetime(date: AnyDateTimeType) -> "dt.datetime | None":
638
+ """Convert a single date value to datetime.datetime.
639
+
640
+ Accepts datetime.datetime, numpy.datetime64, and matplotlib date floats.
641
+ Returns None for NaT/NaN values.
642
+
643
+ Args:
644
+ date: A date value in any supported format.
645
+
646
+ Returns:
647
+ A datetime.datetime object, or None if the value is NaT/NaN.
648
+ """
649
+ import datetime as dt
650
+ import numpy as np
651
+
652
+ if isinstance(date, dt.datetime):
653
+ return date
654
+ if isinstance(date, (int, float)):
655
+ if np.isnan(date):
656
+ return None
657
+ import matplotlib.dates as mdates
658
+ return mdates.num2date(date).replace(tzinfo=None)
659
+ if isinstance(date, np.datetime64):
660
+ if np.isnat(date):
661
+ return None
662
+ ts = (date - np.datetime64("1970-01-01T00:00:00")) / np.timedelta64(1, "s")
663
+ return dt.datetime.fromtimestamp(float(ts), tz=dt.timezone.utc).replace(tzinfo=None)
664
+ msg = f"Unsupported date type: {type(date)}"
665
+ raise TypeError(msg)
666
+
667
+ def adaptive_date_labels(
668
+ dates: "Sequence[AnyDateTimeType]",
669
+ *,
670
+ min_precision: int = Precision.YEAR,
671
+ max_precision: int = Precision.SECOND,
672
+ format_levels: "list[str] | None" = None,
673
+ ) -> list[str]:
674
+ """Format dates at the coarsest precision that produces unique labels.
675
+
676
+ Given a sequence of dates, starts formatting at the coarsest level and
677
+ refines until all labels are unique or max_precision is reached.
678
+
679
+ Args:
680
+ dates: Sequence of date values (datetime.datetime, numpy.datetime64,
681
+ or matplotlib date floats).
682
+ min_precision: Minimum precision level (default: Precision.YEAR).
683
+ The formatter will never produce labels coarser than this.
684
+ max_precision: Maximum precision level (default: Precision.SECOND).
685
+ The formatter stops refining at this level even if labels collide.
686
+ format_levels: Custom format strings for each level. Must have length
687
+ >= max_precision + 1. Defaults to ADAPTIVE_FORMAT_LEVELS.
688
+
689
+ Returns:
690
+ List of formatted date strings, one per input date. Empty strings
691
+ for NaT/NaN values.
692
+ """
693
+ import numpy as np
694
+
695
+ if isinstance(dates, np.ndarray):
696
+ if dates.size == 0:
697
+ return []
698
+ elif not dates:
699
+ return []
700
+
701
+ levels = format_levels if format_levels is not None else ADAPTIVE_FORMAT_LEVELS
702
+
703
+ normalized = [_normalize_to_datetime(d) for d in dates]
704
+
705
+ if len(normalized) == 1:
706
+ d = normalized[0]
707
+ level = max(min_precision, Precision.DAY)
708
+ level = min(level, max_precision)
709
+ if d is None:
710
+ return [""]
711
+ return [d.strftime(levels[level])]
712
+
713
+ for level in range(min_precision, max_precision + 1):
714
+ fmt = levels[level]
715
+ labels = [d.strftime(fmt) if d is not None else "" for d in normalized]
716
+ non_empty = [lbl for lbl in labels if lbl != ""]
717
+ if len(set(non_empty)) == len(non_empty):
718
+ return labels
719
+
720
+ fmt = levels[max_precision]
721
+ return [d.strftime(fmt) if d is not None else "" for d in normalized]
722
+
723
+ class AdaptiveDateFormatter:
724
+ """Matplotlib Formatter that auto-selects date label precision.
725
+
726
+ Uses adaptive disambiguation: labels start at the coarsest level and
727
+ refine until all tick labels are unique. Drop-in replacement for any
728
+ matplotlib axis formatter or colorbar formatter.
729
+
730
+ Args:
731
+ min_precision: Minimum precision level (default: Precision.YEAR).
732
+ max_precision: Maximum precision level (default: Precision.SECOND).
733
+ format_levels: Custom format strings per level.
734
+
735
+ Example:
736
+ >>> ax.xaxis.set_major_formatter(AdaptiveDateFormatter())
737
+ >>> cbar.ax.yaxis.set_major_formatter(AdaptiveDateFormatter())
738
+ """
739
+
740
+ def __init__(
741
+ self,
742
+ *,
743
+ min_precision: int = Precision.YEAR,
744
+ max_precision: int = Precision.SECOND,
745
+ format_levels: "list[str] | None" = None,
746
+ ) -> None:
747
+ """Initialize the AdaptiveDateFormatter.
748
+
749
+ Args:
750
+ min_precision: Minimum precision level.
751
+ max_precision: Maximum precision level.
752
+ format_levels: Custom format strings per level.
753
+ """
754
+ import matplotlib.ticker as mticker # noqa: F401
755
+ self._min_precision = min_precision
756
+ self._max_precision = max_precision
757
+ self._format_levels = format_levels
758
+ self._formatter = mticker.Formatter.__new__(mticker.Formatter)
759
+ self._cached_labels: dict[float, str] = {}
760
+
761
+ def format_ticks(self, values: list[float]) -> list[str]:
762
+ """Format all tick values collectively with disambiguation.
763
+
764
+ Args:
765
+ values: List of matplotlib date floats (from date2num).
766
+
767
+ Returns:
768
+ List of formatted label strings.
769
+ """
770
+ labels = adaptive_date_labels(
771
+ values,
772
+ min_precision=self._min_precision,
773
+ max_precision=self._max_precision,
774
+ format_levels=self._format_levels,
775
+ )
776
+ self._cached_labels = dict(zip(values, labels))
777
+ return labels
778
+
779
+ def __call__(self, x: float, pos: "int | None" = None) -> str:
780
+ """Format a single tick value.
781
+
782
+ Uses cached results from format_ticks if available, otherwise
783
+ formats independently at the day level.
784
+
785
+ Args:
786
+ x: A matplotlib date float.
787
+ pos: Tick position (unused, required by matplotlib protocol).
788
+
789
+ Returns:
790
+ Formatted date string.
791
+ """
792
+ if x in self._cached_labels:
793
+ return self._cached_labels[x]
794
+ labels = adaptive_date_labels(
795
+ [x],
796
+ min_precision=self._min_precision,
797
+ max_precision=self._max_precision,
798
+ format_levels=self._format_levels,
799
+ )
800
+ return labels[0] if labels else ""