datemonkey 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datemonkey/formats.py ADDED
@@ -0,0 +1,133 @@
1
+ """Known date format patterns and detection logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Optional
7
+
8
+ from .models import DateFormat
9
+
10
+ # ── Well-known formats, ordered from most specific to least ──────────────
11
+
12
+ ISO_8601 = DateFormat("%Y-%m-%d", "ISO 8601", "2024-03-15")
13
+ ISO_8601_T = DateFormat("%Y-%m-%dT%H:%M:%S", "ISO 8601 datetime", "2024-03-15T14:30:00")
14
+ ISO_8601_TZ = DateFormat("%Y-%m-%dT%H:%M:%S%z", "ISO 8601 with timezone", "2024-03-15T14:30:00+00:00")
15
+
16
+ US_SLASH = DateFormat("%m/%d/%Y", "US date (MM/DD/YYYY)", "03/15/2024")
17
+ US_SLASH_SHORT = DateFormat("%m/%d/%y", "US date short year (MM/DD/YY)", "03/15/24")
18
+ US_DASH = DateFormat("%m-%d-%Y", "US date dash (MM-DD-YYYY)", "03-15-2024")
19
+
20
+ EU_SLASH = DateFormat("%d/%m/%Y", "European date (DD/MM/YYYY)", "15/03/2024")
21
+ EU_SLASH_SHORT = DateFormat("%d/%m/%y", "European date short year (DD/MM/YY)", "15/03/24")
22
+ EU_DASH = DateFormat("%d-%m-%Y", "European date dash (DD-MM-YYYY)", "15-03-2024")
23
+ EU_DOT = DateFormat("%d.%m.%Y", "European date dot (DD.MM.YYYY)", "15.03.2024")
24
+ EU_DOT_SHORT = DateFormat("%d.%m.%y", "European date dot short (DD.MM.YY)", "15.03.24")
25
+
26
+ YYYYMMDD = DateFormat("%Y%m%d", "Compact (YYYYMMDD)", "20240315")
27
+
28
+ YYYY_MM_DD_SLASH = DateFormat("%Y/%m/%d", "Year-first slash (YYYY/MM/DD)", "2024/03/15")
29
+
30
+ MONTH_NAME_DMY = DateFormat("%d %B %Y", "Day Month Year (15 March 2024)", "15 March 2024")
31
+ MONTH_NAME_MDY = DateFormat("%B %d, %Y", "Month Day, Year (March 15, 2024)", "March 15, 2024")
32
+ MONTH_ABBR_DMY = DateFormat("%d %b %Y", "Day Mon Year (15 Mar 2024)", "15 Mar 2024")
33
+ MONTH_ABBR_MDY = DateFormat("%b %d, %Y", "Mon Day, Year (Mar 15, 2024)", "Mar 15, 2024")
34
+ MONTH_ABBR_MDY_NO_COMMA = DateFormat("%b %d %Y", "Mon Day Year (Mar 15 2024)", "Mar 15 2024")
35
+ MONTH_NAME_MDY_NO_COMMA = DateFormat("%B %d %Y", "Month Day Year (March 15 2024)", "March 15 2024")
36
+
37
+ # With time components
38
+ US_SLASH_TIME = DateFormat("%m/%d/%Y %H:%M:%S", "US datetime", "03/15/2024 14:30:00")
39
+ US_SLASH_TIME_12 = DateFormat("%m/%d/%Y %I:%M:%S %p", "US datetime 12h", "03/15/2024 02:30:00 PM")
40
+ EU_SLASH_TIME = DateFormat("%d/%m/%Y %H:%M:%S", "European datetime", "15/03/2024 14:30:00")
41
+ ISO_SPACE = DateFormat("%Y-%m-%d %H:%M:%S", "ISO datetime (space)", "2024-03-15 14:30:00")
42
+
43
+ # ── Format groups ────────────────────────────────────────────────────────
44
+
45
+ # Formats where day and month positions are unambiguous
46
+ UNAMBIGUOUS_FORMATS: list[DateFormat] = [
47
+ ISO_8601_TZ,
48
+ ISO_8601_T,
49
+ ISO_8601,
50
+ ISO_SPACE,
51
+ YYYY_MM_DD_SLASH,
52
+ YYYYMMDD,
53
+ MONTH_NAME_MDY,
54
+ MONTH_NAME_DMY,
55
+ MONTH_ABBR_MDY,
56
+ MONTH_ABBR_DMY,
57
+ MONTH_ABBR_MDY_NO_COMMA,
58
+ MONTH_NAME_MDY_NO_COMMA,
59
+ ]
60
+
61
+ # US formats (MM/DD)
62
+ US_FORMATS: list[DateFormat] = [
63
+ US_SLASH,
64
+ US_SLASH_SHORT,
65
+ US_DASH,
66
+ US_SLASH_TIME,
67
+ US_SLASH_TIME_12,
68
+ ]
69
+
70
+ # European formats (DD/MM)
71
+ EU_FORMATS: list[DateFormat] = [
72
+ EU_SLASH,
73
+ EU_SLASH_SHORT,
74
+ EU_DASH,
75
+ EU_DOT,
76
+ EU_DOT_SHORT,
77
+ EU_SLASH_TIME,
78
+ ]
79
+
80
+ # All formats for detection, in priority order
81
+ ALL_FORMATS: list[DateFormat] = UNAMBIGUOUS_FORMATS + US_FORMATS + EU_FORMATS
82
+
83
+ # ── Ambiguous pairs: formats that differ only in day/month position ──────
84
+
85
+ AMBIGUOUS_PAIRS: list[tuple[DateFormat, DateFormat]] = [
86
+ (US_SLASH, EU_SLASH),
87
+ (US_SLASH_SHORT, EU_SLASH_SHORT),
88
+ (US_DASH, EU_DASH),
89
+ (US_SLASH_TIME, EU_SLASH_TIME),
90
+ ]
91
+
92
+ # ── Regex for quick pre-screening ────────────────────────────────────────
93
+
94
+ # Matches potential Excel serial date numbers (integers 1-2958465 or floats)
95
+ EXCEL_SERIAL_RE = re.compile(r"^\d{1,7}(\.\d+)?$")
96
+
97
+ # Quick check: does this look like a date-ish string at all?
98
+ DATE_LIKE_RE = re.compile(
99
+ r"(?:"
100
+ r"\d{1,4}[\-/\.]\d{1,2}[\-/\.]\d{1,4}" # numeric with separators
101
+ r"|\d{8}" # YYYYMMDD
102
+ r"|\d{1,2}\s+\w+\s+\d{2,4}" # 15 March 2024
103
+ r"|\w+\s+\d{1,2},?\s+\d{2,4}" # March 15, 2024
104
+ r"|\d{4}-\d{2}-\d{2}T" # ISO with T
105
+ r")",
106
+ re.IGNORECASE,
107
+ )
108
+
109
+
110
+ def is_date_like(value: str) -> bool:
111
+ """Quick check whether a string looks like it could be a date."""
112
+ return bool(DATE_LIKE_RE.search(value.strip()))
113
+
114
+
115
+ def could_be_excel_serial(value: str) -> bool:
116
+ """Check whether a string looks like an Excel serial date number."""
117
+ s = value.strip()
118
+ if not EXCEL_SERIAL_RE.match(s):
119
+ return False
120
+ num = float(s)
121
+ # Excel serial dates: 1 = 1900-01-01, reasonable range up to ~2958465 (9999-12-31)
122
+ # But practically, most dates are between 1 (1900) and ~55000 (2050+)
123
+ return 1 <= num <= 2958465
124
+
125
+
126
+ def get_ambiguous_partner(fmt: DateFormat) -> Optional[DateFormat]:
127
+ """If a format has a DD/MM vs MM/DD ambiguous partner, return it."""
128
+ for a, b in AMBIGUOUS_PAIRS:
129
+ if fmt == a:
130
+ return b
131
+ if fmt == b:
132
+ return a
133
+ return None
datemonkey/models.py ADDED
@@ -0,0 +1,196 @@
1
+ """Result models for datemonkey."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum
8
+ from typing import Any, Optional
9
+
10
+
11
+ class AmbiguityType(Enum):
12
+ """Types of date ambiguity."""
13
+
14
+ NONE = "none"
15
+ DAY_MONTH_SWAP = "day_month_swap" # DD/MM vs MM/DD
16
+ TWO_DIGIT_YEAR = "two_digit_year" # 03 -> 2003 or 1903?
17
+ MIXED_FORMATS = "mixed_formats" # Multiple formats in batch
18
+
19
+
20
+ class Confidence(Enum):
21
+ """Confidence levels for parsed dates."""
22
+
23
+ HIGH = "high" # Unambiguous parse
24
+ MEDIUM = "medium" # Likely correct but some ambiguity
25
+ LOW = "low" # Ambiguous, could be wrong
26
+ FAILED = "failed" # Could not parse
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class DateFormat:
31
+ """A detected or specified date format.
32
+
33
+ Attributes:
34
+ pattern: strftime-compatible format string (e.g. "%Y-%m-%d").
35
+ label: Human-readable name (e.g. "ISO 8601", "US date", "European date").
36
+ example: Example value matching this format.
37
+ """
38
+
39
+ pattern: str
40
+ label: str
41
+ example: str = ""
42
+
43
+ def __str__(self) -> str:
44
+ return self.pattern
45
+
46
+
47
+ @dataclass
48
+ class DateResult:
49
+ """Result of parsing a single date value.
50
+
51
+ Returned as part of a BatchResult. Downstream consumers should check
52
+ ``confidence`` and ``warnings`` before using ``parsed``.
53
+
54
+ Attributes:
55
+ original: The raw input value.
56
+ parsed: The parsed datetime, or None if parsing failed.
57
+ format_used: The DateFormat applied.
58
+ confidence: How confident we are in the parse.
59
+ warnings: Any issues detected (ambiguity, coercion, etc.).
60
+ row_index: Optional position in the source batch.
61
+ """
62
+
63
+ original: Any
64
+ parsed: Optional[datetime.datetime] = None
65
+ format_used: Optional[DateFormat] = None
66
+ confidence: Confidence = Confidence.FAILED
67
+ warnings: list[str] = field(default_factory=list)
68
+ row_index: Optional[int] = None
69
+
70
+ @property
71
+ def ok(self) -> bool:
72
+ """True if the value was parsed successfully."""
73
+ return self.parsed is not None and self.confidence != Confidence.FAILED
74
+
75
+ @property
76
+ def date(self) -> Optional[datetime.date]:
77
+ """Return just the date portion, if parsed."""
78
+ return self.parsed.date() if self.parsed else None
79
+
80
+ @property
81
+ def iso(self) -> Optional[str]:
82
+ """Return ISO 8601 string, if parsed."""
83
+ return self.parsed.isoformat() if self.parsed else None
84
+
85
+
86
+ @dataclass
87
+ class FormatDetectionResult:
88
+ """Result of detecting the date format for a batch of values.
89
+
90
+ Attributes:
91
+ format: The most likely format, or None if detection failed.
92
+ confidence: Overall confidence in the detected format.
93
+ ambiguities: List of ambiguity types found.
94
+ candidates: All candidate formats considered, with match counts.
95
+ sample_size: Number of values analyzed.
96
+ match_count: Number of values matching the detected format.
97
+ warnings: Issues found during detection.
98
+ """
99
+
100
+ format: Optional[DateFormat] = None
101
+ confidence: Confidence = Confidence.FAILED
102
+ ambiguities: list[AmbiguityType] = field(default_factory=list)
103
+ candidates: list[FormatCandidate] = field(default_factory=list)
104
+ sample_size: int = 0
105
+ match_count: int = 0
106
+ warnings: list[str] = field(default_factory=list)
107
+
108
+ @property
109
+ def is_ambiguous(self) -> bool:
110
+ """True if ambiguity was detected."""
111
+ return len(self.ambiguities) > 0
112
+
113
+ @property
114
+ def match_ratio(self) -> float:
115
+ """Fraction of values matching the detected format."""
116
+ if self.sample_size == 0:
117
+ return 0.0
118
+ return self.match_count / self.sample_size
119
+
120
+
121
+ @dataclass
122
+ class FormatCandidate:
123
+ """A candidate format with its match statistics.
124
+
125
+ Attributes:
126
+ format: The candidate date format.
127
+ match_count: How many values match this format.
128
+ sample_size: Total values tested.
129
+ confidence: Confidence if this format were chosen.
130
+ """
131
+
132
+ format: DateFormat
133
+ match_count: int = 0
134
+ sample_size: int = 0
135
+ confidence: Confidence = Confidence.FAILED
136
+
137
+ @property
138
+ def match_ratio(self) -> float:
139
+ if self.sample_size == 0:
140
+ return 0.0
141
+ return self.match_count / self.sample_size
142
+
143
+
144
+ @dataclass
145
+ class BatchResult:
146
+ """Result of parsing a batch of date values.
147
+
148
+ Attributes:
149
+ results: Per-value parse results.
150
+ detected_format: The format used (detected or specified).
151
+ total: Total number of values in the batch.
152
+ parsed_count: Number successfully parsed.
153
+ failed_count: Number that failed to parse.
154
+ warnings: Batch-level warnings.
155
+ format_detection: Full detection result, if detection was performed.
156
+ """
157
+
158
+ results: list[DateResult] = field(default_factory=list)
159
+ detected_format: Optional[DateFormat] = None
160
+ total: int = 0
161
+ parsed_count: int = 0
162
+ failed_count: int = 0
163
+ warnings: list[str] = field(default_factory=list)
164
+ format_detection: Optional[FormatDetectionResult] = None
165
+
166
+ @property
167
+ def ok(self) -> bool:
168
+ """True if all values were parsed successfully."""
169
+ return self.failed_count == 0 and self.total > 0
170
+
171
+ @property
172
+ def success_ratio(self) -> float:
173
+ """Fraction of values successfully parsed."""
174
+ if self.total == 0:
175
+ return 0.0
176
+ return self.parsed_count / self.total
177
+
178
+ @property
179
+ def dates(self) -> list[Optional[datetime.datetime]]:
180
+ """Extract just the parsed datetimes (None for failures)."""
181
+ return [r.parsed for r in self.results]
182
+
183
+ @property
184
+ def iso_strings(self) -> list[Optional[str]]:
185
+ """Extract ISO 8601 strings (None for failures)."""
186
+ return [r.iso for r in self.results]
187
+
188
+ @property
189
+ def failed(self) -> list[DateResult]:
190
+ """Return only the failed results."""
191
+ return [r for r in self.results if not r.ok]
192
+
193
+ @property
194
+ def succeeded(self) -> list[DateResult]:
195
+ """Return only the successful results."""
196
+ return [r for r in self.results if r.ok]
datemonkey/parser.py ADDED
@@ -0,0 +1,174 @@
1
+ """Batch date parser with format lock-in.
2
+
3
+ Once a format is detected (or specified), it is enforced across all values
4
+ in the batch. Values that don't match are flagged, not silently re-guessed.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import datetime
10
+ from typing import Any, Optional, Sequence, Union
11
+
12
+ from .detector import detect_format
13
+ from .excel import EXCEL_SERIAL_FORMAT, parse_excel_serial
14
+ from .formats import could_be_excel_serial
15
+ from .models import (
16
+ AmbiguityType,
17
+ BatchResult,
18
+ Confidence,
19
+ DateFormat,
20
+ DateResult,
21
+ FormatDetectionResult,
22
+ )
23
+
24
+
25
+ def _parse_single(
26
+ value: Any,
27
+ fmt: DateFormat,
28
+ row_index: Optional[int] = None,
29
+ ) -> DateResult:
30
+ """Parse a single value with a locked-in format."""
31
+ # Handle blanks/None
32
+ if value is None or (isinstance(value, str) and value.strip() == ""):
33
+ return DateResult(
34
+ original=value,
35
+ confidence=Confidence.FAILED,
36
+ format_used=fmt,
37
+ warnings=["Blank or None value."],
38
+ row_index=row_index,
39
+ )
40
+
41
+ s = str(value).strip()
42
+
43
+ # Excel serial path
44
+ if fmt == EXCEL_SERIAL_FORMAT:
45
+ return parse_excel_serial(value, row_index=row_index)
46
+
47
+ # Standard strftime path
48
+ try:
49
+ parsed = datetime.datetime.strptime(s, fmt.pattern)
50
+ return DateResult(
51
+ original=value,
52
+ parsed=parsed,
53
+ format_used=fmt,
54
+ confidence=Confidence.HIGH,
55
+ row_index=row_index,
56
+ )
57
+ except (ValueError, OverflowError):
58
+ warnings = [f"Value does not match format {fmt.label} ({fmt.pattern})."]
59
+ # Hint if it looks like an Excel serial
60
+ if could_be_excel_serial(s):
61
+ warnings.append("This looks like an Excel serial date number.")
62
+ return DateResult(
63
+ original=value,
64
+ confidence=Confidence.FAILED,
65
+ format_used=fmt,
66
+ warnings=warnings,
67
+ row_index=row_index,
68
+ )
69
+
70
+
71
+ def parse_dates(
72
+ values: Sequence[Any],
73
+ *,
74
+ format: Optional[Union[DateFormat, str]] = None,
75
+ locale_preference: Optional[str] = None,
76
+ strict: bool = False,
77
+ ) -> BatchResult:
78
+ """Parse a batch of date values.
79
+
80
+ If ``format`` is provided, it is used directly (format lock-in).
81
+ If not, the format is auto-detected from the batch.
82
+
83
+ Args:
84
+ values: Sequence of date-like values (strings, ints, floats, None).
85
+ format: A DateFormat or strftime pattern string to enforce.
86
+ If None, auto-detect from the values.
87
+ locale_preference: Hint for resolving DD/MM vs MM/DD ambiguity
88
+ during auto-detection. "us" for MM/DD, "eu" for DD/MM.
89
+ strict: If True, treat any ambiguity as a failure (don't parse
90
+ ambiguous values, report them as errors).
91
+
92
+ Returns:
93
+ BatchResult containing per-value results, detected format,
94
+ and batch-level statistics.
95
+ """
96
+ if not values:
97
+ return BatchResult(warnings=["Empty input: no values to parse."])
98
+
99
+ detection_result: Optional[FormatDetectionResult] = None
100
+ resolved_format: Optional[DateFormat] = None
101
+
102
+ # Resolve the format to use
103
+ if format is not None:
104
+ if isinstance(format, str):
105
+ if format == "EXCEL_SERIAL":
106
+ resolved_format = EXCEL_SERIAL_FORMAT
107
+ else:
108
+ resolved_format = DateFormat(
109
+ pattern=format,
110
+ label=f"Custom ({format})",
111
+ )
112
+ else:
113
+ resolved_format = format
114
+ else:
115
+ # Auto-detect
116
+ detection_result = detect_format(
117
+ values,
118
+ locale_preference=locale_preference,
119
+ )
120
+ resolved_format = detection_result.format
121
+
122
+ if resolved_format is None:
123
+ return BatchResult(
124
+ total=len(values),
125
+ warnings=["Could not determine date format."],
126
+ format_detection=detection_result,
127
+ )
128
+
129
+ # In strict mode, refuse to parse if ambiguous
130
+ if strict and detection_result and detection_result.is_ambiguous:
131
+ if AmbiguityType.DAY_MONTH_SWAP in detection_result.ambiguities:
132
+ return BatchResult(
133
+ total=len(values),
134
+ detected_format=resolved_format,
135
+ warnings=[
136
+ "Strict mode: refusing to parse due to DD/MM vs MM/DD "
137
+ "ambiguity. Provide a format or locale_preference to resolve."
138
+ ]
139
+ + detection_result.warnings,
140
+ format_detection=detection_result,
141
+ )
142
+
143
+ # Parse each value with the locked-in format
144
+ results: list[DateResult] = []
145
+ parsed_count = 0
146
+ failed_count = 0
147
+ batch_warnings: list[str] = []
148
+
149
+ for i, v in enumerate(values):
150
+ result = _parse_single(v, resolved_format, row_index=i)
151
+ results.append(result)
152
+ if result.ok:
153
+ parsed_count += 1
154
+ else:
155
+ failed_count += 1
156
+
157
+ if detection_result:
158
+ batch_warnings.extend(detection_result.warnings)
159
+
160
+ if failed_count > 0:
161
+ batch_warnings.append(
162
+ f"{failed_count}/{len(values)} values failed to parse "
163
+ f"with format {resolved_format.label}."
164
+ )
165
+
166
+ return BatchResult(
167
+ results=results,
168
+ detected_format=resolved_format,
169
+ total=len(values),
170
+ parsed_count=parsed_count,
171
+ failed_count=failed_count,
172
+ warnings=batch_warnings,
173
+ format_detection=detection_result,
174
+ )
@@ -0,0 +1,198 @@
1
+ Metadata-Version: 2.4
2
+ Name: datemonkey
3
+ Version: 0.1.0
4
+ Summary: Batch date parsing with ambiguity detection, confidence scores, and format lock-in.
5
+ Author-email: RexBytes <pythonic@rexbytes.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/RexBytes/datemonkey
8
+ Project-URL: Repository, https://github.com/RexBytes/datemonkey
9
+ Project-URL: Issues, https://github.com/RexBytes/datemonkey/issues
10
+ Keywords: date,parsing,ambiguity,detection,batch,excel
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: Text Processing
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Dynamic: license-file
25
+
26
+ # datemonkey
27
+
28
+ Batch date parsing with ambiguity detection, confidence scores, and format lock-in.
29
+
30
+ **The problem:** `dateutil.parser.parse("01/02/03")` silently guesses and is often wrong. DD/MM vs MM/DD ambiguity corrupts joins, aggregations, and reports. datemonkey detects ambiguity and tells you about it instead of guessing.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install datemonkey
36
+ ```
37
+
38
+ ## Quick Start
39
+
40
+ ### Detect format from a column of values
41
+
42
+ ```python
43
+ from datemonkey import detect_format
44
+
45
+ result = detect_format(["15/03/2024", "20/04/2024", "25/12/2024"])
46
+ print(result.format.label) # "European date (DD/MM/YYYY)"
47
+ print(result.confidence) # Confidence.HIGH
48
+ print(result.is_ambiguous) # False — day > 12 resolves it
49
+ ```
50
+
51
+ ### Ambiguity detection
52
+
53
+ ```python
54
+ result = detect_format(["01/02/2024", "03/04/2024", "05/06/2024"])
55
+ print(result.is_ambiguous) # True
56
+ print(result.ambiguities) # [AmbiguityType.DAY_MONTH_SWAP]
57
+ print(result.warnings)
58
+ # ["Ambiguous: cannot distinguish US date (MM/DD/YYYY) from European date (DD/MM/YYYY) ..."]
59
+ ```
60
+
61
+ ### Resolve ambiguity with locale preference
62
+
63
+ ```python
64
+ result = detect_format(["01/02/2024", "03/04/2024"], locale_preference="eu")
65
+ print(result.format.label) # "European date (DD/MM/YYYY)"
66
+ ```
67
+
68
+ ### Parse a batch of dates
69
+
70
+ ```python
71
+ from datemonkey import parse_dates
72
+
73
+ batch = parse_dates(["2024-03-15", "2024-04-20", "2024-12-25"])
74
+ print(batch.ok) # True
75
+ print(batch.dates) # [datetime(2024,3,15), datetime(2024,4,20), datetime(2024,12,25)]
76
+ print(batch.iso_strings) # ["2024-03-15T00:00:00", ...]
77
+ ```
78
+
79
+ ### Format lock-in
80
+
81
+ ```python
82
+ from datemonkey import parse_dates, ISO_8601
83
+
84
+ batch = parse_dates(["2024-03-15", "03/15/2024"], format=ISO_8601)
85
+ print(batch.results[0].ok) # True — matches ISO
86
+ print(batch.results[1].ok) # False — doesn't match, flagged not re-guessed
87
+ ```
88
+
89
+ ### Strict mode
90
+
91
+ ```python
92
+ batch = parse_dates(["01/02/2024", "03/04/2024"], strict=True)
93
+ print(batch.parsed_count) # 0 — refuses to parse ambiguous data
94
+ print(batch.warnings) # ["Strict mode: refusing to parse due to DD/MM vs MM/DD ambiguity..."]
95
+ ```
96
+
97
+ ### Excel serial dates
98
+
99
+ ```python
100
+ from datemonkey import parse_dates, excel_serial_to_datetime
101
+
102
+ # Single value
103
+ dt = excel_serial_to_datetime(45292) # datetime(2024, 1, 1)
104
+
105
+ # Batch — auto-detected
106
+ batch = parse_dates(["45292", "45293", "45294"])
107
+ print(batch.detected_format.label) # "Excel serial date number"
108
+ ```
109
+
110
+ ### Per-value results
111
+
112
+ ```python
113
+ batch = parse_dates(["2024-03-15", "garbage", "2024-12-25"], format="%Y-%m-%d")
114
+ for r in batch.results:
115
+ print(f"{r.original:20s} ok={r.ok} parsed={r.iso} warnings={r.warnings}")
116
+ # 2024-03-15 ok=True parsed=2024-03-15T00:00:00 warnings=[]
117
+ # garbage ok=False parsed=None warnings=[...]
118
+ # 2024-12-25 ok=True parsed=2024-12-25T00:00:00 warnings=[]
119
+ ```
120
+
121
+ ## CLI
122
+
123
+ ```bash
124
+ # Detect format
125
+ datemonkey detect "15/03/2024" "20/04/2024" "25/12/2024"
126
+
127
+ # Detect with JSON output
128
+ datemonkey detect --json "01/02/2024" "03/04/2024"
129
+
130
+ # Parse dates
131
+ datemonkey parse "2024-03-15" "2024-04-20"
132
+
133
+ # Parse from CSV file (column 2, skip header)
134
+ datemonkey parse --file data.csv --column 2 --skip-header
135
+
136
+ # Parse with explicit format
137
+ datemonkey parse --format "%d-%m-%Y" "15-03-2024"
138
+
139
+ # Parse in strict mode
140
+ datemonkey parse --strict "01/02/2024" "03/04/2024"
141
+
142
+ # List known formats
143
+ datemonkey formats
144
+ ```
145
+
146
+ ## API Reference
147
+
148
+ ### `detect_format(values, *, locale_preference=None, formats=None) -> FormatDetectionResult`
149
+
150
+ Analyze a batch and determine the most likely format, reporting ambiguity.
151
+
152
+ - **values**: List of date-like values (strings, ints, floats, None)
153
+ - **locale_preference**: `"us"` for MM/DD, `"eu"` for DD/MM (only used when data alone can't resolve)
154
+ - **formats**: Custom list of `DateFormat` objects to test
155
+
156
+ ### `parse_dates(values, *, format=None, locale_preference=None, strict=False) -> BatchResult`
157
+
158
+ Parse a batch with format lock-in.
159
+
160
+ - **format**: A `DateFormat` object or strftime string. If None, auto-detected.
161
+ - **strict**: If True, refuse to parse when DD/MM vs MM/DD is ambiguous.
162
+
163
+ ### `excel_serial_to_datetime(serial) -> datetime | None`
164
+
165
+ Convert an Excel serial date number to a Python datetime.
166
+
167
+ ### Result Objects
168
+
169
+ | Object | Key Properties |
170
+ |---|---|
171
+ | `FormatDetectionResult` | `.format`, `.confidence`, `.is_ambiguous`, `.ambiguities`, `.candidates`, `.warnings` |
172
+ | `BatchResult` | `.ok`, `.results`, `.detected_format`, `.dates`, `.iso_strings`, `.failed`, `.succeeded`, `.success_ratio` |
173
+ | `DateResult` | `.ok`, `.original`, `.parsed`, `.date`, `.iso`, `.confidence`, `.warnings`, `.row_index` |
174
+
175
+ ### Confidence Levels
176
+
177
+ | Level | Meaning |
178
+ |---|---|
179
+ | `HIGH` | Unambiguous parse, format is certain |
180
+ | `MEDIUM` | Likely correct, minor ambiguity (e.g. two-digit year) |
181
+ | `LOW` | Ambiguous — DD/MM vs MM/DD unresolved, or poor match ratio |
182
+ | `FAILED` | Could not parse or detect |
183
+
184
+ ## Design
185
+
186
+ - **Batch-first**: Designed for columns of data, not single strings
187
+ - **No silent guessing**: Ambiguity is reported, not hidden
188
+ - **Format lock-in**: Once detected, the format is enforced — violations are flagged
189
+ - **Structured results**: Every parse returns confidence scores and warnings
190
+ - **Zero dependencies**: Pure Python, stdlib only
191
+
192
+ ## Built for LLMs
193
+
194
+ datemonkey is designed to work well as a tool for large language models. Date parsing is a common source of silent errors in LLM-driven data pipelines — ambiguous formats lead to wrong guesses, wasted tokens on retries, and broken downstream logic. datemonkey reduces that complexity: a single call returns a structured result with the detected format, confidence level, and any ambiguities — no multi-step prompting or validation loops required. Fewer tokens in, reliable answers out.
195
+
196
+ ## License
197
+
198
+ MIT