datemonkey 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RexBytes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,198 @@
1
+ Metadata-Version: 2.4
2
+ Name: datemonkey
3
+ Version: 0.1.0
4
+ Summary: Batch date parsing with ambiguity detection, confidence scores, and format lock-in.
5
+ Author-email: RexBytes <pythonic@rexbytes.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/RexBytes/datemonkey
8
+ Project-URL: Repository, https://github.com/RexBytes/datemonkey
9
+ Project-URL: Issues, https://github.com/RexBytes/datemonkey/issues
10
+ Keywords: date,parsing,ambiguity,detection,batch,excel
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: Text Processing
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Dynamic: license-file
25
+
26
+ # datemonkey
27
+
28
+ Batch date parsing with ambiguity detection, confidence scores, and format lock-in.
29
+
30
+ **The problem:** `dateutil.parser.parse("01/02/03")` silently guesses and is often wrong. DD/MM vs MM/DD ambiguity corrupts joins, aggregations, and reports. datemonkey detects ambiguity and tells you about it instead of guessing.
31
+
32
+ ## Install
33
+
34
+ ```bash
35
+ pip install datemonkey
36
+ ```
37
+
38
+ ## Quick Start
39
+
40
+ ### Detect format from a column of values
41
+
42
+ ```python
43
+ from datemonkey import detect_format
44
+
45
+ result = detect_format(["15/03/2024", "20/04/2024", "25/12/2024"])
46
+ print(result.format.label) # "European date (DD/MM/YYYY)"
47
+ print(result.confidence) # Confidence.HIGH
48
+ print(result.is_ambiguous) # False — day > 12 resolves it
49
+ ```
50
+
51
+ ### Ambiguity detection
52
+
53
+ ```python
54
+ result = detect_format(["01/02/2024", "03/04/2024", "05/06/2024"])
55
+ print(result.is_ambiguous) # True
56
+ print(result.ambiguities) # [AmbiguityType.DAY_MONTH_SWAP]
57
+ print(result.warnings)
58
+ # ["Ambiguous: cannot distinguish US date (MM/DD/YYYY) from European date (DD/MM/YYYY) ..."]
59
+ ```
60
+
61
+ ### Resolve ambiguity with locale preference
62
+
63
+ ```python
64
+ result = detect_format(["01/02/2024", "03/04/2024"], locale_preference="eu")
65
+ print(result.format.label) # "European date (DD/MM/YYYY)"
66
+ ```
67
+
68
+ ### Parse a batch of dates
69
+
70
+ ```python
71
+ from datemonkey import parse_dates
72
+
73
+ batch = parse_dates(["2024-03-15", "2024-04-20", "2024-12-25"])
74
+ print(batch.ok) # True
75
+ print(batch.dates) # [datetime(2024,3,15), datetime(2024,4,20), datetime(2024,12,25)]
76
+ print(batch.iso_strings) # ["2024-03-15T00:00:00", ...]
77
+ ```
78
+
79
+ ### Format lock-in
80
+
81
+ ```python
82
+ from datemonkey import parse_dates, ISO_8601
83
+
84
+ batch = parse_dates(["2024-03-15", "03/15/2024"], format=ISO_8601)
85
+ print(batch.results[0].ok) # True — matches ISO
86
+ print(batch.results[1].ok) # False — doesn't match, flagged not re-guessed
87
+ ```
88
+
89
+ ### Strict mode
90
+
91
+ ```python
92
+ batch = parse_dates(["01/02/2024", "03/04/2024"], strict=True)
93
+ print(batch.parsed_count) # 0 — refuses to parse ambiguous data
94
+ print(batch.warnings) # ["Strict mode: refusing to parse due to DD/MM vs MM/DD ambiguity..."]
95
+ ```
96
+
97
+ ### Excel serial dates
98
+
99
+ ```python
100
+ from datemonkey import parse_dates, excel_serial_to_datetime
101
+
102
+ # Single value
103
+ dt = excel_serial_to_datetime(45292) # datetime(2024, 1, 1)
104
+
105
+ # Batch — auto-detected
106
+ batch = parse_dates(["45292", "45293", "45294"])
107
+ print(batch.detected_format.label) # "Excel serial date number"
108
+ ```
109
+
110
+ ### Per-value results
111
+
112
+ ```python
113
+ batch = parse_dates(["2024-03-15", "garbage", "2024-12-25"], format="%Y-%m-%d")
114
+ for r in batch.results:
115
+ print(f"{r.original:20s} ok={r.ok} parsed={r.iso} warnings={r.warnings}")
116
+ # 2024-03-15 ok=True parsed=2024-03-15T00:00:00 warnings=[]
117
+ # garbage ok=False parsed=None warnings=[...]
118
+ # 2024-12-25 ok=True parsed=2024-12-25T00:00:00 warnings=[]
119
+ ```
120
+
121
+ ## CLI
122
+
123
+ ```bash
124
+ # Detect format
125
+ datemonkey detect "15/03/2024" "20/04/2024" "25/12/2024"
126
+
127
+ # Detect with JSON output
128
+ datemonkey detect --json "01/02/2024" "03/04/2024"
129
+
130
+ # Parse dates
131
+ datemonkey parse "2024-03-15" "2024-04-20"
132
+
133
+ # Parse from CSV file (column 2, skip header)
134
+ datemonkey parse --file data.csv --column 2 --skip-header
135
+
136
+ # Parse with explicit format
137
+ datemonkey parse --format "%d-%m-%Y" "15-03-2024"
138
+
139
+ # Parse in strict mode
140
+ datemonkey parse --strict "01/02/2024" "03/04/2024"
141
+
142
+ # List known formats
143
+ datemonkey formats
144
+ ```
145
+
146
+ ## API Reference
147
+
148
+ ### `detect_format(values, *, locale_preference=None, formats=None) -> FormatDetectionResult`
149
+
150
+ Analyze a batch and determine the most likely format, reporting ambiguity.
151
+
152
+ - **values**: List of date-like values (strings, ints, floats, None)
153
+ - **locale_preference**: `"us"` for MM/DD, `"eu"` for DD/MM (only used when data alone can't resolve)
154
+ - **formats**: Custom list of `DateFormat` objects to test
155
+
156
+ ### `parse_dates(values, *, format=None, locale_preference=None, strict=False) -> BatchResult`
157
+
158
+ Parse a batch with format lock-in.
159
+
160
+ - **format**: A `DateFormat` object or strftime string. If None, auto-detected.
161
+ - **strict**: If True, refuse to parse when DD/MM vs MM/DD is ambiguous.
162
+
163
+ ### `excel_serial_to_datetime(serial) -> datetime | None`
164
+
165
+ Convert an Excel serial date number to a Python datetime.
166
+
167
+ ### Result Objects
168
+
169
+ | Object | Key Properties |
170
+ |---|---|
171
+ | `FormatDetectionResult` | `.format`, `.confidence`, `.is_ambiguous`, `.ambiguities`, `.candidates`, `.warnings` |
172
+ | `BatchResult` | `.ok`, `.results`, `.detected_format`, `.dates`, `.iso_strings`, `.failed`, `.succeeded`, `.success_ratio` |
173
+ | `DateResult` | `.ok`, `.original`, `.parsed`, `.date`, `.iso`, `.confidence`, `.warnings`, `.row_index` |
174
+
175
+ ### Confidence Levels
176
+
177
+ | Level | Meaning |
178
+ |---|---|
179
+ | `HIGH` | Unambiguous parse, format is certain |
180
+ | `MEDIUM` | Likely correct, minor ambiguity (e.g. two-digit year) |
181
+ | `LOW` | Ambiguous — DD/MM vs MM/DD unresolved, or poor match ratio |
182
+ | `FAILED` | Could not parse or detect |
183
+
184
+ ## Design
185
+
186
+ - **Batch-first**: Designed for columns of data, not single strings
187
+ - **No silent guessing**: Ambiguity is reported, not hidden
188
+ - **Format lock-in**: Once detected, the format is enforced — violations are flagged
189
+ - **Structured results**: Every parse returns confidence scores and warnings
190
+ - **Zero dependencies**: Pure Python, stdlib only
191
+
192
+ ## Built for LLMs
193
+
194
+ datemonkey is designed to work well as a tool for large language models. Date parsing is a common source of silent errors in LLM-driven data pipelines — ambiguous formats lead to wrong guesses, wasted tokens on retries, and broken downstream logic. datemonkey reduces that complexity: a single call returns a structured result with the detected format, confidence level, and any ambiguities — no multi-step prompting or validation loops required. Fewer tokens in, reliable answers out.
195
+
196
+ ## License
197
+
198
+ MIT
@@ -0,0 +1,173 @@
1
+ # datemonkey
2
+
3
+ Batch date parsing with ambiguity detection, confidence scores, and format lock-in.
4
+
5
+ **The problem:** `dateutil.parser.parse("01/02/03")` silently guesses and is often wrong. DD/MM vs MM/DD ambiguity corrupts joins, aggregations, and reports. datemonkey detects ambiguity and tells you about it instead of guessing.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install datemonkey
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ### Detect format from a column of values
16
+
17
+ ```python
18
+ from datemonkey import detect_format
19
+
20
+ result = detect_format(["15/03/2024", "20/04/2024", "25/12/2024"])
21
+ print(result.format.label) # "European date (DD/MM/YYYY)"
22
+ print(result.confidence) # Confidence.HIGH
23
+ print(result.is_ambiguous) # False — day > 12 resolves it
24
+ ```
25
+
26
+ ### Ambiguity detection
27
+
28
+ ```python
29
+ result = detect_format(["01/02/2024", "03/04/2024", "05/06/2024"])
30
+ print(result.is_ambiguous) # True
31
+ print(result.ambiguities) # [AmbiguityType.DAY_MONTH_SWAP]
32
+ print(result.warnings)
33
+ # ["Ambiguous: cannot distinguish US date (MM/DD/YYYY) from European date (DD/MM/YYYY) ..."]
34
+ ```
35
+
36
+ ### Resolve ambiguity with locale preference
37
+
38
+ ```python
39
+ result = detect_format(["01/02/2024", "03/04/2024"], locale_preference="eu")
40
+ print(result.format.label) # "European date (DD/MM/YYYY)"
41
+ ```
42
+
43
+ ### Parse a batch of dates
44
+
45
+ ```python
46
+ from datemonkey import parse_dates
47
+
48
+ batch = parse_dates(["2024-03-15", "2024-04-20", "2024-12-25"])
49
+ print(batch.ok) # True
50
+ print(batch.dates) # [datetime(2024,3,15), datetime(2024,4,20), datetime(2024,12,25)]
51
+ print(batch.iso_strings) # ["2024-03-15T00:00:00", ...]
52
+ ```
53
+
54
+ ### Format lock-in
55
+
56
+ ```python
57
+ from datemonkey import parse_dates, ISO_8601
58
+
59
+ batch = parse_dates(["2024-03-15", "03/15/2024"], format=ISO_8601)
60
+ print(batch.results[0].ok) # True — matches ISO
61
+ print(batch.results[1].ok) # False — doesn't match, flagged not re-guessed
62
+ ```
63
+
64
+ ### Strict mode
65
+
66
+ ```python
67
+ batch = parse_dates(["01/02/2024", "03/04/2024"], strict=True)
68
+ print(batch.parsed_count) # 0 — refuses to parse ambiguous data
69
+ print(batch.warnings) # ["Strict mode: refusing to parse due to DD/MM vs MM/DD ambiguity..."]
70
+ ```
71
+
72
+ ### Excel serial dates
73
+
74
+ ```python
75
+ from datemonkey import parse_dates, excel_serial_to_datetime
76
+
77
+ # Single value
78
+ dt = excel_serial_to_datetime(45292) # datetime(2024, 1, 1)
79
+
80
+ # Batch — auto-detected
81
+ batch = parse_dates(["45292", "45293", "45294"])
82
+ print(batch.detected_format.label) # "Excel serial date number"
83
+ ```
84
+
85
+ ### Per-value results
86
+
87
+ ```python
88
+ batch = parse_dates(["2024-03-15", "garbage", "2024-12-25"], format="%Y-%m-%d")
89
+ for r in batch.results:
90
+ print(f"{r.original:20s} ok={r.ok} parsed={r.iso} warnings={r.warnings}")
91
+ # 2024-03-15 ok=True parsed=2024-03-15T00:00:00 warnings=[]
92
+ # garbage ok=False parsed=None warnings=[...]
93
+ # 2024-12-25 ok=True parsed=2024-12-25T00:00:00 warnings=[]
94
+ ```
95
+
96
+ ## CLI
97
+
98
+ ```bash
99
+ # Detect format
100
+ datemonkey detect "15/03/2024" "20/04/2024" "25/12/2024"
101
+
102
+ # Detect with JSON output
103
+ datemonkey detect --json "01/02/2024" "03/04/2024"
104
+
105
+ # Parse dates
106
+ datemonkey parse "2024-03-15" "2024-04-20"
107
+
108
+ # Parse from CSV file (column 2, skip header)
109
+ datemonkey parse --file data.csv --column 2 --skip-header
110
+
111
+ # Parse with explicit format
112
+ datemonkey parse --format "%d-%m-%Y" "15-03-2024"
113
+
114
+ # Parse in strict mode
115
+ datemonkey parse --strict "01/02/2024" "03/04/2024"
116
+
117
+ # List known formats
118
+ datemonkey formats
119
+ ```
120
+
121
+ ## API Reference
122
+
123
+ ### `detect_format(values, *, locale_preference=None, formats=None) -> FormatDetectionResult`
124
+
125
+ Analyze a batch and determine the most likely format, reporting ambiguity.
126
+
127
+ - **values**: List of date-like values (strings, ints, floats, None)
128
+ - **locale_preference**: `"us"` for MM/DD, `"eu"` for DD/MM (only used when data alone can't resolve)
129
+ - **formats**: Custom list of `DateFormat` objects to test
130
+
131
+ ### `parse_dates(values, *, format=None, locale_preference=None, strict=False) -> BatchResult`
132
+
133
+ Parse a batch with format lock-in.
134
+
135
+ - **format**: A `DateFormat` object or strftime string. If None, auto-detected.
136
+ - **strict**: If True, refuse to parse when DD/MM vs MM/DD is ambiguous.
137
+
138
+ ### `excel_serial_to_datetime(serial) -> datetime | None`
139
+
140
+ Convert an Excel serial date number to a Python datetime.
141
+
142
+ ### Result Objects
143
+
144
+ | Object | Key Properties |
145
+ |---|---|
146
+ | `FormatDetectionResult` | `.format`, `.confidence`, `.is_ambiguous`, `.ambiguities`, `.candidates`, `.warnings` |
147
+ | `BatchResult` | `.ok`, `.results`, `.detected_format`, `.dates`, `.iso_strings`, `.failed`, `.succeeded`, `.success_ratio` |
148
+ | `DateResult` | `.ok`, `.original`, `.parsed`, `.date`, `.iso`, `.confidence`, `.warnings`, `.row_index` |
149
+
150
+ ### Confidence Levels
151
+
152
+ | Level | Meaning |
153
+ |---|---|
154
+ | `HIGH` | Unambiguous parse, format is certain |
155
+ | `MEDIUM` | Likely correct, minor ambiguity (e.g. two-digit year) |
156
+ | `LOW` | Ambiguous — DD/MM vs MM/DD unresolved, or poor match ratio |
157
+ | `FAILED` | Could not parse or detect |
158
+
159
+ ## Design
160
+
161
+ - **Batch-first**: Designed for columns of data, not single strings
162
+ - **No silent guessing**: Ambiguity is reported, not hidden
163
+ - **Format lock-in**: Once detected, the format is enforced — violations are flagged
164
+ - **Structured results**: Every parse returns confidence scores and warnings
165
+ - **Zero dependencies**: Pure Python, stdlib only
166
+
167
+ ## Built for LLMs
168
+
169
+ datemonkey is designed to work well as a tool for large language models. Date parsing is a common source of silent errors in LLM-driven data pipelines — ambiguous formats lead to wrong guesses, wasted tokens on retries, and broken downstream logic. datemonkey reduces that complexity: a single call returns a structured result with the detected format, confidence level, and any ambiguities — no multi-step prompting or validation loops required. Fewer tokens in, reliable answers out.
170
+
171
+ ## License
172
+
173
+ MIT
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "datemonkey"
7
+ version = "0.1.0"
8
+ description = "Batch date parsing with ambiguity detection, confidence scores, and format lock-in."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "RexBytes", email = "pythonic@rexbytes.com" },
14
+ ]
15
+ keywords = ["date", "parsing", "ambiguity", "detection", "batch", "excel"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Software Development :: Libraries",
26
+ "Topic :: Text Processing",
27
+ ]
28
+
29
+ [project.scripts]
30
+ datemonkey = "datemonkey.cli:main"
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/RexBytes/datemonkey"
34
+ Repository = "https://github.com/RexBytes/datemonkey"
35
+ Issues = "https://github.com/RexBytes/datemonkey/issues"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["src"]
39
+
40
+ [tool.pytest.ini_options]
41
+ testpaths = ["tests"]
42
+ pythonpath = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,57 @@
1
+ """datemonkey — Batch date parsing with ambiguity detection.
2
+
3
+ >>> from datemonkey import detect_format, parse_dates
4
+ >>> result = detect_format(["15/03/2024", "20/04/2024", "25/12/2024"])
5
+ >>> result.format.label
6
+ 'European date (DD/MM/YYYY)'
7
+ >>> batch = parse_dates(["2024-03-15", "2024-04-20", "2024-12-25"])
8
+ >>> batch.ok
9
+ True
10
+ """
11
+
12
+ from .detector import detect_format
13
+ from .excel import excel_serial_to_datetime
14
+ from .formats import (
15
+ EU_DASH,
16
+ EU_DOT,
17
+ EU_SLASH,
18
+ ISO_8601,
19
+ ISO_8601_T,
20
+ US_DASH,
21
+ US_SLASH,
22
+ DateFormat,
23
+ )
24
+ from .models import (
25
+ AmbiguityType,
26
+ BatchResult,
27
+ Confidence,
28
+ DateResult,
29
+ FormatCandidate,
30
+ FormatDetectionResult,
31
+ )
32
+ from .parser import parse_dates
33
+
34
+ __version__ = "0.1.0"
35
+
36
+ __all__ = [
37
+ # Core API
38
+ "detect_format",
39
+ "parse_dates",
40
+ "excel_serial_to_datetime",
41
+ # Models
42
+ "DateResult",
43
+ "BatchResult",
44
+ "FormatDetectionResult",
45
+ "FormatCandidate",
46
+ "DateFormat",
47
+ "AmbiguityType",
48
+ "Confidence",
49
+ # Common formats
50
+ "ISO_8601",
51
+ "ISO_8601_T",
52
+ "US_SLASH",
53
+ "US_DASH",
54
+ "EU_SLASH",
55
+ "EU_DASH",
56
+ "EU_DOT",
57
+ ]