convi-lab 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convi_lab-0.1.0/LICENSE +21 -0
- convi_lab-0.1.0/PKG-INFO +166 -0
- convi_lab-0.1.0/README.md +155 -0
- convi_lab-0.1.0/pyproject.toml +10 -0
- convi_lab-0.1.0/setup.cfg +4 -0
- convi_lab-0.1.0/src/convi_lab/__init__.py +76 -0
- convi_lab-0.1.0/src/convi_lab/conversion_kernel/__init__.py +5 -0
- convi_lab-0.1.0/src/convi_lab/conversion_kernel/constants.py +123 -0
- convi_lab-0.1.0/src/convi_lab/conversion_kernel/errors.py +255 -0
- convi_lab-0.1.0/src/convi_lab/conversion_kernel/pattern_parsers.py +53 -0
- convi_lab-0.1.0/src/convi_lab/conversion_kernel/utils.py +81 -0
- convi_lab-0.1.0/src/convi_lab/conversions/__init__.py +11 -0
- convi_lab-0.1.0/src/convi_lab/conversions/convert_clock_format.py +70 -0
- convi_lab-0.1.0/src/convi_lab/conversions/convert_datetime.py +92 -0
- convi_lab-0.1.0/src/convi_lab/conversions/convert_name.py +67 -0
- convi_lab-0.1.0/src/convi_lab/parsers/__init__.py +11 -0
- convi_lab-0.1.0/src/convi_lab/parsers/parse_datetime.py +127 -0
- convi_lab-0.1.0/src/convi_lab/parsers/parse_day_month_time.py +85 -0
- convi_lab-0.1.0/src/convi_lab/parsers/parse_day_time.py +146 -0
- convi_lab-0.1.0/src/convi_lab.egg-info/PKG-INFO +166 -0
- convi_lab-0.1.0/src/convi_lab.egg-info/SOURCES.txt +22 -0
- convi_lab-0.1.0/src/convi_lab.egg-info/dependency_links.txt +1 -0
- convi_lab-0.1.0/src/convi_lab.egg-info/requires.txt +2 -0
- convi_lab-0.1.0/src/convi_lab.egg-info/top_level.txt +1 -0
convi_lab-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Mosquito-Lab
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
convi_lab-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: convi-lab
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Data conversion/normalization in a nutshell
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: logger-lab>=0.1.1
|
|
9
|
+
Requires-Dist: rapidfuzz>=3.0.0
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# convi-lab
|
|
13
|
+
|
|
14
|
+
Data conversion and normalization utilities. The current focus is datetime
|
|
15
|
+
parsing — turning messily formatted date/time strings into a single
|
|
16
|
+
normalized `"YYYY-MM-DD HH:MM:SS"` output — with fuzzy name matching as a
|
|
17
|
+
first-class primitive that powers the date logic and is exposed for general use.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install convi-lab
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**Runtime dependencies:** `logger-lab >= 0.1.1`, `rapidfuzz >= 3.0.0`
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Quick start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from convi_lab import convert_date_time, convert_name, DAYS, MONTHS
|
|
35
|
+
|
|
36
|
+
# Datetime normalization
|
|
37
|
+
convert_date_time("Tuesday 12:30 PM") # "2026-05-13 12:30:00"
|
|
38
|
+
convert_date_time("17 Jul - 02:00 PM") # "2026-07-17 14:00:00"
|
|
39
|
+
convert_date_time("12.04.2025 22:15") # "2026-04-12 22:15:00"
|
|
40
|
+
convert_date_time("Today 09:00") # "2026-05-09 09:00:00"
|
|
41
|
+
|
|
42
|
+
# Fuzzy name matching
|
|
43
|
+
convert_name("Mon", DAYS) # "Monday"
|
|
44
|
+
convert_name("Jul", MONTHS) # "July"
|
|
45
|
+
convert_name("Celts", ["Boston Celtics", "Brooklyn Nets"]) # "Boston Celtics"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Supported datetime formats
|
|
51
|
+
|
|
52
|
+
All formats accept an optional `AM`/`PM` suffix (case-insensitive).
|
|
53
|
+
Spaces between components are stripped automatically.
|
|
54
|
+
|
|
55
|
+
| Format | Example input | Parsed as |
|
|
56
|
+
|------------------------|-------------------------|----------------------|
|
|
57
|
+
| `dd/mm HH:MM` | `"12/04 22:15"` | April 12, 22:15 |
|
|
58
|
+
| `dd/mm/yy HH:MM` | `"12/04/25 10:15 PM"` | April 12 2025, 22:15 |
|
|
59
|
+
| `dd.mm.yyyy HH:MM` | `"12.04.2025 12:15 pm"` | April 12 2025, 12:15 |
|
|
60
|
+
| `Day HH:MM` | `"Tuesday 12:30"` | Next Tuesday, 12:30 |
|
|
61
|
+
| `Today/Tomorrow HH:MM` | `"Today 14:30"` | Today at 14:30 |
|
|
62
|
+
| `ddMon[-]HH:MM` | `"17 Jul - 02:00 PM"` | July 17, 14:00 |
|
|
63
|
+
|
|
64
|
+
**Year boundary rule:** if the resolved date is in the past (date only,
|
|
65
|
+
not time), the year is automatically bumped to the next calendar year.
|
|
66
|
+
|
|
67
|
+
**Day rule:** weekday names always resolve to the *next* occurrence. If
|
|
68
|
+
today is Tuesday and you pass `"Tuesday 12:30"`, the result is next
|
|
69
|
+
Tuesday, not today.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Error handling
|
|
74
|
+
|
|
75
|
+
All exceptions inherit from `LabError`. You can catch broadly or precisely:
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from convi_lab import convert_date_time, LabError, ParserError
|
|
79
|
+
|
|
80
|
+
# convert_date_time is the safe boundary — it returns "" on any failure
|
|
81
|
+
result = convert_date_time("garbage input")
|
|
82
|
+
assert result == ""
|
|
83
|
+
|
|
84
|
+
# Call process_patterns directly to get typed exceptions
|
|
85
|
+
from convi_lab import process_patterns
|
|
86
|
+
from convi_lab.conversion_kernel.utils import remove_spaces
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
dt = process_patterns(remove_spaces("17 Jly - 02:00"))
|
|
90
|
+
except ParserError as exc:
|
|
91
|
+
print(f"Parse failed: {exc}")
|
|
92
|
+
except LabError as exc:
|
|
93
|
+
print(f"Conversion failed: {exc}")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Exception hierarchy
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
LabError
|
|
100
|
+
├── ParserError
|
|
101
|
+
│ ├── PatternMatchError no pattern matched the input
|
|
102
|
+
│ ├── DayResolutionError weekday/relative-day string unresolvable
|
|
103
|
+
│ ├── MonthResolutionError month name unresolvable
|
|
104
|
+
│ └── DateComponentError numeric date fragment malformed
|
|
105
|
+
└── ConversionError
|
|
106
|
+
├── ClockFormatError HH:MM[AM|PM] string failed to parse
|
|
107
|
+
├── FuzzyMatchError rapidfuzz returned no result
|
|
108
|
+
└── InvalidInputError bad type or empty string at entry point
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Fuzzy name matching
|
|
114
|
+
|
|
115
|
+
`convert_name` is a general-purpose utility — pass any query and any
|
|
116
|
+
reference list:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from convi_lab import convert_name
|
|
120
|
+
|
|
121
|
+
teams = ["Atlanta Hawks", "Boston Celtics", "Brooklyn Nets"]
|
|
122
|
+
convert_name("A. Hawks", teams) # "Atlanta Hawks"
|
|
123
|
+
convert_name("Nets", teams) # "Brooklyn Nets"
|
|
124
|
+
|
|
125
|
+
countries = ["United States", "United Kingdom", "Australia"]
|
|
126
|
+
convert_name("USA", countries) # "United States"
|
|
127
|
+
convert_name("Aus", countries) # "Australia"
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
It raises `FuzzyMatchError` if rapidfuzz finds nothing, so you always
|
|
131
|
+
get a typed failure rather than a silent empty string.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Roadmap
|
|
136
|
+
|
|
137
|
+
The library is intentionally small today. Planned additions:
|
|
138
|
+
|
|
139
|
+
### Number and currency
|
|
140
|
+
- **Locale-aware number parsing** — `"1.234,56"` (DE) / `"1,234.56"` (US) → `float`
|
|
141
|
+
- **Currency string normalization** — `"$1,200.00"`, `"€ 1.200"` → `Decimal`
|
|
142
|
+
|
|
143
|
+
### Identity and contact data
|
|
144
|
+
- **Phone number normalization** — `"+1 (800) 555-0100"`, `"08001234"` → E.164 format
|
|
145
|
+
- **Email normalization** — lowercase, strip display names, validate structure
|
|
146
|
+
- **Name normalization** — `"DR. JOHN A. SMITH"` → `{"prefix": "Dr.", "first": "John", "last": "Smith"}`
|
|
147
|
+
|
|
148
|
+
### Units and measurements
|
|
149
|
+
- **Temperature** — `"98.6 F"` / `"37 C"` / `"310 K"` → normalized `(value, unit)`
|
|
150
|
+
- **Distance** — km, miles, nautical miles with conversion helpers
|
|
151
|
+
- **Weight** — kg, lb, oz
|
|
152
|
+
|
|
153
|
+
### Geolocation
|
|
154
|
+
- **Country/locale normalization** — `"USA"`, `"US"`, `"United States of America"` → ISO 3166-1 alpha-2
|
|
155
|
+
- **Timezone string normalization** — `"EST"`, `"Eastern Time"`, `"America/New_York"` → IANA key
|
|
156
|
+
|
|
157
|
+
### Data quality
|
|
158
|
+
- **Boolean parsing** — `"yes"`, `"true"`, `"1"`, `"on"`, `"enabled"` → `bool`
|
|
159
|
+
- **Null/empty detection** — `"N/A"`, `"none"`, `"–"`, `""` → `None`
|
|
160
|
+
- **Whitespace and encoding normalization** — strip invisible Unicode, normalize line endings
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## License
|
|
165
|
+
|
|
166
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# convi-lab
|
|
2
|
+
|
|
3
|
+
Data conversion and normalization utilities. The current focus is datetime
|
|
4
|
+
parsing — turning messily formatted date/time strings into a single
|
|
5
|
+
normalized `"YYYY-MM-DD HH:MM:SS"` output — with fuzzy name matching as a
|
|
6
|
+
first-class primitive that powers the date logic and is exposed for general use.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install convi-lab
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
**Runtime dependencies:** `logger-lab >= 0.1.1`, `rapidfuzz >= 3.0.0`
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Quick start
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from convi_lab import convert_date_time, convert_name, DAYS, MONTHS
|
|
24
|
+
|
|
25
|
+
# Datetime normalization
|
|
26
|
+
convert_date_time("Tuesday 12:30 PM") # "2026-05-13 12:30:00"
|
|
27
|
+
convert_date_time("17 Jul - 02:00 PM") # "2026-07-17 14:00:00"
|
|
28
|
+
convert_date_time("12.04.2025 22:15") # "2026-04-12 22:15:00"
|
|
29
|
+
convert_date_time("Today 09:00") # "2026-05-09 09:00:00"
|
|
30
|
+
|
|
31
|
+
# Fuzzy name matching
|
|
32
|
+
convert_name("Mon", DAYS) # "Monday"
|
|
33
|
+
convert_name("Jul", MONTHS) # "July"
|
|
34
|
+
convert_name("Celts", ["Boston Celtics", "Brooklyn Nets"]) # "Boston Celtics"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Supported datetime formats
|
|
40
|
+
|
|
41
|
+
All formats accept an optional `AM`/`PM` suffix (case-insensitive).
|
|
42
|
+
Spaces between components are stripped automatically.
|
|
43
|
+
|
|
44
|
+
| Format | Example input | Parsed as |
|
|
45
|
+
|------------------------|-------------------------|----------------------|
|
|
46
|
+
| `dd/mm HH:MM` | `"12/04 22:15"` | April 12, 22:15 |
|
|
47
|
+
| `dd/mm/yy HH:MM` | `"12/04/25 10:15 PM"` | April 12 2025, 22:15 |
|
|
48
|
+
| `dd.mm.yyyy HH:MM` | `"12.04.2025 12:15 pm"` | April 12 2025, 12:15 |
|
|
49
|
+
| `Day HH:MM` | `"Tuesday 12:30"` | Next Tuesday, 12:30 |
|
|
50
|
+
| `Today/Tomorrow HH:MM` | `"Today 14:30"` | Today at 14:30 |
|
|
51
|
+
| `ddMon[-]HH:MM` | `"17 Jul - 02:00 PM"` | July 17, 14:00 |
|
|
52
|
+
|
|
53
|
+
**Year boundary rule:** if the resolved date is in the past (date only,
|
|
54
|
+
not time), the year is automatically bumped to the next calendar year.
|
|
55
|
+
|
|
56
|
+
**Day rule:** weekday names always resolve to the *next* occurrence. If
|
|
57
|
+
today is Tuesday and you pass `"Tuesday 12:30"`, the result is next
|
|
58
|
+
Tuesday, not today.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Error handling
|
|
63
|
+
|
|
64
|
+
All exceptions inherit from `LabError`. You can catch broadly or precisely:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from convi_lab import convert_date_time, LabError, ParserError
|
|
68
|
+
|
|
69
|
+
# convert_date_time is the safe boundary — it returns "" on any failure
|
|
70
|
+
result = convert_date_time("garbage input")
|
|
71
|
+
assert result == ""
|
|
72
|
+
|
|
73
|
+
# Call process_patterns directly to get typed exceptions
|
|
74
|
+
from convi_lab import process_patterns
|
|
75
|
+
from convi_lab.conversion_kernel.utils import remove_spaces
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
dt = process_patterns(remove_spaces("17 Jly - 02:00"))
|
|
79
|
+
except ParserError as exc:
|
|
80
|
+
print(f"Parse failed: {exc}")
|
|
81
|
+
except LabError as exc:
|
|
82
|
+
print(f"Conversion failed: {exc}")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Exception hierarchy
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
LabError
|
|
89
|
+
├── ParserError
|
|
90
|
+
│ ├── PatternMatchError no pattern matched the input
|
|
91
|
+
│ ├── DayResolutionError weekday/relative-day string unresolvable
|
|
92
|
+
│ ├── MonthResolutionError month name unresolvable
|
|
93
|
+
│ └── DateComponentError numeric date fragment malformed
|
|
94
|
+
└── ConversionError
|
|
95
|
+
├── ClockFormatError HH:MM[AM|PM] string failed to parse
|
|
96
|
+
├── FuzzyMatchError rapidfuzz returned no result
|
|
97
|
+
└── InvalidInputError bad type or empty string at entry point
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Fuzzy name matching
|
|
103
|
+
|
|
104
|
+
`convert_name` is a general-purpose utility — pass any query and any
|
|
105
|
+
reference list:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from convi_lab import convert_name
|
|
109
|
+
|
|
110
|
+
teams = ["Atlanta Hawks", "Boston Celtics", "Brooklyn Nets"]
|
|
111
|
+
convert_name("A. Hawks", teams) # "Atlanta Hawks"
|
|
112
|
+
convert_name("Nets", teams) # "Brooklyn Nets"
|
|
113
|
+
|
|
114
|
+
countries = ["United States", "United Kingdom", "Australia"]
|
|
115
|
+
convert_name("USA", countries) # "United States"
|
|
116
|
+
convert_name("Aus", countries) # "Australia"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
It raises `FuzzyMatchError` if rapidfuzz finds nothing, so you always
|
|
120
|
+
get a typed failure rather than a silent empty string.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Roadmap
|
|
125
|
+
|
|
126
|
+
The library is intentionally small today. Planned additions:
|
|
127
|
+
|
|
128
|
+
### Number and currency
|
|
129
|
+
- **Locale-aware number parsing** — `"1.234,56"` (DE) / `"1,234.56"` (US) → `float`
|
|
130
|
+
- **Currency string normalization** — `"$1,200.00"`, `"€ 1.200"` → `Decimal`
|
|
131
|
+
|
|
132
|
+
### Identity and contact data
|
|
133
|
+
- **Phone number normalization** — `"+1 (800) 555-0100"`, `"08001234"` → E.164 format
|
|
134
|
+
- **Email normalization** — lowercase, strip display names, validate structure
|
|
135
|
+
- **Name normalization** — `"DR. JOHN A. SMITH"` → `{"prefix": "Dr.", "first": "John", "last": "Smith"}`
|
|
136
|
+
|
|
137
|
+
### Units and measurements
|
|
138
|
+
- **Temperature** — `"98.6 F"` / `"37 C"` / `"310 K"` → normalized `(value, unit)`
|
|
139
|
+
- **Distance** — km, miles, nautical miles with conversion helpers
|
|
140
|
+
- **Weight** — kg, lb, oz
|
|
141
|
+
|
|
142
|
+
### Geolocation
|
|
143
|
+
- **Country/locale normalization** — `"USA"`, `"US"`, `"United States of America"` → ISO 3166-1 alpha-2
|
|
144
|
+
- **Timezone string normalization** — `"EST"`, `"Eastern Time"`, `"America/New_York"` → IANA key
|
|
145
|
+
|
|
146
|
+
### Data quality
|
|
147
|
+
- **Boolean parsing** — `"yes"`, `"true"`, `"1"`, `"on"`, `"enabled"` → `bool`
|
|
148
|
+
- **Null/empty detection** — `"N/A"`, `"none"`, `"–"`, `""` → `None`
|
|
149
|
+
- **Whitespace and encoding normalization** — strip invisible Unicode, normalize line endings
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
convi-lab — Data conversion and normalization utilities.
|
|
3
|
+
|
|
4
|
+
Public API
|
|
5
|
+
----------
|
|
6
|
+
Conversions:
|
|
7
|
+
convert_to_24_format – Normalize any time string to 24-hour datetime
|
|
8
|
+
convert_name – Fuzzy-match a name against a reference list
|
|
9
|
+
convert_date_time – Master entry point: raw string → "YYYY-MM-DD HH:MM:SS"
|
|
10
|
+
|
|
11
|
+
Parsers (lower-level, used internally by convert_date_time):
|
|
12
|
+
parse_day_month_time – "17Jul-02:00PM" style strings
|
|
13
|
+
parse_day_time – "Tuesday12:30", "Today14:30" style strings
|
|
14
|
+
parse_date_time – "12/04/2522:15", "12.04.202512:15pm" style strings
|
|
15
|
+
|
|
16
|
+
Errors:
|
|
17
|
+
LabError – base; catch this for any convi-lab exception
|
|
18
|
+
ParserError – base for all parse-time failures
|
|
19
|
+
ConversionError – base for all conversion-time failures
|
|
20
|
+
PatternMatchError, DayResolutionError, MonthResolutionError, DateComponentError
|
|
21
|
+
ClockFormatError, FuzzyMatchError, InvalidInputError
|
|
22
|
+
|
|
23
|
+
Constants / helpers:
|
|
24
|
+
DAYS, MONTHS, RELATIVE_DAYS, WEEKDAY_MAP, TIME_PATTERNS, TEST_CASES
|
|
25
|
+
process_patterns
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from convi_lab.conversions import convert_to_24_format, convert_name, convert_date_time
|
|
29
|
+
from convi_lab.parsers import parse_day_month_time, parse_day_time, parse_date_time
|
|
30
|
+
from convi_lab.conversion_kernel.errors import (
|
|
31
|
+
LabError,
|
|
32
|
+
ParserError,
|
|
33
|
+
PatternMatchError,
|
|
34
|
+
DayResolutionError,
|
|
35
|
+
MonthResolutionError,
|
|
36
|
+
DateComponentError,
|
|
37
|
+
ConversionError,
|
|
38
|
+
ClockFormatError,
|
|
39
|
+
FuzzyMatchError,
|
|
40
|
+
InvalidInputError,
|
|
41
|
+
)
|
|
42
|
+
from convi_lab.conversion_kernel.constants import DAYS, MONTHS, RELATIVE_DAYS, WEEKDAY_MAP, TIME_PATTERNS, TEST_CASES
|
|
43
|
+
from convi_lab.conversion_kernel.pattern_parsers import process_patterns
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Conversions
|
|
47
|
+
"convert_to_24_format",
|
|
48
|
+
"convert_name",
|
|
49
|
+
"convert_date_time",
|
|
50
|
+
# Parsers
|
|
51
|
+
"parse_day_month_time",
|
|
52
|
+
"parse_day_time",
|
|
53
|
+
"parse_date_time",
|
|
54
|
+
# Errors — root
|
|
55
|
+
"LabError",
|
|
56
|
+
# Errors — parsers
|
|
57
|
+
"ParserError",
|
|
58
|
+
"PatternMatchError",
|
|
59
|
+
"DayResolutionError",
|
|
60
|
+
"MonthResolutionError",
|
|
61
|
+
"DateComponentError",
|
|
62
|
+
# Errors — conversions
|
|
63
|
+
"ConversionError",
|
|
64
|
+
"ClockFormatError",
|
|
65
|
+
"FuzzyMatchError",
|
|
66
|
+
"InvalidInputError",
|
|
67
|
+
# Constants
|
|
68
|
+
"DAYS",
|
|
69
|
+
"MONTHS",
|
|
70
|
+
"RELATIVE_DAYS",
|
|
71
|
+
"WEEKDAY_MAP",
|
|
72
|
+
"TIME_PATTERNS",
|
|
73
|
+
"TEST_CASES",
|
|
74
|
+
# Helpers
|
|
75
|
+
"process_patterns",
|
|
76
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
###########################
|
|
4
|
+
# Day / month name tables #
|
|
5
|
+
###########################
|
|
6
|
+
|
|
7
|
+
# All available days for the day time parser
|
|
8
|
+
DAYS = [
|
|
9
|
+
'Today', 'Tomorrow', 'Monday', 'Tuesday',
|
|
10
|
+
'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
# All calendar months for pattern parsers
|
|
14
|
+
MONTHS = [
|
|
15
|
+
'January', 'February', 'March', 'April', 'May', 'June',
|
|
16
|
+
'July', 'August', 'September', 'October', 'November', 'December'
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# Relative days specific to parse_relative_time()
|
|
20
|
+
RELATIVE_DAYS = [
|
|
21
|
+
'Today','Tomorrow'
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Map days to datetime standard
|
|
25
|
+
WEEKDAY_MAP = {
|
|
26
|
+
"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3,
|
|
27
|
+
"Friday": 4, "Saturday": 5, "Sunday": 6
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
###############################################
|
|
31
|
+
# Abbreviation tables (used to build regexes) #
|
|
32
|
+
###############################################
|
|
33
|
+
|
|
34
|
+
# Day abbreviations
|
|
35
|
+
SHORTFORM_DAYS = [
|
|
36
|
+
'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
# Month abbreviations
|
|
40
|
+
SHORTFORM_MONTHS = [
|
|
41
|
+
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
|
42
|
+
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# The remaining part of day abbreviations
|
|
46
|
+
SHORTFORM_DAY_ENDERS = [
|
|
47
|
+
'day', 'sday', 'nesday', 'rsday', 'urday'
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# The remaining part of month abbreviations
|
|
52
|
+
SHORTFORM_MONTH_ENDERS = [
|
|
53
|
+
'uary', 'ruary', 'ch', 'il',
|
|
54
|
+
'y', 'ust', 'tember', 'ober', 'ember',
|
|
55
|
+
'e'
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
##########################
|
|
59
|
+
# Atomic regex fragments #
|
|
60
|
+
##########################
|
|
61
|
+
|
|
62
|
+
# Date regular expressions
|
|
63
|
+
re_num_day = r"^(?:0[0-9]|[12][0-9]|3[01])"
|
|
64
|
+
re_num_month = r"[./](?:0[1-9]|1[0-2])"
|
|
65
|
+
re_year = r"([./]2[5-9]|[./]202[5-9])?"
|
|
66
|
+
|
|
67
|
+
# Time regular expressions
|
|
68
|
+
re_time = r"((?:[01]?\d|2[0-3]):[0-5]\d)"
|
|
69
|
+
re_am_pm = r"([AaPp][Mm])?$"
|
|
70
|
+
|
|
71
|
+
######################
|
|
72
|
+
# Compound fragments #
|
|
73
|
+
######################
|
|
74
|
+
|
|
75
|
+
# Compound regular expressions
|
|
76
|
+
_date_re = rf"({re_num_day}{re_num_month})"
|
|
77
|
+
_day_re = rf"^((?:Today|Tomorrow|{'|'.join(SHORTFORM_DAYS)})(?:{'|'.join(SHORTFORM_DAY_ENDERS)})?)"
|
|
78
|
+
_month_re = rf"((?:{'|'.join(SHORTFORM_MONTHS)})(?:{'|'.join(SHORTFORM_MONTH_ENDERS)})?)"
|
|
79
|
+
|
|
80
|
+
#####################
|
|
81
|
+
# Compiled patterns #
|
|
82
|
+
#####################
|
|
83
|
+
|
|
84
|
+
# "12/04/2523:15", "31/08/202503:15AM"
|
|
85
|
+
_datetime_pattern = re.compile(rf'{_date_re}{re_year}{re_time}{re_am_pm}') # "12/04/2523:15", "31/08/202503:15AM"
|
|
86
|
+
|
|
87
|
+
# "Tuesday12:30AM", "Today20:00"
|
|
88
|
+
_day_time_pattern = re.compile(rf'{_day_re}{re_time}{re_am_pm}') # "Tuesday12:30AM", "Today20:00
|
|
89
|
+
|
|
90
|
+
# "17Jul-02:00PM"
|
|
91
|
+
_day_month_time_pattern = re.compile(rf'^(0[0-9]|[12][0-9]|3[01]){_month_re}-?{re_time}{re_am_pm}') # "17Jul-02:00PM"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Event_data patterns (all without spaces)
|
|
95
|
+
TIME_PATTERNS: dict[str, re.Pattern] = {
|
|
96
|
+
'day_time': _day_time_pattern,
|
|
97
|
+
'day_month_time': _day_month_time_pattern,
|
|
98
|
+
'datetime': _datetime_pattern
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#################
|
|
102
|
+
# Test fixtures #
|
|
103
|
+
#################
|
|
104
|
+
|
|
105
|
+
# Unit test cases
|
|
106
|
+
TEST_CASES = [
|
|
107
|
+
"12/04 22:15", # 24-hour, no AM/PM, with space
|
|
108
|
+
"12/04 10:15 PM", # 12-hour with PM, with spaces
|
|
109
|
+
"Tuesday 12:30", # 24-hour weekday, with space
|
|
110
|
+
"Tuesday 12:30 PM", # 12-hour weekday, with space
|
|
111
|
+
"12.04.2025 12:15", # 24-hour full date, with space
|
|
112
|
+
"12.04.2025 12:15 pm", # 12-hour full date, with spaces
|
|
113
|
+
"Tuesday 02:30 PM", # 12-hour with space
|
|
114
|
+
"17 Jul - 02:00", # 24-hour with month, with spaces
|
|
115
|
+
"17 Jul - 02:00 PM", # 12-hour with month, with spaces
|
|
116
|
+
"Today 14:30", # 24-hour today, with space
|
|
117
|
+
"Tomorrow 03:30 PM", # 12-hour tomorrow, with space
|
|
118
|
+
# No-space versions
|
|
119
|
+
"12/0422:15",
|
|
120
|
+
"Tuesday12:30PM",
|
|
121
|
+
"17Jul-02:00PM",
|
|
122
|
+
"Wed1:30am"
|
|
123
|
+
]
|