convi-lab 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convi_lab/__init__.py +76 -0
- convi_lab/conversion_kernel/__init__.py +5 -0
- convi_lab/conversion_kernel/constants.py +123 -0
- convi_lab/conversion_kernel/errors.py +255 -0
- convi_lab/conversion_kernel/pattern_parsers.py +53 -0
- convi_lab/conversion_kernel/utils.py +81 -0
- convi_lab/conversions/__init__.py +11 -0
- convi_lab/conversions/convert_clock_format.py +70 -0
- convi_lab/conversions/convert_datetime.py +92 -0
- convi_lab/conversions/convert_name.py +67 -0
- convi_lab/parsers/__init__.py +11 -0
- convi_lab/parsers/parse_datetime.py +127 -0
- convi_lab/parsers/parse_day_month_time.py +85 -0
- convi_lab/parsers/parse_day_time.py +146 -0
- convi_lab-0.1.0.dist-info/METADATA +166 -0
- convi_lab-0.1.0.dist-info/RECORD +19 -0
- convi_lab-0.1.0.dist-info/WHEEL +5 -0
- convi_lab-0.1.0.dist-info/licenses/LICENSE +21 -0
- convi_lab-0.1.0.dist-info/top_level.txt +1 -0
convi_lab/__init__.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
convi-lab — Data conversion and normalization utilities.
|
|
3
|
+
|
|
4
|
+
Public API
|
|
5
|
+
----------
|
|
6
|
+
Conversions:
|
|
7
|
+
convert_to_24_format – Normalize any time string to 24-hour datetime
|
|
8
|
+
convert_name – Fuzzy-match a name against a reference list
|
|
9
|
+
convert_date_time – Master entry point: raw string → "YYYY-MM-DD HH:MM:SS"
|
|
10
|
+
|
|
11
|
+
Parsers (lower-level, used internally by convert_date_time):
|
|
12
|
+
parse_day_month_time – "17Jul-02:00PM" style strings
|
|
13
|
+
parse_day_time – "Tuesday12:30", "Today14:30" style strings
|
|
14
|
+
parse_date_time – "12/04/2522:15", "12.04.202512:15pm" style strings
|
|
15
|
+
|
|
16
|
+
Errors:
|
|
17
|
+
LabError – base; catch this for any convi-lab exception
|
|
18
|
+
ParserError – base for all parse-time failures
|
|
19
|
+
ConversionError – base for all conversion-time failures
|
|
20
|
+
PatternMatchError, DayResolutionError, MonthResolutionError, DateComponentError
|
|
21
|
+
ClockFormatError, FuzzyMatchError, InvalidInputError
|
|
22
|
+
|
|
23
|
+
Constants / helpers:
|
|
24
|
+
DAYS, MONTHS, RELATIVE_DAYS, WEEKDAY_MAP, TIME_PATTERNS, TEST_CASES
|
|
25
|
+
process_patterns
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from convi_lab.conversions import convert_to_24_format, convert_name, convert_date_time
|
|
29
|
+
from convi_lab.parsers import parse_day_month_time, parse_day_time, parse_date_time
|
|
30
|
+
from convi_lab.conversion_kernel.errors import (
|
|
31
|
+
LabError,
|
|
32
|
+
ParserError,
|
|
33
|
+
PatternMatchError,
|
|
34
|
+
DayResolutionError,
|
|
35
|
+
MonthResolutionError,
|
|
36
|
+
DateComponentError,
|
|
37
|
+
ConversionError,
|
|
38
|
+
ClockFormatError,
|
|
39
|
+
FuzzyMatchError,
|
|
40
|
+
InvalidInputError,
|
|
41
|
+
)
|
|
42
|
+
from convi_lab.conversion_kernel.constants import DAYS, MONTHS, RELATIVE_DAYS, WEEKDAY_MAP, TIME_PATTERNS, TEST_CASES
|
|
43
|
+
from convi_lab.conversion_kernel.pattern_parsers import process_patterns
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Conversions
|
|
47
|
+
"convert_to_24_format",
|
|
48
|
+
"convert_name",
|
|
49
|
+
"convert_date_time",
|
|
50
|
+
# Parsers
|
|
51
|
+
"parse_day_month_time",
|
|
52
|
+
"parse_day_time",
|
|
53
|
+
"parse_date_time",
|
|
54
|
+
# Errors — root
|
|
55
|
+
"LabError",
|
|
56
|
+
# Errors — parsers
|
|
57
|
+
"ParserError",
|
|
58
|
+
"PatternMatchError",
|
|
59
|
+
"DayResolutionError",
|
|
60
|
+
"MonthResolutionError",
|
|
61
|
+
"DateComponentError",
|
|
62
|
+
# Errors — conversions
|
|
63
|
+
"ConversionError",
|
|
64
|
+
"ClockFormatError",
|
|
65
|
+
"FuzzyMatchError",
|
|
66
|
+
"InvalidInputError",
|
|
67
|
+
# Constants
|
|
68
|
+
"DAYS",
|
|
69
|
+
"MONTHS",
|
|
70
|
+
"RELATIVE_DAYS",
|
|
71
|
+
"WEEKDAY_MAP",
|
|
72
|
+
"TIME_PATTERNS",
|
|
73
|
+
"TEST_CASES",
|
|
74
|
+
# Helpers
|
|
75
|
+
"process_patterns",
|
|
76
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
###########################
|
|
4
|
+
# Day / month name tables #
|
|
5
|
+
###########################
|
|
6
|
+
|
|
7
|
+
# All available days for the day time parser
|
|
8
|
+
DAYS = [
|
|
9
|
+
'Today', 'Tomorrow', 'Monday', 'Tuesday',
|
|
10
|
+
'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
# All calendar months for pattern parsers
|
|
14
|
+
MONTHS = [
|
|
15
|
+
'January', 'February', 'March', 'April', 'May', 'June',
|
|
16
|
+
'July', 'August', 'September', 'October', 'November', 'December'
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# Relative days specific to parse_relative_time()
|
|
20
|
+
RELATIVE_DAYS = [
|
|
21
|
+
'Today','Tomorrow'
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Map days to datetime standard
|
|
25
|
+
WEEKDAY_MAP = {
|
|
26
|
+
"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3,
|
|
27
|
+
"Friday": 4, "Saturday": 5, "Sunday": 6
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
###############################################
|
|
31
|
+
# Abbreviation tables (used to build regexes) #
|
|
32
|
+
###############################################
|
|
33
|
+
|
|
34
|
+
# Day abbreviations
|
|
35
|
+
SHORTFORM_DAYS = [
|
|
36
|
+
'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
# Month abbreviations
|
|
40
|
+
SHORTFORM_MONTHS = [
|
|
41
|
+
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
|
42
|
+
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# The remaining part of day abbreviations
|
|
46
|
+
SHORTFORM_DAY_ENDERS = [
|
|
47
|
+
'day', 'sday', 'nesday', 'rsday', 'urday'
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# The remaining part of month abbreviations
|
|
52
|
+
SHORTFORM_MONTH_ENDERS = [
|
|
53
|
+
'uary', 'ruary', 'ch', 'il',
|
|
54
|
+
'y', 'ust', 'tember', 'ober', 'ember',
|
|
55
|
+
'e'
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
##########################
|
|
59
|
+
# Atomic regex fragments #
|
|
60
|
+
##########################
|
|
61
|
+
|
|
62
|
+
# Date regular expressions
|
|
63
|
+
re_num_day = r"^(?:0[0-9]|[12][0-9]|3[01])"
|
|
64
|
+
re_num_month = r"[./](?:0[1-9]|1[0-2])"
|
|
65
|
+
re_year = r"([./]2[5-9]|[./]202[5-9])?"
|
|
66
|
+
|
|
67
|
+
# Time regular expressions
|
|
68
|
+
re_time = r"((?:[01]?\d|2[0-3]):[0-5]\d)"
|
|
69
|
+
re_am_pm = r"([AaPp][Mm])?$"
|
|
70
|
+
|
|
71
|
+
######################
|
|
72
|
+
# Compound fragments #
|
|
73
|
+
######################
|
|
74
|
+
|
|
75
|
+
# Compound regular expressions
|
|
76
|
+
_date_re = rf"({re_num_day}{re_num_month})"
|
|
77
|
+
_day_re = rf"^((?:Today|Tomorrow|{'|'.join(SHORTFORM_DAYS)})(?:{'|'.join(SHORTFORM_DAY_ENDERS)})?)"
|
|
78
|
+
_month_re = rf"((?:{'|'.join(SHORTFORM_MONTHS)})(?:{'|'.join(SHORTFORM_MONTH_ENDERS)})?)"
|
|
79
|
+
|
|
80
|
+
#####################
|
|
81
|
+
# Compiled patterns #
|
|
82
|
+
#####################
|
|
83
|
+
|
|
84
|
+
# "12/04/2523:15", "31/08/202503:15AM"
|
|
85
|
+
_datetime_pattern = re.compile(rf'{_date_re}{re_year}{re_time}{re_am_pm}') # "12/04/2523:15", "31/08/202503:15AM"
|
|
86
|
+
|
|
87
|
+
# "Tuesday12:30AM", "Today20:00"
|
|
88
|
+
_day_time_pattern = re.compile(rf'{_day_re}{re_time}{re_am_pm}') # "Tuesday12:30AM", "Today20:00
|
|
89
|
+
|
|
90
|
+
# "17Jul-02:00PM"
|
|
91
|
+
_day_month_time_pattern = re.compile(rf'^(0[0-9]|[12][0-9]|3[01]){_month_re}-?{re_time}{re_am_pm}') # "17Jul-02:00PM"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Event_data patterns (all without spaces)
|
|
95
|
+
TIME_PATTERNS: dict[str, re.Pattern] = {
|
|
96
|
+
'day_time': _day_time_pattern,
|
|
97
|
+
'day_month_time': _day_month_time_pattern,
|
|
98
|
+
'datetime': _datetime_pattern
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#################
|
|
102
|
+
# Test fixtures #
|
|
103
|
+
#################
|
|
104
|
+
|
|
105
|
+
# Unit test cases
|
|
106
|
+
TEST_CASES = [
|
|
107
|
+
"12/04 22:15", # 24-hour, no AM/PM, with space
|
|
108
|
+
"12/04 10:15 PM", # 12-hour with PM, with spaces
|
|
109
|
+
"Tuesday 12:30", # 24-hour weekday, with space
|
|
110
|
+
"Tuesday 12:30 PM", # 12-hour weekday, with space
|
|
111
|
+
"12.04.2025 12:15", # 24-hour full date, with space
|
|
112
|
+
"12.04.2025 12:15 pm", # 12-hour full date, with spaces
|
|
113
|
+
"Tuesday 02:30 PM", # 12-hour with space
|
|
114
|
+
"17 Jul - 02:00", # 24-hour with month, with spaces
|
|
115
|
+
"17 Jul - 02:00 PM", # 12-hour with month, with spaces
|
|
116
|
+
"Today 14:30", # 24-hour today, with space
|
|
117
|
+
"Tomorrow 03:30 PM", # 12-hour tomorrow, with space
|
|
118
|
+
# No-space versions
|
|
119
|
+
"12/0422:15",
|
|
120
|
+
"Tuesday12:30PM",
|
|
121
|
+
"17Jul-02:00PM",
|
|
122
|
+
"Wed1:30am"
|
|
123
|
+
]
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""
|
|
2
|
+
errors.py — Exception hierarchy for convi-lab.
|
|
3
|
+
|
|
4
|
+
All exceptions inherit from :class:`LabError` so library consumers can
|
|
5
|
+
write a single broad ``except LabError`` or catch specific subclasses
|
|
6
|
+
for fine-grained handling.
|
|
7
|
+
|
|
8
|
+
Hierarchy
|
|
9
|
+
---------
|
|
10
|
+
::
|
|
11
|
+
|
|
12
|
+
LabError
|
|
13
|
+
├── ParserError
|
|
14
|
+
│ ├── PatternMatchError no TIME_PATTERNS entry matched the raw input
|
|
15
|
+
│ ├── DayResolutionError weekday or relative-day string is unresolvable
|
|
16
|
+
│ ├── MonthResolutionError month name string is unresolvable
|
|
17
|
+
│ └── DateComponentError numeric date fragment (day/month/year) is malformed
|
|
18
|
+
└── ConversionError
|
|
19
|
+
├── ClockFormatError HH:MM[AM|PM] string failed to parse
|
|
20
|
+
├── FuzzyMatchError rapidfuzz returned no result or raised internally
|
|
21
|
+
└── InvalidInputError bad type or empty string passed to convert_date_time
|
|
22
|
+
|
|
23
|
+
Design notes
|
|
24
|
+
------------
|
|
25
|
+
* Every leaf class accepts the minimum arguments needed to produce a
|
|
26
|
+
self-describing message so callers never have to format strings
|
|
27
|
+
themselves.
|
|
28
|
+
* ``errors.py`` imports nothing from the rest of the package — it sits
|
|
29
|
+
at the base of the dependency graph and is safe to import from anywhere.
|
|
30
|
+
* ``convert_date_time`` is the only public function that swallows
|
|
31
|
+
``LabError``; all other callables let errors propagate so the caller
|
|
32
|
+
can decide how to handle them.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
########
|
|
36
|
+
# Root #
|
|
37
|
+
########
|
|
38
|
+
|
|
39
|
+
class LabError(Exception):
|
|
40
|
+
"""
|
|
41
|
+
Base class for all convi-lab exceptions.
|
|
42
|
+
|
|
43
|
+
Catch this to handle any error the library can raise::
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
result = convert_date_time(raw)
|
|
47
|
+
except LabError as exc:
|
|
48
|
+
print(f' convi-lab error: {exc}')
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
##################
|
|
52
|
+
# Parser errors #
|
|
53
|
+
##################
|
|
54
|
+
|
|
55
|
+
class ParserError(LabError):
|
|
56
|
+
"""
|
|
57
|
+
Base class for all parse-time failures.
|
|
58
|
+
|
|
59
|
+
Raised (or subclassed) whenever a raw string cannot be turned into a
|
|
60
|
+
``datetime`` — either because no regex pattern matched or because a
|
|
61
|
+
matched component (day, month, date fragment) could not be resolved
|
|
62
|
+
to a concrete value.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
class PatternMatchError(ParserError):
|
|
66
|
+
"""
|
|
67
|
+
Raised when no entry in ``TIME_PATTERNS`` matches the input string.
|
|
68
|
+
This fires in `process_patterns()` after all patterns have been tried
|
|
69
|
+
and none produced a match.
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
raise PatternMatchError("badstring", ["day_time", "day_month_time", "datetime"])
|
|
73
|
+
# PatternMatchError: 'badstring' did not match any known pattern.
|
|
74
|
+
# Available patterns: day_time, day_month_time, datetime
|
|
75
|
+
Args:
|
|
76
|
+
input_str: The cleaned (space-stripped) string that was tested.
|
|
77
|
+
available_patterns: Sequence of pattern names that were tried.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, input_str: str, available_patterns: list[str]) -> None:
|
|
81
|
+
patterns = ", ".join(available_patterns)
|
|
82
|
+
|
|
83
|
+
super().__init__(
|
|
84
|
+
f"'{input_str}' did not match any known pattern. "
|
|
85
|
+
f"Available patterns: {patterns}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
class DayResolutionError(ParserError):
|
|
89
|
+
"""
|
|
90
|
+
Raised when a day string cannot be resolved to a weekday or relative day.
|
|
91
|
+
|
|
92
|
+
Fired inside `parse_day_time()` when fuzzy matching returns a value that
|
|
93
|
+
is neither in ``WEEKDAY_MAP`` nor in ``RELATIVE_DAYS``.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
raise DayResolutionError("Xyz")
|
|
97
|
+
# DayResolutionError: 'Xyz' could not be resolved to a weekday or
|
|
98
|
+
# relative day. Valid values: Today, Tomorrow, Monday … Sunday
|
|
99
|
+
Args:
|
|
100
|
+
day_str: The raw day string that could not be resolved
|
|
101
|
+
(e.g. ``"Mnday"``, ``"Xyz"``).
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
_VALID = (
|
|
105
|
+
"Today", "Tomorrow",
|
|
106
|
+
"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
def __init__(self, day_str: str) -> None:
|
|
110
|
+
valid = ", ".join(self._VALID)
|
|
111
|
+
|
|
112
|
+
super().__init__(
|
|
113
|
+
f"'{day_str}' could not be resolved to a weekday or relative day. "
|
|
114
|
+
f"Valid values: {valid}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
class MonthResolutionError(ParserError):
|
|
118
|
+
"""
|
|
119
|
+
Raised when a month string cannot be resolved to a calendar month.
|
|
120
|
+
|
|
121
|
+
Fired inside `parse_day_month_time()` when fuzzy matching returns
|
|
122
|
+
an empty result for the month component.
|
|
123
|
+
|
|
124
|
+
Example:
|
|
125
|
+
raise MonthResolutionError("Jly")
|
|
126
|
+
# MonthResolutionError: 'Jly' could not be resolved to a calendar month.
|
|
127
|
+
# Valid values: January, February, … December
|
|
128
|
+
Args:
|
|
129
|
+
month_str: The raw month string that could not be resolved
|
|
130
|
+
(e.g. ``"Jly"``, ``"Xyz"``).
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
_VALID = (
|
|
134
|
+
"January", "February", "March", "April", "May", "June",
|
|
135
|
+
"July", "August", "September", "October", "November", "December",
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def __init__(self, month_str: str) -> None:
|
|
139
|
+
valid = ", ".join(self._VALID)
|
|
140
|
+
|
|
141
|
+
super().__init__(
|
|
142
|
+
f"'{month_str}' could not be resolved to a calendar month. "
|
|
143
|
+
f"Valid values: {valid}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
class DateComponentError(ParserError):
|
|
147
|
+
"""
|
|
148
|
+
Raised when a numeric date fragment is structurally malformed.
|
|
149
|
+
|
|
150
|
+
Covers year fragments (``"/99"``, ``".20255"``), day-of-month values
|
|
151
|
+
outside 1–31, and month numbers outside 1–12 that pass regex
|
|
152
|
+
screening but fail ``datetime.strptime``.
|
|
153
|
+
|
|
154
|
+
Example:
|
|
155
|
+
raise DateComponentError("/999", "year fragment must be 2 or 4 digits")
|
|
156
|
+
# DateComponentError: date fragment '/999' is malformed:
|
|
157
|
+
# year fragment must be 2 or 4 digits
|
|
158
|
+
Args:
|
|
159
|
+
fragment: The raw string fragment that could not be parsed
|
|
160
|
+
(e.g. ``"/999"``, ``"32/13"``).
|
|
161
|
+
reason: Human-readable explanation of what went wrong.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
def __init__(self, fragment: str, reason: str) -> None:
|
|
165
|
+
super().__init__(
|
|
166
|
+
f"date fragment '{fragment}' is malformed: {reason}"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
######################
|
|
171
|
+
# Conversion errors #
|
|
172
|
+
######################
|
|
173
|
+
|
|
174
|
+
class ConversionError(LabError):
|
|
175
|
+
"""
|
|
176
|
+
Base class for all conversion-time failures.
|
|
177
|
+
|
|
178
|
+
Raised (or subclassed) whenever a structurally valid component
|
|
179
|
+
(time string, fuzzy name query, or entry-point input) cannot be
|
|
180
|
+
converted to its target type.
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
class ClockFormatError(ConversionError):
|
|
184
|
+
"""
|
|
185
|
+
Raised when an HH:MM[AM|PM] string cannot be parsed.
|
|
186
|
+
|
|
187
|
+
Fired by `convert_to_24_format()` when `datetime.strptime()`
|
|
188
|
+
rejects the assembled time string.
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
raise ClockFormatError("25:00", None)
|
|
192
|
+
# ClockFormatError: cannot parse '25:00' as a 24-hour time (HH:MM)
|
|
193
|
+
|
|
194
|
+
raise ClockFormatError("09:99", "PM")
|
|
195
|
+
# ClockFormatError: cannot parse '09:99PM' as a 12-hour time (HH:MM AM/PM)
|
|
196
|
+
Args:
|
|
197
|
+
time: The raw ``HH:MM`` fragment (e.g. ``"25:00"``, ``"9:99"``).
|
|
198
|
+
am_pm: The AM/PM suffix if 12-hour mode was requested, else ``None``.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
def __init__(self, time: str, am_pm: str | None) -> None:
|
|
202
|
+
if am_pm:
|
|
203
|
+
raw = f"{time}{am_pm.upper()}"
|
|
204
|
+
fmt = "12-hour time (HH:MM AM/PM)"
|
|
205
|
+
else:
|
|
206
|
+
raw = time
|
|
207
|
+
fmt = "24-hour time (HH:MM)"
|
|
208
|
+
|
|
209
|
+
super().__init__(f"cannot parse '{raw}' as a {fmt}")
|
|
210
|
+
|
|
211
|
+
class FuzzyMatchError(ConversionError):
|
|
212
|
+
"""
|
|
213
|
+
Raised when rapidfuzz finds no match for the given query.
|
|
214
|
+
|
|
215
|
+
Fired by `convert_name()` when ``process.extractOne``
|
|
216
|
+
returns ``None`` or raises internally.
|
|
217
|
+
|
|
218
|
+
Example:
|
|
219
|
+
raise FuzzyMatchError("xyz", 12)
|
|
220
|
+
# FuzzyMatchError: fuzzy match found no result for 'xyz'
|
|
221
|
+
# in a reference list of 12 items
|
|
222
|
+
Args:
|
|
223
|
+
name: The query string that could not be matched.
|
|
224
|
+
ref_count: Number of items in the reference list (aids debugging).
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def __init__(self, name: str, ref_count: int) -> None:
|
|
228
|
+
super().__init__(
|
|
229
|
+
f"fuzzy match found no result for '{name}' "
|
|
230
|
+
f"in a reference list of {ref_count} item{'s' if ref_count != 1 else ''}"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
class InvalidInputError(ConversionError):
|
|
234
|
+
"""
|
|
235
|
+
Raised when the value passed to ``convert_date_time()`` is not a
|
|
236
|
+
non-empty string.
|
|
237
|
+
|
|
238
|
+
Example:
|
|
239
|
+
raise InvalidInputError(None)
|
|
240
|
+
# InvalidInputError: expected a non-empty str, got NoneType: None
|
|
241
|
+
|
|
242
|
+
raise InvalidInputError("")
|
|
243
|
+
# InvalidInputError: expected a non-empty str, got an empty string
|
|
244
|
+
Args:
|
|
245
|
+
value: The invalid value that was provided.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
def __init__(self, value: object) -> None:
|
|
249
|
+
if value == "":
|
|
250
|
+
detail = "got an empty string"
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
detail = f"got {type(value).__name__}: {value!r}"
|
|
254
|
+
|
|
255
|
+
super().__init__(f"expected a non-empty str, {detail}")
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from logger_lab import lab
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Callable
|
|
4
|
+
|
|
5
|
+
from convi_lab.conversion_kernel.constants import TIME_PATTERNS
|
|
6
|
+
from convi_lab.parsers import parse_day_month_time, parse_day_time, parse_date_time
|
|
7
|
+
|
|
8
|
+
logger = (
|
|
9
|
+
lab()
|
|
10
|
+
.with_profile("INVESTIGATOR")
|
|
11
|
+
.with_level("ERROR")
|
|
12
|
+
.build(__name__)
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# MAPS PATTERNS TO FUNCTIONS
|
|
16
|
+
_PATTERN_HANDLERS: dict[str, Callable] = {
|
|
17
|
+
'day_time': parse_day_time,
|
|
18
|
+
'day_month_time': parse_day_month_time,
|
|
19
|
+
'datetime': parse_date_time,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Match and parse pattern
|
|
24
|
+
def process_patterns(event_date: str) -> Optional[datetime]:
|
|
25
|
+
"""
|
|
26
|
+
Try every pattern in ``TIME_PATTERNS`` against *event_date* in order,
|
|
27
|
+
returning the first successful ``datetime`` parse.
|
|
28
|
+
|
|
29
|
+
Note:
|
|
30
|
+
All spaces must be removed from *event_date* before calling this
|
|
31
|
+
function — use `remove_spaces()`.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
event_date: Date/time string with all spaces already stripped.
|
|
35
|
+
Returns:
|
|
36
|
+
Parsed ``datetime`` on the first pattern match, or ``None`` if
|
|
37
|
+
no pattern matches.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Check each pattern till a match is found
|
|
41
|
+
for pattern_name, pattern in TIME_PATTERNS.items():
|
|
42
|
+
|
|
43
|
+
if match := pattern.match(event_date):
|
|
44
|
+
|
|
45
|
+
handler = _PATTERN_HANDLERS[pattern_name]
|
|
46
|
+
_date_time = handler(match)
|
|
47
|
+
|
|
48
|
+
logger.debug(f"Matched pattern '{pattern_name}' for input '{event_date}'")
|
|
49
|
+
|
|
50
|
+
return _date_time
|
|
51
|
+
|
|
52
|
+
# Fallback
|
|
53
|
+
return None
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
from convi_lab.conversion_kernel.constants import TIME_PATTERNS
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Returns a match object for unit tests
|
|
9
|
+
def match_pattern(pattern_name: str, case: str) -> re.Match:
|
|
10
|
+
"""
|
|
11
|
+
Return a match object for *case* against the named pattern.
|
|
12
|
+
Used in unit tests and standalone ``__main__`` blocks.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
pattern_name: Key into ``TIME_PATTERNS`` (e.g. ``"datetime"``).
|
|
16
|
+
case: Raw date/time string (spaces already removed).
|
|
17
|
+
Returns:
|
|
18
|
+
``re.Match`` if the pattern matches, else ``None``.
|
|
19
|
+
Raises:
|
|
20
|
+
KeyError:
|
|
21
|
+
If *pattern_name* is not in ``TIME_PATTERNS``.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
return TIME_PATTERNS[pattern_name].match(case)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Test the output of a regex
|
|
28
|
+
def _test_pattern(pattern: re.Pattern, case: str) -> tuple[str, ...]:
|
|
29
|
+
"""
|
|
30
|
+
Assert that *case* matches *pattern* and return the captured groups.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
pattern: A compiled regex pattern.
|
|
34
|
+
case: Input string to test.
|
|
35
|
+
Returns:
|
|
36
|
+
Tuple of captured groups.
|
|
37
|
+
Raises:
|
|
38
|
+
ValueError:
|
|
39
|
+
If *case* does not match.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
if match := pattern.match(case):
|
|
43
|
+
return match.groups()
|
|
44
|
+
|
|
45
|
+
raise ValueError(f"{case} is not a valid pattern\n Valid pattern: {pattern}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Remove all spaces from string
|
|
49
|
+
def remove_spaces(value: str) -> str:
|
|
50
|
+
"""Strip all whitespace characters from *value*."""
|
|
51
|
+
|
|
52
|
+
return value.replace(' ', '')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Used in a parser to build the datetime object
|
|
56
|
+
def parser_handler(date_object, time_str, am_pm):
|
|
57
|
+
"""
|
|
58
|
+
Builds datetime object after *date_object* in a parser.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
date_object: ``dd.mm.yyyy``
|
|
62
|
+
time_str: ``HH:MM`` string
|
|
63
|
+
am_pm: ``"AM"`` / ``"PM"`` / ``None``
|
|
64
|
+
Returns:
|
|
65
|
+
Parsed and potentially year-corrected ``datetime``.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Good old lazy loading to avoid circling
|
|
69
|
+
from convi_lab import convert_to_24_format
|
|
70
|
+
|
|
71
|
+
time_object = convert_to_24_format(time=time_str, am_pm=am_pm)
|
|
72
|
+
|
|
73
|
+
# Combine date and time
|
|
74
|
+
_datetime = datetime.combine(date_object.date(), time_object.time())
|
|
75
|
+
|
|
76
|
+
# Handle year boundary (if parsed date is in the past, assume next year)
|
|
77
|
+
# Compare only the date part, not the time
|
|
78
|
+
if date_object.date() < datetime.now().date():
|
|
79
|
+
_datetime = _datetime.replace(year=datetime.now().year + 1)
|
|
80
|
+
|
|
81
|
+
return _datetime
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
""" DATA CONVERSION HANDLERS """
|
|
2
|
+
|
|
3
|
+
from .convert_clock_format import convert_to_24_format
|
|
4
|
+
from .convert_name import convert_name
|
|
5
|
+
from .convert_datetime import convert_date_time
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"convert_to_24_format",
|
|
9
|
+
"convert_name",
|
|
10
|
+
"convert_date_time"
|
|
11
|
+
]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
convert_clock_format.py — Normalise any HH:MM[AM|PM] string to a 24-hour datetime.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from logger_lab import lab
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from convi_lab.conversion_kernel.errors import ClockFormatError
|
|
10
|
+
|
|
11
|
+
logger = (
|
|
12
|
+
lab()
|
|
13
|
+
.with_profile("INVESTIGATOR")
|
|
14
|
+
.with_level("ERROR")
|
|
15
|
+
.build(__name__)
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# RUN CONVERT FORMAT
|
|
19
|
+
|
|
20
|
+
# Convert 12-hour to 24-hour format
|
|
21
|
+
def convert_to_24_format(time: str, am_pm: Optional[str] = None) -> datetime:
|
|
22
|
+
"""
|
|
23
|
+
Parse a time string into a ``datetime`` object, normalising to 24-hour time.
|
|
24
|
+
|
|
25
|
+
When *am_pm* is provided the input is treated as 12-hour clock; otherwise
|
|
26
|
+
it is assumed to already be in 24-hour format.
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
convert_to_24_format("14:30") # → datetime(... 14, 30)
|
|
30
|
+
convert_to_24_format("02:30", "PM") # → datetime(... 14, 30)
|
|
31
|
+
convert_to_24_format("12:00", "AM") # → datetime(... 0, 0)
|
|
32
|
+
Args:
|
|
33
|
+
time: Time in ``HH:MM`` format, no spaces (e.g. ``"02:30"``).
|
|
34
|
+
am_pm: Optional AM/PM suffix — case-insensitive
|
|
35
|
+
(``"AM"``, ``"pm"``, ``"Pm"`` …). Pass ``None`` for 24-hour input.
|
|
36
|
+
Returns:
|
|
37
|
+
``datetime`` object carrying only the parsed time component.
|
|
38
|
+
Raises:
|
|
39
|
+
ClockFormatError:
|
|
40
|
+
If the string cannot be parsed under the chosen format.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
if am_pm:
|
|
44
|
+
# 12-hour format conversion
|
|
45
|
+
am_pm = am_pm.upper()
|
|
46
|
+
try:
|
|
47
|
+
return datetime.strptime(f"{time}{am_pm}", "%I:%M%p")
|
|
48
|
+
|
|
49
|
+
except ValueError:
|
|
50
|
+
logger.error(f"Failed to parse 12-hour time: {time}{am_pm}")
|
|
51
|
+
raise ClockFormatError(time, am_pm)
|
|
52
|
+
|
|
53
|
+
else:
|
|
54
|
+
# 24-hour format
|
|
55
|
+
try:
|
|
56
|
+
return datetime.strptime(time, "%H:%M")
|
|
57
|
+
|
|
58
|
+
except ValueError:
|
|
59
|
+
logger.error(f"Failed to parse 24-hour time: {time}")
|
|
60
|
+
raise ClockFormatError(time, None)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# Example usage and test case
|
|
64
|
+
def test_case():
|
|
65
|
+
logger.debug(convert_to_24_format('12:36', 'AM'))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == '__main__':
|
|
69
|
+
# TEST
|
|
70
|
+
test_case()
|