dateparser 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +82 -0
- dateparser/calendars/__init__.py +144 -0
- dateparser/calendars/hijri.py +6 -0
- dateparser/calendars/hijri_parser.py +60 -0
- dateparser/calendars/jalali.py +9 -0
- dateparser/calendars/jalali_parser.py +184 -0
- dateparser/conf.py +267 -0
- dateparser/custom_language_detection/__init__.py +0 -0
- dateparser/custom_language_detection/fasttext.py +43 -0
- dateparser/custom_language_detection/langdetect.py +37 -0
- dateparser/custom_language_detection/language_mapping.py +18 -0
- dateparser/data/__init__.py +2 -0
- dateparser/data/date_translation_data/__init__.py +0 -0
- dateparser/data/date_translation_data/af.py +242 -0
- dateparser/data/date_translation_data/agq.py +169 -0
- dateparser/data/date_translation_data/ak.py +169 -0
- dateparser/data/date_translation_data/am.py +222 -0
- dateparser/data/date_translation_data/ar.py +574 -0
- dateparser/data/date_translation_data/as.py +164 -0
- dateparser/data/date_translation_data/asa.py +168 -0
- dateparser/data/date_translation_data/ast.py +280 -0
- dateparser/data/date_translation_data/az-Cyrl.py +168 -0
- dateparser/data/date_translation_data/az-Latn.py +217 -0
- dateparser/data/date_translation_data/az.py +217 -0
- dateparser/data/date_translation_data/bas.py +169 -0
- dateparser/data/date_translation_data/be.py +340 -0
- dateparser/data/date_translation_data/bem.py +161 -0
- dateparser/data/date_translation_data/bez.py +169 -0
- dateparser/data/date_translation_data/bg.py +345 -0
- dateparser/data/date_translation_data/bm.py +167 -0
- dateparser/data/date_translation_data/bn.py +241 -0
- dateparser/data/date_translation_data/bo.py +185 -0
- dateparser/data/date_translation_data/br.py +226 -0
- dateparser/data/date_translation_data/brx.py +157 -0
- dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
- dateparser/data/date_translation_data/bs-Latn.py +248 -0
- dateparser/data/date_translation_data/bs.py +248 -0
- dateparser/data/date_translation_data/ca.py +313 -0
- dateparser/data/date_translation_data/ce.py +225 -0
- dateparser/data/date_translation_data/cgg.py +169 -0
- dateparser/data/date_translation_data/chr.py +240 -0
- dateparser/data/date_translation_data/ckb.py +154 -0
- dateparser/data/date_translation_data/cs.py +316 -0
- dateparser/data/date_translation_data/cy.py +217 -0
- dateparser/data/date_translation_data/da.py +296 -0
- dateparser/data/date_translation_data/dav.py +169 -0
- dateparser/data/date_translation_data/de.py +357 -0
- dateparser/data/date_translation_data/dje.py +167 -0
- dateparser/data/date_translation_data/dsb.py +270 -0
- dateparser/data/date_translation_data/dua.py +169 -0
- dateparser/data/date_translation_data/dyo.py +168 -0
- dateparser/data/date_translation_data/dz.py +225 -0
- dateparser/data/date_translation_data/ebu.py +169 -0
- dateparser/data/date_translation_data/ee.py +233 -0
- dateparser/data/date_translation_data/el.py +279 -0
- dateparser/data/date_translation_data/en.py +851 -0
- dateparser/data/date_translation_data/eo.py +169 -0
- dateparser/data/date_translation_data/es.py +499 -0
- dateparser/data/date_translation_data/et.py +233 -0
- dateparser/data/date_translation_data/eu.py +219 -0
- dateparser/data/date_translation_data/ewo.py +169 -0
- dateparser/data/date_translation_data/fa.py +270 -0
- dateparser/data/date_translation_data/ff.py +179 -0
- dateparser/data/date_translation_data/fi.py +345 -0
- dateparser/data/date_translation_data/fil.py +223 -0
- dateparser/data/date_translation_data/fo.py +256 -0
- dateparser/data/date_translation_data/fr.py +520 -0
- dateparser/data/date_translation_data/fur.py +223 -0
- dateparser/data/date_translation_data/fy.py +223 -0
- dateparser/data/date_translation_data/ga.py +238 -0
- dateparser/data/date_translation_data/gd.py +277 -0
- dateparser/data/date_translation_data/gl.py +253 -0
- dateparser/data/date_translation_data/gsw.py +179 -0
- dateparser/data/date_translation_data/gu.py +216 -0
- dateparser/data/date_translation_data/guz.py +170 -0
- dateparser/data/date_translation_data/gv.py +166 -0
- dateparser/data/date_translation_data/ha.py +176 -0
- dateparser/data/date_translation_data/haw.py +168 -0
- dateparser/data/date_translation_data/he.py +371 -0
- dateparser/data/date_translation_data/hi.py +261 -0
- dateparser/data/date_translation_data/hr.py +378 -0
- dateparser/data/date_translation_data/hsb.py +271 -0
- dateparser/data/date_translation_data/hu.py +297 -0
- dateparser/data/date_translation_data/hy.py +246 -0
- dateparser/data/date_translation_data/id.py +272 -0
- dateparser/data/date_translation_data/ig.py +168 -0
- dateparser/data/date_translation_data/ii.py +157 -0
- dateparser/data/date_translation_data/is.py +242 -0
- dateparser/data/date_translation_data/it.py +282 -0
- dateparser/data/date_translation_data/ja.py +286 -0
- dateparser/data/date_translation_data/jgo.py +188 -0
- dateparser/data/date_translation_data/jmc.py +168 -0
- dateparser/data/date_translation_data/ka.py +241 -0
- dateparser/data/date_translation_data/kab.py +169 -0
- dateparser/data/date_translation_data/kam.py +169 -0
- dateparser/data/date_translation_data/kde.py +169 -0
- dateparser/data/date_translation_data/kea.py +230 -0
- dateparser/data/date_translation_data/khq.py +167 -0
- dateparser/data/date_translation_data/ki.py +169 -0
- dateparser/data/date_translation_data/kk.py +228 -0
- dateparser/data/date_translation_data/kl.py +213 -0
- dateparser/data/date_translation_data/kln.py +171 -0
- dateparser/data/date_translation_data/km.py +198 -0
- dateparser/data/date_translation_data/kn.py +225 -0
- dateparser/data/date_translation_data/ko.py +207 -0
- dateparser/data/date_translation_data/kok.py +157 -0
- dateparser/data/date_translation_data/ks.py +152 -0
- dateparser/data/date_translation_data/ksb.py +168 -0
- dateparser/data/date_translation_data/ksf.py +169 -0
- dateparser/data/date_translation_data/ksh.py +192 -0
- dateparser/data/date_translation_data/kw.py +169 -0
- dateparser/data/date_translation_data/ky.py +240 -0
- dateparser/data/date_translation_data/lag.py +169 -0
- dateparser/data/date_translation_data/lb.py +233 -0
- dateparser/data/date_translation_data/lg.py +169 -0
- dateparser/data/date_translation_data/lkt.py +194 -0
- dateparser/data/date_translation_data/ln.py +179 -0
- dateparser/data/date_translation_data/lo.py +228 -0
- dateparser/data/date_translation_data/lrc.py +154 -0
- dateparser/data/date_translation_data/lt.py +263 -0
- dateparser/data/date_translation_data/lu.py +169 -0
- dateparser/data/date_translation_data/luo.py +169 -0
- dateparser/data/date_translation_data/luy.py +168 -0
- dateparser/data/date_translation_data/lv.py +257 -0
- dateparser/data/date_translation_data/mas.py +173 -0
- dateparser/data/date_translation_data/mer.py +168 -0
- dateparser/data/date_translation_data/mfe.py +166 -0
- dateparser/data/date_translation_data/mg.py +168 -0
- dateparser/data/date_translation_data/mgh.py +169 -0
- dateparser/data/date_translation_data/mgo.py +151 -0
- dateparser/data/date_translation_data/mk.py +234 -0
- dateparser/data/date_translation_data/ml.py +217 -0
- dateparser/data/date_translation_data/mn.py +224 -0
- dateparser/data/date_translation_data/mr.py +229 -0
- dateparser/data/date_translation_data/ms.py +242 -0
- dateparser/data/date_translation_data/mt.py +175 -0
- dateparser/data/date_translation_data/mua.py +169 -0
- dateparser/data/date_translation_data/my.py +203 -0
- dateparser/data/date_translation_data/mzn.py +199 -0
- dateparser/data/date_translation_data/naq.py +169 -0
- dateparser/data/date_translation_data/nb.py +261 -0
- dateparser/data/date_translation_data/nd.py +169 -0
- dateparser/data/date_translation_data/ne.py +207 -0
- dateparser/data/date_translation_data/nl.py +273 -0
- dateparser/data/date_translation_data/nmg.py +169 -0
- dateparser/data/date_translation_data/nn.py +231 -0
- dateparser/data/date_translation_data/nnh.py +150 -0
- dateparser/data/date_translation_data/nus.py +166 -0
- dateparser/data/date_translation_data/nyn.py +169 -0
- dateparser/data/date_translation_data/om.py +173 -0
- dateparser/data/date_translation_data/or.py +157 -0
- dateparser/data/date_translation_data/os.py +203 -0
- dateparser/data/date_translation_data/pa-Arab.py +150 -0
- dateparser/data/date_translation_data/pa-Guru.py +221 -0
- dateparser/data/date_translation_data/pa.py +221 -0
- dateparser/data/date_translation_data/pl.py +416 -0
- dateparser/data/date_translation_data/ps.py +150 -0
- dateparser/data/date_translation_data/pt.py +981 -0
- dateparser/data/date_translation_data/qu.py +176 -0
- dateparser/data/date_translation_data/rm.py +166 -0
- dateparser/data/date_translation_data/rn.py +169 -0
- dateparser/data/date_translation_data/ro.py +270 -0
- dateparser/data/date_translation_data/rof.py +157 -0
- dateparser/data/date_translation_data/ru.py +442 -0
- dateparser/data/date_translation_data/rw.py +169 -0
- dateparser/data/date_translation_data/rwk.py +168 -0
- dateparser/data/date_translation_data/sah.py +219 -0
- dateparser/data/date_translation_data/saq.py +169 -0
- dateparser/data/date_translation_data/sbp.py +169 -0
- dateparser/data/date_translation_data/se.py +280 -0
- dateparser/data/date_translation_data/seh.py +169 -0
- dateparser/data/date_translation_data/ses.py +167 -0
- dateparser/data/date_translation_data/sg.py +169 -0
- dateparser/data/date_translation_data/shi-Latn.py +169 -0
- dateparser/data/date_translation_data/shi-Tfng.py +169 -0
- dateparser/data/date_translation_data/shi.py +169 -0
- dateparser/data/date_translation_data/si.py +220 -0
- dateparser/data/date_translation_data/sk.py +327 -0
- dateparser/data/date_translation_data/sl.py +244 -0
- dateparser/data/date_translation_data/smn.py +176 -0
- dateparser/data/date_translation_data/sn.py +169 -0
- dateparser/data/date_translation_data/so.py +179 -0
- dateparser/data/date_translation_data/sq.py +237 -0
- dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
- dateparser/data/date_translation_data/sr-Latn.py +306 -0
- dateparser/data/date_translation_data/sr.py +255 -0
- dateparser/data/date_translation_data/sv.py +309 -0
- dateparser/data/date_translation_data/sw.py +231 -0
- dateparser/data/date_translation_data/ta.py +264 -0
- dateparser/data/date_translation_data/te.py +239 -0
- dateparser/data/date_translation_data/teo.py +173 -0
- dateparser/data/date_translation_data/th.py +300 -0
- dateparser/data/date_translation_data/ti.py +173 -0
- dateparser/data/date_translation_data/tl.py +137 -0
- dateparser/data/date_translation_data/to.py +216 -0
- dateparser/data/date_translation_data/tr.py +259 -0
- dateparser/data/date_translation_data/twq.py +167 -0
- dateparser/data/date_translation_data/tzm.py +169 -0
- dateparser/data/date_translation_data/ug.py +203 -0
- dateparser/data/date_translation_data/uk.py +502 -0
- dateparser/data/date_translation_data/ur.py +256 -0
- dateparser/data/date_translation_data/uz-Arab.py +167 -0
- dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
- dateparser/data/date_translation_data/uz-Latn.py +216 -0
- dateparser/data/date_translation_data/uz.py +216 -0
- dateparser/data/date_translation_data/vi.py +260 -0
- dateparser/data/date_translation_data/vun.py +168 -0
- dateparser/data/date_translation_data/wae.py +224 -0
- dateparser/data/date_translation_data/xog.py +169 -0
- dateparser/data/date_translation_data/yav.py +169 -0
- dateparser/data/date_translation_data/yi.py +178 -0
- dateparser/data/date_translation_data/yo.py +263 -0
- dateparser/data/date_translation_data/yue.py +203 -0
- dateparser/data/date_translation_data/zgh.py +169 -0
- dateparser/data/date_translation_data/zh-Hans.py +240 -0
- dateparser/data/date_translation_data/zh-Hant.py +402 -0
- dateparser/data/date_translation_data/zh.py +273 -0
- dateparser/data/date_translation_data/zu.py +196 -0
- dateparser/data/languages_info.py +826 -0
- dateparser/date.py +599 -0
- dateparser/date_parser.py +55 -0
- dateparser/freshness_date_parser.py +156 -0
- dateparser/languages/__init__.py +2 -0
- dateparser/languages/dictionary.py +352 -0
- dateparser/languages/loader.py +224 -0
- dateparser/languages/locale.py +625 -0
- dateparser/languages/validation.py +467 -0
- dateparser/parser.py +742 -0
- dateparser/search/__init__.py +71 -0
- dateparser/search/detection.py +78 -0
- dateparser/search/search.py +297 -0
- dateparser/search/text_detection.py +89 -0
- dateparser/timezone_parser.py +91 -0
- dateparser/timezones.py +469 -0
- dateparser/utils/__init__.py +257 -0
- dateparser/utils/strptime.py +108 -0
- dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
- dateparser-1.2.1.dist-info/LICENSE +12 -0
- dateparser-1.2.1.dist-info/METADATA +864 -0
- dateparser-1.2.1.dist-info/RECORD +256 -0
- dateparser-1.2.1.dist-info/WHEEL +5 -0
- dateparser-1.2.1.dist-info/entry_points.txt +2 -0
- dateparser-1.2.1.dist-info/top_level.txt +4 -0
- dateparser_cli/__init__.py +0 -0
- dateparser_cli/cli.py +36 -0
- dateparser_cli/exceptions.py +2 -0
- dateparser_cli/fasttext_manager.py +42 -0
- dateparser_cli/utils.py +27 -0
- dateparser_data/__init__.py +0 -0
- dateparser_data/settings.py +33 -0
- dateparser_scripts/__init__.py +0 -0
- dateparser_scripts/get_cldr_data.py +567 -0
- dateparser_scripts/order_languages.py +217 -0
- dateparser_scripts/update_supported_languages_and_locales.py +48 -0
- dateparser_scripts/utils.py +73 -0
- dateparser_scripts/write_complete_data.py +129 -0
dateparser/parser.py
ADDED
|
@@ -0,0 +1,742 @@
|
|
|
1
|
+
import calendar
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
from io import StringIO
|
|
5
|
+
|
|
6
|
+
import pytz
|
|
7
|
+
import regex as re
|
|
8
|
+
|
|
9
|
+
from dateparser.utils import (
|
|
10
|
+
_get_missing_parts,
|
|
11
|
+
get_last_day_of_month,
|
|
12
|
+
get_next_leap_year,
|
|
13
|
+
get_previous_leap_year,
|
|
14
|
+
get_timezone_from_tz_string,
|
|
15
|
+
set_correct_day_from_settings,
|
|
16
|
+
set_correct_month_from_settings,
|
|
17
|
+
)
|
|
18
|
+
from dateparser.utils.strptime import strptime
|
|
19
|
+
|
|
20
|
+
NSP_COMPATIBLE = re.compile(r"\D+")
|
|
21
|
+
MERIDIAN = re.compile(r"am|pm")
|
|
22
|
+
MICROSECOND = re.compile(r"\d{1,6}")
|
|
23
|
+
EIGHT_DIGIT = re.compile(r"^\d{8}$")
|
|
24
|
+
HOUR_MINUTE_REGEX = re.compile(r"^([0-9]|0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def no_space_parser_eligibile(datestring):
|
|
28
|
+
src = NSP_COMPATIBLE.search(datestring)
|
|
29
|
+
if not src or ":" == src.group():
|
|
30
|
+
return True
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_unresolved_attrs(parser_object):
|
|
35
|
+
attrs = ["year", "month", "day"]
|
|
36
|
+
seen = []
|
|
37
|
+
unseen = []
|
|
38
|
+
for attr in attrs:
|
|
39
|
+
if getattr(parser_object, attr, None) is not None:
|
|
40
|
+
seen.append(attr)
|
|
41
|
+
else:
|
|
42
|
+
unseen.append(attr)
|
|
43
|
+
return seen, unseen
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
date_order_chart = {
|
|
47
|
+
"DMY": "%d%m%y",
|
|
48
|
+
"DYM": "%d%y%m",
|
|
49
|
+
"MDY": "%m%d%y",
|
|
50
|
+
"MYD": "%m%y%d",
|
|
51
|
+
"YDM": "%y%d%m",
|
|
52
|
+
"YMD": "%y%m%d",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def resolve_date_order(order, lst=None):
|
|
57
|
+
chart_list = {
|
|
58
|
+
"DMY": ["day", "month", "year"],
|
|
59
|
+
"DYM": ["day", "year", "month"],
|
|
60
|
+
"MDY": ["month", "day", "year"],
|
|
61
|
+
"MYD": ["month", "year", "day"],
|
|
62
|
+
"YDM": ["year", "day", "month"],
|
|
63
|
+
"YMD": ["year", "month", "day"],
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return chart_list[order] if lst else date_order_chart[order]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _parse_absolute(datestring, settings, tz=None):
|
|
70
|
+
return _parser.parse(datestring, settings, tz)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _parse_nospaces(datestring, settings, tz=None):
|
|
74
|
+
return _no_spaces_parser.parse(datestring, settings)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class _time_parser:
|
|
78
|
+
time_directives = [
|
|
79
|
+
"%H:%M:%S",
|
|
80
|
+
"%I:%M:%S %p",
|
|
81
|
+
"%H:%M",
|
|
82
|
+
"%I:%M %p",
|
|
83
|
+
"%I %p",
|
|
84
|
+
"%H:%M:%S.%f",
|
|
85
|
+
"%I:%M:%S.%f %p",
|
|
86
|
+
"%H:%M %p",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
def __call__(self, timestring):
|
|
90
|
+
_timestring = timestring
|
|
91
|
+
for directive in self.time_directives:
|
|
92
|
+
try:
|
|
93
|
+
return strptime(timestring.strip(), directive).time()
|
|
94
|
+
except ValueError:
|
|
95
|
+
pass
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError("%s does not seem to be a valid time string" % _timestring)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
time_parser = _time_parser()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class _no_spaces_parser:
|
|
104
|
+
_dateformats = [
|
|
105
|
+
"%Y%m%d",
|
|
106
|
+
"%Y%d%m",
|
|
107
|
+
"%m%Y%d",
|
|
108
|
+
"%m%d%Y",
|
|
109
|
+
"%d%Y%m",
|
|
110
|
+
"%d%m%Y",
|
|
111
|
+
"%y%m%d",
|
|
112
|
+
"%y%d%m",
|
|
113
|
+
"%m%y%d",
|
|
114
|
+
"%m%d%y",
|
|
115
|
+
"%d%y%m",
|
|
116
|
+
"%d%m%y",
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
_preferred_formats = ["%Y%m%d%H%M", "%Y%m%d%H%M%S", "%Y%m%d%H%M%S.%f"]
|
|
120
|
+
|
|
121
|
+
_preferred_formats_ordered_8_digit = [
|
|
122
|
+
"%m%d%Y",
|
|
123
|
+
"%d%m%Y",
|
|
124
|
+
"%Y%m%d",
|
|
125
|
+
"%Y%d%m",
|
|
126
|
+
"%m%Y%d",
|
|
127
|
+
"%d%Y%m",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
_timeformats = ["%H%M%S.%f", "%H%M%S", "%H%M", "%H"]
|
|
131
|
+
|
|
132
|
+
period = {"day": ["%d", "%H", "%M", "%S"], "month": ["%m"]}
|
|
133
|
+
|
|
134
|
+
_default_order = resolve_date_order("MDY")
|
|
135
|
+
|
|
136
|
+
def __init__(self, *args, **kwargs):
|
|
137
|
+
self._all = (
|
|
138
|
+
self._dateformats
|
|
139
|
+
+ [x + y for x in self._dateformats for y in self._timeformats]
|
|
140
|
+
+ self._timeformats
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
self.date_formats = {
|
|
144
|
+
"%m%d%y": (
|
|
145
|
+
self._preferred_formats
|
|
146
|
+
+ sorted(
|
|
147
|
+
self._all,
|
|
148
|
+
key=lambda x: x.lower().startswith("%m%d%y"),
|
|
149
|
+
reverse=True,
|
|
150
|
+
)
|
|
151
|
+
),
|
|
152
|
+
"%m%y%d": sorted(
|
|
153
|
+
self._all, key=lambda x: x.lower().startswith("%m%y%d"), reverse=True
|
|
154
|
+
),
|
|
155
|
+
"%y%m%d": sorted(
|
|
156
|
+
self._all, key=lambda x: x.lower().startswith("%y%m%d"), reverse=True
|
|
157
|
+
),
|
|
158
|
+
"%y%d%m": sorted(
|
|
159
|
+
self._all, key=lambda x: x.lower().startswith("%y%d%m"), reverse=True
|
|
160
|
+
),
|
|
161
|
+
"%d%m%y": sorted(
|
|
162
|
+
self._all, key=lambda x: x.lower().startswith("%d%m%y"), reverse=True
|
|
163
|
+
),
|
|
164
|
+
"%d%y%m": sorted(
|
|
165
|
+
self._all, key=lambda x: x.lower().startswith("%d%y%m"), reverse=True
|
|
166
|
+
),
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
@classmethod
|
|
170
|
+
def _get_period(cls, format_string):
|
|
171
|
+
for pname, pdrv in sorted(cls.period.items(), key=lambda x: x[0]):
|
|
172
|
+
for drv in pdrv:
|
|
173
|
+
if drv in format_string:
|
|
174
|
+
return pname
|
|
175
|
+
else:
|
|
176
|
+
return "year"
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def _find_best_matching_date(cls, datestring):
|
|
180
|
+
for fmt in cls._preferred_formats_ordered_8_digit:
|
|
181
|
+
try:
|
|
182
|
+
dt = strptime(datestring, fmt), cls._get_period(fmt)
|
|
183
|
+
if len(str(dt[0].year)) == 4:
|
|
184
|
+
return dt
|
|
185
|
+
except Exception:
|
|
186
|
+
pass
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def parse(cls, datestring, settings):
|
|
191
|
+
if not no_space_parser_eligibile(datestring):
|
|
192
|
+
raise ValueError("Unable to parse date from: %s" % datestring)
|
|
193
|
+
|
|
194
|
+
datestring = datestring.replace(":", "")
|
|
195
|
+
if not datestring:
|
|
196
|
+
raise ValueError("Empty string")
|
|
197
|
+
tokens = tokenizer(datestring)
|
|
198
|
+
if settings.DATE_ORDER:
|
|
199
|
+
order = resolve_date_order(settings.DATE_ORDER)
|
|
200
|
+
else:
|
|
201
|
+
order = cls._default_order
|
|
202
|
+
if EIGHT_DIGIT.match(datestring):
|
|
203
|
+
dt = cls._find_best_matching_date(datestring)
|
|
204
|
+
if dt is not None:
|
|
205
|
+
return dt
|
|
206
|
+
nsp = cls()
|
|
207
|
+
ambiguous_date = None
|
|
208
|
+
for token, _ in tokens.tokenize():
|
|
209
|
+
for fmt in nsp.date_formats[order]:
|
|
210
|
+
try:
|
|
211
|
+
dt = strptime(token, fmt), cls._get_period(fmt)
|
|
212
|
+
if len(str(dt[0].year)) < 4:
|
|
213
|
+
ambiguous_date = dt
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
missing = _get_missing_parts(fmt)
|
|
217
|
+
_check_strict_parsing(missing, settings)
|
|
218
|
+
return dt
|
|
219
|
+
except Exception:
|
|
220
|
+
pass
|
|
221
|
+
else:
|
|
222
|
+
if ambiguous_date:
|
|
223
|
+
return ambiguous_date
|
|
224
|
+
else:
|
|
225
|
+
raise ValueError("Unable to parse date from: %s" % datestring)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _get_missing_error(missing):
|
|
229
|
+
return "Fields missing from the date string: {}".format(", ".join(missing))
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _check_strict_parsing(missing, settings):
|
|
233
|
+
if settings.STRICT_PARSING and missing:
|
|
234
|
+
raise ValueError(_get_missing_error(missing))
|
|
235
|
+
elif settings.REQUIRE_PARTS and missing:
|
|
236
|
+
errors = [part for part in settings.REQUIRE_PARTS if part in missing]
|
|
237
|
+
if errors:
|
|
238
|
+
raise ValueError(_get_missing_error(errors))
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class _parser:
|
|
242
|
+
alpha_directives = OrderedDict(
|
|
243
|
+
[
|
|
244
|
+
("weekday", ["%A", "%a"]),
|
|
245
|
+
("month", ["%B", "%b"]),
|
|
246
|
+
]
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
num_directives = {
|
|
250
|
+
"month": ["%m"],
|
|
251
|
+
"day": ["%d"],
|
|
252
|
+
"year": ["%y", "%Y"],
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
def __init__(self, tokens, settings):
|
|
256
|
+
self.settings = settings
|
|
257
|
+
self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)]
|
|
258
|
+
self.filtered_tokens = [
|
|
259
|
+
(t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
self.unset_tokens = []
|
|
263
|
+
|
|
264
|
+
self.day = None
|
|
265
|
+
self.month = None
|
|
266
|
+
self.year = None
|
|
267
|
+
self.time = None
|
|
268
|
+
|
|
269
|
+
self.auto_order = []
|
|
270
|
+
|
|
271
|
+
self._token_day = None
|
|
272
|
+
self._token_month = None
|
|
273
|
+
self._token_year = None
|
|
274
|
+
self._token_time = None
|
|
275
|
+
|
|
276
|
+
self.ordered_num_directives = OrderedDict(
|
|
277
|
+
(k, self.num_directives[k])
|
|
278
|
+
for k in (resolve_date_order(settings.DATE_ORDER, lst=True))
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
skip_index = []
|
|
282
|
+
skip_component = None
|
|
283
|
+
skip_tokens = ["t", "year", "hour", "minute"]
|
|
284
|
+
|
|
285
|
+
for index, token_type_original_index in enumerate(self.filtered_tokens):
|
|
286
|
+
if index in skip_index:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
token, type, original_index = token_type_original_index
|
|
290
|
+
|
|
291
|
+
if token in skip_tokens:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
if self.time is None:
|
|
295
|
+
meridian_index = index + 1
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
# try case where hours and minutes are separated by a period. Example: 13.20.
|
|
299
|
+
_is_before_period = self.tokens[original_index + 1][0] == "."
|
|
300
|
+
_is_after_period = (
|
|
301
|
+
original_index != 0
|
|
302
|
+
and self.tokens[original_index - 1][0] == "."
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if _is_before_period and not _is_after_period:
|
|
306
|
+
index_next_token = index + 1
|
|
307
|
+
next_token = self.filtered_tokens[index_next_token][0]
|
|
308
|
+
index_in_tokens_for_next_token = self.filtered_tokens[
|
|
309
|
+
index_next_token
|
|
310
|
+
][2]
|
|
311
|
+
|
|
312
|
+
next_token_is_last = (
|
|
313
|
+
index_next_token == len(self.filtered_tokens) - 1
|
|
314
|
+
)
|
|
315
|
+
if (
|
|
316
|
+
next_token_is_last
|
|
317
|
+
or self.tokens[index_in_tokens_for_next_token + 1][0] != "."
|
|
318
|
+
):
|
|
319
|
+
new_token = token + ":" + next_token
|
|
320
|
+
if re.match(HOUR_MINUTE_REGEX, new_token):
|
|
321
|
+
token = new_token
|
|
322
|
+
skip_index.append(index + 1)
|
|
323
|
+
meridian_index += 1
|
|
324
|
+
except Exception:
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
microsecond = MICROSECOND.search(
|
|
329
|
+
self.filtered_tokens[index + 1][0]
|
|
330
|
+
).group()
|
|
331
|
+
# Is after time token? raise ValueError if ':' can't be found:
|
|
332
|
+
token.index(":")
|
|
333
|
+
# Is after period? raise ValueError if '.' can't be found:
|
|
334
|
+
self.tokens[self.tokens.index((token, 0)) + 1][0].index(".")
|
|
335
|
+
except Exception:
|
|
336
|
+
microsecond = None
|
|
337
|
+
|
|
338
|
+
if microsecond:
|
|
339
|
+
meridian_index += 1
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
meridian = MERIDIAN.search(
|
|
343
|
+
self.filtered_tokens[meridian_index][0]
|
|
344
|
+
).group()
|
|
345
|
+
except Exception:
|
|
346
|
+
meridian = None
|
|
347
|
+
|
|
348
|
+
if any([":" in token, meridian, microsecond]):
|
|
349
|
+
if meridian and not microsecond:
|
|
350
|
+
self._token_time = "%s %s" % (token, meridian)
|
|
351
|
+
skip_index.append(meridian_index)
|
|
352
|
+
elif microsecond and not meridian:
|
|
353
|
+
self._token_time = "%s.%s" % (token, microsecond)
|
|
354
|
+
skip_index.append(index + 1)
|
|
355
|
+
elif meridian and microsecond:
|
|
356
|
+
self._token_time = "%s.%s %s" % (token, microsecond, meridian)
|
|
357
|
+
skip_index.append(index + 1)
|
|
358
|
+
skip_index.append(meridian_index)
|
|
359
|
+
else:
|
|
360
|
+
self._token_time = token
|
|
361
|
+
self.time = lambda: time_parser(self._token_time)
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
results = self._parse(type, token, skip_component=skip_component)
|
|
365
|
+
for res in results:
|
|
366
|
+
if len(token) == 4 and res[0] == "year":
|
|
367
|
+
skip_component = "year"
|
|
368
|
+
setattr(self, *res)
|
|
369
|
+
|
|
370
|
+
known, unknown = get_unresolved_attrs(self)
|
|
371
|
+
params = {}
|
|
372
|
+
for attr in known:
|
|
373
|
+
params.update({attr: getattr(self, attr)})
|
|
374
|
+
for attr in unknown:
|
|
375
|
+
for token, type, _ in self.unset_tokens:
|
|
376
|
+
if type == 0:
|
|
377
|
+
params.update({attr: int(token)})
|
|
378
|
+
setattr(self, "_token_%s" % attr, token)
|
|
379
|
+
setattr(self, attr, int(token))
|
|
380
|
+
|
|
381
|
+
def _get_period(self):
|
|
382
|
+
if self.settings.RETURN_TIME_AS_PERIOD:
|
|
383
|
+
if getattr(self, "time", None):
|
|
384
|
+
return "time"
|
|
385
|
+
|
|
386
|
+
for period in ["time", "day"]:
|
|
387
|
+
if getattr(self, period, None):
|
|
388
|
+
return "day"
|
|
389
|
+
|
|
390
|
+
for period in ["month", "year"]:
|
|
391
|
+
if getattr(self, period, None):
|
|
392
|
+
return period
|
|
393
|
+
|
|
394
|
+
if self._results():
|
|
395
|
+
return "day"
|
|
396
|
+
|
|
397
|
+
def _get_datetime_obj(self, **params):
|
|
398
|
+
try:
|
|
399
|
+
return datetime(**params)
|
|
400
|
+
except ValueError as e:
|
|
401
|
+
error_text = e.__str__()
|
|
402
|
+
error_msgs = ["day is out of range", "day must be in"]
|
|
403
|
+
if error_msgs[0] in error_text or error_msgs[1] in error_text:
|
|
404
|
+
if not (self._token_day or hasattr(self, "_token_weekday")):
|
|
405
|
+
# if day is not available put last day of the month
|
|
406
|
+
params["day"] = get_last_day_of_month(
|
|
407
|
+
params["year"], params["month"]
|
|
408
|
+
)
|
|
409
|
+
return datetime(**params)
|
|
410
|
+
elif (
|
|
411
|
+
not self._token_year
|
|
412
|
+
and params["day"] == 29
|
|
413
|
+
and params["month"] == 2
|
|
414
|
+
and not calendar.isleap(params["year"])
|
|
415
|
+
):
|
|
416
|
+
# fix the year when year is not present and it is 29 of February
|
|
417
|
+
params["year"] = self._get_correct_leap_year(
|
|
418
|
+
self.settings.PREFER_DATES_FROM, params["year"]
|
|
419
|
+
)
|
|
420
|
+
return datetime(**params)
|
|
421
|
+
raise e
|
|
422
|
+
|
|
423
|
+
def _get_correct_leap_year(self, prefer_dates_from, current_year):
|
|
424
|
+
if prefer_dates_from == "future":
|
|
425
|
+
return get_next_leap_year(current_year)
|
|
426
|
+
if prefer_dates_from == "past":
|
|
427
|
+
return get_previous_leap_year(current_year)
|
|
428
|
+
|
|
429
|
+
# Default case ('current_period'): return closer leap year
|
|
430
|
+
next_leap_year = get_next_leap_year(current_year)
|
|
431
|
+
previous_leap_year = get_previous_leap_year(current_year)
|
|
432
|
+
next_leap_year_is_closer = (
|
|
433
|
+
next_leap_year - current_year < current_year - previous_leap_year
|
|
434
|
+
)
|
|
435
|
+
return next_leap_year if next_leap_year_is_closer else previous_leap_year
|
|
436
|
+
|
|
437
|
+
def _set_relative_base(self):
|
|
438
|
+
self.now = self.settings.RELATIVE_BASE
|
|
439
|
+
if not self.now:
|
|
440
|
+
self.now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
|
|
441
|
+
|
|
442
|
+
def _get_datetime_obj_params(self):
|
|
443
|
+
if not self.now:
|
|
444
|
+
self._set_relative_base()
|
|
445
|
+
|
|
446
|
+
params = {
|
|
447
|
+
"day": self.day or self.now.day,
|
|
448
|
+
"month": self.month or self.now.month,
|
|
449
|
+
"year": self.year or self.now.year,
|
|
450
|
+
"hour": 0,
|
|
451
|
+
"minute": 0,
|
|
452
|
+
"second": 0,
|
|
453
|
+
"microsecond": 0,
|
|
454
|
+
}
|
|
455
|
+
return params
|
|
456
|
+
|
|
457
|
+
def _get_date_obj(self, token, directive):
|
|
458
|
+
return strptime(token, directive)
|
|
459
|
+
|
|
460
|
+
def _results(self):
|
|
461
|
+
missing = [
|
|
462
|
+
field for field in ("day", "month", "year") if not getattr(self, field)
|
|
463
|
+
]
|
|
464
|
+
_check_strict_parsing(missing, self.settings)
|
|
465
|
+
self._set_relative_base()
|
|
466
|
+
|
|
467
|
+
time = self.time() if self.time is not None else None
|
|
468
|
+
params = self._get_datetime_obj_params()
|
|
469
|
+
|
|
470
|
+
if time:
|
|
471
|
+
params.update(
|
|
472
|
+
dict(
|
|
473
|
+
hour=time.hour,
|
|
474
|
+
minute=time.minute,
|
|
475
|
+
second=time.second,
|
|
476
|
+
microsecond=time.microsecond,
|
|
477
|
+
)
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
return self._get_datetime_obj(**params)
|
|
481
|
+
|
|
482
|
+
def _correct_for_time_frame(self, dateobj, tz):
|
|
483
|
+
days = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
|
|
484
|
+
|
|
485
|
+
token_weekday, _ = getattr(self, "_token_weekday", (None, None))
|
|
486
|
+
|
|
487
|
+
if token_weekday and not (
|
|
488
|
+
self._token_year or self._token_month or self._token_day
|
|
489
|
+
):
|
|
490
|
+
day_index = calendar.weekday(dateobj.year, dateobj.month, dateobj.day)
|
|
491
|
+
day = token_weekday[:3].lower()
|
|
492
|
+
steps = 0
|
|
493
|
+
if "future" in self.settings.PREFER_DATES_FROM:
|
|
494
|
+
if days[day_index] == day:
|
|
495
|
+
steps = 7
|
|
496
|
+
else:
|
|
497
|
+
while days[day_index] != day:
|
|
498
|
+
day_index = (day_index + 1) % 7
|
|
499
|
+
steps += 1
|
|
500
|
+
delta = timedelta(days=steps)
|
|
501
|
+
else:
|
|
502
|
+
if days[day_index] == day:
|
|
503
|
+
if self.settings.PREFER_DATES_FROM == "past":
|
|
504
|
+
steps = 7
|
|
505
|
+
else:
|
|
506
|
+
steps = 0
|
|
507
|
+
else:
|
|
508
|
+
while days[day_index] != day:
|
|
509
|
+
day_index -= 1
|
|
510
|
+
steps += 1
|
|
511
|
+
delta = timedelta(days=-steps)
|
|
512
|
+
|
|
513
|
+
dateobj = dateobj + delta
|
|
514
|
+
|
|
515
|
+
# NOTE: If this assert fires, self.now needs to be made offset-aware in a similar
|
|
516
|
+
# way that dateobj is temporarily made offset-aware.
|
|
517
|
+
assert not (self.now.tzinfo is None and dateobj.tzinfo is not None), (
|
|
518
|
+
"`self.now` doesn't have `tzinfo`. Review comment in code for details."
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# Store the original dateobj values so that upon subsequent parsing everything is not
|
|
522
|
+
# treated as offset-aware if offset awareness is changed.
|
|
523
|
+
original_dateobj = dateobj
|
|
524
|
+
|
|
525
|
+
# Since date comparisons must be either offset-naive or offset-aware, normalize dateobj
|
|
526
|
+
# to be offset-aware if one or the other is already offset-aware.
|
|
527
|
+
if self.now.tzinfo is not None and dateobj.tzinfo is None:
|
|
528
|
+
dateobj = pytz.utc.localize(dateobj)
|
|
529
|
+
|
|
530
|
+
if self.month and not self.year:
|
|
531
|
+
try:
|
|
532
|
+
if self.now < dateobj:
|
|
533
|
+
if self.settings.PREFER_DATES_FROM == "past":
|
|
534
|
+
dateobj = dateobj.replace(year=dateobj.year - 1)
|
|
535
|
+
else:
|
|
536
|
+
if self.settings.PREFER_DATES_FROM == "future":
|
|
537
|
+
dateobj = dateobj.replace(year=dateobj.year + 1)
|
|
538
|
+
except ValueError as e:
|
|
539
|
+
if dateobj.day == 29 and dateobj.month == 2:
|
|
540
|
+
valid_year = self._get_correct_leap_year(
|
|
541
|
+
self.settings.PREFER_DATES_FROM, dateobj.year
|
|
542
|
+
)
|
|
543
|
+
dateobj = dateobj.replace(year=valid_year)
|
|
544
|
+
else:
|
|
545
|
+
raise e
|
|
546
|
+
|
|
547
|
+
if self._token_year and len(self._token_year[0]) == 2:
|
|
548
|
+
if self.now < dateobj:
|
|
549
|
+
if "past" in self.settings.PREFER_DATES_FROM:
|
|
550
|
+
dateobj = dateobj.replace(year=dateobj.year - 100)
|
|
551
|
+
else:
|
|
552
|
+
if "future" in self.settings.PREFER_DATES_FROM:
|
|
553
|
+
dateobj = dateobj.replace(year=dateobj.year + 100)
|
|
554
|
+
|
|
555
|
+
if self._token_time and not any(
|
|
556
|
+
[
|
|
557
|
+
self._token_year,
|
|
558
|
+
self._token_month,
|
|
559
|
+
self._token_day,
|
|
560
|
+
hasattr(self, "_token_weekday"),
|
|
561
|
+
]
|
|
562
|
+
):
|
|
563
|
+
# Convert dateobj to utc time to compare with self.now
|
|
564
|
+
try:
|
|
565
|
+
tz = tz or get_timezone_from_tz_string(self.settings.TIMEZONE)
|
|
566
|
+
tz_offset = tz.utcoffset(dateobj)
|
|
567
|
+
except (pytz.UnknownTimeZoneError, pytz.NonExistentTimeError):
|
|
568
|
+
tz_offset = timedelta(hours=0)
|
|
569
|
+
|
|
570
|
+
if "past" in self.settings.PREFER_DATES_FROM:
|
|
571
|
+
if self.now < dateobj - tz_offset:
|
|
572
|
+
dateobj = dateobj + timedelta(days=-1)
|
|
573
|
+
if "future" in self.settings.PREFER_DATES_FROM:
|
|
574
|
+
if self.now > dateobj - tz_offset:
|
|
575
|
+
dateobj = dateobj + timedelta(days=1)
|
|
576
|
+
|
|
577
|
+
# Reset dateobj to the original value, thus removing any offset awareness that may
|
|
578
|
+
# have been set earlier.
|
|
579
|
+
dateobj = dateobj.replace(tzinfo=original_dateobj.tzinfo)
|
|
580
|
+
|
|
581
|
+
return dateobj
|
|
582
|
+
|
|
583
|
+
def _correct_for_day(self, dateobj):
|
|
584
|
+
if (
|
|
585
|
+
getattr(self, "_token_day", None)
|
|
586
|
+
or getattr(self, "_token_weekday", None)
|
|
587
|
+
or getattr(self, "_token_time", None)
|
|
588
|
+
):
|
|
589
|
+
return dateobj
|
|
590
|
+
|
|
591
|
+
dateobj = set_correct_day_from_settings(
|
|
592
|
+
dateobj, self.settings, current_day=self.now.day
|
|
593
|
+
)
|
|
594
|
+
return dateobj
|
|
595
|
+
|
|
596
|
+
def _correct_for_month(self, dateobj):
|
|
597
|
+
relative_base = getattr(self.settings, "RELATIVE_BASE", None)
|
|
598
|
+
relative_base_month = (
|
|
599
|
+
relative_base.month if hasattr(relative_base, "month") else relative_base
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
if getattr(self, "_token_month", None):
|
|
603
|
+
return dateobj
|
|
604
|
+
|
|
605
|
+
dateobj = set_correct_month_from_settings(
|
|
606
|
+
dateobj, self.settings, relative_base_month
|
|
607
|
+
)
|
|
608
|
+
return dateobj
|
|
609
|
+
|
|
610
|
+
@classmethod
|
|
611
|
+
def parse(cls, datestring, settings, tz=None):
|
|
612
|
+
tokens = tokenizer(datestring)
|
|
613
|
+
po = cls(tokens.tokenize(), settings)
|
|
614
|
+
dateobj = po._results()
|
|
615
|
+
|
|
616
|
+
# correction for past, future if applicable
|
|
617
|
+
dateobj = po._correct_for_time_frame(dateobj, tz)
|
|
618
|
+
|
|
619
|
+
# correction for preference of month: beginning, current, end
|
|
620
|
+
# must happen before day so that day is derived from the correct month
|
|
621
|
+
dateobj = po._correct_for_month(dateobj)
|
|
622
|
+
|
|
623
|
+
# correction for preference of day: beginning, current, end
|
|
624
|
+
dateobj = po._correct_for_day(dateobj)
|
|
625
|
+
|
|
626
|
+
period = po._get_period()
|
|
627
|
+
|
|
628
|
+
return dateobj, period
|
|
629
|
+
|
|
630
|
+
def _parse(self, type, token, skip_component=None):
|
|
631
|
+
def set_and_return(token, type, component, dateobj, skip_date_order=False):
|
|
632
|
+
if not skip_date_order:
|
|
633
|
+
self.auto_order.append(component)
|
|
634
|
+
setattr(self, "_token_%s" % component, (token, type))
|
|
635
|
+
return [(component, getattr(dateobj, component))]
|
|
636
|
+
|
|
637
|
+
def parse_number(token, skip_component=None):
|
|
638
|
+
type = 0
|
|
639
|
+
|
|
640
|
+
for component, directives in self.ordered_num_directives.items():
|
|
641
|
+
if skip_component == component:
|
|
642
|
+
continue
|
|
643
|
+
for directive in directives:
|
|
644
|
+
try:
|
|
645
|
+
do = self._get_date_obj(token, directive)
|
|
646
|
+
prev_value = getattr(self, component, None)
|
|
647
|
+
if not prev_value:
|
|
648
|
+
return set_and_return(token, type, component, do)
|
|
649
|
+
else:
|
|
650
|
+
try:
|
|
651
|
+
prev_token, prev_type = getattr(
|
|
652
|
+
self, "_token_%s" % component
|
|
653
|
+
)
|
|
654
|
+
if prev_type == type:
|
|
655
|
+
do = self._get_date_obj(prev_token, directive)
|
|
656
|
+
except ValueError:
|
|
657
|
+
self.unset_tokens.append(
|
|
658
|
+
(prev_token, prev_type, component)
|
|
659
|
+
)
|
|
660
|
+
return set_and_return(token, type, component, do)
|
|
661
|
+
except ValueError:
|
|
662
|
+
pass
|
|
663
|
+
else:
|
|
664
|
+
raise ValueError("Unable to parse: %s" % token)
|
|
665
|
+
|
|
666
|
+
def parse_alpha(token, skip_component=None):
|
|
667
|
+
type = 1
|
|
668
|
+
|
|
669
|
+
for component, directives in self.alpha_directives.items():
|
|
670
|
+
if skip_component == component:
|
|
671
|
+
continue
|
|
672
|
+
for directive in directives:
|
|
673
|
+
try:
|
|
674
|
+
do = self._get_date_obj(token, directive)
|
|
675
|
+
prev_value = getattr(self, component, None)
|
|
676
|
+
if not prev_value:
|
|
677
|
+
return set_and_return(
|
|
678
|
+
token, type, component, do, skip_date_order=True
|
|
679
|
+
)
|
|
680
|
+
elif component == "month":
|
|
681
|
+
index = self.auto_order.index("month")
|
|
682
|
+
self.auto_order[index] = "day"
|
|
683
|
+
setattr(self, "_token_day", self._token_month)
|
|
684
|
+
setattr(self, "_token_month", (token, type))
|
|
685
|
+
return [
|
|
686
|
+
(component, getattr(do, component)),
|
|
687
|
+
("day", prev_value),
|
|
688
|
+
]
|
|
689
|
+
except Exception:
|
|
690
|
+
pass
|
|
691
|
+
else:
|
|
692
|
+
raise ValueError("Unable to parse: %s" % token)
|
|
693
|
+
|
|
694
|
+
handlers = {0: parse_number, 1: parse_alpha}
|
|
695
|
+
return handlers[type](token, skip_component)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
class tokenizer:
|
|
699
|
+
digits = "0123456789:"
|
|
700
|
+
letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
701
|
+
|
|
702
|
+
def _isletter(self, tkn):
|
|
703
|
+
return tkn in self.letters
|
|
704
|
+
|
|
705
|
+
def _isdigit(self, tkn):
|
|
706
|
+
return tkn in self.digits
|
|
707
|
+
|
|
708
|
+
def __init__(self, ds):
|
|
709
|
+
self.instream = StringIO(ds)
|
|
710
|
+
|
|
711
|
+
def _switch(self, chara, charb):
|
|
712
|
+
if self._isdigit(chara):
|
|
713
|
+
return 0, not self._isdigit(charb)
|
|
714
|
+
|
|
715
|
+
if self._isletter(chara):
|
|
716
|
+
return 1, not self._isletter(charb)
|
|
717
|
+
|
|
718
|
+
return 2, self._isdigit(charb) or self._isletter(charb)
|
|
719
|
+
|
|
720
|
+
def tokenize(self):
|
|
721
|
+
token = ""
|
|
722
|
+
EOF = False
|
|
723
|
+
|
|
724
|
+
while not EOF:
|
|
725
|
+
nextchar = self.instream.read(1)
|
|
726
|
+
|
|
727
|
+
if not nextchar:
|
|
728
|
+
EOF = True
|
|
729
|
+
type, _ = self._switch(token[-1], nextchar)
|
|
730
|
+
yield token, type
|
|
731
|
+
return
|
|
732
|
+
|
|
733
|
+
if token:
|
|
734
|
+
type, switch = self._switch(token[-1], nextchar)
|
|
735
|
+
|
|
736
|
+
if not switch:
|
|
737
|
+
token += nextchar
|
|
738
|
+
else:
|
|
739
|
+
yield token, type
|
|
740
|
+
token = nextchar
|
|
741
|
+
else:
|
|
742
|
+
token += nextchar
|