dateparser 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. dateparser/__init__.py +82 -0
  2. dateparser/calendars/__init__.py +144 -0
  3. dateparser/calendars/hijri.py +6 -0
  4. dateparser/calendars/hijri_parser.py +60 -0
  5. dateparser/calendars/jalali.py +9 -0
  6. dateparser/calendars/jalali_parser.py +184 -0
  7. dateparser/conf.py +267 -0
  8. dateparser/custom_language_detection/__init__.py +0 -0
  9. dateparser/custom_language_detection/fasttext.py +43 -0
  10. dateparser/custom_language_detection/langdetect.py +37 -0
  11. dateparser/custom_language_detection/language_mapping.py +18 -0
  12. dateparser/data/__init__.py +2 -0
  13. dateparser/data/date_translation_data/__init__.py +0 -0
  14. dateparser/data/date_translation_data/af.py +242 -0
  15. dateparser/data/date_translation_data/agq.py +169 -0
  16. dateparser/data/date_translation_data/ak.py +169 -0
  17. dateparser/data/date_translation_data/am.py +222 -0
  18. dateparser/data/date_translation_data/ar.py +574 -0
  19. dateparser/data/date_translation_data/as.py +164 -0
  20. dateparser/data/date_translation_data/asa.py +168 -0
  21. dateparser/data/date_translation_data/ast.py +280 -0
  22. dateparser/data/date_translation_data/az-Cyrl.py +168 -0
  23. dateparser/data/date_translation_data/az-Latn.py +217 -0
  24. dateparser/data/date_translation_data/az.py +217 -0
  25. dateparser/data/date_translation_data/bas.py +169 -0
  26. dateparser/data/date_translation_data/be.py +340 -0
  27. dateparser/data/date_translation_data/bem.py +161 -0
  28. dateparser/data/date_translation_data/bez.py +169 -0
  29. dateparser/data/date_translation_data/bg.py +345 -0
  30. dateparser/data/date_translation_data/bm.py +167 -0
  31. dateparser/data/date_translation_data/bn.py +241 -0
  32. dateparser/data/date_translation_data/bo.py +185 -0
  33. dateparser/data/date_translation_data/br.py +226 -0
  34. dateparser/data/date_translation_data/brx.py +157 -0
  35. dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
  36. dateparser/data/date_translation_data/bs-Latn.py +248 -0
  37. dateparser/data/date_translation_data/bs.py +248 -0
  38. dateparser/data/date_translation_data/ca.py +313 -0
  39. dateparser/data/date_translation_data/ce.py +225 -0
  40. dateparser/data/date_translation_data/cgg.py +169 -0
  41. dateparser/data/date_translation_data/chr.py +240 -0
  42. dateparser/data/date_translation_data/ckb.py +154 -0
  43. dateparser/data/date_translation_data/cs.py +316 -0
  44. dateparser/data/date_translation_data/cy.py +217 -0
  45. dateparser/data/date_translation_data/da.py +296 -0
  46. dateparser/data/date_translation_data/dav.py +169 -0
  47. dateparser/data/date_translation_data/de.py +357 -0
  48. dateparser/data/date_translation_data/dje.py +167 -0
  49. dateparser/data/date_translation_data/dsb.py +270 -0
  50. dateparser/data/date_translation_data/dua.py +169 -0
  51. dateparser/data/date_translation_data/dyo.py +168 -0
  52. dateparser/data/date_translation_data/dz.py +225 -0
  53. dateparser/data/date_translation_data/ebu.py +169 -0
  54. dateparser/data/date_translation_data/ee.py +233 -0
  55. dateparser/data/date_translation_data/el.py +279 -0
  56. dateparser/data/date_translation_data/en.py +851 -0
  57. dateparser/data/date_translation_data/eo.py +169 -0
  58. dateparser/data/date_translation_data/es.py +499 -0
  59. dateparser/data/date_translation_data/et.py +233 -0
  60. dateparser/data/date_translation_data/eu.py +219 -0
  61. dateparser/data/date_translation_data/ewo.py +169 -0
  62. dateparser/data/date_translation_data/fa.py +270 -0
  63. dateparser/data/date_translation_data/ff.py +179 -0
  64. dateparser/data/date_translation_data/fi.py +345 -0
  65. dateparser/data/date_translation_data/fil.py +223 -0
  66. dateparser/data/date_translation_data/fo.py +256 -0
  67. dateparser/data/date_translation_data/fr.py +520 -0
  68. dateparser/data/date_translation_data/fur.py +223 -0
  69. dateparser/data/date_translation_data/fy.py +223 -0
  70. dateparser/data/date_translation_data/ga.py +238 -0
  71. dateparser/data/date_translation_data/gd.py +277 -0
  72. dateparser/data/date_translation_data/gl.py +253 -0
  73. dateparser/data/date_translation_data/gsw.py +179 -0
  74. dateparser/data/date_translation_data/gu.py +216 -0
  75. dateparser/data/date_translation_data/guz.py +170 -0
  76. dateparser/data/date_translation_data/gv.py +166 -0
  77. dateparser/data/date_translation_data/ha.py +176 -0
  78. dateparser/data/date_translation_data/haw.py +168 -0
  79. dateparser/data/date_translation_data/he.py +371 -0
  80. dateparser/data/date_translation_data/hi.py +261 -0
  81. dateparser/data/date_translation_data/hr.py +378 -0
  82. dateparser/data/date_translation_data/hsb.py +271 -0
  83. dateparser/data/date_translation_data/hu.py +297 -0
  84. dateparser/data/date_translation_data/hy.py +246 -0
  85. dateparser/data/date_translation_data/id.py +272 -0
  86. dateparser/data/date_translation_data/ig.py +168 -0
  87. dateparser/data/date_translation_data/ii.py +157 -0
  88. dateparser/data/date_translation_data/is.py +242 -0
  89. dateparser/data/date_translation_data/it.py +282 -0
  90. dateparser/data/date_translation_data/ja.py +286 -0
  91. dateparser/data/date_translation_data/jgo.py +188 -0
  92. dateparser/data/date_translation_data/jmc.py +168 -0
  93. dateparser/data/date_translation_data/ka.py +241 -0
  94. dateparser/data/date_translation_data/kab.py +169 -0
  95. dateparser/data/date_translation_data/kam.py +169 -0
  96. dateparser/data/date_translation_data/kde.py +169 -0
  97. dateparser/data/date_translation_data/kea.py +230 -0
  98. dateparser/data/date_translation_data/khq.py +167 -0
  99. dateparser/data/date_translation_data/ki.py +169 -0
  100. dateparser/data/date_translation_data/kk.py +228 -0
  101. dateparser/data/date_translation_data/kl.py +213 -0
  102. dateparser/data/date_translation_data/kln.py +171 -0
  103. dateparser/data/date_translation_data/km.py +198 -0
  104. dateparser/data/date_translation_data/kn.py +225 -0
  105. dateparser/data/date_translation_data/ko.py +207 -0
  106. dateparser/data/date_translation_data/kok.py +157 -0
  107. dateparser/data/date_translation_data/ks.py +152 -0
  108. dateparser/data/date_translation_data/ksb.py +168 -0
  109. dateparser/data/date_translation_data/ksf.py +169 -0
  110. dateparser/data/date_translation_data/ksh.py +192 -0
  111. dateparser/data/date_translation_data/kw.py +169 -0
  112. dateparser/data/date_translation_data/ky.py +240 -0
  113. dateparser/data/date_translation_data/lag.py +169 -0
  114. dateparser/data/date_translation_data/lb.py +233 -0
  115. dateparser/data/date_translation_data/lg.py +169 -0
  116. dateparser/data/date_translation_data/lkt.py +194 -0
  117. dateparser/data/date_translation_data/ln.py +179 -0
  118. dateparser/data/date_translation_data/lo.py +228 -0
  119. dateparser/data/date_translation_data/lrc.py +154 -0
  120. dateparser/data/date_translation_data/lt.py +263 -0
  121. dateparser/data/date_translation_data/lu.py +169 -0
  122. dateparser/data/date_translation_data/luo.py +169 -0
  123. dateparser/data/date_translation_data/luy.py +168 -0
  124. dateparser/data/date_translation_data/lv.py +257 -0
  125. dateparser/data/date_translation_data/mas.py +173 -0
  126. dateparser/data/date_translation_data/mer.py +168 -0
  127. dateparser/data/date_translation_data/mfe.py +166 -0
  128. dateparser/data/date_translation_data/mg.py +168 -0
  129. dateparser/data/date_translation_data/mgh.py +169 -0
  130. dateparser/data/date_translation_data/mgo.py +151 -0
  131. dateparser/data/date_translation_data/mk.py +234 -0
  132. dateparser/data/date_translation_data/ml.py +217 -0
  133. dateparser/data/date_translation_data/mn.py +224 -0
  134. dateparser/data/date_translation_data/mr.py +229 -0
  135. dateparser/data/date_translation_data/ms.py +242 -0
  136. dateparser/data/date_translation_data/mt.py +175 -0
  137. dateparser/data/date_translation_data/mua.py +169 -0
  138. dateparser/data/date_translation_data/my.py +203 -0
  139. dateparser/data/date_translation_data/mzn.py +199 -0
  140. dateparser/data/date_translation_data/naq.py +169 -0
  141. dateparser/data/date_translation_data/nb.py +261 -0
  142. dateparser/data/date_translation_data/nd.py +169 -0
  143. dateparser/data/date_translation_data/ne.py +207 -0
  144. dateparser/data/date_translation_data/nl.py +273 -0
  145. dateparser/data/date_translation_data/nmg.py +169 -0
  146. dateparser/data/date_translation_data/nn.py +231 -0
  147. dateparser/data/date_translation_data/nnh.py +150 -0
  148. dateparser/data/date_translation_data/nus.py +166 -0
  149. dateparser/data/date_translation_data/nyn.py +169 -0
  150. dateparser/data/date_translation_data/om.py +173 -0
  151. dateparser/data/date_translation_data/or.py +157 -0
  152. dateparser/data/date_translation_data/os.py +203 -0
  153. dateparser/data/date_translation_data/pa-Arab.py +150 -0
  154. dateparser/data/date_translation_data/pa-Guru.py +221 -0
  155. dateparser/data/date_translation_data/pa.py +221 -0
  156. dateparser/data/date_translation_data/pl.py +416 -0
  157. dateparser/data/date_translation_data/ps.py +150 -0
  158. dateparser/data/date_translation_data/pt.py +981 -0
  159. dateparser/data/date_translation_data/qu.py +176 -0
  160. dateparser/data/date_translation_data/rm.py +166 -0
  161. dateparser/data/date_translation_data/rn.py +169 -0
  162. dateparser/data/date_translation_data/ro.py +270 -0
  163. dateparser/data/date_translation_data/rof.py +157 -0
  164. dateparser/data/date_translation_data/ru.py +442 -0
  165. dateparser/data/date_translation_data/rw.py +169 -0
  166. dateparser/data/date_translation_data/rwk.py +168 -0
  167. dateparser/data/date_translation_data/sah.py +219 -0
  168. dateparser/data/date_translation_data/saq.py +169 -0
  169. dateparser/data/date_translation_data/sbp.py +169 -0
  170. dateparser/data/date_translation_data/se.py +280 -0
  171. dateparser/data/date_translation_data/seh.py +169 -0
  172. dateparser/data/date_translation_data/ses.py +167 -0
  173. dateparser/data/date_translation_data/sg.py +169 -0
  174. dateparser/data/date_translation_data/shi-Latn.py +169 -0
  175. dateparser/data/date_translation_data/shi-Tfng.py +169 -0
  176. dateparser/data/date_translation_data/shi.py +169 -0
  177. dateparser/data/date_translation_data/si.py +220 -0
  178. dateparser/data/date_translation_data/sk.py +327 -0
  179. dateparser/data/date_translation_data/sl.py +244 -0
  180. dateparser/data/date_translation_data/smn.py +176 -0
  181. dateparser/data/date_translation_data/sn.py +169 -0
  182. dateparser/data/date_translation_data/so.py +179 -0
  183. dateparser/data/date_translation_data/sq.py +237 -0
  184. dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
  185. dateparser/data/date_translation_data/sr-Latn.py +306 -0
  186. dateparser/data/date_translation_data/sr.py +255 -0
  187. dateparser/data/date_translation_data/sv.py +309 -0
  188. dateparser/data/date_translation_data/sw.py +231 -0
  189. dateparser/data/date_translation_data/ta.py +264 -0
  190. dateparser/data/date_translation_data/te.py +239 -0
  191. dateparser/data/date_translation_data/teo.py +173 -0
  192. dateparser/data/date_translation_data/th.py +300 -0
  193. dateparser/data/date_translation_data/ti.py +173 -0
  194. dateparser/data/date_translation_data/tl.py +137 -0
  195. dateparser/data/date_translation_data/to.py +216 -0
  196. dateparser/data/date_translation_data/tr.py +259 -0
  197. dateparser/data/date_translation_data/twq.py +167 -0
  198. dateparser/data/date_translation_data/tzm.py +169 -0
  199. dateparser/data/date_translation_data/ug.py +203 -0
  200. dateparser/data/date_translation_data/uk.py +502 -0
  201. dateparser/data/date_translation_data/ur.py +256 -0
  202. dateparser/data/date_translation_data/uz-Arab.py +167 -0
  203. dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
  204. dateparser/data/date_translation_data/uz-Latn.py +216 -0
  205. dateparser/data/date_translation_data/uz.py +216 -0
  206. dateparser/data/date_translation_data/vi.py +260 -0
  207. dateparser/data/date_translation_data/vun.py +168 -0
  208. dateparser/data/date_translation_data/wae.py +224 -0
  209. dateparser/data/date_translation_data/xog.py +169 -0
  210. dateparser/data/date_translation_data/yav.py +169 -0
  211. dateparser/data/date_translation_data/yi.py +178 -0
  212. dateparser/data/date_translation_data/yo.py +263 -0
  213. dateparser/data/date_translation_data/yue.py +203 -0
  214. dateparser/data/date_translation_data/zgh.py +169 -0
  215. dateparser/data/date_translation_data/zh-Hans.py +240 -0
  216. dateparser/data/date_translation_data/zh-Hant.py +402 -0
  217. dateparser/data/date_translation_data/zh.py +273 -0
  218. dateparser/data/date_translation_data/zu.py +196 -0
  219. dateparser/data/languages_info.py +826 -0
  220. dateparser/date.py +599 -0
  221. dateparser/date_parser.py +55 -0
  222. dateparser/freshness_date_parser.py +156 -0
  223. dateparser/languages/__init__.py +2 -0
  224. dateparser/languages/dictionary.py +352 -0
  225. dateparser/languages/loader.py +224 -0
  226. dateparser/languages/locale.py +625 -0
  227. dateparser/languages/validation.py +467 -0
  228. dateparser/parser.py +742 -0
  229. dateparser/search/__init__.py +71 -0
  230. dateparser/search/detection.py +78 -0
  231. dateparser/search/search.py +297 -0
  232. dateparser/search/text_detection.py +89 -0
  233. dateparser/timezone_parser.py +91 -0
  234. dateparser/timezones.py +469 -0
  235. dateparser/utils/__init__.py +257 -0
  236. dateparser/utils/strptime.py +108 -0
  237. dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
  238. dateparser-1.2.1.dist-info/LICENSE +12 -0
  239. dateparser-1.2.1.dist-info/METADATA +864 -0
  240. dateparser-1.2.1.dist-info/RECORD +256 -0
  241. dateparser-1.2.1.dist-info/WHEEL +5 -0
  242. dateparser-1.2.1.dist-info/entry_points.txt +2 -0
  243. dateparser-1.2.1.dist-info/top_level.txt +4 -0
  244. dateparser_cli/__init__.py +0 -0
  245. dateparser_cli/cli.py +36 -0
  246. dateparser_cli/exceptions.py +2 -0
  247. dateparser_cli/fasttext_manager.py +42 -0
  248. dateparser_cli/utils.py +27 -0
  249. dateparser_data/__init__.py +0 -0
  250. dateparser_data/settings.py +33 -0
  251. dateparser_scripts/__init__.py +0 -0
  252. dateparser_scripts/get_cldr_data.py +567 -0
  253. dateparser_scripts/order_languages.py +217 -0
  254. dateparser_scripts/update_supported_languages_and_locales.py +48 -0
  255. dateparser_scripts/utils.py +73 -0
  256. dateparser_scripts/write_complete_data.py +129 -0
dateparser/parser.py ADDED
@@ -0,0 +1,742 @@
1
+ import calendar
2
+ from collections import OrderedDict
3
+ from datetime import datetime, timedelta, timezone
4
+ from io import StringIO
5
+
6
+ import pytz
7
+ import regex as re
8
+
9
+ from dateparser.utils import (
10
+ _get_missing_parts,
11
+ get_last_day_of_month,
12
+ get_next_leap_year,
13
+ get_previous_leap_year,
14
+ get_timezone_from_tz_string,
15
+ set_correct_day_from_settings,
16
+ set_correct_month_from_settings,
17
+ )
18
+ from dateparser.utils.strptime import strptime
19
+
20
+ NSP_COMPATIBLE = re.compile(r"\D+")
21
+ MERIDIAN = re.compile(r"am|pm")
22
+ MICROSECOND = re.compile(r"\d{1,6}")
23
+ EIGHT_DIGIT = re.compile(r"^\d{8}$")
24
+ HOUR_MINUTE_REGEX = re.compile(r"^([0-9]|0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$")
25
+
26
+
27
+ def no_space_parser_eligibile(datestring):
28
+ src = NSP_COMPATIBLE.search(datestring)
29
+ if not src or ":" == src.group():
30
+ return True
31
+ return False
32
+
33
+
34
+ def get_unresolved_attrs(parser_object):
35
+ attrs = ["year", "month", "day"]
36
+ seen = []
37
+ unseen = []
38
+ for attr in attrs:
39
+ if getattr(parser_object, attr, None) is not None:
40
+ seen.append(attr)
41
+ else:
42
+ unseen.append(attr)
43
+ return seen, unseen
44
+
45
+
46
+ date_order_chart = {
47
+ "DMY": "%d%m%y",
48
+ "DYM": "%d%y%m",
49
+ "MDY": "%m%d%y",
50
+ "MYD": "%m%y%d",
51
+ "YDM": "%y%d%m",
52
+ "YMD": "%y%m%d",
53
+ }
54
+
55
+
56
+ def resolve_date_order(order, lst=None):
57
+ chart_list = {
58
+ "DMY": ["day", "month", "year"],
59
+ "DYM": ["day", "year", "month"],
60
+ "MDY": ["month", "day", "year"],
61
+ "MYD": ["month", "year", "day"],
62
+ "YDM": ["year", "day", "month"],
63
+ "YMD": ["year", "month", "day"],
64
+ }
65
+
66
+ return chart_list[order] if lst else date_order_chart[order]
67
+
68
+
69
+ def _parse_absolute(datestring, settings, tz=None):
70
+ return _parser.parse(datestring, settings, tz)
71
+
72
+
73
+ def _parse_nospaces(datestring, settings, tz=None):
74
+ return _no_spaces_parser.parse(datestring, settings)
75
+
76
+
77
+ class _time_parser:
78
+ time_directives = [
79
+ "%H:%M:%S",
80
+ "%I:%M:%S %p",
81
+ "%H:%M",
82
+ "%I:%M %p",
83
+ "%I %p",
84
+ "%H:%M:%S.%f",
85
+ "%I:%M:%S.%f %p",
86
+ "%H:%M %p",
87
+ ]
88
+
89
+ def __call__(self, timestring):
90
+ _timestring = timestring
91
+ for directive in self.time_directives:
92
+ try:
93
+ return strptime(timestring.strip(), directive).time()
94
+ except ValueError:
95
+ pass
96
+ else:
97
+ raise ValueError("%s does not seem to be a valid time string" % _timestring)
98
+
99
+
100
+ time_parser = _time_parser()
101
+
102
+
103
+ class _no_spaces_parser:
104
+ _dateformats = [
105
+ "%Y%m%d",
106
+ "%Y%d%m",
107
+ "%m%Y%d",
108
+ "%m%d%Y",
109
+ "%d%Y%m",
110
+ "%d%m%Y",
111
+ "%y%m%d",
112
+ "%y%d%m",
113
+ "%m%y%d",
114
+ "%m%d%y",
115
+ "%d%y%m",
116
+ "%d%m%y",
117
+ ]
118
+
119
+ _preferred_formats = ["%Y%m%d%H%M", "%Y%m%d%H%M%S", "%Y%m%d%H%M%S.%f"]
120
+
121
+ _preferred_formats_ordered_8_digit = [
122
+ "%m%d%Y",
123
+ "%d%m%Y",
124
+ "%Y%m%d",
125
+ "%Y%d%m",
126
+ "%m%Y%d",
127
+ "%d%Y%m",
128
+ ]
129
+
130
+ _timeformats = ["%H%M%S.%f", "%H%M%S", "%H%M", "%H"]
131
+
132
+ period = {"day": ["%d", "%H", "%M", "%S"], "month": ["%m"]}
133
+
134
+ _default_order = resolve_date_order("MDY")
135
+
136
+ def __init__(self, *args, **kwargs):
137
+ self._all = (
138
+ self._dateformats
139
+ + [x + y for x in self._dateformats for y in self._timeformats]
140
+ + self._timeformats
141
+ )
142
+
143
+ self.date_formats = {
144
+ "%m%d%y": (
145
+ self._preferred_formats
146
+ + sorted(
147
+ self._all,
148
+ key=lambda x: x.lower().startswith("%m%d%y"),
149
+ reverse=True,
150
+ )
151
+ ),
152
+ "%m%y%d": sorted(
153
+ self._all, key=lambda x: x.lower().startswith("%m%y%d"), reverse=True
154
+ ),
155
+ "%y%m%d": sorted(
156
+ self._all, key=lambda x: x.lower().startswith("%y%m%d"), reverse=True
157
+ ),
158
+ "%y%d%m": sorted(
159
+ self._all, key=lambda x: x.lower().startswith("%y%d%m"), reverse=True
160
+ ),
161
+ "%d%m%y": sorted(
162
+ self._all, key=lambda x: x.lower().startswith("%d%m%y"), reverse=True
163
+ ),
164
+ "%d%y%m": sorted(
165
+ self._all, key=lambda x: x.lower().startswith("%d%y%m"), reverse=True
166
+ ),
167
+ }
168
+
169
+ @classmethod
170
+ def _get_period(cls, format_string):
171
+ for pname, pdrv in sorted(cls.period.items(), key=lambda x: x[0]):
172
+ for drv in pdrv:
173
+ if drv in format_string:
174
+ return pname
175
+ else:
176
+ return "year"
177
+
178
+ @classmethod
179
+ def _find_best_matching_date(cls, datestring):
180
+ for fmt in cls._preferred_formats_ordered_8_digit:
181
+ try:
182
+ dt = strptime(datestring, fmt), cls._get_period(fmt)
183
+ if len(str(dt[0].year)) == 4:
184
+ return dt
185
+ except Exception:
186
+ pass
187
+ return None
188
+
189
+ @classmethod
190
+ def parse(cls, datestring, settings):
191
+ if not no_space_parser_eligibile(datestring):
192
+ raise ValueError("Unable to parse date from: %s" % datestring)
193
+
194
+ datestring = datestring.replace(":", "")
195
+ if not datestring:
196
+ raise ValueError("Empty string")
197
+ tokens = tokenizer(datestring)
198
+ if settings.DATE_ORDER:
199
+ order = resolve_date_order(settings.DATE_ORDER)
200
+ else:
201
+ order = cls._default_order
202
+ if EIGHT_DIGIT.match(datestring):
203
+ dt = cls._find_best_matching_date(datestring)
204
+ if dt is not None:
205
+ return dt
206
+ nsp = cls()
207
+ ambiguous_date = None
208
+ for token, _ in tokens.tokenize():
209
+ for fmt in nsp.date_formats[order]:
210
+ try:
211
+ dt = strptime(token, fmt), cls._get_period(fmt)
212
+ if len(str(dt[0].year)) < 4:
213
+ ambiguous_date = dt
214
+ continue
215
+
216
+ missing = _get_missing_parts(fmt)
217
+ _check_strict_parsing(missing, settings)
218
+ return dt
219
+ except Exception:
220
+ pass
221
+ else:
222
+ if ambiguous_date:
223
+ return ambiguous_date
224
+ else:
225
+ raise ValueError("Unable to parse date from: %s" % datestring)
226
+
227
+
228
+ def _get_missing_error(missing):
229
+ return "Fields missing from the date string: {}".format(", ".join(missing))
230
+
231
+
232
+ def _check_strict_parsing(missing, settings):
233
+ if settings.STRICT_PARSING and missing:
234
+ raise ValueError(_get_missing_error(missing))
235
+ elif settings.REQUIRE_PARTS and missing:
236
+ errors = [part for part in settings.REQUIRE_PARTS if part in missing]
237
+ if errors:
238
+ raise ValueError(_get_missing_error(errors))
239
+
240
+
241
+ class _parser:
242
+ alpha_directives = OrderedDict(
243
+ [
244
+ ("weekday", ["%A", "%a"]),
245
+ ("month", ["%B", "%b"]),
246
+ ]
247
+ )
248
+
249
+ num_directives = {
250
+ "month": ["%m"],
251
+ "day": ["%d"],
252
+ "year": ["%y", "%Y"],
253
+ }
254
+
255
+ def __init__(self, tokens, settings):
256
+ self.settings = settings
257
+ self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)]
258
+ self.filtered_tokens = [
259
+ (t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1
260
+ ]
261
+
262
+ self.unset_tokens = []
263
+
264
+ self.day = None
265
+ self.month = None
266
+ self.year = None
267
+ self.time = None
268
+
269
+ self.auto_order = []
270
+
271
+ self._token_day = None
272
+ self._token_month = None
273
+ self._token_year = None
274
+ self._token_time = None
275
+
276
+ self.ordered_num_directives = OrderedDict(
277
+ (k, self.num_directives[k])
278
+ for k in (resolve_date_order(settings.DATE_ORDER, lst=True))
279
+ )
280
+
281
+ skip_index = []
282
+ skip_component = None
283
+ skip_tokens = ["t", "year", "hour", "minute"]
284
+
285
+ for index, token_type_original_index in enumerate(self.filtered_tokens):
286
+ if index in skip_index:
287
+ continue
288
+
289
+ token, type, original_index = token_type_original_index
290
+
291
+ if token in skip_tokens:
292
+ continue
293
+
294
+ if self.time is None:
295
+ meridian_index = index + 1
296
+
297
+ try:
298
+ # try case where hours and minutes are separated by a period. Example: 13.20.
299
+ _is_before_period = self.tokens[original_index + 1][0] == "."
300
+ _is_after_period = (
301
+ original_index != 0
302
+ and self.tokens[original_index - 1][0] == "."
303
+ )
304
+
305
+ if _is_before_period and not _is_after_period:
306
+ index_next_token = index + 1
307
+ next_token = self.filtered_tokens[index_next_token][0]
308
+ index_in_tokens_for_next_token = self.filtered_tokens[
309
+ index_next_token
310
+ ][2]
311
+
312
+ next_token_is_last = (
313
+ index_next_token == len(self.filtered_tokens) - 1
314
+ )
315
+ if (
316
+ next_token_is_last
317
+ or self.tokens[index_in_tokens_for_next_token + 1][0] != "."
318
+ ):
319
+ new_token = token + ":" + next_token
320
+ if re.match(HOUR_MINUTE_REGEX, new_token):
321
+ token = new_token
322
+ skip_index.append(index + 1)
323
+ meridian_index += 1
324
+ except Exception:
325
+ pass
326
+
327
+ try:
328
+ microsecond = MICROSECOND.search(
329
+ self.filtered_tokens[index + 1][0]
330
+ ).group()
331
+ # Is after time token? raise ValueError if ':' can't be found:
332
+ token.index(":")
333
+ # Is after period? raise ValueError if '.' can't be found:
334
+ self.tokens[self.tokens.index((token, 0)) + 1][0].index(".")
335
+ except Exception:
336
+ microsecond = None
337
+
338
+ if microsecond:
339
+ meridian_index += 1
340
+
341
+ try:
342
+ meridian = MERIDIAN.search(
343
+ self.filtered_tokens[meridian_index][0]
344
+ ).group()
345
+ except Exception:
346
+ meridian = None
347
+
348
+ if any([":" in token, meridian, microsecond]):
349
+ if meridian and not microsecond:
350
+ self._token_time = "%s %s" % (token, meridian)
351
+ skip_index.append(meridian_index)
352
+ elif microsecond and not meridian:
353
+ self._token_time = "%s.%s" % (token, microsecond)
354
+ skip_index.append(index + 1)
355
+ elif meridian and microsecond:
356
+ self._token_time = "%s.%s %s" % (token, microsecond, meridian)
357
+ skip_index.append(index + 1)
358
+ skip_index.append(meridian_index)
359
+ else:
360
+ self._token_time = token
361
+ self.time = lambda: time_parser(self._token_time)
362
+ continue
363
+
364
+ results = self._parse(type, token, skip_component=skip_component)
365
+ for res in results:
366
+ if len(token) == 4 and res[0] == "year":
367
+ skip_component = "year"
368
+ setattr(self, *res)
369
+
370
+ known, unknown = get_unresolved_attrs(self)
371
+ params = {}
372
+ for attr in known:
373
+ params.update({attr: getattr(self, attr)})
374
+ for attr in unknown:
375
+ for token, type, _ in self.unset_tokens:
376
+ if type == 0:
377
+ params.update({attr: int(token)})
378
+ setattr(self, "_token_%s" % attr, token)
379
+ setattr(self, attr, int(token))
380
+
381
+ def _get_period(self):
382
+ if self.settings.RETURN_TIME_AS_PERIOD:
383
+ if getattr(self, "time", None):
384
+ return "time"
385
+
386
+ for period in ["time", "day"]:
387
+ if getattr(self, period, None):
388
+ return "day"
389
+
390
+ for period in ["month", "year"]:
391
+ if getattr(self, period, None):
392
+ return period
393
+
394
+ if self._results():
395
+ return "day"
396
+
397
+ def _get_datetime_obj(self, **params):
398
+ try:
399
+ return datetime(**params)
400
+ except ValueError as e:
401
+ error_text = e.__str__()
402
+ error_msgs = ["day is out of range", "day must be in"]
403
+ if error_msgs[0] in error_text or error_msgs[1] in error_text:
404
+ if not (self._token_day or hasattr(self, "_token_weekday")):
405
+ # if day is not available put last day of the month
406
+ params["day"] = get_last_day_of_month(
407
+ params["year"], params["month"]
408
+ )
409
+ return datetime(**params)
410
+ elif (
411
+ not self._token_year
412
+ and params["day"] == 29
413
+ and params["month"] == 2
414
+ and not calendar.isleap(params["year"])
415
+ ):
416
+ # fix the year when year is not present and it is 29 of February
417
+ params["year"] = self._get_correct_leap_year(
418
+ self.settings.PREFER_DATES_FROM, params["year"]
419
+ )
420
+ return datetime(**params)
421
+ raise e
422
+
423
+ def _get_correct_leap_year(self, prefer_dates_from, current_year):
424
+ if prefer_dates_from == "future":
425
+ return get_next_leap_year(current_year)
426
+ if prefer_dates_from == "past":
427
+ return get_previous_leap_year(current_year)
428
+
429
+ # Default case ('current_period'): return closer leap year
430
+ next_leap_year = get_next_leap_year(current_year)
431
+ previous_leap_year = get_previous_leap_year(current_year)
432
+ next_leap_year_is_closer = (
433
+ next_leap_year - current_year < current_year - previous_leap_year
434
+ )
435
+ return next_leap_year if next_leap_year_is_closer else previous_leap_year
436
+
437
+ def _set_relative_base(self):
438
+ self.now = self.settings.RELATIVE_BASE
439
+ if not self.now:
440
+ self.now = datetime.now(tz=timezone.utc).replace(tzinfo=None)
441
+
442
+ def _get_datetime_obj_params(self):
443
+ if not self.now:
444
+ self._set_relative_base()
445
+
446
+ params = {
447
+ "day": self.day or self.now.day,
448
+ "month": self.month or self.now.month,
449
+ "year": self.year or self.now.year,
450
+ "hour": 0,
451
+ "minute": 0,
452
+ "second": 0,
453
+ "microsecond": 0,
454
+ }
455
+ return params
456
+
457
+ def _get_date_obj(self, token, directive):
458
+ return strptime(token, directive)
459
+
460
+ def _results(self):
461
+ missing = [
462
+ field for field in ("day", "month", "year") if not getattr(self, field)
463
+ ]
464
+ _check_strict_parsing(missing, self.settings)
465
+ self._set_relative_base()
466
+
467
+ time = self.time() if self.time is not None else None
468
+ params = self._get_datetime_obj_params()
469
+
470
+ if time:
471
+ params.update(
472
+ dict(
473
+ hour=time.hour,
474
+ minute=time.minute,
475
+ second=time.second,
476
+ microsecond=time.microsecond,
477
+ )
478
+ )
479
+
480
+ return self._get_datetime_obj(**params)
481
+
482
+ def _correct_for_time_frame(self, dateobj, tz):
483
+ days = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
484
+
485
+ token_weekday, _ = getattr(self, "_token_weekday", (None, None))
486
+
487
+ if token_weekday and not (
488
+ self._token_year or self._token_month or self._token_day
489
+ ):
490
+ day_index = calendar.weekday(dateobj.year, dateobj.month, dateobj.day)
491
+ day = token_weekday[:3].lower()
492
+ steps = 0
493
+ if "future" in self.settings.PREFER_DATES_FROM:
494
+ if days[day_index] == day:
495
+ steps = 7
496
+ else:
497
+ while days[day_index] != day:
498
+ day_index = (day_index + 1) % 7
499
+ steps += 1
500
+ delta = timedelta(days=steps)
501
+ else:
502
+ if days[day_index] == day:
503
+ if self.settings.PREFER_DATES_FROM == "past":
504
+ steps = 7
505
+ else:
506
+ steps = 0
507
+ else:
508
+ while days[day_index] != day:
509
+ day_index -= 1
510
+ steps += 1
511
+ delta = timedelta(days=-steps)
512
+
513
+ dateobj = dateobj + delta
514
+
515
+ # NOTE: If this assert fires, self.now needs to be made offset-aware in a similar
516
+ # way that dateobj is temporarily made offset-aware.
517
+ assert not (self.now.tzinfo is None and dateobj.tzinfo is not None), (
518
+ "`self.now` doesn't have `tzinfo`. Review comment in code for details."
519
+ )
520
+
521
+ # Store the original dateobj values so that upon subsequent parsing everything is not
522
+ # treated as offset-aware if offset awareness is changed.
523
+ original_dateobj = dateobj
524
+
525
+ # Since date comparisons must be either offset-naive or offset-aware, normalize dateobj
526
+ # to be offset-aware if one or the other is already offset-aware.
527
+ if self.now.tzinfo is not None and dateobj.tzinfo is None:
528
+ dateobj = pytz.utc.localize(dateobj)
529
+
530
+ if self.month and not self.year:
531
+ try:
532
+ if self.now < dateobj:
533
+ if self.settings.PREFER_DATES_FROM == "past":
534
+ dateobj = dateobj.replace(year=dateobj.year - 1)
535
+ else:
536
+ if self.settings.PREFER_DATES_FROM == "future":
537
+ dateobj = dateobj.replace(year=dateobj.year + 1)
538
+ except ValueError as e:
539
+ if dateobj.day == 29 and dateobj.month == 2:
540
+ valid_year = self._get_correct_leap_year(
541
+ self.settings.PREFER_DATES_FROM, dateobj.year
542
+ )
543
+ dateobj = dateobj.replace(year=valid_year)
544
+ else:
545
+ raise e
546
+
547
+ if self._token_year and len(self._token_year[0]) == 2:
548
+ if self.now < dateobj:
549
+ if "past" in self.settings.PREFER_DATES_FROM:
550
+ dateobj = dateobj.replace(year=dateobj.year - 100)
551
+ else:
552
+ if "future" in self.settings.PREFER_DATES_FROM:
553
+ dateobj = dateobj.replace(year=dateobj.year + 100)
554
+
555
+ if self._token_time and not any(
556
+ [
557
+ self._token_year,
558
+ self._token_month,
559
+ self._token_day,
560
+ hasattr(self, "_token_weekday"),
561
+ ]
562
+ ):
563
+ # Convert dateobj to utc time to compare with self.now
564
+ try:
565
+ tz = tz or get_timezone_from_tz_string(self.settings.TIMEZONE)
566
+ tz_offset = tz.utcoffset(dateobj)
567
+ except (pytz.UnknownTimeZoneError, pytz.NonExistentTimeError):
568
+ tz_offset = timedelta(hours=0)
569
+
570
+ if "past" in self.settings.PREFER_DATES_FROM:
571
+ if self.now < dateobj - tz_offset:
572
+ dateobj = dateobj + timedelta(days=-1)
573
+ if "future" in self.settings.PREFER_DATES_FROM:
574
+ if self.now > dateobj - tz_offset:
575
+ dateobj = dateobj + timedelta(days=1)
576
+
577
+ # Reset dateobj to the original value, thus removing any offset awareness that may
578
+ # have been set earlier.
579
+ dateobj = dateobj.replace(tzinfo=original_dateobj.tzinfo)
580
+
581
+ return dateobj
582
+
583
+ def _correct_for_day(self, dateobj):
584
+ if (
585
+ getattr(self, "_token_day", None)
586
+ or getattr(self, "_token_weekday", None)
587
+ or getattr(self, "_token_time", None)
588
+ ):
589
+ return dateobj
590
+
591
+ dateobj = set_correct_day_from_settings(
592
+ dateobj, self.settings, current_day=self.now.day
593
+ )
594
+ return dateobj
595
+
596
+ def _correct_for_month(self, dateobj):
597
+ relative_base = getattr(self.settings, "RELATIVE_BASE", None)
598
+ relative_base_month = (
599
+ relative_base.month if hasattr(relative_base, "month") else relative_base
600
+ )
601
+
602
+ if getattr(self, "_token_month", None):
603
+ return dateobj
604
+
605
+ dateobj = set_correct_month_from_settings(
606
+ dateobj, self.settings, relative_base_month
607
+ )
608
+ return dateobj
609
+
610
+ @classmethod
611
+ def parse(cls, datestring, settings, tz=None):
612
+ tokens = tokenizer(datestring)
613
+ po = cls(tokens.tokenize(), settings)
614
+ dateobj = po._results()
615
+
616
+ # correction for past, future if applicable
617
+ dateobj = po._correct_for_time_frame(dateobj, tz)
618
+
619
+ # correction for preference of month: beginning, current, end
620
+ # must happen before day so that day is derived from the correct month
621
+ dateobj = po._correct_for_month(dateobj)
622
+
623
+ # correction for preference of day: beginning, current, end
624
+ dateobj = po._correct_for_day(dateobj)
625
+
626
+ period = po._get_period()
627
+
628
+ return dateobj, period
629
+
630
+ def _parse(self, type, token, skip_component=None):
631
+ def set_and_return(token, type, component, dateobj, skip_date_order=False):
632
+ if not skip_date_order:
633
+ self.auto_order.append(component)
634
+ setattr(self, "_token_%s" % component, (token, type))
635
+ return [(component, getattr(dateobj, component))]
636
+
637
+ def parse_number(token, skip_component=None):
638
+ type = 0
639
+
640
+ for component, directives in self.ordered_num_directives.items():
641
+ if skip_component == component:
642
+ continue
643
+ for directive in directives:
644
+ try:
645
+ do = self._get_date_obj(token, directive)
646
+ prev_value = getattr(self, component, None)
647
+ if not prev_value:
648
+ return set_and_return(token, type, component, do)
649
+ else:
650
+ try:
651
+ prev_token, prev_type = getattr(
652
+ self, "_token_%s" % component
653
+ )
654
+ if prev_type == type:
655
+ do = self._get_date_obj(prev_token, directive)
656
+ except ValueError:
657
+ self.unset_tokens.append(
658
+ (prev_token, prev_type, component)
659
+ )
660
+ return set_and_return(token, type, component, do)
661
+ except ValueError:
662
+ pass
663
+ else:
664
+ raise ValueError("Unable to parse: %s" % token)
665
+
666
+ def parse_alpha(token, skip_component=None):
667
+ type = 1
668
+
669
+ for component, directives in self.alpha_directives.items():
670
+ if skip_component == component:
671
+ continue
672
+ for directive in directives:
673
+ try:
674
+ do = self._get_date_obj(token, directive)
675
+ prev_value = getattr(self, component, None)
676
+ if not prev_value:
677
+ return set_and_return(
678
+ token, type, component, do, skip_date_order=True
679
+ )
680
+ elif component == "month":
681
+ index = self.auto_order.index("month")
682
+ self.auto_order[index] = "day"
683
+ setattr(self, "_token_day", self._token_month)
684
+ setattr(self, "_token_month", (token, type))
685
+ return [
686
+ (component, getattr(do, component)),
687
+ ("day", prev_value),
688
+ ]
689
+ except Exception:
690
+ pass
691
+ else:
692
+ raise ValueError("Unable to parse: %s" % token)
693
+
694
+ handlers = {0: parse_number, 1: parse_alpha}
695
+ return handlers[type](token, skip_component)
696
+
697
+
698
+ class tokenizer:
699
+ digits = "0123456789:"
700
+ letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
701
+
702
+ def _isletter(self, tkn):
703
+ return tkn in self.letters
704
+
705
+ def _isdigit(self, tkn):
706
+ return tkn in self.digits
707
+
708
+ def __init__(self, ds):
709
+ self.instream = StringIO(ds)
710
+
711
+ def _switch(self, chara, charb):
712
+ if self._isdigit(chara):
713
+ return 0, not self._isdigit(charb)
714
+
715
+ if self._isletter(chara):
716
+ return 1, not self._isletter(charb)
717
+
718
+ return 2, self._isdigit(charb) or self._isletter(charb)
719
+
720
+ def tokenize(self):
721
+ token = ""
722
+ EOF = False
723
+
724
+ while not EOF:
725
+ nextchar = self.instream.read(1)
726
+
727
+ if not nextchar:
728
+ EOF = True
729
+ type, _ = self._switch(token[-1], nextchar)
730
+ yield token, type
731
+ return
732
+
733
+ if token:
734
+ type, switch = self._switch(token[-1], nextchar)
735
+
736
+ if not switch:
737
+ token += nextchar
738
+ else:
739
+ yield token, type
740
+ token = nextchar
741
+ else:
742
+ token += nextchar