dateparser 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. dateparser/__init__.py +82 -0
  2. dateparser/calendars/__init__.py +144 -0
  3. dateparser/calendars/hijri.py +6 -0
  4. dateparser/calendars/hijri_parser.py +60 -0
  5. dateparser/calendars/jalali.py +9 -0
  6. dateparser/calendars/jalali_parser.py +184 -0
  7. dateparser/conf.py +267 -0
  8. dateparser/custom_language_detection/__init__.py +0 -0
  9. dateparser/custom_language_detection/fasttext.py +43 -0
  10. dateparser/custom_language_detection/langdetect.py +37 -0
  11. dateparser/custom_language_detection/language_mapping.py +18 -0
  12. dateparser/data/__init__.py +2 -0
  13. dateparser/data/date_translation_data/__init__.py +0 -0
  14. dateparser/data/date_translation_data/af.py +242 -0
  15. dateparser/data/date_translation_data/agq.py +169 -0
  16. dateparser/data/date_translation_data/ak.py +169 -0
  17. dateparser/data/date_translation_data/am.py +222 -0
  18. dateparser/data/date_translation_data/ar.py +574 -0
  19. dateparser/data/date_translation_data/as.py +164 -0
  20. dateparser/data/date_translation_data/asa.py +168 -0
  21. dateparser/data/date_translation_data/ast.py +280 -0
  22. dateparser/data/date_translation_data/az-Cyrl.py +168 -0
  23. dateparser/data/date_translation_data/az-Latn.py +217 -0
  24. dateparser/data/date_translation_data/az.py +217 -0
  25. dateparser/data/date_translation_data/bas.py +169 -0
  26. dateparser/data/date_translation_data/be.py +340 -0
  27. dateparser/data/date_translation_data/bem.py +161 -0
  28. dateparser/data/date_translation_data/bez.py +169 -0
  29. dateparser/data/date_translation_data/bg.py +345 -0
  30. dateparser/data/date_translation_data/bm.py +167 -0
  31. dateparser/data/date_translation_data/bn.py +241 -0
  32. dateparser/data/date_translation_data/bo.py +185 -0
  33. dateparser/data/date_translation_data/br.py +226 -0
  34. dateparser/data/date_translation_data/brx.py +157 -0
  35. dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
  36. dateparser/data/date_translation_data/bs-Latn.py +248 -0
  37. dateparser/data/date_translation_data/bs.py +248 -0
  38. dateparser/data/date_translation_data/ca.py +313 -0
  39. dateparser/data/date_translation_data/ce.py +225 -0
  40. dateparser/data/date_translation_data/cgg.py +169 -0
  41. dateparser/data/date_translation_data/chr.py +240 -0
  42. dateparser/data/date_translation_data/ckb.py +154 -0
  43. dateparser/data/date_translation_data/cs.py +316 -0
  44. dateparser/data/date_translation_data/cy.py +217 -0
  45. dateparser/data/date_translation_data/da.py +296 -0
  46. dateparser/data/date_translation_data/dav.py +169 -0
  47. dateparser/data/date_translation_data/de.py +357 -0
  48. dateparser/data/date_translation_data/dje.py +167 -0
  49. dateparser/data/date_translation_data/dsb.py +270 -0
  50. dateparser/data/date_translation_data/dua.py +169 -0
  51. dateparser/data/date_translation_data/dyo.py +168 -0
  52. dateparser/data/date_translation_data/dz.py +225 -0
  53. dateparser/data/date_translation_data/ebu.py +169 -0
  54. dateparser/data/date_translation_data/ee.py +233 -0
  55. dateparser/data/date_translation_data/el.py +279 -0
  56. dateparser/data/date_translation_data/en.py +851 -0
  57. dateparser/data/date_translation_data/eo.py +169 -0
  58. dateparser/data/date_translation_data/es.py +499 -0
  59. dateparser/data/date_translation_data/et.py +233 -0
  60. dateparser/data/date_translation_data/eu.py +219 -0
  61. dateparser/data/date_translation_data/ewo.py +169 -0
  62. dateparser/data/date_translation_data/fa.py +270 -0
  63. dateparser/data/date_translation_data/ff.py +179 -0
  64. dateparser/data/date_translation_data/fi.py +345 -0
  65. dateparser/data/date_translation_data/fil.py +223 -0
  66. dateparser/data/date_translation_data/fo.py +256 -0
  67. dateparser/data/date_translation_data/fr.py +520 -0
  68. dateparser/data/date_translation_data/fur.py +223 -0
  69. dateparser/data/date_translation_data/fy.py +223 -0
  70. dateparser/data/date_translation_data/ga.py +238 -0
  71. dateparser/data/date_translation_data/gd.py +277 -0
  72. dateparser/data/date_translation_data/gl.py +253 -0
  73. dateparser/data/date_translation_data/gsw.py +179 -0
  74. dateparser/data/date_translation_data/gu.py +216 -0
  75. dateparser/data/date_translation_data/guz.py +170 -0
  76. dateparser/data/date_translation_data/gv.py +166 -0
  77. dateparser/data/date_translation_data/ha.py +176 -0
  78. dateparser/data/date_translation_data/haw.py +168 -0
  79. dateparser/data/date_translation_data/he.py +371 -0
  80. dateparser/data/date_translation_data/hi.py +261 -0
  81. dateparser/data/date_translation_data/hr.py +378 -0
  82. dateparser/data/date_translation_data/hsb.py +271 -0
  83. dateparser/data/date_translation_data/hu.py +297 -0
  84. dateparser/data/date_translation_data/hy.py +246 -0
  85. dateparser/data/date_translation_data/id.py +272 -0
  86. dateparser/data/date_translation_data/ig.py +168 -0
  87. dateparser/data/date_translation_data/ii.py +157 -0
  88. dateparser/data/date_translation_data/is.py +242 -0
  89. dateparser/data/date_translation_data/it.py +282 -0
  90. dateparser/data/date_translation_data/ja.py +286 -0
  91. dateparser/data/date_translation_data/jgo.py +188 -0
  92. dateparser/data/date_translation_data/jmc.py +168 -0
  93. dateparser/data/date_translation_data/ka.py +241 -0
  94. dateparser/data/date_translation_data/kab.py +169 -0
  95. dateparser/data/date_translation_data/kam.py +169 -0
  96. dateparser/data/date_translation_data/kde.py +169 -0
  97. dateparser/data/date_translation_data/kea.py +230 -0
  98. dateparser/data/date_translation_data/khq.py +167 -0
  99. dateparser/data/date_translation_data/ki.py +169 -0
  100. dateparser/data/date_translation_data/kk.py +228 -0
  101. dateparser/data/date_translation_data/kl.py +213 -0
  102. dateparser/data/date_translation_data/kln.py +171 -0
  103. dateparser/data/date_translation_data/km.py +198 -0
  104. dateparser/data/date_translation_data/kn.py +225 -0
  105. dateparser/data/date_translation_data/ko.py +207 -0
  106. dateparser/data/date_translation_data/kok.py +157 -0
  107. dateparser/data/date_translation_data/ks.py +152 -0
  108. dateparser/data/date_translation_data/ksb.py +168 -0
  109. dateparser/data/date_translation_data/ksf.py +169 -0
  110. dateparser/data/date_translation_data/ksh.py +192 -0
  111. dateparser/data/date_translation_data/kw.py +169 -0
  112. dateparser/data/date_translation_data/ky.py +240 -0
  113. dateparser/data/date_translation_data/lag.py +169 -0
  114. dateparser/data/date_translation_data/lb.py +233 -0
  115. dateparser/data/date_translation_data/lg.py +169 -0
  116. dateparser/data/date_translation_data/lkt.py +194 -0
  117. dateparser/data/date_translation_data/ln.py +179 -0
  118. dateparser/data/date_translation_data/lo.py +228 -0
  119. dateparser/data/date_translation_data/lrc.py +154 -0
  120. dateparser/data/date_translation_data/lt.py +263 -0
  121. dateparser/data/date_translation_data/lu.py +169 -0
  122. dateparser/data/date_translation_data/luo.py +169 -0
  123. dateparser/data/date_translation_data/luy.py +168 -0
  124. dateparser/data/date_translation_data/lv.py +257 -0
  125. dateparser/data/date_translation_data/mas.py +173 -0
  126. dateparser/data/date_translation_data/mer.py +168 -0
  127. dateparser/data/date_translation_data/mfe.py +166 -0
  128. dateparser/data/date_translation_data/mg.py +168 -0
  129. dateparser/data/date_translation_data/mgh.py +169 -0
  130. dateparser/data/date_translation_data/mgo.py +151 -0
  131. dateparser/data/date_translation_data/mk.py +234 -0
  132. dateparser/data/date_translation_data/ml.py +217 -0
  133. dateparser/data/date_translation_data/mn.py +224 -0
  134. dateparser/data/date_translation_data/mr.py +229 -0
  135. dateparser/data/date_translation_data/ms.py +242 -0
  136. dateparser/data/date_translation_data/mt.py +175 -0
  137. dateparser/data/date_translation_data/mua.py +169 -0
  138. dateparser/data/date_translation_data/my.py +203 -0
  139. dateparser/data/date_translation_data/mzn.py +199 -0
  140. dateparser/data/date_translation_data/naq.py +169 -0
  141. dateparser/data/date_translation_data/nb.py +261 -0
  142. dateparser/data/date_translation_data/nd.py +169 -0
  143. dateparser/data/date_translation_data/ne.py +207 -0
  144. dateparser/data/date_translation_data/nl.py +273 -0
  145. dateparser/data/date_translation_data/nmg.py +169 -0
  146. dateparser/data/date_translation_data/nn.py +231 -0
  147. dateparser/data/date_translation_data/nnh.py +150 -0
  148. dateparser/data/date_translation_data/nus.py +166 -0
  149. dateparser/data/date_translation_data/nyn.py +169 -0
  150. dateparser/data/date_translation_data/om.py +173 -0
  151. dateparser/data/date_translation_data/or.py +157 -0
  152. dateparser/data/date_translation_data/os.py +203 -0
  153. dateparser/data/date_translation_data/pa-Arab.py +150 -0
  154. dateparser/data/date_translation_data/pa-Guru.py +221 -0
  155. dateparser/data/date_translation_data/pa.py +221 -0
  156. dateparser/data/date_translation_data/pl.py +416 -0
  157. dateparser/data/date_translation_data/ps.py +150 -0
  158. dateparser/data/date_translation_data/pt.py +981 -0
  159. dateparser/data/date_translation_data/qu.py +176 -0
  160. dateparser/data/date_translation_data/rm.py +166 -0
  161. dateparser/data/date_translation_data/rn.py +169 -0
  162. dateparser/data/date_translation_data/ro.py +270 -0
  163. dateparser/data/date_translation_data/rof.py +157 -0
  164. dateparser/data/date_translation_data/ru.py +442 -0
  165. dateparser/data/date_translation_data/rw.py +169 -0
  166. dateparser/data/date_translation_data/rwk.py +168 -0
  167. dateparser/data/date_translation_data/sah.py +219 -0
  168. dateparser/data/date_translation_data/saq.py +169 -0
  169. dateparser/data/date_translation_data/sbp.py +169 -0
  170. dateparser/data/date_translation_data/se.py +280 -0
  171. dateparser/data/date_translation_data/seh.py +169 -0
  172. dateparser/data/date_translation_data/ses.py +167 -0
  173. dateparser/data/date_translation_data/sg.py +169 -0
  174. dateparser/data/date_translation_data/shi-Latn.py +169 -0
  175. dateparser/data/date_translation_data/shi-Tfng.py +169 -0
  176. dateparser/data/date_translation_data/shi.py +169 -0
  177. dateparser/data/date_translation_data/si.py +220 -0
  178. dateparser/data/date_translation_data/sk.py +327 -0
  179. dateparser/data/date_translation_data/sl.py +244 -0
  180. dateparser/data/date_translation_data/smn.py +176 -0
  181. dateparser/data/date_translation_data/sn.py +169 -0
  182. dateparser/data/date_translation_data/so.py +179 -0
  183. dateparser/data/date_translation_data/sq.py +237 -0
  184. dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
  185. dateparser/data/date_translation_data/sr-Latn.py +306 -0
  186. dateparser/data/date_translation_data/sr.py +255 -0
  187. dateparser/data/date_translation_data/sv.py +309 -0
  188. dateparser/data/date_translation_data/sw.py +231 -0
  189. dateparser/data/date_translation_data/ta.py +264 -0
  190. dateparser/data/date_translation_data/te.py +239 -0
  191. dateparser/data/date_translation_data/teo.py +173 -0
  192. dateparser/data/date_translation_data/th.py +300 -0
  193. dateparser/data/date_translation_data/ti.py +173 -0
  194. dateparser/data/date_translation_data/tl.py +137 -0
  195. dateparser/data/date_translation_data/to.py +216 -0
  196. dateparser/data/date_translation_data/tr.py +259 -0
  197. dateparser/data/date_translation_data/twq.py +167 -0
  198. dateparser/data/date_translation_data/tzm.py +169 -0
  199. dateparser/data/date_translation_data/ug.py +203 -0
  200. dateparser/data/date_translation_data/uk.py +502 -0
  201. dateparser/data/date_translation_data/ur.py +256 -0
  202. dateparser/data/date_translation_data/uz-Arab.py +167 -0
  203. dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
  204. dateparser/data/date_translation_data/uz-Latn.py +216 -0
  205. dateparser/data/date_translation_data/uz.py +216 -0
  206. dateparser/data/date_translation_data/vi.py +260 -0
  207. dateparser/data/date_translation_data/vun.py +168 -0
  208. dateparser/data/date_translation_data/wae.py +224 -0
  209. dateparser/data/date_translation_data/xog.py +169 -0
  210. dateparser/data/date_translation_data/yav.py +169 -0
  211. dateparser/data/date_translation_data/yi.py +178 -0
  212. dateparser/data/date_translation_data/yo.py +263 -0
  213. dateparser/data/date_translation_data/yue.py +203 -0
  214. dateparser/data/date_translation_data/zgh.py +169 -0
  215. dateparser/data/date_translation_data/zh-Hans.py +240 -0
  216. dateparser/data/date_translation_data/zh-Hant.py +402 -0
  217. dateparser/data/date_translation_data/zh.py +273 -0
  218. dateparser/data/date_translation_data/zu.py +196 -0
  219. dateparser/data/languages_info.py +826 -0
  220. dateparser/date.py +599 -0
  221. dateparser/date_parser.py +55 -0
  222. dateparser/freshness_date_parser.py +156 -0
  223. dateparser/languages/__init__.py +2 -0
  224. dateparser/languages/dictionary.py +352 -0
  225. dateparser/languages/loader.py +224 -0
  226. dateparser/languages/locale.py +625 -0
  227. dateparser/languages/validation.py +467 -0
  228. dateparser/parser.py +742 -0
  229. dateparser/search/__init__.py +71 -0
  230. dateparser/search/detection.py +78 -0
  231. dateparser/search/search.py +297 -0
  232. dateparser/search/text_detection.py +89 -0
  233. dateparser/timezone_parser.py +91 -0
  234. dateparser/timezones.py +469 -0
  235. dateparser/utils/__init__.py +257 -0
  236. dateparser/utils/strptime.py +108 -0
  237. dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
  238. dateparser-1.2.1.dist-info/LICENSE +12 -0
  239. dateparser-1.2.1.dist-info/METADATA +864 -0
  240. dateparser-1.2.1.dist-info/RECORD +256 -0
  241. dateparser-1.2.1.dist-info/WHEEL +5 -0
  242. dateparser-1.2.1.dist-info/entry_points.txt +2 -0
  243. dateparser-1.2.1.dist-info/top_level.txt +4 -0
  244. dateparser_cli/__init__.py +0 -0
  245. dateparser_cli/cli.py +36 -0
  246. dateparser_cli/exceptions.py +2 -0
  247. dateparser_cli/fasttext_manager.py +42 -0
  248. dateparser_cli/utils.py +27 -0
  249. dateparser_data/__init__.py +0 -0
  250. dateparser_data/settings.py +33 -0
  251. dateparser_scripts/__init__.py +0 -0
  252. dateparser_scripts/get_cldr_data.py +567 -0
  253. dateparser_scripts/order_languages.py +217 -0
  254. dateparser_scripts/update_supported_languages_and_locales.py +48 -0
  255. dateparser_scripts/utils.py +73 -0
  256. dateparser_scripts/write_complete_data.py +129 -0
dateparser/date.py ADDED
@@ -0,0 +1,599 @@
1
+ import collections
2
+ from collections.abc import Set
3
+ from datetime import datetime, timedelta
4
+
5
+ import regex as re
6
+ from dateutil.relativedelta import relativedelta
7
+ from tzlocal import get_localzone
8
+
9
+ from dateparser.conf import apply_settings, check_settings
10
+ from dateparser.custom_language_detection.language_mapping import map_languages
11
+ from dateparser.date_parser import date_parser
12
+ from dateparser.freshness_date_parser import freshness_date_parser
13
+ from dateparser.languages.loader import LocaleDataLoader
14
+ from dateparser.parser import _parse_absolute, _parse_nospaces
15
+ from dateparser.timezone_parser import pop_tz_offset_from_string
16
+ from dateparser.utils import (
17
+ apply_timezone_from_settings,
18
+ get_timezone_from_tz_string,
19
+ set_correct_day_from_settings,
20
+ set_correct_month_from_settings,
21
+ )
22
+
23
+ APOSTROPHE_LOOK_ALIKE_CHARS = [
24
+ "\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019'
25
+ "\N{MODIFIER LETTER APOSTROPHE}", # '\u02bc'
26
+ "\N{MODIFIER LETTER TURNED COMMA}", # '\u02bb'
27
+ "\N{ARMENIAN APOSTROPHE}", # '\u055a'
28
+ "\N{LATIN SMALL LETTER SALTILLO}", # '\ua78c'
29
+ "\N{PRIME}", # '\u2032'
30
+ "\N{REVERSED PRIME}", # '\u2035'
31
+ "\N{MODIFIER LETTER PRIME}", # '\u02b9'
32
+ "\N{FULLWIDTH APOSTROPHE}", # '\uff07'
33
+ ]
34
+
35
+ RE_NBSP = re.compile("\xa0", flags=re.UNICODE)
36
+ RE_SPACES = re.compile(r"\s+")
37
+ RE_TRIM_SPACES = re.compile(r"^\s+(\S.*?)\s+$")
38
+ RE_TRIM_COLONS = re.compile(r"(\S.*?):*$")
39
+
40
+ RE_SANITIZE_SKIP = re.compile(
41
+ r"\t|\n|\r|\u00bb|,\s\u0432\b|\u200e|\xb7|\u200f|\u064e|\u064f", flags=re.M
42
+ )
43
+ RE_SANITIZE_RUSSIAN = re.compile(r"([\W\d])\u0433\.", flags=re.I | re.U)
44
+ RE_SANITIZE_CROATIAN = re.compile(
45
+ r"(\d+)\.\s?(\d+)\.\s?(\d+)\.( u)?", flags=re.I | re.U
46
+ )
47
+ RE_SANITIZE_PERIOD = re.compile(r"(?<=[^0-9\s])\.", flags=re.U)
48
+ RE_SANITIZE_ON = re.compile(r"^.*?on:\s+(.*)")
49
+ RE_SANITIZE_APOSTROPHE = re.compile("|".join(APOSTROPHE_LOOK_ALIKE_CHARS))
50
+
51
+ RE_SEARCH_TIMESTAMP = re.compile(r"^(\d{10})(\d{3})?(\d{3})?(?![^.])")
52
+ RE_SEARCH_NEGATIVE_TIMESTAMP = re.compile(r"^([-]\d{10})(\d{3})?(\d{3})?(?![^.])")
53
+
54
+
55
+ def sanitize_spaces(date_string):
56
+ date_string = RE_NBSP.sub(" ", date_string)
57
+ date_string = RE_SPACES.sub(" ", date_string)
58
+ date_string = RE_TRIM_SPACES.sub(r"\1", date_string)
59
+ return date_string
60
+
61
+
62
+ def date_range(begin, end, **kwargs):
63
+ dateutil_error_prone_args = [
64
+ "year",
65
+ "month",
66
+ "week",
67
+ "day",
68
+ "hour",
69
+ "minute",
70
+ "second",
71
+ ]
72
+ for arg in dateutil_error_prone_args:
73
+ if arg in kwargs:
74
+ raise ValueError("Invalid argument: %s" % arg)
75
+
76
+ step = relativedelta(**kwargs) if kwargs else relativedelta(days=1)
77
+
78
+ date = begin
79
+ while date < end:
80
+ yield date
81
+ date += step
82
+
83
+ # handles edge-case when iterating months and last interval is < 30 days
84
+ if kwargs.get("months", 0) > 0 and (date.year, date.month) == (end.year, end.month):
85
+ yield end
86
+
87
+
88
+ def get_intersecting_periods(low, high, period="day"):
89
+ if period not in [
90
+ "year",
91
+ "month",
92
+ "week",
93
+ "day",
94
+ "hour",
95
+ "minute",
96
+ "second",
97
+ "microsecond",
98
+ ]:
99
+ raise ValueError("Invalid period: {}".format(period))
100
+
101
+ if high <= low:
102
+ return
103
+
104
+ step = relativedelta(**{period + "s": 1})
105
+
106
+ current_period_start = low
107
+ if isinstance(current_period_start, datetime):
108
+ reset_arguments = {}
109
+ for test_period in ["microsecond", "second", "minute", "hour"]:
110
+ if test_period == period:
111
+ break
112
+ else:
113
+ reset_arguments[test_period] = 0
114
+ current_period_start = current_period_start.replace(**reset_arguments)
115
+
116
+ if period == "week":
117
+ current_period_start = current_period_start - timedelta(
118
+ days=current_period_start.weekday()
119
+ )
120
+ elif period == "month":
121
+ current_period_start = current_period_start.replace(day=1)
122
+ elif period == "year":
123
+ current_period_start = current_period_start.replace(month=1, day=1)
124
+
125
+ while current_period_start < high:
126
+ yield current_period_start
127
+ current_period_start += step
128
+
129
+
130
+ def sanitize_date(date_string):
131
+ date_string = RE_SANITIZE_SKIP.sub(" ", date_string)
132
+ date_string = RE_SANITIZE_RUSSIAN.sub(
133
+ r"\1 ", date_string
134
+ ) # remove 'г.' (Russian for year) but not in words
135
+ date_string = RE_SANITIZE_CROATIAN.sub(
136
+ r"\1.\2.\3 ", date_string
137
+ ) # extra '.' and 'u' interferes with parsing relative fractional dates
138
+ date_string = sanitize_spaces(date_string)
139
+ date_string = RE_SANITIZE_PERIOD.sub("", date_string)
140
+ date_string = RE_SANITIZE_ON.sub(r"\1", date_string)
141
+ date_string = RE_TRIM_COLONS.sub(r"\1", date_string)
142
+ date_string = RE_SANITIZE_APOSTROPHE.sub("'", date_string)
143
+ date_string = date_string.strip()
144
+ return date_string
145
+
146
+
147
+ def get_date_from_timestamp(date_string, settings, negative=False):
148
+ if negative:
149
+ match = RE_SEARCH_NEGATIVE_TIMESTAMP.search(date_string)
150
+ else:
151
+ match = RE_SEARCH_TIMESTAMP.search(date_string)
152
+
153
+ if match:
154
+ if (
155
+ settings is None
156
+ or settings.TIMEZONE is None
157
+ or "local" in settings.TIMEZONE.lower()
158
+ ):
159
+ # If the timezone in settings is unset, or it's 'local', use the
160
+ # local timezone
161
+ timezone = get_localzone()
162
+ else:
163
+ # Otherwise, use the timezone given in settings
164
+ timezone = get_timezone_from_tz_string(settings.TIMEZONE)
165
+
166
+ seconds = int(match.group(1))
167
+ millis = int(match.group(2) or 0)
168
+ micros = int(match.group(3) or 0)
169
+ date_obj = datetime.fromtimestamp(seconds, timezone).replace(
170
+ microsecond=millis * 1000 + micros, tzinfo=None
171
+ )
172
+ date_obj = apply_timezone_from_settings(date_obj, settings)
173
+ return date_obj
174
+
175
+
176
+ def parse_with_formats(date_string, date_formats, settings):
177
+ """Parse with formats and return a dictionary with 'period' and 'obj_date'.
178
+
179
+ :returns: :class:`datetime.datetime`, dict or None
180
+
181
+ """
182
+ period = "day"
183
+ for date_format in date_formats:
184
+ try:
185
+ date_obj = datetime.strptime(date_string, date_format)
186
+ except ValueError:
187
+ continue
188
+ else:
189
+ missing_month = not any(m in date_format for m in ["%m", "%b", "%B"])
190
+ missing_day = "%d" not in date_format
191
+ if missing_month and missing_day:
192
+ period = "year"
193
+ date_obj = set_correct_month_from_settings(date_obj, settings)
194
+ date_obj = set_correct_day_from_settings(date_obj, settings)
195
+
196
+ elif missing_month:
197
+ period = "year"
198
+ date_obj = set_correct_month_from_settings(date_obj, settings)
199
+
200
+ elif missing_day:
201
+ period = "month"
202
+ date_obj = set_correct_day_from_settings(date_obj, settings)
203
+
204
+ if not ("%y" in date_format or "%Y" in date_format):
205
+ today = datetime.today()
206
+ date_obj = date_obj.replace(year=today.year)
207
+
208
+ date_obj = apply_timezone_from_settings(date_obj, settings)
209
+
210
+ return DateData(date_obj=date_obj, period=period)
211
+ else:
212
+ return DateData(date_obj=None, period=period)
213
+
214
+
215
+ class _DateLocaleParser:
216
+ def __init__(self, locale, date_string, date_formats, settings=None):
217
+ self._settings = settings
218
+ if not (date_formats is None or isinstance(date_formats, (list, tuple, Set))):
219
+ raise TypeError("Date formats should be list, tuple or set of strings")
220
+
221
+ self.locale = locale
222
+ self.date_string = date_string
223
+ self.date_formats = date_formats
224
+ self._translated_date = None
225
+ self._translated_date_with_formatting = None
226
+ self._parsers = {
227
+ "timestamp": self._try_timestamp,
228
+ "negative-timestamp": self._try_negative_timestamp,
229
+ "relative-time": self._try_freshness_parser,
230
+ "custom-formats": self._try_given_formats,
231
+ "absolute-time": self._try_absolute_parser,
232
+ "no-spaces-time": self._try_nospaces_parser,
233
+ }
234
+
235
+ @classmethod
236
+ def parse(cls, locale, date_string, date_formats=None, settings=None):
237
+ instance = cls(locale, date_string, date_formats, settings)
238
+ return instance._parse()
239
+
240
+ def _parse(self):
241
+ for parser_name in self._settings.PARSERS:
242
+ date_data = self._parsers[parser_name]()
243
+ if self._is_valid_date_data(date_data):
244
+ return date_data
245
+ else:
246
+ return None
247
+
248
+ def _try_timestamp_parser(self, negative=False):
249
+ return DateData(
250
+ date_obj=get_date_from_timestamp(
251
+ self.date_string, self._settings, negative=negative
252
+ ),
253
+ period="time" if self._settings.RETURN_TIME_AS_PERIOD else "day",
254
+ )
255
+
256
+ def _try_timestamp(self):
257
+ return self._try_timestamp_parser()
258
+
259
+ def _try_negative_timestamp(self):
260
+ return self._try_timestamp_parser(negative=True)
261
+
262
+ def _try_freshness_parser(self):
263
+ try:
264
+ return freshness_date_parser.get_date_data(
265
+ self._get_translated_date(), self._settings
266
+ )
267
+ except (OverflowError, ValueError):
268
+ return None
269
+
270
+ def _try_absolute_parser(self):
271
+ return self._try_parser(parse_method=_parse_absolute)
272
+
273
+ def _try_nospaces_parser(self):
274
+ return self._try_parser(parse_method=_parse_nospaces)
275
+
276
+ def _try_parser(self, parse_method):
277
+ _order = self._settings.DATE_ORDER
278
+ try:
279
+ if self._settings.PREFER_LOCALE_DATE_ORDER:
280
+ if "DATE_ORDER" not in self._settings._mod_settings:
281
+ self._settings.DATE_ORDER = self.locale.info.get(
282
+ "date_order", _order
283
+ )
284
+ date_obj, period = date_parser.parse(
285
+ self._get_translated_date(),
286
+ parse_method=parse_method,
287
+ settings=self._settings,
288
+ )
289
+ self._settings.DATE_ORDER = _order
290
+ return DateData(
291
+ date_obj=date_obj,
292
+ period=period,
293
+ )
294
+ except ValueError:
295
+ self._settings.DATE_ORDER = _order
296
+ return None
297
+
298
+ def _try_given_formats(self):
299
+ if not self.date_formats:
300
+ return
301
+
302
+ return parse_with_formats(
303
+ self._get_translated_date_with_formatting(),
304
+ self.date_formats,
305
+ settings=self._settings,
306
+ )
307
+
308
+ def _get_translated_date(self):
309
+ if self._translated_date is None:
310
+ self._translated_date = self.locale.translate(
311
+ self.date_string, keep_formatting=False, settings=self._settings
312
+ )
313
+ return self._translated_date
314
+
315
+ def _get_translated_date_with_formatting(self):
316
+ if self._translated_date_with_formatting is None:
317
+ self._translated_date_with_formatting = self.locale.translate(
318
+ self.date_string, keep_formatting=True, settings=self._settings
319
+ )
320
+ return self._translated_date_with_formatting
321
+
322
+ def _is_valid_date_data(self, date_data):
323
+ if not isinstance(date_data, DateData):
324
+ return False
325
+ if not date_data["date_obj"] or not date_data["period"]:
326
+ return False
327
+ if date_data["date_obj"] and not isinstance(date_data["date_obj"], datetime):
328
+ return False
329
+ if date_data["period"] not in ("time", "day", "week", "month", "year"):
330
+ return False
331
+ return True
332
+
333
+
334
+ class DateData:
335
+ """
336
+ Class that represents the parsed data with useful information.
337
+ It can be accessed with square brackets like a dict object.
338
+ """
339
+
340
+ def __init__(self, *, date_obj=None, period=None, locale=None):
341
+ self.date_obj = date_obj
342
+ self.period = period
343
+ self.locale = locale
344
+
345
+ def __getitem__(self, k):
346
+ if not hasattr(self, k):
347
+ raise KeyError(k)
348
+ return getattr(self, k)
349
+
350
+ def __setitem__(self, k, v):
351
+ if not hasattr(self, k):
352
+ raise KeyError(k)
353
+ setattr(self, k, v)
354
+
355
+ def __repr__(self):
356
+ properties_text = ", ".join(
357
+ "{}={}".format(prop, val.__repr__()) for prop, val in self.__dict__.items()
358
+ )
359
+
360
+ return "{}({})".format(self.__class__.__name__, properties_text)
361
+
362
+
363
+ class DateDataParser:
364
+ """
365
+ Class which handles language detection, translation and subsequent generic parsing of
366
+ string representing date and/or time.
367
+
368
+ :param languages:
369
+ A list of language codes, e.g. ['en', 'es', 'zh-Hant'].
370
+ If locales are not given, languages and region are
371
+ used to construct locales for translation.
372
+ :type languages: list
373
+
374
+ :param locales:
375
+ A list of locale codes, e.g. ['fr-PF', 'qu-EC', 'af-NA'].
376
+ The parser uses only these locales to translate date string.
377
+ :type locales: list
378
+
379
+ :param region:
380
+ A region code, e.g. 'IN', '001', 'NE'.
381
+ If locales are not given, languages and region are
382
+ used to construct locales for translation.
383
+ :type region: str
384
+
385
+ :param try_previous_locales:
386
+ If True, locales previously used to translate date are tried first.
387
+ :type try_previous_locales: bool
388
+
389
+ :param use_given_order:
390
+ If True, locales are tried for translation of date string
391
+ in the order in which they are given.
392
+ :type use_given_order: bool
393
+
394
+ :param settings:
395
+ Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
396
+ :type settings: dict
397
+
398
+ :param detect_languages_function:
399
+ A function for language detection that takes as input a `text` and a `confidence_threshold`,
400
+ and returns a list of detected language codes.
401
+ Note: this function is only used if ``languages`` and ``locales`` are not provided.
402
+ :type detect_languages_function: function
403
+
404
+ :return: A parser instance
405
+
406
+ :raises:
407
+ ``ValueError``: Unknown Language, ``TypeError``: Languages argument must be a list,
408
+ ``SettingValidationError``: A provided setting is not valid.
409
+ """
410
+
411
+ locale_loader = None
412
+
413
+ @apply_settings
414
+ def __init__(
415
+ self,
416
+ languages=None,
417
+ locales=None,
418
+ region=None,
419
+ try_previous_locales=False,
420
+ use_given_order=False,
421
+ settings=None,
422
+ detect_languages_function=None,
423
+ ):
424
+ if languages is not None and not isinstance(languages, (list, tuple, Set)):
425
+ raise TypeError(
426
+ "languages argument must be a list (%r given)" % type(languages)
427
+ )
428
+
429
+ if locales is not None and not isinstance(locales, (list, tuple, Set)):
430
+ raise TypeError(
431
+ "locales argument must be a list (%r given)" % type(locales)
432
+ )
433
+
434
+ if region is not None and not isinstance(region, str):
435
+ raise TypeError("region argument must be str (%r given)" % type(region))
436
+
437
+ if not isinstance(try_previous_locales, bool):
438
+ raise TypeError(
439
+ "try_previous_locales argument must be a boolean (%r given)"
440
+ % type(try_previous_locales)
441
+ )
442
+
443
+ if not isinstance(use_given_order, bool):
444
+ raise TypeError(
445
+ "use_given_order argument must be a boolean (%r given)"
446
+ % type(use_given_order)
447
+ )
448
+
449
+ if not locales and not languages and use_given_order:
450
+ raise ValueError(
451
+ "locales or languages must be given if use_given_order is True"
452
+ )
453
+
454
+ check_settings(settings)
455
+
456
+ self._settings = settings
457
+ self.try_previous_locales = try_previous_locales
458
+ self.use_given_order = use_given_order
459
+ self.languages = list(languages) if languages else None
460
+ self.locales = locales
461
+ self.region = region
462
+ self.detect_languages_function = detect_languages_function
463
+ self.previous_locales = collections.OrderedDict()
464
+
465
+ def get_date_data(self, date_string, date_formats=None):
466
+ """
467
+ Parse string representing date and/or time in recognizable localized formats.
468
+ Supports parsing multiple languages and timezones.
469
+
470
+ :param date_string:
471
+ A string representing date and/or time in a recognizably valid format.
472
+ :type date_string: str
473
+ :param date_formats:
474
+ A list of format strings using directives as given
475
+ `here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
476
+ The parser applies formats one by one, taking into account the detected languages.
477
+ :type date_formats: list
478
+
479
+ :return: a ``DateData`` object.
480
+
481
+ :raises: ValueError - Unknown Language
482
+
483
+ .. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year', 'time'.
484
+
485
+ *Period* represents the granularity of date parsed from the given string.
486
+
487
+ In the example below, since no day information is present, the day is assumed to be current
488
+ day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
489
+ Hence, the level of precision is ``month``:
490
+
491
+ >>> DateDataParser().get_date_data('March 2015')
492
+ DateData(date_obj=datetime.datetime(2015, 3, 16, 0, 0), period='month', locale='en')
493
+
494
+ Similarly, for date strings with no day and month information present, level of precision
495
+ is ``year`` and day ``16`` and month ``6`` are from *current_date*.
496
+
497
+ >>> DateDataParser().get_date_data('2014')
498
+ DateData(date_obj=datetime.datetime(2014, 6, 16, 0, 0), period='year', locale='en')
499
+
500
+ Dates with time zone indications or UTC offsets are returned in UTC time unless
501
+ specified using `Settings <https://dateparser.readthedocs.io/en/latest/settings.html#settings>`__.
502
+
503
+ >>> DateDataParser().get_date_data('23 March 2000, 1:21 PM CET')
504
+ DateData(date_obj=datetime.datetime(2000, 3, 23, 13, 21, tzinfo=<StaticTzInfo 'CET'>),
505
+ period='day', locale='en')
506
+
507
+ """
508
+ if not isinstance(date_string, str):
509
+ raise TypeError("Input type must be str")
510
+
511
+ res = parse_with_formats(date_string, date_formats or [], self._settings)
512
+ if res["date_obj"]:
513
+ return res
514
+
515
+ date_string = sanitize_date(date_string)
516
+
517
+ for locale in self._get_applicable_locales(date_string):
518
+ parsed_date = _DateLocaleParser.parse(
519
+ locale, date_string, date_formats, settings=self._settings
520
+ )
521
+ if parsed_date:
522
+ parsed_date["locale"] = locale.shortname
523
+ if self.try_previous_locales:
524
+ self.previous_locales[locale] = None
525
+ return parsed_date
526
+ else:
527
+ return DateData(date_obj=None, period="day", locale=None)
528
+
529
+ def get_date_tuple(self, *args, **kwargs):
530
+ date_data = self.get_date_data(*args, **kwargs)
531
+ fields = date_data.__dict__.keys()
532
+ date_tuple = collections.namedtuple("DateData", fields)
533
+ return date_tuple(**date_data.__dict__)
534
+
535
+ def _get_applicable_locales(self, date_string):
536
+ pop_tz_cache = []
537
+
538
+ def date_strings():
539
+ """A generator instead of a static list to avoid calling
540
+ pop_tz_offset_from_string if the first locale matches on unmodified
541
+ date_string.
542
+ """
543
+ yield date_string
544
+ if not pop_tz_cache:
545
+ stripped_date_string, _ = pop_tz_offset_from_string(
546
+ date_string, as_offset=False
547
+ )
548
+ if stripped_date_string == date_string:
549
+ stripped_date_string = None
550
+ pop_tz_cache[:] = [stripped_date_string]
551
+ (stripped_date_string,) = pop_tz_cache
552
+ if stripped_date_string is not None:
553
+ yield stripped_date_string
554
+
555
+ if self.try_previous_locales:
556
+ for locale in self.previous_locales.keys():
557
+ for s in date_strings():
558
+ if self._is_applicable_locale(locale, s):
559
+ yield locale
560
+
561
+ if self.detect_languages_function and not self.languages and not self.locales:
562
+ detected_languages = self.detect_languages_function(
563
+ text=date_string,
564
+ confidence_threshold=self._settings.LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD,
565
+ )
566
+
567
+ self.languages = map_languages(detected_languages)
568
+
569
+ for locale in self._get_locale_loader().get_locales(
570
+ languages=self.languages,
571
+ locales=self.locales,
572
+ region=self.region,
573
+ use_given_order=self.use_given_order,
574
+ ):
575
+ for s in date_strings():
576
+ if self._is_applicable_locale(locale, s):
577
+ yield locale
578
+
579
+ if self._settings.DEFAULT_LANGUAGES:
580
+ for locale in self._get_locale_loader().get_locales(
581
+ languages=self._settings.DEFAULT_LANGUAGES,
582
+ locales=None,
583
+ region=self.region,
584
+ use_given_order=self.use_given_order,
585
+ ):
586
+ yield locale
587
+
588
+ def _is_applicable_locale(self, locale, date_string):
589
+ return locale.is_applicable(
590
+ date_string,
591
+ strip_timezone=False, # it is stripped outside
592
+ settings=self._settings,
593
+ )
594
+
595
+ @classmethod
596
+ def _get_locale_loader(cls):
597
+ if not cls.locale_loader:
598
+ cls.locale_loader = LocaleDataLoader()
599
+ return cls.locale_loader
@@ -0,0 +1,55 @@
1
+ import sys
2
+
3
+ from tzlocal import get_localzone
4
+
5
+ from .conf import apply_settings
6
+ from .timezone_parser import pop_tz_offset_from_string
7
+ from .utils import apply_timezone, localize_timezone, strip_braces
8
+
9
+
10
+ class DateParser:
11
+ @apply_settings
12
+ def parse(self, date_string, parse_method, settings=None):
13
+ date_string = str(date_string)
14
+
15
+ if not date_string.strip():
16
+ raise ValueError("Empty string")
17
+
18
+ date_string = strip_braces(date_string)
19
+ date_string, ptz = pop_tz_offset_from_string(date_string)
20
+
21
+ date_obj, period = parse_method(date_string, settings=settings, tz=ptz)
22
+
23
+ _settings_tz = settings.TIMEZONE.lower()
24
+
25
+ if ptz:
26
+ if hasattr(ptz, "localize"):
27
+ date_obj = ptz.localize(date_obj)
28
+ else:
29
+ date_obj = date_obj.replace(tzinfo=ptz)
30
+ if "local" not in _settings_tz:
31
+ date_obj = apply_timezone(date_obj, settings.TIMEZONE)
32
+ else:
33
+ if "local" in _settings_tz:
34
+ stz = get_localzone()
35
+ if hasattr(stz, "localize") and sys.version_info < (3, 6):
36
+ date_obj = stz.localize(date_obj)
37
+ else:
38
+ date_obj = date_obj.replace(tzinfo=stz)
39
+ else:
40
+ date_obj = localize_timezone(date_obj, settings.TIMEZONE)
41
+
42
+ if settings.TO_TIMEZONE:
43
+ date_obj = apply_timezone(date_obj, settings.TO_TIMEZONE)
44
+
45
+ if not settings.RETURN_AS_TIMEZONE_AWARE or (
46
+ settings.RETURN_AS_TIMEZONE_AWARE
47
+ and "default" == settings.RETURN_AS_TIMEZONE_AWARE
48
+ and not ptz
49
+ ):
50
+ date_obj = date_obj.replace(tzinfo=None)
51
+
52
+ return date_obj, period
53
+
54
+
55
+ date_parser = DateParser()