dateparser 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. dateparser/__init__.py +82 -0
  2. dateparser/calendars/__init__.py +144 -0
  3. dateparser/calendars/hijri.py +6 -0
  4. dateparser/calendars/hijri_parser.py +60 -0
  5. dateparser/calendars/jalali.py +9 -0
  6. dateparser/calendars/jalali_parser.py +184 -0
  7. dateparser/conf.py +267 -0
  8. dateparser/custom_language_detection/__init__.py +0 -0
  9. dateparser/custom_language_detection/fasttext.py +43 -0
  10. dateparser/custom_language_detection/langdetect.py +37 -0
  11. dateparser/custom_language_detection/language_mapping.py +18 -0
  12. dateparser/data/__init__.py +2 -0
  13. dateparser/data/date_translation_data/__init__.py +0 -0
  14. dateparser/data/date_translation_data/af.py +242 -0
  15. dateparser/data/date_translation_data/agq.py +169 -0
  16. dateparser/data/date_translation_data/ak.py +169 -0
  17. dateparser/data/date_translation_data/am.py +222 -0
  18. dateparser/data/date_translation_data/ar.py +574 -0
  19. dateparser/data/date_translation_data/as.py +164 -0
  20. dateparser/data/date_translation_data/asa.py +168 -0
  21. dateparser/data/date_translation_data/ast.py +280 -0
  22. dateparser/data/date_translation_data/az-Cyrl.py +168 -0
  23. dateparser/data/date_translation_data/az-Latn.py +217 -0
  24. dateparser/data/date_translation_data/az.py +217 -0
  25. dateparser/data/date_translation_data/bas.py +169 -0
  26. dateparser/data/date_translation_data/be.py +340 -0
  27. dateparser/data/date_translation_data/bem.py +161 -0
  28. dateparser/data/date_translation_data/bez.py +169 -0
  29. dateparser/data/date_translation_data/bg.py +345 -0
  30. dateparser/data/date_translation_data/bm.py +167 -0
  31. dateparser/data/date_translation_data/bn.py +241 -0
  32. dateparser/data/date_translation_data/bo.py +185 -0
  33. dateparser/data/date_translation_data/br.py +226 -0
  34. dateparser/data/date_translation_data/brx.py +157 -0
  35. dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
  36. dateparser/data/date_translation_data/bs-Latn.py +248 -0
  37. dateparser/data/date_translation_data/bs.py +248 -0
  38. dateparser/data/date_translation_data/ca.py +313 -0
  39. dateparser/data/date_translation_data/ce.py +225 -0
  40. dateparser/data/date_translation_data/cgg.py +169 -0
  41. dateparser/data/date_translation_data/chr.py +240 -0
  42. dateparser/data/date_translation_data/ckb.py +154 -0
  43. dateparser/data/date_translation_data/cs.py +316 -0
  44. dateparser/data/date_translation_data/cy.py +217 -0
  45. dateparser/data/date_translation_data/da.py +296 -0
  46. dateparser/data/date_translation_data/dav.py +169 -0
  47. dateparser/data/date_translation_data/de.py +357 -0
  48. dateparser/data/date_translation_data/dje.py +167 -0
  49. dateparser/data/date_translation_data/dsb.py +270 -0
  50. dateparser/data/date_translation_data/dua.py +169 -0
  51. dateparser/data/date_translation_data/dyo.py +168 -0
  52. dateparser/data/date_translation_data/dz.py +225 -0
  53. dateparser/data/date_translation_data/ebu.py +169 -0
  54. dateparser/data/date_translation_data/ee.py +233 -0
  55. dateparser/data/date_translation_data/el.py +279 -0
  56. dateparser/data/date_translation_data/en.py +851 -0
  57. dateparser/data/date_translation_data/eo.py +169 -0
  58. dateparser/data/date_translation_data/es.py +499 -0
  59. dateparser/data/date_translation_data/et.py +233 -0
  60. dateparser/data/date_translation_data/eu.py +219 -0
  61. dateparser/data/date_translation_data/ewo.py +169 -0
  62. dateparser/data/date_translation_data/fa.py +270 -0
  63. dateparser/data/date_translation_data/ff.py +179 -0
  64. dateparser/data/date_translation_data/fi.py +345 -0
  65. dateparser/data/date_translation_data/fil.py +223 -0
  66. dateparser/data/date_translation_data/fo.py +256 -0
  67. dateparser/data/date_translation_data/fr.py +520 -0
  68. dateparser/data/date_translation_data/fur.py +223 -0
  69. dateparser/data/date_translation_data/fy.py +223 -0
  70. dateparser/data/date_translation_data/ga.py +238 -0
  71. dateparser/data/date_translation_data/gd.py +277 -0
  72. dateparser/data/date_translation_data/gl.py +253 -0
  73. dateparser/data/date_translation_data/gsw.py +179 -0
  74. dateparser/data/date_translation_data/gu.py +216 -0
  75. dateparser/data/date_translation_data/guz.py +170 -0
  76. dateparser/data/date_translation_data/gv.py +166 -0
  77. dateparser/data/date_translation_data/ha.py +176 -0
  78. dateparser/data/date_translation_data/haw.py +168 -0
  79. dateparser/data/date_translation_data/he.py +371 -0
  80. dateparser/data/date_translation_data/hi.py +261 -0
  81. dateparser/data/date_translation_data/hr.py +378 -0
  82. dateparser/data/date_translation_data/hsb.py +271 -0
  83. dateparser/data/date_translation_data/hu.py +297 -0
  84. dateparser/data/date_translation_data/hy.py +246 -0
  85. dateparser/data/date_translation_data/id.py +272 -0
  86. dateparser/data/date_translation_data/ig.py +168 -0
  87. dateparser/data/date_translation_data/ii.py +157 -0
  88. dateparser/data/date_translation_data/is.py +242 -0
  89. dateparser/data/date_translation_data/it.py +282 -0
  90. dateparser/data/date_translation_data/ja.py +286 -0
  91. dateparser/data/date_translation_data/jgo.py +188 -0
  92. dateparser/data/date_translation_data/jmc.py +168 -0
  93. dateparser/data/date_translation_data/ka.py +241 -0
  94. dateparser/data/date_translation_data/kab.py +169 -0
  95. dateparser/data/date_translation_data/kam.py +169 -0
  96. dateparser/data/date_translation_data/kde.py +169 -0
  97. dateparser/data/date_translation_data/kea.py +230 -0
  98. dateparser/data/date_translation_data/khq.py +167 -0
  99. dateparser/data/date_translation_data/ki.py +169 -0
  100. dateparser/data/date_translation_data/kk.py +228 -0
  101. dateparser/data/date_translation_data/kl.py +213 -0
  102. dateparser/data/date_translation_data/kln.py +171 -0
  103. dateparser/data/date_translation_data/km.py +198 -0
  104. dateparser/data/date_translation_data/kn.py +225 -0
  105. dateparser/data/date_translation_data/ko.py +207 -0
  106. dateparser/data/date_translation_data/kok.py +157 -0
  107. dateparser/data/date_translation_data/ks.py +152 -0
  108. dateparser/data/date_translation_data/ksb.py +168 -0
  109. dateparser/data/date_translation_data/ksf.py +169 -0
  110. dateparser/data/date_translation_data/ksh.py +192 -0
  111. dateparser/data/date_translation_data/kw.py +169 -0
  112. dateparser/data/date_translation_data/ky.py +240 -0
  113. dateparser/data/date_translation_data/lag.py +169 -0
  114. dateparser/data/date_translation_data/lb.py +233 -0
  115. dateparser/data/date_translation_data/lg.py +169 -0
  116. dateparser/data/date_translation_data/lkt.py +194 -0
  117. dateparser/data/date_translation_data/ln.py +179 -0
  118. dateparser/data/date_translation_data/lo.py +228 -0
  119. dateparser/data/date_translation_data/lrc.py +154 -0
  120. dateparser/data/date_translation_data/lt.py +263 -0
  121. dateparser/data/date_translation_data/lu.py +169 -0
  122. dateparser/data/date_translation_data/luo.py +169 -0
  123. dateparser/data/date_translation_data/luy.py +168 -0
  124. dateparser/data/date_translation_data/lv.py +257 -0
  125. dateparser/data/date_translation_data/mas.py +173 -0
  126. dateparser/data/date_translation_data/mer.py +168 -0
  127. dateparser/data/date_translation_data/mfe.py +166 -0
  128. dateparser/data/date_translation_data/mg.py +168 -0
  129. dateparser/data/date_translation_data/mgh.py +169 -0
  130. dateparser/data/date_translation_data/mgo.py +151 -0
  131. dateparser/data/date_translation_data/mk.py +234 -0
  132. dateparser/data/date_translation_data/ml.py +217 -0
  133. dateparser/data/date_translation_data/mn.py +224 -0
  134. dateparser/data/date_translation_data/mr.py +229 -0
  135. dateparser/data/date_translation_data/ms.py +242 -0
  136. dateparser/data/date_translation_data/mt.py +175 -0
  137. dateparser/data/date_translation_data/mua.py +169 -0
  138. dateparser/data/date_translation_data/my.py +203 -0
  139. dateparser/data/date_translation_data/mzn.py +199 -0
  140. dateparser/data/date_translation_data/naq.py +169 -0
  141. dateparser/data/date_translation_data/nb.py +261 -0
  142. dateparser/data/date_translation_data/nd.py +169 -0
  143. dateparser/data/date_translation_data/ne.py +207 -0
  144. dateparser/data/date_translation_data/nl.py +273 -0
  145. dateparser/data/date_translation_data/nmg.py +169 -0
  146. dateparser/data/date_translation_data/nn.py +231 -0
  147. dateparser/data/date_translation_data/nnh.py +150 -0
  148. dateparser/data/date_translation_data/nus.py +166 -0
  149. dateparser/data/date_translation_data/nyn.py +169 -0
  150. dateparser/data/date_translation_data/om.py +173 -0
  151. dateparser/data/date_translation_data/or.py +157 -0
  152. dateparser/data/date_translation_data/os.py +203 -0
  153. dateparser/data/date_translation_data/pa-Arab.py +150 -0
  154. dateparser/data/date_translation_data/pa-Guru.py +221 -0
  155. dateparser/data/date_translation_data/pa.py +221 -0
  156. dateparser/data/date_translation_data/pl.py +416 -0
  157. dateparser/data/date_translation_data/ps.py +150 -0
  158. dateparser/data/date_translation_data/pt.py +981 -0
  159. dateparser/data/date_translation_data/qu.py +176 -0
  160. dateparser/data/date_translation_data/rm.py +166 -0
  161. dateparser/data/date_translation_data/rn.py +169 -0
  162. dateparser/data/date_translation_data/ro.py +270 -0
  163. dateparser/data/date_translation_data/rof.py +157 -0
  164. dateparser/data/date_translation_data/ru.py +442 -0
  165. dateparser/data/date_translation_data/rw.py +169 -0
  166. dateparser/data/date_translation_data/rwk.py +168 -0
  167. dateparser/data/date_translation_data/sah.py +219 -0
  168. dateparser/data/date_translation_data/saq.py +169 -0
  169. dateparser/data/date_translation_data/sbp.py +169 -0
  170. dateparser/data/date_translation_data/se.py +280 -0
  171. dateparser/data/date_translation_data/seh.py +169 -0
  172. dateparser/data/date_translation_data/ses.py +167 -0
  173. dateparser/data/date_translation_data/sg.py +169 -0
  174. dateparser/data/date_translation_data/shi-Latn.py +169 -0
  175. dateparser/data/date_translation_data/shi-Tfng.py +169 -0
  176. dateparser/data/date_translation_data/shi.py +169 -0
  177. dateparser/data/date_translation_data/si.py +220 -0
  178. dateparser/data/date_translation_data/sk.py +327 -0
  179. dateparser/data/date_translation_data/sl.py +244 -0
  180. dateparser/data/date_translation_data/smn.py +176 -0
  181. dateparser/data/date_translation_data/sn.py +169 -0
  182. dateparser/data/date_translation_data/so.py +179 -0
  183. dateparser/data/date_translation_data/sq.py +237 -0
  184. dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
  185. dateparser/data/date_translation_data/sr-Latn.py +306 -0
  186. dateparser/data/date_translation_data/sr.py +255 -0
  187. dateparser/data/date_translation_data/sv.py +309 -0
  188. dateparser/data/date_translation_data/sw.py +231 -0
  189. dateparser/data/date_translation_data/ta.py +264 -0
  190. dateparser/data/date_translation_data/te.py +239 -0
  191. dateparser/data/date_translation_data/teo.py +173 -0
  192. dateparser/data/date_translation_data/th.py +300 -0
  193. dateparser/data/date_translation_data/ti.py +173 -0
  194. dateparser/data/date_translation_data/tl.py +137 -0
  195. dateparser/data/date_translation_data/to.py +216 -0
  196. dateparser/data/date_translation_data/tr.py +259 -0
  197. dateparser/data/date_translation_data/twq.py +167 -0
  198. dateparser/data/date_translation_data/tzm.py +169 -0
  199. dateparser/data/date_translation_data/ug.py +203 -0
  200. dateparser/data/date_translation_data/uk.py +502 -0
  201. dateparser/data/date_translation_data/ur.py +256 -0
  202. dateparser/data/date_translation_data/uz-Arab.py +167 -0
  203. dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
  204. dateparser/data/date_translation_data/uz-Latn.py +216 -0
  205. dateparser/data/date_translation_data/uz.py +216 -0
  206. dateparser/data/date_translation_data/vi.py +260 -0
  207. dateparser/data/date_translation_data/vun.py +168 -0
  208. dateparser/data/date_translation_data/wae.py +224 -0
  209. dateparser/data/date_translation_data/xog.py +169 -0
  210. dateparser/data/date_translation_data/yav.py +169 -0
  211. dateparser/data/date_translation_data/yi.py +178 -0
  212. dateparser/data/date_translation_data/yo.py +263 -0
  213. dateparser/data/date_translation_data/yue.py +203 -0
  214. dateparser/data/date_translation_data/zgh.py +169 -0
  215. dateparser/data/date_translation_data/zh-Hans.py +240 -0
  216. dateparser/data/date_translation_data/zh-Hant.py +402 -0
  217. dateparser/data/date_translation_data/zh.py +273 -0
  218. dateparser/data/date_translation_data/zu.py +196 -0
  219. dateparser/data/languages_info.py +826 -0
  220. dateparser/date.py +599 -0
  221. dateparser/date_parser.py +55 -0
  222. dateparser/freshness_date_parser.py +156 -0
  223. dateparser/languages/__init__.py +2 -0
  224. dateparser/languages/dictionary.py +352 -0
  225. dateparser/languages/loader.py +224 -0
  226. dateparser/languages/locale.py +625 -0
  227. dateparser/languages/validation.py +467 -0
  228. dateparser/parser.py +742 -0
  229. dateparser/search/__init__.py +71 -0
  230. dateparser/search/detection.py +78 -0
  231. dateparser/search/search.py +297 -0
  232. dateparser/search/text_detection.py +89 -0
  233. dateparser/timezone_parser.py +91 -0
  234. dateparser/timezones.py +469 -0
  235. dateparser/utils/__init__.py +257 -0
  236. dateparser/utils/strptime.py +108 -0
  237. dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
  238. dateparser-1.2.1.dist-info/LICENSE +12 -0
  239. dateparser-1.2.1.dist-info/METADATA +864 -0
  240. dateparser-1.2.1.dist-info/RECORD +256 -0
  241. dateparser-1.2.1.dist-info/WHEEL +5 -0
  242. dateparser-1.2.1.dist-info/entry_points.txt +2 -0
  243. dateparser-1.2.1.dist-info/top_level.txt +4 -0
  244. dateparser_cli/__init__.py +0 -0
  245. dateparser_cli/cli.py +36 -0
  246. dateparser_cli/exceptions.py +2 -0
  247. dateparser_cli/fasttext_manager.py +42 -0
  248. dateparser_cli/utils.py +27 -0
  249. dateparser_data/__init__.py +0 -0
  250. dateparser_data/settings.py +33 -0
  251. dateparser_scripts/__init__.py +0 -0
  252. dateparser_scripts/get_cldr_data.py +567 -0
  253. dateparser_scripts/order_languages.py +217 -0
  254. dateparser_scripts/update_supported_languages_and_locales.py +48 -0
  255. dateparser_scripts/utils.py +73 -0
  256. dateparser_scripts/write_complete_data.py +129 -0
dateparser/conf.py ADDED
@@ -0,0 +1,267 @@
1
+ import hashlib
2
+ from datetime import datetime
3
+ from functools import wraps
4
+
5
+ from dateparser.data.languages_info import language_order
6
+
7
+ from .parser import date_order_chart
8
+ from .utils import registry
9
+
10
+
11
+ @registry
12
+ class Settings:
13
+ """Control and configure default parsing behavior of dateparser.
14
+ Currently, supported settings are:
15
+
16
+ * `DATE_ORDER`
17
+ * `PREFER_LOCALE_DATE_ORDER`
18
+ * `TIMEZONE`
19
+ * `TO_TIMEZONE`
20
+ * `RETURN_AS_TIMEZONE_AWARE`
21
+ * `PREFER_MONTH_OF_YEAR`
22
+ * `PREFER_DAY_OF_MONTH`
23
+ * `PREFER_DATES_FROM`
24
+ * `RELATIVE_BASE`
25
+ * `STRICT_PARSING`
26
+ * `REQUIRE_PARTS`
27
+ * `SKIP_TOKENS`
28
+ * `NORMALIZE`
29
+ * `RETURN_TIME_AS_PERIOD`
30
+ * `PARSERS`
31
+ * `DEFAULT_LANGUAGES`
32
+ * `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
33
+ * `CACHE_SIZE_LIMIT`
34
+ """
35
+
36
+ _default = True
37
+ _pyfile_data = None
38
+ _mod_settings = dict()
39
+
40
+ def __init__(self, settings=None):
41
+ if settings:
42
+ self._updateall(settings.items())
43
+ else:
44
+ self._updateall(self._get_settings_from_pyfile().items())
45
+
46
+ @classmethod
47
+ def get_key(cls, settings=None):
48
+ if not settings:
49
+ return "default"
50
+
51
+ keys = sorted(["%s-%s" % (key, str(settings[key])) for key in settings])
52
+ return hashlib.md5("".join(keys).encode("utf-8")).hexdigest()
53
+
54
+ @classmethod
55
+ def _get_settings_from_pyfile(cls):
56
+ if not cls._pyfile_data:
57
+ from dateparser_data import settings
58
+
59
+ cls._pyfile_data = settings.settings
60
+ return cls._pyfile_data
61
+
62
+ def _updateall(self, iterable):
63
+ for key, value in iterable:
64
+ setattr(self, key, value)
65
+
66
+ def replace(self, mod_settings=None, **kwds):
67
+ for k, v in kwds.items():
68
+ if v is None:
69
+ raise TypeError('Invalid {{"{}": {}}}'.format(k, v))
70
+
71
+ for x in self._get_settings_from_pyfile().keys():
72
+ kwds.setdefault(x, getattr(self, x))
73
+
74
+ kwds["_default"] = False
75
+ if mod_settings:
76
+ kwds["_mod_settings"] = mod_settings
77
+
78
+ return self.__class__(settings=kwds)
79
+
80
+
81
+ settings = Settings()
82
+
83
+
84
+ def apply_settings(f):
85
+ @wraps(f)
86
+ def wrapper(*args, **kwargs):
87
+ mod_settings = kwargs.get("settings")
88
+ kwargs["settings"] = mod_settings or settings
89
+
90
+ if isinstance(kwargs["settings"], dict):
91
+ kwargs["settings"] = settings.replace(
92
+ mod_settings=mod_settings, **kwargs["settings"]
93
+ )
94
+
95
+ if not isinstance(kwargs["settings"], Settings):
96
+ raise TypeError(
97
+ "settings can only be either dict or instance of Settings class"
98
+ )
99
+
100
+ return f(*args, **kwargs)
101
+
102
+ return wrapper
103
+
104
+
105
+ class SettingValidationError(ValueError):
106
+ pass
107
+
108
+
109
+ def _check_repeated_values(setting_name, setting_value):
110
+ if len(setting_value) != len(set(setting_value)):
111
+ raise SettingValidationError(
112
+ 'There are repeated values in the "{}" setting'.format(setting_name)
113
+ )
114
+ return
115
+
116
+
117
+ def _check_require_part(setting_name, setting_value):
118
+ """Returns `True` if the provided list of parts contains valid values"""
119
+ invalid_values = set(setting_value) - {"day", "month", "year"}
120
+ if invalid_values:
121
+ raise SettingValidationError(
122
+ '"{}" setting contains invalid values: {}'.format(
123
+ setting_name, ", ".join(invalid_values)
124
+ )
125
+ )
126
+ _check_repeated_values(setting_name, setting_value)
127
+
128
+
129
+ def _check_parsers(setting_name, setting_value):
130
+ """Returns `True` if the provided list of parsers contains valid values"""
131
+ existing_parsers = [
132
+ "timestamp",
133
+ "relative-time",
134
+ "custom-formats",
135
+ "absolute-time",
136
+ "no-spaces-time",
137
+ "negative-timestamp",
138
+ ] # FIXME: Extract the list of existing parsers from another place (#798)
139
+ unknown_parsers = set(setting_value) - set(existing_parsers)
140
+ if unknown_parsers:
141
+ raise SettingValidationError(
142
+ 'Found unknown parsers in the "{}" setting: {}'.format(
143
+ setting_name, ", ".join(unknown_parsers)
144
+ )
145
+ )
146
+ _check_repeated_values(setting_name, setting_value)
147
+
148
+
149
+ def _check_default_languages(setting_name, setting_value):
150
+ unsupported_languages = set(setting_value) - set(language_order)
151
+ if unsupported_languages:
152
+ raise SettingValidationError(
153
+ "Found invalid languages in the '{}' setting: {}".format(
154
+ setting_name, ", ".join(map(repr, unsupported_languages))
155
+ )
156
+ )
157
+ _check_repeated_values(setting_name, setting_value)
158
+
159
+
160
+ def _check_between_0_and_1(setting_name, setting_value):
161
+ is_valid = 0 <= setting_value <= 1
162
+ if not is_valid:
163
+ raise SettingValidationError(
164
+ "{} is not a valid value for {}. It can take values between 0 and "
165
+ "1.".format(
166
+ setting_value,
167
+ setting_name,
168
+ )
169
+ )
170
+
171
+
172
+ def check_settings(settings):
173
+ """
174
+ Check if provided settings are valid, if not it raises `SettingValidationError`.
175
+ Only checks for the modified settings.
176
+ """
177
+ settings_values = {
178
+ "DATE_ORDER": {
179
+ "values": tuple(date_order_chart.keys()),
180
+ "type": str,
181
+ },
182
+ "TIMEZONE": {
183
+ # we don't check invalid Timezones as they raise an error
184
+ "type": str,
185
+ },
186
+ "TO_TIMEZONE": {
187
+ # It defaults to None, but it's not allowed to use it directly
188
+ # "values" can take unlimited options
189
+ "type": str
190
+ },
191
+ "RETURN_AS_TIMEZONE_AWARE": {
192
+ # It defaults to 'default', but it's not allowed to use it directly
193
+ "type": bool
194
+ },
195
+ "PREFER_MONTH_OF_YEAR": {"values": ("current", "first", "last"), "type": str},
196
+ "PREFER_DAY_OF_MONTH": {"values": ("current", "first", "last"), "type": str},
197
+ "PREFER_DATES_FROM": {
198
+ "values": ("current_period", "past", "future"),
199
+ "type": str,
200
+ },
201
+ "RELATIVE_BASE": {
202
+ # "values" can take unlimited options
203
+ "type": datetime
204
+ },
205
+ "STRICT_PARSING": {"type": bool},
206
+ "REQUIRE_PARTS": {
207
+ # "values" covered by the 'extra_check'
208
+ "type": list,
209
+ "extra_check": _check_require_part,
210
+ },
211
+ "SKIP_TOKENS": {
212
+ # "values" can take unlimited options
213
+ "type": list,
214
+ },
215
+ "NORMALIZE": {"type": bool},
216
+ "RETURN_TIME_AS_PERIOD": {"type": bool},
217
+ "PARSERS": {
218
+ # "values" covered by the 'extra_check'
219
+ "type": list,
220
+ "extra_check": _check_parsers,
221
+ },
222
+ "FUZZY": {"type": bool},
223
+ "PREFER_LOCALE_DATE_ORDER": {"type": bool},
224
+ "DEFAULT_LANGUAGES": {"type": list, "extra_check": _check_default_languages},
225
+ "LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": {
226
+ "type": float,
227
+ "extra_check": _check_between_0_and_1,
228
+ },
229
+ "CACHE_SIZE_LIMIT": {
230
+ "type": int,
231
+ },
232
+ }
233
+
234
+ modified_settings = settings._mod_settings # check only modified settings
235
+
236
+ # check settings keys:
237
+ for setting in modified_settings:
238
+ if setting not in settings_values:
239
+ raise SettingValidationError('"{}" is not a valid setting'.format(setting))
240
+
241
+ for setting_name, setting_value in modified_settings.items():
242
+ setting_type = type(setting_value)
243
+ setting_props = settings_values[setting_name]
244
+
245
+ # check type:
246
+ if not isinstance(setting_value, setting_props["type"]):
247
+ raise SettingValidationError(
248
+ '"{}" must be "{}", not "{}".'.format(
249
+ setting_name, setting_props["type"].__name__, setting_type.__name__
250
+ )
251
+ )
252
+
253
+ # check values:
254
+ if setting_props.get("values") and setting_value not in setting_props["values"]:
255
+ raise SettingValidationError(
256
+ '"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format(
257
+ setting_value,
258
+ setting_name,
259
+ '", "'.join(setting_props["values"][:-1]),
260
+ setting_props["values"][-1],
261
+ )
262
+ )
263
+
264
+ # specific checks
265
+ extra_check = setting_props.get("extra_check")
266
+ if extra_check:
267
+ extra_check(setting_name, setting_value)
File without changes
@@ -0,0 +1,43 @@
1
+ import os
2
+
3
+ import fasttext
4
+
5
+ from dateparser_cli.exceptions import FastTextModelNotFoundException
6
+ from dateparser_cli.fasttext_manager import fasttext_downloader
7
+ from dateparser_cli.utils import create_data_model_home, dateparser_model_home
8
+
9
+ _supported_models = ["large.bin", "small.bin"]
10
+ _DEFAULT_MODEL = "small"
11
+
12
+
13
+ class _FastTextCache:
14
+ model = None
15
+
16
+
17
+ def _load_fasttext_model():
18
+ if _FastTextCache.model:
19
+ return _FastTextCache.model
20
+ create_data_model_home()
21
+ downloaded_models = [
22
+ file for file in os.listdir(dateparser_model_home) if file in _supported_models
23
+ ]
24
+ if not downloaded_models:
25
+ fasttext_downloader(_DEFAULT_MODEL)
26
+ return _load_fasttext_model()
27
+ model_path = os.path.join(dateparser_model_home, downloaded_models[0])
28
+ if not os.path.isfile(model_path):
29
+ raise FastTextModelNotFoundException("Fasttext model file not found")
30
+ _FastTextCache.model = fasttext.load_model(model_path)
31
+ return _FastTextCache.model
32
+
33
+
34
+ def detect_languages(text, confidence_threshold):
35
+ _language_parser = _load_fasttext_model()
36
+ text = text.replace("\n", " ").replace("\r", "")
37
+ language_codes = []
38
+ parser_data = _language_parser.predict(text)
39
+ for idx, language_probability in enumerate(parser_data[1]):
40
+ if language_probability > confidence_threshold:
41
+ language_code = parser_data[0][idx].replace("__label__", "")
42
+ language_codes.append(language_code)
43
+ return language_codes
@@ -0,0 +1,37 @@
1
+ import langdetect
2
+
3
+ # The below _Factory is set to prevent setting global state of the library
4
+ # but still get consistent results.
5
+ # Refer : https://github.com/Mimino666/langdetect
6
+
7
+
8
+ class _Factory:
9
+ data = None
10
+
11
+
12
+ def _init_factory():
13
+ if _Factory.data is None:
14
+ _Factory.data = langdetect.detector_factory.DetectorFactory()
15
+ _Factory.data.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
16
+ _Factory.data.seed = 0
17
+
18
+
19
+ def _get_language_probablities(text):
20
+ _init_factory()
21
+ detector = _Factory.data.create()
22
+ detector.append(text)
23
+ return detector.get_probabilities()
24
+
25
+
26
+ def detect_languages(text, confidence_threshold):
27
+ language_codes = []
28
+ try:
29
+ parser_data = _get_language_probablities(text)
30
+ for language_candidate in parser_data:
31
+ if language_candidate.prob > confidence_threshold:
32
+ language_codes.append(language_candidate.lang)
33
+ except langdetect.lang_detect_exception.LangDetectException:
34
+ # This exception can be produced with empty strings or inputs without letters like `10-10-2021`.
35
+ # As this could be really common, we ignore them.
36
+ pass
37
+ return language_codes
@@ -0,0 +1,18 @@
1
+ from dateparser.data.languages_info import language_map
2
+
3
+
4
+ def map_languages(language_codes):
5
+ """
6
+ Returns the candidates from the supported languages codes.
7
+ :param language_codes:
8
+ A list of language codes, e.g. ['en', 'es'] in ISO 639 Standard.
9
+ :type language_codes: list
10
+ :return: Returns list[str] representing supported languages
11
+ :rtype: list[str]
12
+ """
13
+ return [
14
+ language_code
15
+ for language in language_codes
16
+ if language in language_map
17
+ for language_code in language_map[language]
18
+ ]
@@ -0,0 +1,2 @@
1
+ from dateparser.data import date_translation_data
2
+ from .languages_info import language_order, language_locale_dict
File without changes
@@ -0,0 +1,242 @@
1
+ info = {
2
+ "name": "af",
3
+ "date_order": "YMD",
4
+ "january": [
5
+ "jan",
6
+ "januarie"
7
+ ],
8
+ "february": [
9
+ "feb",
10
+ "februarie"
11
+ ],
12
+ "march": [
13
+ "maart",
14
+ "mrt"
15
+ ],
16
+ "april": [
17
+ "apr",
18
+ "april"
19
+ ],
20
+ "may": [
21
+ "mei"
22
+ ],
23
+ "june": [
24
+ "jun",
25
+ "junie"
26
+ ],
27
+ "july": [
28
+ "jul",
29
+ "julie"
30
+ ],
31
+ "august": [
32
+ "aug",
33
+ "augustus"
34
+ ],
35
+ "september": [
36
+ "sep",
37
+ "september"
38
+ ],
39
+ "october": [
40
+ "okt",
41
+ "oktober"
42
+ ],
43
+ "november": [
44
+ "nov",
45
+ "november"
46
+ ],
47
+ "december": [
48
+ "des",
49
+ "desember"
50
+ ],
51
+ "monday": [
52
+ "ma",
53
+ "maandag"
54
+ ],
55
+ "tuesday": [
56
+ "di",
57
+ "dinsdag"
58
+ ],
59
+ "wednesday": [
60
+ "wo",
61
+ "woensdag"
62
+ ],
63
+ "thursday": [
64
+ "do",
65
+ "donderdag"
66
+ ],
67
+ "friday": [
68
+ "vr",
69
+ "vrydag"
70
+ ],
71
+ "saturday": [
72
+ "sa",
73
+ "saterdag"
74
+ ],
75
+ "sunday": [
76
+ "so",
77
+ "sondag"
78
+ ],
79
+ "am": [
80
+ "vm"
81
+ ],
82
+ "pm": [
83
+ "nm"
84
+ ],
85
+ "year": [
86
+ "j",
87
+ "jaar"
88
+ ],
89
+ "month": [
90
+ "maand",
91
+ "md"
92
+ ],
93
+ "week": [
94
+ "week",
95
+ "wk"
96
+ ],
97
+ "day": [
98
+ "d",
99
+ "dag"
100
+ ],
101
+ "hour": [
102
+ "u",
103
+ "uur"
104
+ ],
105
+ "minute": [
106
+ "m",
107
+ "min",
108
+ "minuut"
109
+ ],
110
+ "second": [
111
+ "s",
112
+ "sek",
113
+ "sekonde"
114
+ ],
115
+ "relative-type": {
116
+ "0 day ago": [
117
+ "vandag"
118
+ ],
119
+ "0 hour ago": [
120
+ "hierdie uur"
121
+ ],
122
+ "0 minute ago": [
123
+ "hierdie minuut"
124
+ ],
125
+ "0 month ago": [
126
+ "vandeesmaand"
127
+ ],
128
+ "0 second ago": [
129
+ "nou"
130
+ ],
131
+ "0 week ago": [
132
+ "vandeesweek"
133
+ ],
134
+ "0 year ago": [
135
+ "hierdie jaar"
136
+ ],
137
+ "1 day ago": [
138
+ "gister"
139
+ ],
140
+ "1 month ago": [
141
+ "verlede maand"
142
+ ],
143
+ "1 week ago": [
144
+ "verlede week"
145
+ ],
146
+ "1 year ago": [
147
+ "verlede jaar"
148
+ ],
149
+ "in 1 day": [
150
+ "môre"
151
+ ],
152
+ "in 1 month": [
153
+ "volgende maand"
154
+ ],
155
+ "in 1 week": [
156
+ "volgende week"
157
+ ],
158
+ "in 1 year": [
159
+ "volgende jaar"
160
+ ]
161
+ },
162
+ "relative-type-regex": {
163
+ "\\1 day ago": [
164
+ "(\\d+[.,]?\\d*) dae gelede",
165
+ "(\\d+[.,]?\\d*) dag gelede"
166
+ ],
167
+ "\\1 hour ago": [
168
+ "(\\d+[.,]?\\d*) uur gelede"
169
+ ],
170
+ "\\1 minute ago": [
171
+ "(\\d+[.,]?\\d*) min gelede",
172
+ "(\\d+[.,]?\\d*) minute gelede",
173
+ "(\\d+[.,]?\\d*) minuut gelede"
174
+ ],
175
+ "\\1 month ago": [
176
+ "(\\d+[.,]?\\d*) maand gelede",
177
+ "(\\d+[.,]?\\d*) maande gelede",
178
+ "(\\d+[.,]?\\d*) md gelede"
179
+ ],
180
+ "\\1 second ago": [
181
+ "(\\d+[.,]?\\d*) sek gelede",
182
+ "(\\d+[.,]?\\d*) sekonde gelede",
183
+ "(\\d+[.,]?\\d*) sekondes gelede"
184
+ ],
185
+ "\\1 week ago": [
186
+ "(\\d+[.,]?\\d*) w gelede",
187
+ "(\\d+[.,]?\\d*) week gelede",
188
+ "(\\d+[.,]?\\d*) weke gelede"
189
+ ],
190
+ "\\1 year ago": [
191
+ "(\\d+[.,]?\\d*) jaar gelede"
192
+ ],
193
+ "in \\1 day": [
194
+ "oor (\\d+[.,]?\\d*) dae",
195
+ "oor (\\d+[.,]?\\d*) dag",
196
+ "oor (\\d+[.,]?\\d*) minuut"
197
+ ],
198
+ "in \\1 hour": [
199
+ "oor (\\d+[.,]?\\d*) uur"
200
+ ],
201
+ "in \\1 minute": [
202
+ "oor (\\d+[.,]?\\d*) min",
203
+ "oor (\\d+[.,]?\\d*) minuut"
204
+ ],
205
+ "in \\1 month": [
206
+ "oor (\\d+[.,]?\\d*) md",
207
+ "oor (\\d+[.,]?\\d*) minuut"
208
+ ],
209
+ "in \\1 second": [
210
+ "oor (\\d+[.,]?\\d*) sek",
211
+ "oor (\\d+[.,]?\\d*) sekonde",
212
+ "oor (\\d+[.,]?\\d*) sekondes"
213
+ ],
214
+ "in \\1 week": [
215
+ "oor (\\d+[.,]?\\d*) w",
216
+ "oor (\\d+[.,]?\\d*) week",
217
+ "oor (\\d+[.,]?\\d*) weke"
218
+ ],
219
+ "in \\1 year": [
220
+ "oor (\\d+[.,]?\\d*) jaar"
221
+ ]
222
+ },
223
+ "locale_specific": {
224
+ "af-NA": {
225
+ "name": "af-NA"
226
+ }
227
+ },
228
+ "skip": [
229
+ " ",
230
+ "'",
231
+ ",",
232
+ "-",
233
+ ".",
234
+ "/",
235
+ ";",
236
+ "@",
237
+ "[",
238
+ "]",
239
+ "|",
240
+ ","
241
+ ]
242
+ }