dateparser 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +82 -0
- dateparser/calendars/__init__.py +144 -0
- dateparser/calendars/hijri.py +6 -0
- dateparser/calendars/hijri_parser.py +60 -0
- dateparser/calendars/jalali.py +9 -0
- dateparser/calendars/jalali_parser.py +184 -0
- dateparser/conf.py +267 -0
- dateparser/custom_language_detection/__init__.py +0 -0
- dateparser/custom_language_detection/fasttext.py +43 -0
- dateparser/custom_language_detection/langdetect.py +37 -0
- dateparser/custom_language_detection/language_mapping.py +18 -0
- dateparser/data/__init__.py +2 -0
- dateparser/data/date_translation_data/__init__.py +0 -0
- dateparser/data/date_translation_data/af.py +242 -0
- dateparser/data/date_translation_data/agq.py +169 -0
- dateparser/data/date_translation_data/ak.py +169 -0
- dateparser/data/date_translation_data/am.py +222 -0
- dateparser/data/date_translation_data/ar.py +574 -0
- dateparser/data/date_translation_data/as.py +164 -0
- dateparser/data/date_translation_data/asa.py +168 -0
- dateparser/data/date_translation_data/ast.py +280 -0
- dateparser/data/date_translation_data/az-Cyrl.py +168 -0
- dateparser/data/date_translation_data/az-Latn.py +217 -0
- dateparser/data/date_translation_data/az.py +217 -0
- dateparser/data/date_translation_data/bas.py +169 -0
- dateparser/data/date_translation_data/be.py +340 -0
- dateparser/data/date_translation_data/bem.py +161 -0
- dateparser/data/date_translation_data/bez.py +169 -0
- dateparser/data/date_translation_data/bg.py +345 -0
- dateparser/data/date_translation_data/bm.py +167 -0
- dateparser/data/date_translation_data/bn.py +241 -0
- dateparser/data/date_translation_data/bo.py +185 -0
- dateparser/data/date_translation_data/br.py +226 -0
- dateparser/data/date_translation_data/brx.py +157 -0
- dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
- dateparser/data/date_translation_data/bs-Latn.py +248 -0
- dateparser/data/date_translation_data/bs.py +248 -0
- dateparser/data/date_translation_data/ca.py +313 -0
- dateparser/data/date_translation_data/ce.py +225 -0
- dateparser/data/date_translation_data/cgg.py +169 -0
- dateparser/data/date_translation_data/chr.py +240 -0
- dateparser/data/date_translation_data/ckb.py +154 -0
- dateparser/data/date_translation_data/cs.py +316 -0
- dateparser/data/date_translation_data/cy.py +217 -0
- dateparser/data/date_translation_data/da.py +296 -0
- dateparser/data/date_translation_data/dav.py +169 -0
- dateparser/data/date_translation_data/de.py +357 -0
- dateparser/data/date_translation_data/dje.py +167 -0
- dateparser/data/date_translation_data/dsb.py +270 -0
- dateparser/data/date_translation_data/dua.py +169 -0
- dateparser/data/date_translation_data/dyo.py +168 -0
- dateparser/data/date_translation_data/dz.py +225 -0
- dateparser/data/date_translation_data/ebu.py +169 -0
- dateparser/data/date_translation_data/ee.py +233 -0
- dateparser/data/date_translation_data/el.py +279 -0
- dateparser/data/date_translation_data/en.py +851 -0
- dateparser/data/date_translation_data/eo.py +169 -0
- dateparser/data/date_translation_data/es.py +499 -0
- dateparser/data/date_translation_data/et.py +233 -0
- dateparser/data/date_translation_data/eu.py +219 -0
- dateparser/data/date_translation_data/ewo.py +169 -0
- dateparser/data/date_translation_data/fa.py +270 -0
- dateparser/data/date_translation_data/ff.py +179 -0
- dateparser/data/date_translation_data/fi.py +345 -0
- dateparser/data/date_translation_data/fil.py +223 -0
- dateparser/data/date_translation_data/fo.py +256 -0
- dateparser/data/date_translation_data/fr.py +520 -0
- dateparser/data/date_translation_data/fur.py +223 -0
- dateparser/data/date_translation_data/fy.py +223 -0
- dateparser/data/date_translation_data/ga.py +238 -0
- dateparser/data/date_translation_data/gd.py +277 -0
- dateparser/data/date_translation_data/gl.py +253 -0
- dateparser/data/date_translation_data/gsw.py +179 -0
- dateparser/data/date_translation_data/gu.py +216 -0
- dateparser/data/date_translation_data/guz.py +170 -0
- dateparser/data/date_translation_data/gv.py +166 -0
- dateparser/data/date_translation_data/ha.py +176 -0
- dateparser/data/date_translation_data/haw.py +168 -0
- dateparser/data/date_translation_data/he.py +371 -0
- dateparser/data/date_translation_data/hi.py +261 -0
- dateparser/data/date_translation_data/hr.py +378 -0
- dateparser/data/date_translation_data/hsb.py +271 -0
- dateparser/data/date_translation_data/hu.py +297 -0
- dateparser/data/date_translation_data/hy.py +246 -0
- dateparser/data/date_translation_data/id.py +272 -0
- dateparser/data/date_translation_data/ig.py +168 -0
- dateparser/data/date_translation_data/ii.py +157 -0
- dateparser/data/date_translation_data/is.py +242 -0
- dateparser/data/date_translation_data/it.py +282 -0
- dateparser/data/date_translation_data/ja.py +286 -0
- dateparser/data/date_translation_data/jgo.py +188 -0
- dateparser/data/date_translation_data/jmc.py +168 -0
- dateparser/data/date_translation_data/ka.py +241 -0
- dateparser/data/date_translation_data/kab.py +169 -0
- dateparser/data/date_translation_data/kam.py +169 -0
- dateparser/data/date_translation_data/kde.py +169 -0
- dateparser/data/date_translation_data/kea.py +230 -0
- dateparser/data/date_translation_data/khq.py +167 -0
- dateparser/data/date_translation_data/ki.py +169 -0
- dateparser/data/date_translation_data/kk.py +228 -0
- dateparser/data/date_translation_data/kl.py +213 -0
- dateparser/data/date_translation_data/kln.py +171 -0
- dateparser/data/date_translation_data/km.py +198 -0
- dateparser/data/date_translation_data/kn.py +225 -0
- dateparser/data/date_translation_data/ko.py +207 -0
- dateparser/data/date_translation_data/kok.py +157 -0
- dateparser/data/date_translation_data/ks.py +152 -0
- dateparser/data/date_translation_data/ksb.py +168 -0
- dateparser/data/date_translation_data/ksf.py +169 -0
- dateparser/data/date_translation_data/ksh.py +192 -0
- dateparser/data/date_translation_data/kw.py +169 -0
- dateparser/data/date_translation_data/ky.py +240 -0
- dateparser/data/date_translation_data/lag.py +169 -0
- dateparser/data/date_translation_data/lb.py +233 -0
- dateparser/data/date_translation_data/lg.py +169 -0
- dateparser/data/date_translation_data/lkt.py +194 -0
- dateparser/data/date_translation_data/ln.py +179 -0
- dateparser/data/date_translation_data/lo.py +228 -0
- dateparser/data/date_translation_data/lrc.py +154 -0
- dateparser/data/date_translation_data/lt.py +263 -0
- dateparser/data/date_translation_data/lu.py +169 -0
- dateparser/data/date_translation_data/luo.py +169 -0
- dateparser/data/date_translation_data/luy.py +168 -0
- dateparser/data/date_translation_data/lv.py +257 -0
- dateparser/data/date_translation_data/mas.py +173 -0
- dateparser/data/date_translation_data/mer.py +168 -0
- dateparser/data/date_translation_data/mfe.py +166 -0
- dateparser/data/date_translation_data/mg.py +168 -0
- dateparser/data/date_translation_data/mgh.py +169 -0
- dateparser/data/date_translation_data/mgo.py +151 -0
- dateparser/data/date_translation_data/mk.py +234 -0
- dateparser/data/date_translation_data/ml.py +217 -0
- dateparser/data/date_translation_data/mn.py +224 -0
- dateparser/data/date_translation_data/mr.py +229 -0
- dateparser/data/date_translation_data/ms.py +242 -0
- dateparser/data/date_translation_data/mt.py +175 -0
- dateparser/data/date_translation_data/mua.py +169 -0
- dateparser/data/date_translation_data/my.py +203 -0
- dateparser/data/date_translation_data/mzn.py +199 -0
- dateparser/data/date_translation_data/naq.py +169 -0
- dateparser/data/date_translation_data/nb.py +261 -0
- dateparser/data/date_translation_data/nd.py +169 -0
- dateparser/data/date_translation_data/ne.py +207 -0
- dateparser/data/date_translation_data/nl.py +273 -0
- dateparser/data/date_translation_data/nmg.py +169 -0
- dateparser/data/date_translation_data/nn.py +231 -0
- dateparser/data/date_translation_data/nnh.py +150 -0
- dateparser/data/date_translation_data/nus.py +166 -0
- dateparser/data/date_translation_data/nyn.py +169 -0
- dateparser/data/date_translation_data/om.py +173 -0
- dateparser/data/date_translation_data/or.py +157 -0
- dateparser/data/date_translation_data/os.py +203 -0
- dateparser/data/date_translation_data/pa-Arab.py +150 -0
- dateparser/data/date_translation_data/pa-Guru.py +221 -0
- dateparser/data/date_translation_data/pa.py +221 -0
- dateparser/data/date_translation_data/pl.py +416 -0
- dateparser/data/date_translation_data/ps.py +150 -0
- dateparser/data/date_translation_data/pt.py +981 -0
- dateparser/data/date_translation_data/qu.py +176 -0
- dateparser/data/date_translation_data/rm.py +166 -0
- dateparser/data/date_translation_data/rn.py +169 -0
- dateparser/data/date_translation_data/ro.py +270 -0
- dateparser/data/date_translation_data/rof.py +157 -0
- dateparser/data/date_translation_data/ru.py +442 -0
- dateparser/data/date_translation_data/rw.py +169 -0
- dateparser/data/date_translation_data/rwk.py +168 -0
- dateparser/data/date_translation_data/sah.py +219 -0
- dateparser/data/date_translation_data/saq.py +169 -0
- dateparser/data/date_translation_data/sbp.py +169 -0
- dateparser/data/date_translation_data/se.py +280 -0
- dateparser/data/date_translation_data/seh.py +169 -0
- dateparser/data/date_translation_data/ses.py +167 -0
- dateparser/data/date_translation_data/sg.py +169 -0
- dateparser/data/date_translation_data/shi-Latn.py +169 -0
- dateparser/data/date_translation_data/shi-Tfng.py +169 -0
- dateparser/data/date_translation_data/shi.py +169 -0
- dateparser/data/date_translation_data/si.py +220 -0
- dateparser/data/date_translation_data/sk.py +327 -0
- dateparser/data/date_translation_data/sl.py +244 -0
- dateparser/data/date_translation_data/smn.py +176 -0
- dateparser/data/date_translation_data/sn.py +169 -0
- dateparser/data/date_translation_data/so.py +179 -0
- dateparser/data/date_translation_data/sq.py +237 -0
- dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
- dateparser/data/date_translation_data/sr-Latn.py +306 -0
- dateparser/data/date_translation_data/sr.py +255 -0
- dateparser/data/date_translation_data/sv.py +309 -0
- dateparser/data/date_translation_data/sw.py +231 -0
- dateparser/data/date_translation_data/ta.py +264 -0
- dateparser/data/date_translation_data/te.py +239 -0
- dateparser/data/date_translation_data/teo.py +173 -0
- dateparser/data/date_translation_data/th.py +300 -0
- dateparser/data/date_translation_data/ti.py +173 -0
- dateparser/data/date_translation_data/tl.py +137 -0
- dateparser/data/date_translation_data/to.py +216 -0
- dateparser/data/date_translation_data/tr.py +259 -0
- dateparser/data/date_translation_data/twq.py +167 -0
- dateparser/data/date_translation_data/tzm.py +169 -0
- dateparser/data/date_translation_data/ug.py +203 -0
- dateparser/data/date_translation_data/uk.py +502 -0
- dateparser/data/date_translation_data/ur.py +256 -0
- dateparser/data/date_translation_data/uz-Arab.py +167 -0
- dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
- dateparser/data/date_translation_data/uz-Latn.py +216 -0
- dateparser/data/date_translation_data/uz.py +216 -0
- dateparser/data/date_translation_data/vi.py +260 -0
- dateparser/data/date_translation_data/vun.py +168 -0
- dateparser/data/date_translation_data/wae.py +224 -0
- dateparser/data/date_translation_data/xog.py +169 -0
- dateparser/data/date_translation_data/yav.py +169 -0
- dateparser/data/date_translation_data/yi.py +178 -0
- dateparser/data/date_translation_data/yo.py +263 -0
- dateparser/data/date_translation_data/yue.py +203 -0
- dateparser/data/date_translation_data/zgh.py +169 -0
- dateparser/data/date_translation_data/zh-Hans.py +240 -0
- dateparser/data/date_translation_data/zh-Hant.py +402 -0
- dateparser/data/date_translation_data/zh.py +273 -0
- dateparser/data/date_translation_data/zu.py +196 -0
- dateparser/data/languages_info.py +826 -0
- dateparser/date.py +599 -0
- dateparser/date_parser.py +55 -0
- dateparser/freshness_date_parser.py +156 -0
- dateparser/languages/__init__.py +2 -0
- dateparser/languages/dictionary.py +352 -0
- dateparser/languages/loader.py +224 -0
- dateparser/languages/locale.py +625 -0
- dateparser/languages/validation.py +467 -0
- dateparser/parser.py +742 -0
- dateparser/search/__init__.py +71 -0
- dateparser/search/detection.py +78 -0
- dateparser/search/search.py +297 -0
- dateparser/search/text_detection.py +89 -0
- dateparser/timezone_parser.py +91 -0
- dateparser/timezones.py +469 -0
- dateparser/utils/__init__.py +257 -0
- dateparser/utils/strptime.py +108 -0
- dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
- dateparser-1.2.1.dist-info/LICENSE +12 -0
- dateparser-1.2.1.dist-info/METADATA +864 -0
- dateparser-1.2.1.dist-info/RECORD +256 -0
- dateparser-1.2.1.dist-info/WHEEL +5 -0
- dateparser-1.2.1.dist-info/entry_points.txt +2 -0
- dateparser-1.2.1.dist-info/top_level.txt +4 -0
- dateparser_cli/__init__.py +0 -0
- dateparser_cli/cli.py +36 -0
- dateparser_cli/exceptions.py +2 -0
- dateparser_cli/fasttext_manager.py +42 -0
- dateparser_cli/utils.py +27 -0
- dateparser_data/__init__.py +0 -0
- dateparser_data/settings.py +33 -0
- dateparser_scripts/__init__.py +0 -0
- dateparser_scripts/get_cldr_data.py +567 -0
- dateparser_scripts/order_languages.py +217 -0
- dateparser_scripts/update_supported_languages_and_locales.py +48 -0
- dateparser_scripts/utils.py +73 -0
- dateparser_scripts/write_complete_data.py +129 -0
dateparser/conf.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from functools import wraps
|
|
4
|
+
|
|
5
|
+
from dateparser.data.languages_info import language_order
|
|
6
|
+
|
|
7
|
+
from .parser import date_order_chart
|
|
8
|
+
from .utils import registry
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@registry
|
|
12
|
+
class Settings:
|
|
13
|
+
"""Control and configure default parsing behavior of dateparser.
|
|
14
|
+
Currently, supported settings are:
|
|
15
|
+
|
|
16
|
+
* `DATE_ORDER`
|
|
17
|
+
* `PREFER_LOCALE_DATE_ORDER`
|
|
18
|
+
* `TIMEZONE`
|
|
19
|
+
* `TO_TIMEZONE`
|
|
20
|
+
* `RETURN_AS_TIMEZONE_AWARE`
|
|
21
|
+
* `PREFER_MONTH_OF_YEAR`
|
|
22
|
+
* `PREFER_DAY_OF_MONTH`
|
|
23
|
+
* `PREFER_DATES_FROM`
|
|
24
|
+
* `RELATIVE_BASE`
|
|
25
|
+
* `STRICT_PARSING`
|
|
26
|
+
* `REQUIRE_PARTS`
|
|
27
|
+
* `SKIP_TOKENS`
|
|
28
|
+
* `NORMALIZE`
|
|
29
|
+
* `RETURN_TIME_AS_PERIOD`
|
|
30
|
+
* `PARSERS`
|
|
31
|
+
* `DEFAULT_LANGUAGES`
|
|
32
|
+
* `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD`
|
|
33
|
+
* `CACHE_SIZE_LIMIT`
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
_default = True
|
|
37
|
+
_pyfile_data = None
|
|
38
|
+
_mod_settings = dict()
|
|
39
|
+
|
|
40
|
+
def __init__(self, settings=None):
|
|
41
|
+
if settings:
|
|
42
|
+
self._updateall(settings.items())
|
|
43
|
+
else:
|
|
44
|
+
self._updateall(self._get_settings_from_pyfile().items())
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def get_key(cls, settings=None):
|
|
48
|
+
if not settings:
|
|
49
|
+
return "default"
|
|
50
|
+
|
|
51
|
+
keys = sorted(["%s-%s" % (key, str(settings[key])) for key in settings])
|
|
52
|
+
return hashlib.md5("".join(keys).encode("utf-8")).hexdigest()
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _get_settings_from_pyfile(cls):
|
|
56
|
+
if not cls._pyfile_data:
|
|
57
|
+
from dateparser_data import settings
|
|
58
|
+
|
|
59
|
+
cls._pyfile_data = settings.settings
|
|
60
|
+
return cls._pyfile_data
|
|
61
|
+
|
|
62
|
+
def _updateall(self, iterable):
|
|
63
|
+
for key, value in iterable:
|
|
64
|
+
setattr(self, key, value)
|
|
65
|
+
|
|
66
|
+
def replace(self, mod_settings=None, **kwds):
|
|
67
|
+
for k, v in kwds.items():
|
|
68
|
+
if v is None:
|
|
69
|
+
raise TypeError('Invalid {{"{}": {}}}'.format(k, v))
|
|
70
|
+
|
|
71
|
+
for x in self._get_settings_from_pyfile().keys():
|
|
72
|
+
kwds.setdefault(x, getattr(self, x))
|
|
73
|
+
|
|
74
|
+
kwds["_default"] = False
|
|
75
|
+
if mod_settings:
|
|
76
|
+
kwds["_mod_settings"] = mod_settings
|
|
77
|
+
|
|
78
|
+
return self.__class__(settings=kwds)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
settings = Settings()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def apply_settings(f):
|
|
85
|
+
@wraps(f)
|
|
86
|
+
def wrapper(*args, **kwargs):
|
|
87
|
+
mod_settings = kwargs.get("settings")
|
|
88
|
+
kwargs["settings"] = mod_settings or settings
|
|
89
|
+
|
|
90
|
+
if isinstance(kwargs["settings"], dict):
|
|
91
|
+
kwargs["settings"] = settings.replace(
|
|
92
|
+
mod_settings=mod_settings, **kwargs["settings"]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if not isinstance(kwargs["settings"], Settings):
|
|
96
|
+
raise TypeError(
|
|
97
|
+
"settings can only be either dict or instance of Settings class"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return f(*args, **kwargs)
|
|
101
|
+
|
|
102
|
+
return wrapper
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class SettingValidationError(ValueError):
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _check_repeated_values(setting_name, setting_value):
|
|
110
|
+
if len(setting_value) != len(set(setting_value)):
|
|
111
|
+
raise SettingValidationError(
|
|
112
|
+
'There are repeated values in the "{}" setting'.format(setting_name)
|
|
113
|
+
)
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _check_require_part(setting_name, setting_value):
|
|
118
|
+
"""Returns `True` if the provided list of parts contains valid values"""
|
|
119
|
+
invalid_values = set(setting_value) - {"day", "month", "year"}
|
|
120
|
+
if invalid_values:
|
|
121
|
+
raise SettingValidationError(
|
|
122
|
+
'"{}" setting contains invalid values: {}'.format(
|
|
123
|
+
setting_name, ", ".join(invalid_values)
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
_check_repeated_values(setting_name, setting_value)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _check_parsers(setting_name, setting_value):
|
|
130
|
+
"""Returns `True` if the provided list of parsers contains valid values"""
|
|
131
|
+
existing_parsers = [
|
|
132
|
+
"timestamp",
|
|
133
|
+
"relative-time",
|
|
134
|
+
"custom-formats",
|
|
135
|
+
"absolute-time",
|
|
136
|
+
"no-spaces-time",
|
|
137
|
+
"negative-timestamp",
|
|
138
|
+
] # FIXME: Extract the list of existing parsers from another place (#798)
|
|
139
|
+
unknown_parsers = set(setting_value) - set(existing_parsers)
|
|
140
|
+
if unknown_parsers:
|
|
141
|
+
raise SettingValidationError(
|
|
142
|
+
'Found unknown parsers in the "{}" setting: {}'.format(
|
|
143
|
+
setting_name, ", ".join(unknown_parsers)
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
_check_repeated_values(setting_name, setting_value)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _check_default_languages(setting_name, setting_value):
|
|
150
|
+
unsupported_languages = set(setting_value) - set(language_order)
|
|
151
|
+
if unsupported_languages:
|
|
152
|
+
raise SettingValidationError(
|
|
153
|
+
"Found invalid languages in the '{}' setting: {}".format(
|
|
154
|
+
setting_name, ", ".join(map(repr, unsupported_languages))
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
_check_repeated_values(setting_name, setting_value)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _check_between_0_and_1(setting_name, setting_value):
|
|
161
|
+
is_valid = 0 <= setting_value <= 1
|
|
162
|
+
if not is_valid:
|
|
163
|
+
raise SettingValidationError(
|
|
164
|
+
"{} is not a valid value for {}. It can take values between 0 and "
|
|
165
|
+
"1.".format(
|
|
166
|
+
setting_value,
|
|
167
|
+
setting_name,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def check_settings(settings):
|
|
173
|
+
"""
|
|
174
|
+
Check if provided settings are valid, if not it raises `SettingValidationError`.
|
|
175
|
+
Only checks for the modified settings.
|
|
176
|
+
"""
|
|
177
|
+
settings_values = {
|
|
178
|
+
"DATE_ORDER": {
|
|
179
|
+
"values": tuple(date_order_chart.keys()),
|
|
180
|
+
"type": str,
|
|
181
|
+
},
|
|
182
|
+
"TIMEZONE": {
|
|
183
|
+
# we don't check invalid Timezones as they raise an error
|
|
184
|
+
"type": str,
|
|
185
|
+
},
|
|
186
|
+
"TO_TIMEZONE": {
|
|
187
|
+
# It defaults to None, but it's not allowed to use it directly
|
|
188
|
+
# "values" can take unlimited options
|
|
189
|
+
"type": str
|
|
190
|
+
},
|
|
191
|
+
"RETURN_AS_TIMEZONE_AWARE": {
|
|
192
|
+
# It defaults to 'default', but it's not allowed to use it directly
|
|
193
|
+
"type": bool
|
|
194
|
+
},
|
|
195
|
+
"PREFER_MONTH_OF_YEAR": {"values": ("current", "first", "last"), "type": str},
|
|
196
|
+
"PREFER_DAY_OF_MONTH": {"values": ("current", "first", "last"), "type": str},
|
|
197
|
+
"PREFER_DATES_FROM": {
|
|
198
|
+
"values": ("current_period", "past", "future"),
|
|
199
|
+
"type": str,
|
|
200
|
+
},
|
|
201
|
+
"RELATIVE_BASE": {
|
|
202
|
+
# "values" can take unlimited options
|
|
203
|
+
"type": datetime
|
|
204
|
+
},
|
|
205
|
+
"STRICT_PARSING": {"type": bool},
|
|
206
|
+
"REQUIRE_PARTS": {
|
|
207
|
+
# "values" covered by the 'extra_check'
|
|
208
|
+
"type": list,
|
|
209
|
+
"extra_check": _check_require_part,
|
|
210
|
+
},
|
|
211
|
+
"SKIP_TOKENS": {
|
|
212
|
+
# "values" can take unlimited options
|
|
213
|
+
"type": list,
|
|
214
|
+
},
|
|
215
|
+
"NORMALIZE": {"type": bool},
|
|
216
|
+
"RETURN_TIME_AS_PERIOD": {"type": bool},
|
|
217
|
+
"PARSERS": {
|
|
218
|
+
# "values" covered by the 'extra_check'
|
|
219
|
+
"type": list,
|
|
220
|
+
"extra_check": _check_parsers,
|
|
221
|
+
},
|
|
222
|
+
"FUZZY": {"type": bool},
|
|
223
|
+
"PREFER_LOCALE_DATE_ORDER": {"type": bool},
|
|
224
|
+
"DEFAULT_LANGUAGES": {"type": list, "extra_check": _check_default_languages},
|
|
225
|
+
"LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD": {
|
|
226
|
+
"type": float,
|
|
227
|
+
"extra_check": _check_between_0_and_1,
|
|
228
|
+
},
|
|
229
|
+
"CACHE_SIZE_LIMIT": {
|
|
230
|
+
"type": int,
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
modified_settings = settings._mod_settings # check only modified settings
|
|
235
|
+
|
|
236
|
+
# check settings keys:
|
|
237
|
+
for setting in modified_settings:
|
|
238
|
+
if setting not in settings_values:
|
|
239
|
+
raise SettingValidationError('"{}" is not a valid setting'.format(setting))
|
|
240
|
+
|
|
241
|
+
for setting_name, setting_value in modified_settings.items():
|
|
242
|
+
setting_type = type(setting_value)
|
|
243
|
+
setting_props = settings_values[setting_name]
|
|
244
|
+
|
|
245
|
+
# check type:
|
|
246
|
+
if not isinstance(setting_value, setting_props["type"]):
|
|
247
|
+
raise SettingValidationError(
|
|
248
|
+
'"{}" must be "{}", not "{}".'.format(
|
|
249
|
+
setting_name, setting_props["type"].__name__, setting_type.__name__
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# check values:
|
|
254
|
+
if setting_props.get("values") and setting_value not in setting_props["values"]:
|
|
255
|
+
raise SettingValidationError(
|
|
256
|
+
'"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format(
|
|
257
|
+
setting_value,
|
|
258
|
+
setting_name,
|
|
259
|
+
'", "'.join(setting_props["values"][:-1]),
|
|
260
|
+
setting_props["values"][-1],
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# specific checks
|
|
265
|
+
extra_check = setting_props.get("extra_check")
|
|
266
|
+
if extra_check:
|
|
267
|
+
extra_check(setting_name, setting_value)
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import fasttext
|
|
4
|
+
|
|
5
|
+
from dateparser_cli.exceptions import FastTextModelNotFoundException
|
|
6
|
+
from dateparser_cli.fasttext_manager import fasttext_downloader
|
|
7
|
+
from dateparser_cli.utils import create_data_model_home, dateparser_model_home
|
|
8
|
+
|
|
9
|
+
_supported_models = ["large.bin", "small.bin"]
|
|
10
|
+
_DEFAULT_MODEL = "small"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _FastTextCache:
|
|
14
|
+
model = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _load_fasttext_model():
|
|
18
|
+
if _FastTextCache.model:
|
|
19
|
+
return _FastTextCache.model
|
|
20
|
+
create_data_model_home()
|
|
21
|
+
downloaded_models = [
|
|
22
|
+
file for file in os.listdir(dateparser_model_home) if file in _supported_models
|
|
23
|
+
]
|
|
24
|
+
if not downloaded_models:
|
|
25
|
+
fasttext_downloader(_DEFAULT_MODEL)
|
|
26
|
+
return _load_fasttext_model()
|
|
27
|
+
model_path = os.path.join(dateparser_model_home, downloaded_models[0])
|
|
28
|
+
if not os.path.isfile(model_path):
|
|
29
|
+
raise FastTextModelNotFoundException("Fasttext model file not found")
|
|
30
|
+
_FastTextCache.model = fasttext.load_model(model_path)
|
|
31
|
+
return _FastTextCache.model
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def detect_languages(text, confidence_threshold):
|
|
35
|
+
_language_parser = _load_fasttext_model()
|
|
36
|
+
text = text.replace("\n", " ").replace("\r", "")
|
|
37
|
+
language_codes = []
|
|
38
|
+
parser_data = _language_parser.predict(text)
|
|
39
|
+
for idx, language_probability in enumerate(parser_data[1]):
|
|
40
|
+
if language_probability > confidence_threshold:
|
|
41
|
+
language_code = parser_data[0][idx].replace("__label__", "")
|
|
42
|
+
language_codes.append(language_code)
|
|
43
|
+
return language_codes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import langdetect
|
|
2
|
+
|
|
3
|
+
# The below _Factory is set to prevent setting global state of the library
|
|
4
|
+
# but still get consistent results.
|
|
5
|
+
# Refer : https://github.com/Mimino666/langdetect
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _Factory:
|
|
9
|
+
data = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _init_factory():
|
|
13
|
+
if _Factory.data is None:
|
|
14
|
+
_Factory.data = langdetect.detector_factory.DetectorFactory()
|
|
15
|
+
_Factory.data.load_profile(langdetect.detector_factory.PROFILES_DIRECTORY)
|
|
16
|
+
_Factory.data.seed = 0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_language_probablities(text):
|
|
20
|
+
_init_factory()
|
|
21
|
+
detector = _Factory.data.create()
|
|
22
|
+
detector.append(text)
|
|
23
|
+
return detector.get_probabilities()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def detect_languages(text, confidence_threshold):
|
|
27
|
+
language_codes = []
|
|
28
|
+
try:
|
|
29
|
+
parser_data = _get_language_probablities(text)
|
|
30
|
+
for language_candidate in parser_data:
|
|
31
|
+
if language_candidate.prob > confidence_threshold:
|
|
32
|
+
language_codes.append(language_candidate.lang)
|
|
33
|
+
except langdetect.lang_detect_exception.LangDetectException:
|
|
34
|
+
# This exception can be produced with empty strings or inputs without letters like `10-10-2021`.
|
|
35
|
+
# As this could be really common, we ignore them.
|
|
36
|
+
pass
|
|
37
|
+
return language_codes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dateparser.data.languages_info import language_map
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def map_languages(language_codes):
|
|
5
|
+
"""
|
|
6
|
+
Returns the candidates from the supported languages codes.
|
|
7
|
+
:param language_codes:
|
|
8
|
+
A list of language codes, e.g. ['en', 'es'] in ISO 639 Standard.
|
|
9
|
+
:type language_codes: list
|
|
10
|
+
:return: Returns list[str] representing supported languages
|
|
11
|
+
:rtype: list[str]
|
|
12
|
+
"""
|
|
13
|
+
return [
|
|
14
|
+
language_code
|
|
15
|
+
for language in language_codes
|
|
16
|
+
if language in language_map
|
|
17
|
+
for language_code in language_map[language]
|
|
18
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
info = {
|
|
2
|
+
"name": "af",
|
|
3
|
+
"date_order": "YMD",
|
|
4
|
+
"january": [
|
|
5
|
+
"jan",
|
|
6
|
+
"januarie"
|
|
7
|
+
],
|
|
8
|
+
"february": [
|
|
9
|
+
"feb",
|
|
10
|
+
"februarie"
|
|
11
|
+
],
|
|
12
|
+
"march": [
|
|
13
|
+
"maart",
|
|
14
|
+
"mrt"
|
|
15
|
+
],
|
|
16
|
+
"april": [
|
|
17
|
+
"apr",
|
|
18
|
+
"april"
|
|
19
|
+
],
|
|
20
|
+
"may": [
|
|
21
|
+
"mei"
|
|
22
|
+
],
|
|
23
|
+
"june": [
|
|
24
|
+
"jun",
|
|
25
|
+
"junie"
|
|
26
|
+
],
|
|
27
|
+
"july": [
|
|
28
|
+
"jul",
|
|
29
|
+
"julie"
|
|
30
|
+
],
|
|
31
|
+
"august": [
|
|
32
|
+
"aug",
|
|
33
|
+
"augustus"
|
|
34
|
+
],
|
|
35
|
+
"september": [
|
|
36
|
+
"sep",
|
|
37
|
+
"september"
|
|
38
|
+
],
|
|
39
|
+
"october": [
|
|
40
|
+
"okt",
|
|
41
|
+
"oktober"
|
|
42
|
+
],
|
|
43
|
+
"november": [
|
|
44
|
+
"nov",
|
|
45
|
+
"november"
|
|
46
|
+
],
|
|
47
|
+
"december": [
|
|
48
|
+
"des",
|
|
49
|
+
"desember"
|
|
50
|
+
],
|
|
51
|
+
"monday": [
|
|
52
|
+
"ma",
|
|
53
|
+
"maandag"
|
|
54
|
+
],
|
|
55
|
+
"tuesday": [
|
|
56
|
+
"di",
|
|
57
|
+
"dinsdag"
|
|
58
|
+
],
|
|
59
|
+
"wednesday": [
|
|
60
|
+
"wo",
|
|
61
|
+
"woensdag"
|
|
62
|
+
],
|
|
63
|
+
"thursday": [
|
|
64
|
+
"do",
|
|
65
|
+
"donderdag"
|
|
66
|
+
],
|
|
67
|
+
"friday": [
|
|
68
|
+
"vr",
|
|
69
|
+
"vrydag"
|
|
70
|
+
],
|
|
71
|
+
"saturday": [
|
|
72
|
+
"sa",
|
|
73
|
+
"saterdag"
|
|
74
|
+
],
|
|
75
|
+
"sunday": [
|
|
76
|
+
"so",
|
|
77
|
+
"sondag"
|
|
78
|
+
],
|
|
79
|
+
"am": [
|
|
80
|
+
"vm"
|
|
81
|
+
],
|
|
82
|
+
"pm": [
|
|
83
|
+
"nm"
|
|
84
|
+
],
|
|
85
|
+
"year": [
|
|
86
|
+
"j",
|
|
87
|
+
"jaar"
|
|
88
|
+
],
|
|
89
|
+
"month": [
|
|
90
|
+
"maand",
|
|
91
|
+
"md"
|
|
92
|
+
],
|
|
93
|
+
"week": [
|
|
94
|
+
"week",
|
|
95
|
+
"wk"
|
|
96
|
+
],
|
|
97
|
+
"day": [
|
|
98
|
+
"d",
|
|
99
|
+
"dag"
|
|
100
|
+
],
|
|
101
|
+
"hour": [
|
|
102
|
+
"u",
|
|
103
|
+
"uur"
|
|
104
|
+
],
|
|
105
|
+
"minute": [
|
|
106
|
+
"m",
|
|
107
|
+
"min",
|
|
108
|
+
"minuut"
|
|
109
|
+
],
|
|
110
|
+
"second": [
|
|
111
|
+
"s",
|
|
112
|
+
"sek",
|
|
113
|
+
"sekonde"
|
|
114
|
+
],
|
|
115
|
+
"relative-type": {
|
|
116
|
+
"0 day ago": [
|
|
117
|
+
"vandag"
|
|
118
|
+
],
|
|
119
|
+
"0 hour ago": [
|
|
120
|
+
"hierdie uur"
|
|
121
|
+
],
|
|
122
|
+
"0 minute ago": [
|
|
123
|
+
"hierdie minuut"
|
|
124
|
+
],
|
|
125
|
+
"0 month ago": [
|
|
126
|
+
"vandeesmaand"
|
|
127
|
+
],
|
|
128
|
+
"0 second ago": [
|
|
129
|
+
"nou"
|
|
130
|
+
],
|
|
131
|
+
"0 week ago": [
|
|
132
|
+
"vandeesweek"
|
|
133
|
+
],
|
|
134
|
+
"0 year ago": [
|
|
135
|
+
"hierdie jaar"
|
|
136
|
+
],
|
|
137
|
+
"1 day ago": [
|
|
138
|
+
"gister"
|
|
139
|
+
],
|
|
140
|
+
"1 month ago": [
|
|
141
|
+
"verlede maand"
|
|
142
|
+
],
|
|
143
|
+
"1 week ago": [
|
|
144
|
+
"verlede week"
|
|
145
|
+
],
|
|
146
|
+
"1 year ago": [
|
|
147
|
+
"verlede jaar"
|
|
148
|
+
],
|
|
149
|
+
"in 1 day": [
|
|
150
|
+
"môre"
|
|
151
|
+
],
|
|
152
|
+
"in 1 month": [
|
|
153
|
+
"volgende maand"
|
|
154
|
+
],
|
|
155
|
+
"in 1 week": [
|
|
156
|
+
"volgende week"
|
|
157
|
+
],
|
|
158
|
+
"in 1 year": [
|
|
159
|
+
"volgende jaar"
|
|
160
|
+
]
|
|
161
|
+
},
|
|
162
|
+
"relative-type-regex": {
|
|
163
|
+
"\\1 day ago": [
|
|
164
|
+
"(\\d+[.,]?\\d*) dae gelede",
|
|
165
|
+
"(\\d+[.,]?\\d*) dag gelede"
|
|
166
|
+
],
|
|
167
|
+
"\\1 hour ago": [
|
|
168
|
+
"(\\d+[.,]?\\d*) uur gelede"
|
|
169
|
+
],
|
|
170
|
+
"\\1 minute ago": [
|
|
171
|
+
"(\\d+[.,]?\\d*) min gelede",
|
|
172
|
+
"(\\d+[.,]?\\d*) minute gelede",
|
|
173
|
+
"(\\d+[.,]?\\d*) minuut gelede"
|
|
174
|
+
],
|
|
175
|
+
"\\1 month ago": [
|
|
176
|
+
"(\\d+[.,]?\\d*) maand gelede",
|
|
177
|
+
"(\\d+[.,]?\\d*) maande gelede",
|
|
178
|
+
"(\\d+[.,]?\\d*) md gelede"
|
|
179
|
+
],
|
|
180
|
+
"\\1 second ago": [
|
|
181
|
+
"(\\d+[.,]?\\d*) sek gelede",
|
|
182
|
+
"(\\d+[.,]?\\d*) sekonde gelede",
|
|
183
|
+
"(\\d+[.,]?\\d*) sekondes gelede"
|
|
184
|
+
],
|
|
185
|
+
"\\1 week ago": [
|
|
186
|
+
"(\\d+[.,]?\\d*) w gelede",
|
|
187
|
+
"(\\d+[.,]?\\d*) week gelede",
|
|
188
|
+
"(\\d+[.,]?\\d*) weke gelede"
|
|
189
|
+
],
|
|
190
|
+
"\\1 year ago": [
|
|
191
|
+
"(\\d+[.,]?\\d*) jaar gelede"
|
|
192
|
+
],
|
|
193
|
+
"in \\1 day": [
|
|
194
|
+
"oor (\\d+[.,]?\\d*) dae",
|
|
195
|
+
"oor (\\d+[.,]?\\d*) dag",
|
|
196
|
+
"oor (\\d+[.,]?\\d*) minuut"
|
|
197
|
+
],
|
|
198
|
+
"in \\1 hour": [
|
|
199
|
+
"oor (\\d+[.,]?\\d*) uur"
|
|
200
|
+
],
|
|
201
|
+
"in \\1 minute": [
|
|
202
|
+
"oor (\\d+[.,]?\\d*) min",
|
|
203
|
+
"oor (\\d+[.,]?\\d*) minuut"
|
|
204
|
+
],
|
|
205
|
+
"in \\1 month": [
|
|
206
|
+
"oor (\\d+[.,]?\\d*) md",
|
|
207
|
+
"oor (\\d+[.,]?\\d*) minuut"
|
|
208
|
+
],
|
|
209
|
+
"in \\1 second": [
|
|
210
|
+
"oor (\\d+[.,]?\\d*) sek",
|
|
211
|
+
"oor (\\d+[.,]?\\d*) sekonde",
|
|
212
|
+
"oor (\\d+[.,]?\\d*) sekondes"
|
|
213
|
+
],
|
|
214
|
+
"in \\1 week": [
|
|
215
|
+
"oor (\\d+[.,]?\\d*) w",
|
|
216
|
+
"oor (\\d+[.,]?\\d*) week",
|
|
217
|
+
"oor (\\d+[.,]?\\d*) weke"
|
|
218
|
+
],
|
|
219
|
+
"in \\1 year": [
|
|
220
|
+
"oor (\\d+[.,]?\\d*) jaar"
|
|
221
|
+
]
|
|
222
|
+
},
|
|
223
|
+
"locale_specific": {
|
|
224
|
+
"af-NA": {
|
|
225
|
+
"name": "af-NA"
|
|
226
|
+
}
|
|
227
|
+
},
|
|
228
|
+
"skip": [
|
|
229
|
+
" ",
|
|
230
|
+
"'",
|
|
231
|
+
",",
|
|
232
|
+
"-",
|
|
233
|
+
".",
|
|
234
|
+
"/",
|
|
235
|
+
";",
|
|
236
|
+
"@",
|
|
237
|
+
"[",
|
|
238
|
+
"]",
|
|
239
|
+
"|",
|
|
240
|
+
","
|
|
241
|
+
]
|
|
242
|
+
}
|