dateparser 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +82 -0
- dateparser/calendars/__init__.py +144 -0
- dateparser/calendars/hijri.py +6 -0
- dateparser/calendars/hijri_parser.py +60 -0
- dateparser/calendars/jalali.py +9 -0
- dateparser/calendars/jalali_parser.py +184 -0
- dateparser/conf.py +267 -0
- dateparser/custom_language_detection/__init__.py +0 -0
- dateparser/custom_language_detection/fasttext.py +43 -0
- dateparser/custom_language_detection/langdetect.py +37 -0
- dateparser/custom_language_detection/language_mapping.py +18 -0
- dateparser/data/__init__.py +2 -0
- dateparser/data/date_translation_data/__init__.py +0 -0
- dateparser/data/date_translation_data/af.py +242 -0
- dateparser/data/date_translation_data/agq.py +169 -0
- dateparser/data/date_translation_data/ak.py +169 -0
- dateparser/data/date_translation_data/am.py +222 -0
- dateparser/data/date_translation_data/ar.py +574 -0
- dateparser/data/date_translation_data/as.py +164 -0
- dateparser/data/date_translation_data/asa.py +168 -0
- dateparser/data/date_translation_data/ast.py +280 -0
- dateparser/data/date_translation_data/az-Cyrl.py +168 -0
- dateparser/data/date_translation_data/az-Latn.py +217 -0
- dateparser/data/date_translation_data/az.py +217 -0
- dateparser/data/date_translation_data/bas.py +169 -0
- dateparser/data/date_translation_data/be.py +340 -0
- dateparser/data/date_translation_data/bem.py +161 -0
- dateparser/data/date_translation_data/bez.py +169 -0
- dateparser/data/date_translation_data/bg.py +345 -0
- dateparser/data/date_translation_data/bm.py +167 -0
- dateparser/data/date_translation_data/bn.py +241 -0
- dateparser/data/date_translation_data/bo.py +185 -0
- dateparser/data/date_translation_data/br.py +226 -0
- dateparser/data/date_translation_data/brx.py +157 -0
- dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
- dateparser/data/date_translation_data/bs-Latn.py +248 -0
- dateparser/data/date_translation_data/bs.py +248 -0
- dateparser/data/date_translation_data/ca.py +313 -0
- dateparser/data/date_translation_data/ce.py +225 -0
- dateparser/data/date_translation_data/cgg.py +169 -0
- dateparser/data/date_translation_data/chr.py +240 -0
- dateparser/data/date_translation_data/ckb.py +154 -0
- dateparser/data/date_translation_data/cs.py +316 -0
- dateparser/data/date_translation_data/cy.py +217 -0
- dateparser/data/date_translation_data/da.py +296 -0
- dateparser/data/date_translation_data/dav.py +169 -0
- dateparser/data/date_translation_data/de.py +357 -0
- dateparser/data/date_translation_data/dje.py +167 -0
- dateparser/data/date_translation_data/dsb.py +270 -0
- dateparser/data/date_translation_data/dua.py +169 -0
- dateparser/data/date_translation_data/dyo.py +168 -0
- dateparser/data/date_translation_data/dz.py +225 -0
- dateparser/data/date_translation_data/ebu.py +169 -0
- dateparser/data/date_translation_data/ee.py +233 -0
- dateparser/data/date_translation_data/el.py +279 -0
- dateparser/data/date_translation_data/en.py +851 -0
- dateparser/data/date_translation_data/eo.py +169 -0
- dateparser/data/date_translation_data/es.py +499 -0
- dateparser/data/date_translation_data/et.py +233 -0
- dateparser/data/date_translation_data/eu.py +219 -0
- dateparser/data/date_translation_data/ewo.py +169 -0
- dateparser/data/date_translation_data/fa.py +270 -0
- dateparser/data/date_translation_data/ff.py +179 -0
- dateparser/data/date_translation_data/fi.py +345 -0
- dateparser/data/date_translation_data/fil.py +223 -0
- dateparser/data/date_translation_data/fo.py +256 -0
- dateparser/data/date_translation_data/fr.py +520 -0
- dateparser/data/date_translation_data/fur.py +223 -0
- dateparser/data/date_translation_data/fy.py +223 -0
- dateparser/data/date_translation_data/ga.py +238 -0
- dateparser/data/date_translation_data/gd.py +277 -0
- dateparser/data/date_translation_data/gl.py +253 -0
- dateparser/data/date_translation_data/gsw.py +179 -0
- dateparser/data/date_translation_data/gu.py +216 -0
- dateparser/data/date_translation_data/guz.py +170 -0
- dateparser/data/date_translation_data/gv.py +166 -0
- dateparser/data/date_translation_data/ha.py +176 -0
- dateparser/data/date_translation_data/haw.py +168 -0
- dateparser/data/date_translation_data/he.py +371 -0
- dateparser/data/date_translation_data/hi.py +261 -0
- dateparser/data/date_translation_data/hr.py +378 -0
- dateparser/data/date_translation_data/hsb.py +271 -0
- dateparser/data/date_translation_data/hu.py +297 -0
- dateparser/data/date_translation_data/hy.py +246 -0
- dateparser/data/date_translation_data/id.py +272 -0
- dateparser/data/date_translation_data/ig.py +168 -0
- dateparser/data/date_translation_data/ii.py +157 -0
- dateparser/data/date_translation_data/is.py +242 -0
- dateparser/data/date_translation_data/it.py +282 -0
- dateparser/data/date_translation_data/ja.py +286 -0
- dateparser/data/date_translation_data/jgo.py +188 -0
- dateparser/data/date_translation_data/jmc.py +168 -0
- dateparser/data/date_translation_data/ka.py +241 -0
- dateparser/data/date_translation_data/kab.py +169 -0
- dateparser/data/date_translation_data/kam.py +169 -0
- dateparser/data/date_translation_data/kde.py +169 -0
- dateparser/data/date_translation_data/kea.py +230 -0
- dateparser/data/date_translation_data/khq.py +167 -0
- dateparser/data/date_translation_data/ki.py +169 -0
- dateparser/data/date_translation_data/kk.py +228 -0
- dateparser/data/date_translation_data/kl.py +213 -0
- dateparser/data/date_translation_data/kln.py +171 -0
- dateparser/data/date_translation_data/km.py +198 -0
- dateparser/data/date_translation_data/kn.py +225 -0
- dateparser/data/date_translation_data/ko.py +207 -0
- dateparser/data/date_translation_data/kok.py +157 -0
- dateparser/data/date_translation_data/ks.py +152 -0
- dateparser/data/date_translation_data/ksb.py +168 -0
- dateparser/data/date_translation_data/ksf.py +169 -0
- dateparser/data/date_translation_data/ksh.py +192 -0
- dateparser/data/date_translation_data/kw.py +169 -0
- dateparser/data/date_translation_data/ky.py +240 -0
- dateparser/data/date_translation_data/lag.py +169 -0
- dateparser/data/date_translation_data/lb.py +233 -0
- dateparser/data/date_translation_data/lg.py +169 -0
- dateparser/data/date_translation_data/lkt.py +194 -0
- dateparser/data/date_translation_data/ln.py +179 -0
- dateparser/data/date_translation_data/lo.py +228 -0
- dateparser/data/date_translation_data/lrc.py +154 -0
- dateparser/data/date_translation_data/lt.py +263 -0
- dateparser/data/date_translation_data/lu.py +169 -0
- dateparser/data/date_translation_data/luo.py +169 -0
- dateparser/data/date_translation_data/luy.py +168 -0
- dateparser/data/date_translation_data/lv.py +257 -0
- dateparser/data/date_translation_data/mas.py +173 -0
- dateparser/data/date_translation_data/mer.py +168 -0
- dateparser/data/date_translation_data/mfe.py +166 -0
- dateparser/data/date_translation_data/mg.py +168 -0
- dateparser/data/date_translation_data/mgh.py +169 -0
- dateparser/data/date_translation_data/mgo.py +151 -0
- dateparser/data/date_translation_data/mk.py +234 -0
- dateparser/data/date_translation_data/ml.py +217 -0
- dateparser/data/date_translation_data/mn.py +224 -0
- dateparser/data/date_translation_data/mr.py +229 -0
- dateparser/data/date_translation_data/ms.py +242 -0
- dateparser/data/date_translation_data/mt.py +175 -0
- dateparser/data/date_translation_data/mua.py +169 -0
- dateparser/data/date_translation_data/my.py +203 -0
- dateparser/data/date_translation_data/mzn.py +199 -0
- dateparser/data/date_translation_data/naq.py +169 -0
- dateparser/data/date_translation_data/nb.py +261 -0
- dateparser/data/date_translation_data/nd.py +169 -0
- dateparser/data/date_translation_data/ne.py +207 -0
- dateparser/data/date_translation_data/nl.py +273 -0
- dateparser/data/date_translation_data/nmg.py +169 -0
- dateparser/data/date_translation_data/nn.py +231 -0
- dateparser/data/date_translation_data/nnh.py +150 -0
- dateparser/data/date_translation_data/nus.py +166 -0
- dateparser/data/date_translation_data/nyn.py +169 -0
- dateparser/data/date_translation_data/om.py +173 -0
- dateparser/data/date_translation_data/or.py +157 -0
- dateparser/data/date_translation_data/os.py +203 -0
- dateparser/data/date_translation_data/pa-Arab.py +150 -0
- dateparser/data/date_translation_data/pa-Guru.py +221 -0
- dateparser/data/date_translation_data/pa.py +221 -0
- dateparser/data/date_translation_data/pl.py +416 -0
- dateparser/data/date_translation_data/ps.py +150 -0
- dateparser/data/date_translation_data/pt.py +981 -0
- dateparser/data/date_translation_data/qu.py +176 -0
- dateparser/data/date_translation_data/rm.py +166 -0
- dateparser/data/date_translation_data/rn.py +169 -0
- dateparser/data/date_translation_data/ro.py +270 -0
- dateparser/data/date_translation_data/rof.py +157 -0
- dateparser/data/date_translation_data/ru.py +442 -0
- dateparser/data/date_translation_data/rw.py +169 -0
- dateparser/data/date_translation_data/rwk.py +168 -0
- dateparser/data/date_translation_data/sah.py +219 -0
- dateparser/data/date_translation_data/saq.py +169 -0
- dateparser/data/date_translation_data/sbp.py +169 -0
- dateparser/data/date_translation_data/se.py +280 -0
- dateparser/data/date_translation_data/seh.py +169 -0
- dateparser/data/date_translation_data/ses.py +167 -0
- dateparser/data/date_translation_data/sg.py +169 -0
- dateparser/data/date_translation_data/shi-Latn.py +169 -0
- dateparser/data/date_translation_data/shi-Tfng.py +169 -0
- dateparser/data/date_translation_data/shi.py +169 -0
- dateparser/data/date_translation_data/si.py +220 -0
- dateparser/data/date_translation_data/sk.py +327 -0
- dateparser/data/date_translation_data/sl.py +244 -0
- dateparser/data/date_translation_data/smn.py +176 -0
- dateparser/data/date_translation_data/sn.py +169 -0
- dateparser/data/date_translation_data/so.py +179 -0
- dateparser/data/date_translation_data/sq.py +237 -0
- dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
- dateparser/data/date_translation_data/sr-Latn.py +306 -0
- dateparser/data/date_translation_data/sr.py +255 -0
- dateparser/data/date_translation_data/sv.py +309 -0
- dateparser/data/date_translation_data/sw.py +231 -0
- dateparser/data/date_translation_data/ta.py +264 -0
- dateparser/data/date_translation_data/te.py +239 -0
- dateparser/data/date_translation_data/teo.py +173 -0
- dateparser/data/date_translation_data/th.py +300 -0
- dateparser/data/date_translation_data/ti.py +173 -0
- dateparser/data/date_translation_data/tl.py +137 -0
- dateparser/data/date_translation_data/to.py +216 -0
- dateparser/data/date_translation_data/tr.py +259 -0
- dateparser/data/date_translation_data/twq.py +167 -0
- dateparser/data/date_translation_data/tzm.py +169 -0
- dateparser/data/date_translation_data/ug.py +203 -0
- dateparser/data/date_translation_data/uk.py +502 -0
- dateparser/data/date_translation_data/ur.py +256 -0
- dateparser/data/date_translation_data/uz-Arab.py +167 -0
- dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
- dateparser/data/date_translation_data/uz-Latn.py +216 -0
- dateparser/data/date_translation_data/uz.py +216 -0
- dateparser/data/date_translation_data/vi.py +260 -0
- dateparser/data/date_translation_data/vun.py +168 -0
- dateparser/data/date_translation_data/wae.py +224 -0
- dateparser/data/date_translation_data/xog.py +169 -0
- dateparser/data/date_translation_data/yav.py +169 -0
- dateparser/data/date_translation_data/yi.py +178 -0
- dateparser/data/date_translation_data/yo.py +263 -0
- dateparser/data/date_translation_data/yue.py +203 -0
- dateparser/data/date_translation_data/zgh.py +169 -0
- dateparser/data/date_translation_data/zh-Hans.py +240 -0
- dateparser/data/date_translation_data/zh-Hant.py +402 -0
- dateparser/data/date_translation_data/zh.py +273 -0
- dateparser/data/date_translation_data/zu.py +196 -0
- dateparser/data/languages_info.py +826 -0
- dateparser/date.py +599 -0
- dateparser/date_parser.py +55 -0
- dateparser/freshness_date_parser.py +156 -0
- dateparser/languages/__init__.py +2 -0
- dateparser/languages/dictionary.py +352 -0
- dateparser/languages/loader.py +224 -0
- dateparser/languages/locale.py +625 -0
- dateparser/languages/validation.py +467 -0
- dateparser/parser.py +742 -0
- dateparser/search/__init__.py +71 -0
- dateparser/search/detection.py +78 -0
- dateparser/search/search.py +297 -0
- dateparser/search/text_detection.py +89 -0
- dateparser/timezone_parser.py +91 -0
- dateparser/timezones.py +469 -0
- dateparser/utils/__init__.py +257 -0
- dateparser/utils/strptime.py +108 -0
- dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
- dateparser-1.2.1.dist-info/LICENSE +12 -0
- dateparser-1.2.1.dist-info/METADATA +864 -0
- dateparser-1.2.1.dist-info/RECORD +256 -0
- dateparser-1.2.1.dist-info/WHEEL +5 -0
- dateparser-1.2.1.dist-info/entry_points.txt +2 -0
- dateparser-1.2.1.dist-info/top_level.txt +4 -0
- dateparser_cli/__init__.py +0 -0
- dateparser_cli/cli.py +36 -0
- dateparser_cli/exceptions.py +2 -0
- dateparser_cli/fasttext_manager.py +42 -0
- dateparser_cli/utils.py +27 -0
- dateparser_data/__init__.py +0 -0
- dateparser_data/settings.py +33 -0
- dateparser_scripts/__init__.py +0 -0
- dateparser_scripts/get_cldr_data.py +567 -0
- dateparser_scripts/order_languages.py +217 -0
- dateparser_scripts/update_supported_languages_and_locales.py +48 -0
- dateparser_scripts/utils.py +73 -0
- dateparser_scripts/write_complete_data.py +129 -0
dateparser/__init__.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
__version__ = "1.2.1"
|
|
2
|
+
|
|
3
|
+
from .conf import apply_settings
|
|
4
|
+
from .date import DateDataParser
|
|
5
|
+
|
|
6
|
+
_default_parser = DateDataParser()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@apply_settings
|
|
10
|
+
def parse(
|
|
11
|
+
date_string,
|
|
12
|
+
date_formats=None,
|
|
13
|
+
languages=None,
|
|
14
|
+
locales=None,
|
|
15
|
+
region=None,
|
|
16
|
+
settings=None,
|
|
17
|
+
detect_languages_function=None,
|
|
18
|
+
):
|
|
19
|
+
"""Parse date and time from given date string.
|
|
20
|
+
|
|
21
|
+
:param date_string:
|
|
22
|
+
A string representing date and/or time in a recognizably valid format.
|
|
23
|
+
:type date_string: str
|
|
24
|
+
|
|
25
|
+
:param date_formats:
|
|
26
|
+
A list of format strings using directives as given
|
|
27
|
+
`here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
|
|
28
|
+
The parser applies formats one by one, taking into account the detected languages/locales.
|
|
29
|
+
:type date_formats: list
|
|
30
|
+
|
|
31
|
+
:param languages:
|
|
32
|
+
A list of language codes, e.g. ['en', 'es', 'zh-Hant'].
|
|
33
|
+
If locales are not given, languages and region are used to construct locales for translation.
|
|
34
|
+
:type languages: list
|
|
35
|
+
|
|
36
|
+
:param locales:
|
|
37
|
+
A list of locale codes, e.g. ['fr-PF', 'qu-EC', 'af-NA'].
|
|
38
|
+
The parser uses only these locales to translate date string.
|
|
39
|
+
:type locales: list
|
|
40
|
+
|
|
41
|
+
:param region:
|
|
42
|
+
A region code, e.g. 'IN', '001', 'NE'.
|
|
43
|
+
If locales are not given, languages and region are used to construct locales for translation.
|
|
44
|
+
:type region: str
|
|
45
|
+
|
|
46
|
+
:param settings:
|
|
47
|
+
Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
|
|
48
|
+
:type settings: dict
|
|
49
|
+
|
|
50
|
+
:param detect_languages_function:
|
|
51
|
+
A function for language detection that takes as input a string (the `date_string`) and
|
|
52
|
+
a `confidence_threshold`, and returns a list of detected language codes.
|
|
53
|
+
Note: this function is only used if ``languages`` and ``locales`` are not provided.
|
|
54
|
+
:type detect_languages_function: function
|
|
55
|
+
|
|
56
|
+
:return: Returns :class:`datetime <datetime.datetime>` representing parsed date if successful, else returns None
|
|
57
|
+
:rtype: :class:`datetime <datetime.datetime>`.
|
|
58
|
+
:raises:
|
|
59
|
+
``ValueError``: Unknown Language, ``TypeError``: Languages argument must be a list,
|
|
60
|
+
``SettingValidationError``: A provided setting is not valid.
|
|
61
|
+
"""
|
|
62
|
+
parser = _default_parser
|
|
63
|
+
|
|
64
|
+
if (
|
|
65
|
+
languages
|
|
66
|
+
or locales
|
|
67
|
+
or region
|
|
68
|
+
or detect_languages_function
|
|
69
|
+
or not settings._default
|
|
70
|
+
):
|
|
71
|
+
parser = DateDataParser(
|
|
72
|
+
languages=languages,
|
|
73
|
+
locales=locales,
|
|
74
|
+
region=region,
|
|
75
|
+
settings=settings,
|
|
76
|
+
detect_languages_function=detect_languages_function,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
data = parser.get_date_data(date_string, date_formats)
|
|
80
|
+
|
|
81
|
+
if data:
|
|
82
|
+
return data["date_obj"]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from dateparser.conf import settings
|
|
4
|
+
from dateparser.date import DateData
|
|
5
|
+
from dateparser.parser import _parser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CalendarBase:
|
|
9
|
+
"""Base setup class for non-Gregorian calendar system.
|
|
10
|
+
|
|
11
|
+
:param source:
|
|
12
|
+
Date string passed to calendar parser.
|
|
13
|
+
:type source: str
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
parser = NotImplemented
|
|
17
|
+
|
|
18
|
+
def __init__(self, source):
|
|
19
|
+
self.source = source
|
|
20
|
+
|
|
21
|
+
def get_date(self):
|
|
22
|
+
try:
|
|
23
|
+
date_obj, period = self.parser.parse(self.source, settings)
|
|
24
|
+
return DateData(date_obj=date_obj, period=period)
|
|
25
|
+
except ValueError:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class non_gregorian_parser(_parser):
|
|
30
|
+
calendar_converter = NotImplemented
|
|
31
|
+
default_year = NotImplemented
|
|
32
|
+
default_month = NotImplemented
|
|
33
|
+
default_day = NotImplemented
|
|
34
|
+
non_gregorian_date_cls = NotImplemented
|
|
35
|
+
|
|
36
|
+
_digits = None
|
|
37
|
+
_months = None
|
|
38
|
+
_weekdays = None
|
|
39
|
+
_number_letters = None
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def _replace_time_conventions(cls, source):
|
|
43
|
+
return source
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _replace_digits(cls, source):
|
|
47
|
+
return source
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def _replace_months(cls, source):
|
|
51
|
+
return source
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def _replace_weekdays(cls, source):
|
|
55
|
+
return source
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def _replace_time(cls, source):
|
|
59
|
+
return source
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _replace_days(cls, source):
|
|
63
|
+
return source
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def to_latin(cls, source):
|
|
67
|
+
result = source
|
|
68
|
+
result = cls._replace_months(result)
|
|
69
|
+
result = cls._replace_weekdays(result)
|
|
70
|
+
result = cls._replace_digits(result)
|
|
71
|
+
result = cls._replace_days(result)
|
|
72
|
+
result = cls._replace_time(result)
|
|
73
|
+
result = cls._replace_time_conventions(result)
|
|
74
|
+
|
|
75
|
+
result = result.strip()
|
|
76
|
+
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
def handle_two_digit_year(self, year):
|
|
80
|
+
raise ValueError
|
|
81
|
+
|
|
82
|
+
def _get_datetime_obj(self, **params):
|
|
83
|
+
day = params["day"]
|
|
84
|
+
year = params["year"]
|
|
85
|
+
month = params["month"]
|
|
86
|
+
if not (0 < day <= self.calendar_converter.month_length(year, month)) and not (
|
|
87
|
+
self._token_day or hasattr(self, "_token_weekday")
|
|
88
|
+
):
|
|
89
|
+
day = self.calendar_converter.month_length(year, month)
|
|
90
|
+
year, month, day = self.calendar_converter.to_gregorian(
|
|
91
|
+
year=year, month=month, day=day
|
|
92
|
+
)
|
|
93
|
+
c_params = params.copy()
|
|
94
|
+
c_params.update(dict(year=year, month=month, day=day))
|
|
95
|
+
return datetime(**c_params)
|
|
96
|
+
|
|
97
|
+
def _get_datetime_obj_params(self):
|
|
98
|
+
if not self.now:
|
|
99
|
+
self._set_relative_base()
|
|
100
|
+
now_year, now_month, now_day = self.calendar_converter.from_gregorian(
|
|
101
|
+
self.now.year, self.now.month, self.now.day
|
|
102
|
+
)
|
|
103
|
+
params = {
|
|
104
|
+
"day": self.day or now_day,
|
|
105
|
+
"month": self.month or now_month,
|
|
106
|
+
"year": self.year or now_year,
|
|
107
|
+
"hour": 0,
|
|
108
|
+
"minute": 0,
|
|
109
|
+
"second": 0,
|
|
110
|
+
"microsecond": 0,
|
|
111
|
+
}
|
|
112
|
+
return params
|
|
113
|
+
|
|
114
|
+
def _get_date_obj(self, token, directive):
|
|
115
|
+
year, month, day = self.default_year, self.default_month, self.default_day
|
|
116
|
+
token_len = len(token)
|
|
117
|
+
is_digit = token.isdigit()
|
|
118
|
+
if directive == "%A" and self._weekdays and token.title() in self._weekdays:
|
|
119
|
+
pass
|
|
120
|
+
elif (
|
|
121
|
+
directive == "%m" and token_len <= 2 and is_digit and 1 <= int(token) <= 12
|
|
122
|
+
):
|
|
123
|
+
month = int(token)
|
|
124
|
+
elif directive == "%B" and self._months and token in self._months:
|
|
125
|
+
month = list(self._months.keys()).index(token) + 1
|
|
126
|
+
elif (
|
|
127
|
+
directive == "%d"
|
|
128
|
+
and token_len <= 2
|
|
129
|
+
and is_digit
|
|
130
|
+
and 0 < int(token) <= self.calendar_converter.month_length(year, month)
|
|
131
|
+
):
|
|
132
|
+
day = int(token)
|
|
133
|
+
elif directive == "%Y" and token_len == 4 and is_digit:
|
|
134
|
+
year = int(token)
|
|
135
|
+
elif directive == "%Y" and token_len == 2 and is_digit:
|
|
136
|
+
year = self.handle_two_digit_year(int(token))
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError
|
|
139
|
+
return self.non_gregorian_date_cls(year, month, day)
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def parse(cls, datestring, settings):
|
|
143
|
+
datestring = cls.to_latin(datestring)
|
|
144
|
+
return super().parse(datestring, settings)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from hijridate import Gregorian, Hijri
|
|
2
|
+
|
|
3
|
+
from dateparser.calendars import non_gregorian_parser
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class hijri:
|
|
7
|
+
@classmethod
|
|
8
|
+
def to_gregorian(cls, year=None, month=None, day=None):
|
|
9
|
+
g = Hijri(year=year, month=month, day=day, validate=False).to_gregorian()
|
|
10
|
+
return g.datetuple()
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def from_gregorian(cls, year=None, month=None, day=None):
|
|
14
|
+
h = Gregorian(year, month, day).to_hijri()
|
|
15
|
+
return h.datetuple()
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def month_length(cls, year, month):
|
|
19
|
+
h = Hijri(year=year, month=month, day=1)
|
|
20
|
+
return h.month_length()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class HijriDate:
|
|
24
|
+
def __init__(self, year, month, day):
|
|
25
|
+
self.year = year
|
|
26
|
+
self.month = month
|
|
27
|
+
self.day = day
|
|
28
|
+
|
|
29
|
+
def weekday(self):
|
|
30
|
+
for week in hijri.monthcalendar(self.year, self.month):
|
|
31
|
+
for idx, day in enumerate(week):
|
|
32
|
+
if day == self.day:
|
|
33
|
+
return idx
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class hijri_parser(non_gregorian_parser):
|
|
37
|
+
calendar_converter = hijri
|
|
38
|
+
default_year = 1389
|
|
39
|
+
default_month = 1
|
|
40
|
+
default_day = 1
|
|
41
|
+
non_gregorian_date_cls = HijriDate
|
|
42
|
+
|
|
43
|
+
_time_conventions = {
|
|
44
|
+
"am": ["صباحاً"],
|
|
45
|
+
"pm": ["مساءً"],
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def _replace_time_conventions(cls, source):
|
|
50
|
+
result = source
|
|
51
|
+
for latin, arabics in cls._time_conventions.items():
|
|
52
|
+
for arabic in arabics:
|
|
53
|
+
result = result.replace(arabic, latin)
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
def handle_two_digit_year(self, year):
|
|
57
|
+
if year >= 90:
|
|
58
|
+
return year + 1300
|
|
59
|
+
else:
|
|
60
|
+
return year + 1400
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
from functools import reduce
|
|
4
|
+
|
|
5
|
+
from convertdate import persian
|
|
6
|
+
|
|
7
|
+
from dateparser.calendars import non_gregorian_parser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PersianDate:
|
|
11
|
+
def __init__(self, year, month, day):
|
|
12
|
+
self.year = year
|
|
13
|
+
self.month = month
|
|
14
|
+
self.day = day
|
|
15
|
+
|
|
16
|
+
def weekday(self):
|
|
17
|
+
for week in persian.monthcalendar(self.year, self.month):
|
|
18
|
+
for idx, day in enumerate(week):
|
|
19
|
+
if day == self.day:
|
|
20
|
+
return idx
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class jalali_parser(non_gregorian_parser):
|
|
24
|
+
calendar_converter = persian
|
|
25
|
+
default_year = 1348
|
|
26
|
+
default_month = 1
|
|
27
|
+
default_day = 1
|
|
28
|
+
non_gregorian_date_cls = PersianDate
|
|
29
|
+
|
|
30
|
+
_digits = {
|
|
31
|
+
"۰": 0,
|
|
32
|
+
"۱": 1,
|
|
33
|
+
"۲": 2,
|
|
34
|
+
"۳": 3,
|
|
35
|
+
"۴": 4,
|
|
36
|
+
"۵": 5,
|
|
37
|
+
"۶": 6,
|
|
38
|
+
"۷": 7,
|
|
39
|
+
"۸": 8,
|
|
40
|
+
"۹": 9,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_months = OrderedDict(
|
|
44
|
+
[
|
|
45
|
+
# pinglish : (persian literals, month index, number of days)
|
|
46
|
+
("Farvardin", (1, 31, ["فروردین"])),
|
|
47
|
+
("Ordibehesht", (2, 31, ["اردیبهشت"])),
|
|
48
|
+
("Khordad", (3, 31, ["خرداد"])),
|
|
49
|
+
("Tir", (4, 31, ["تیر"])),
|
|
50
|
+
("Mordad", (5, 31, ["امرداد", "مرداد"])),
|
|
51
|
+
("Shahrivar", (6, 31, ["شهریور", "شهريور"])),
|
|
52
|
+
("Mehr", (7, 30, ["مهر"])),
|
|
53
|
+
("Aban", (8, 30, ["آبان"])),
|
|
54
|
+
("Azar", (9, 30, ["آذر"])),
|
|
55
|
+
("Dey", (10, 30, ["دی"])),
|
|
56
|
+
("Bahman", (11, 30, ["بهمن", "بهن"])),
|
|
57
|
+
("Esfand", (12, 29, ["اسفند"])),
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
_weekdays = OrderedDict(
|
|
62
|
+
[
|
|
63
|
+
("Sunday", ["یکشنبه"]),
|
|
64
|
+
("Monday", ["دوشنبه"]),
|
|
65
|
+
("Tuesday", ["سهشنبه", "سه شنبه"]),
|
|
66
|
+
("Wednesday", ["چهارشنبه", "چهار شنبه"]),
|
|
67
|
+
("Thursday", ["پنجشنبه", "پنج شنبه"]),
|
|
68
|
+
("Friday", ["جمعه"]),
|
|
69
|
+
("Saturday", ["روز شنبه", "شنبه"]),
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
_number_letters = {
|
|
74
|
+
0: ["صفر"],
|
|
75
|
+
1: ["یک", "اول"],
|
|
76
|
+
2: ["دو"],
|
|
77
|
+
3: ["سه", "سو"],
|
|
78
|
+
4: ["چهار"],
|
|
79
|
+
5: ["پنج"],
|
|
80
|
+
6: ["شش"],
|
|
81
|
+
7: ["هفت"],
|
|
82
|
+
8: ["هشت"],
|
|
83
|
+
9: ["نه"],
|
|
84
|
+
10: ["ده"],
|
|
85
|
+
11: ["یازده"],
|
|
86
|
+
12: ["دوازده"],
|
|
87
|
+
13: ["سیزده"],
|
|
88
|
+
14: ["چهارده"],
|
|
89
|
+
15: ["پانزده"],
|
|
90
|
+
16: ["شانزده"],
|
|
91
|
+
17: ["هفده"],
|
|
92
|
+
18: ["هجده"],
|
|
93
|
+
19: ["نوزده"],
|
|
94
|
+
20: ["بیست"],
|
|
95
|
+
21: ["بیست و یک"],
|
|
96
|
+
22: ["بیست و دو", "بیست ثانیه"],
|
|
97
|
+
23: ["بیست و سه", "بیست و سو"],
|
|
98
|
+
24: ["بیست و چهار"],
|
|
99
|
+
25: ["بیست و پنج"],
|
|
100
|
+
26: ["بیست و شش"],
|
|
101
|
+
27: ["بیست و هفت"],
|
|
102
|
+
28: ["بیست و هشت"],
|
|
103
|
+
29: ["بیست و نه"],
|
|
104
|
+
30: ["سی"],
|
|
105
|
+
31: ["سی و یک"],
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
@classmethod
|
|
109
|
+
def _replace_digits(cls, source):
|
|
110
|
+
result = source
|
|
111
|
+
for pers_digit, number in cls._digits.items():
|
|
112
|
+
result = result.replace(pers_digit, str(number))
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def _replace_months(cls, source):
|
|
117
|
+
result = source
|
|
118
|
+
for pers, latin in reduce(
|
|
119
|
+
lambda a, b: a + b,
|
|
120
|
+
[
|
|
121
|
+
[(value, month) for value in repl[-1]]
|
|
122
|
+
for month, repl in cls._months.items()
|
|
123
|
+
],
|
|
124
|
+
):
|
|
125
|
+
result = result.replace(pers, latin)
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def _replace_weekdays(cls, source):
|
|
130
|
+
result = source
|
|
131
|
+
for pers, latin in reduce(
|
|
132
|
+
lambda a, b: a + b,
|
|
133
|
+
[
|
|
134
|
+
[(value, weekday) for value in repl]
|
|
135
|
+
for weekday, repl in cls._weekdays.items()
|
|
136
|
+
],
|
|
137
|
+
):
|
|
138
|
+
result = result.replace(pers, latin)
|
|
139
|
+
return result
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def _replace_time(cls, source):
|
|
143
|
+
def only_numbers(match_obj):
|
|
144
|
+
matched_string = match_obj.group()
|
|
145
|
+
return re.sub(r"\D", " ", matched_string)
|
|
146
|
+
|
|
147
|
+
hour_pattern = r"ساعت\s+\d{2}"
|
|
148
|
+
minute_pattern = r"\d{2}\s+دقیقه"
|
|
149
|
+
second_pattern = r"\d{2}\s+ثانیه"
|
|
150
|
+
result = re.sub(hour_pattern, only_numbers, source)
|
|
151
|
+
result = re.sub(minute_pattern, only_numbers, result)
|
|
152
|
+
result = re.sub(second_pattern, only_numbers, result)
|
|
153
|
+
result = re.sub(r"\s+و\s+", ":", result)
|
|
154
|
+
result = result.replace("ساعت", "")
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
@classmethod
|
|
158
|
+
def _replace_days(cls, source):
|
|
159
|
+
result = re.sub(
|
|
160
|
+
r"ام|م|ین", "", source
|
|
161
|
+
) # removes persian variant of th/first/second/third
|
|
162
|
+
day_pairs = list(cls._number_letters.items())
|
|
163
|
+
|
|
164
|
+
def comp_key(tup):
|
|
165
|
+
return tup[0]
|
|
166
|
+
|
|
167
|
+
day_pairs.sort(key=comp_key, reverse=True)
|
|
168
|
+
|
|
169
|
+
thirteen, thirty = day_pairs[-14], day_pairs[1]
|
|
170
|
+
day_pairs[-14] = thirty
|
|
171
|
+
day_pairs[1] = thirteen
|
|
172
|
+
|
|
173
|
+
for persian_number, number in reduce(
|
|
174
|
+
lambda a, b: a + b,
|
|
175
|
+
[[(val, repl) for val in persian_word] for repl, persian_word in day_pairs],
|
|
176
|
+
):
|
|
177
|
+
result = result.replace(persian_number, str(number))
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
def handle_two_digit_year(self, year):
|
|
181
|
+
if year > 60:
|
|
182
|
+
return year + 1300
|
|
183
|
+
else:
|
|
184
|
+
return year + 1400
|