dateparser 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +82 -0
- dateparser/calendars/__init__.py +144 -0
- dateparser/calendars/hijri.py +6 -0
- dateparser/calendars/hijri_parser.py +60 -0
- dateparser/calendars/jalali.py +9 -0
- dateparser/calendars/jalali_parser.py +184 -0
- dateparser/conf.py +267 -0
- dateparser/custom_language_detection/__init__.py +0 -0
- dateparser/custom_language_detection/fasttext.py +43 -0
- dateparser/custom_language_detection/langdetect.py +37 -0
- dateparser/custom_language_detection/language_mapping.py +18 -0
- dateparser/data/__init__.py +2 -0
- dateparser/data/date_translation_data/__init__.py +0 -0
- dateparser/data/date_translation_data/af.py +242 -0
- dateparser/data/date_translation_data/agq.py +169 -0
- dateparser/data/date_translation_data/ak.py +169 -0
- dateparser/data/date_translation_data/am.py +222 -0
- dateparser/data/date_translation_data/ar.py +574 -0
- dateparser/data/date_translation_data/as.py +164 -0
- dateparser/data/date_translation_data/asa.py +168 -0
- dateparser/data/date_translation_data/ast.py +280 -0
- dateparser/data/date_translation_data/az-Cyrl.py +168 -0
- dateparser/data/date_translation_data/az-Latn.py +217 -0
- dateparser/data/date_translation_data/az.py +217 -0
- dateparser/data/date_translation_data/bas.py +169 -0
- dateparser/data/date_translation_data/be.py +340 -0
- dateparser/data/date_translation_data/bem.py +161 -0
- dateparser/data/date_translation_data/bez.py +169 -0
- dateparser/data/date_translation_data/bg.py +345 -0
- dateparser/data/date_translation_data/bm.py +167 -0
- dateparser/data/date_translation_data/bn.py +241 -0
- dateparser/data/date_translation_data/bo.py +185 -0
- dateparser/data/date_translation_data/br.py +226 -0
- dateparser/data/date_translation_data/brx.py +157 -0
- dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
- dateparser/data/date_translation_data/bs-Latn.py +248 -0
- dateparser/data/date_translation_data/bs.py +248 -0
- dateparser/data/date_translation_data/ca.py +313 -0
- dateparser/data/date_translation_data/ce.py +225 -0
- dateparser/data/date_translation_data/cgg.py +169 -0
- dateparser/data/date_translation_data/chr.py +240 -0
- dateparser/data/date_translation_data/ckb.py +154 -0
- dateparser/data/date_translation_data/cs.py +316 -0
- dateparser/data/date_translation_data/cy.py +217 -0
- dateparser/data/date_translation_data/da.py +296 -0
- dateparser/data/date_translation_data/dav.py +169 -0
- dateparser/data/date_translation_data/de.py +357 -0
- dateparser/data/date_translation_data/dje.py +167 -0
- dateparser/data/date_translation_data/dsb.py +270 -0
- dateparser/data/date_translation_data/dua.py +169 -0
- dateparser/data/date_translation_data/dyo.py +168 -0
- dateparser/data/date_translation_data/dz.py +225 -0
- dateparser/data/date_translation_data/ebu.py +169 -0
- dateparser/data/date_translation_data/ee.py +233 -0
- dateparser/data/date_translation_data/el.py +279 -0
- dateparser/data/date_translation_data/en.py +851 -0
- dateparser/data/date_translation_data/eo.py +169 -0
- dateparser/data/date_translation_data/es.py +499 -0
- dateparser/data/date_translation_data/et.py +233 -0
- dateparser/data/date_translation_data/eu.py +219 -0
- dateparser/data/date_translation_data/ewo.py +169 -0
- dateparser/data/date_translation_data/fa.py +270 -0
- dateparser/data/date_translation_data/ff.py +179 -0
- dateparser/data/date_translation_data/fi.py +345 -0
- dateparser/data/date_translation_data/fil.py +223 -0
- dateparser/data/date_translation_data/fo.py +256 -0
- dateparser/data/date_translation_data/fr.py +520 -0
- dateparser/data/date_translation_data/fur.py +223 -0
- dateparser/data/date_translation_data/fy.py +223 -0
- dateparser/data/date_translation_data/ga.py +238 -0
- dateparser/data/date_translation_data/gd.py +277 -0
- dateparser/data/date_translation_data/gl.py +253 -0
- dateparser/data/date_translation_data/gsw.py +179 -0
- dateparser/data/date_translation_data/gu.py +216 -0
- dateparser/data/date_translation_data/guz.py +170 -0
- dateparser/data/date_translation_data/gv.py +166 -0
- dateparser/data/date_translation_data/ha.py +176 -0
- dateparser/data/date_translation_data/haw.py +168 -0
- dateparser/data/date_translation_data/he.py +371 -0
- dateparser/data/date_translation_data/hi.py +261 -0
- dateparser/data/date_translation_data/hr.py +378 -0
- dateparser/data/date_translation_data/hsb.py +271 -0
- dateparser/data/date_translation_data/hu.py +297 -0
- dateparser/data/date_translation_data/hy.py +246 -0
- dateparser/data/date_translation_data/id.py +272 -0
- dateparser/data/date_translation_data/ig.py +168 -0
- dateparser/data/date_translation_data/ii.py +157 -0
- dateparser/data/date_translation_data/is.py +242 -0
- dateparser/data/date_translation_data/it.py +282 -0
- dateparser/data/date_translation_data/ja.py +286 -0
- dateparser/data/date_translation_data/jgo.py +188 -0
- dateparser/data/date_translation_data/jmc.py +168 -0
- dateparser/data/date_translation_data/ka.py +241 -0
- dateparser/data/date_translation_data/kab.py +169 -0
- dateparser/data/date_translation_data/kam.py +169 -0
- dateparser/data/date_translation_data/kde.py +169 -0
- dateparser/data/date_translation_data/kea.py +230 -0
- dateparser/data/date_translation_data/khq.py +167 -0
- dateparser/data/date_translation_data/ki.py +169 -0
- dateparser/data/date_translation_data/kk.py +228 -0
- dateparser/data/date_translation_data/kl.py +213 -0
- dateparser/data/date_translation_data/kln.py +171 -0
- dateparser/data/date_translation_data/km.py +198 -0
- dateparser/data/date_translation_data/kn.py +225 -0
- dateparser/data/date_translation_data/ko.py +207 -0
- dateparser/data/date_translation_data/kok.py +157 -0
- dateparser/data/date_translation_data/ks.py +152 -0
- dateparser/data/date_translation_data/ksb.py +168 -0
- dateparser/data/date_translation_data/ksf.py +169 -0
- dateparser/data/date_translation_data/ksh.py +192 -0
- dateparser/data/date_translation_data/kw.py +169 -0
- dateparser/data/date_translation_data/ky.py +240 -0
- dateparser/data/date_translation_data/lag.py +169 -0
- dateparser/data/date_translation_data/lb.py +233 -0
- dateparser/data/date_translation_data/lg.py +169 -0
- dateparser/data/date_translation_data/lkt.py +194 -0
- dateparser/data/date_translation_data/ln.py +179 -0
- dateparser/data/date_translation_data/lo.py +228 -0
- dateparser/data/date_translation_data/lrc.py +154 -0
- dateparser/data/date_translation_data/lt.py +263 -0
- dateparser/data/date_translation_data/lu.py +169 -0
- dateparser/data/date_translation_data/luo.py +169 -0
- dateparser/data/date_translation_data/luy.py +168 -0
- dateparser/data/date_translation_data/lv.py +257 -0
- dateparser/data/date_translation_data/mas.py +173 -0
- dateparser/data/date_translation_data/mer.py +168 -0
- dateparser/data/date_translation_data/mfe.py +166 -0
- dateparser/data/date_translation_data/mg.py +168 -0
- dateparser/data/date_translation_data/mgh.py +169 -0
- dateparser/data/date_translation_data/mgo.py +151 -0
- dateparser/data/date_translation_data/mk.py +234 -0
- dateparser/data/date_translation_data/ml.py +217 -0
- dateparser/data/date_translation_data/mn.py +224 -0
- dateparser/data/date_translation_data/mr.py +229 -0
- dateparser/data/date_translation_data/ms.py +242 -0
- dateparser/data/date_translation_data/mt.py +175 -0
- dateparser/data/date_translation_data/mua.py +169 -0
- dateparser/data/date_translation_data/my.py +203 -0
- dateparser/data/date_translation_data/mzn.py +199 -0
- dateparser/data/date_translation_data/naq.py +169 -0
- dateparser/data/date_translation_data/nb.py +261 -0
- dateparser/data/date_translation_data/nd.py +169 -0
- dateparser/data/date_translation_data/ne.py +207 -0
- dateparser/data/date_translation_data/nl.py +273 -0
- dateparser/data/date_translation_data/nmg.py +169 -0
- dateparser/data/date_translation_data/nn.py +231 -0
- dateparser/data/date_translation_data/nnh.py +150 -0
- dateparser/data/date_translation_data/nus.py +166 -0
- dateparser/data/date_translation_data/nyn.py +169 -0
- dateparser/data/date_translation_data/om.py +173 -0
- dateparser/data/date_translation_data/or.py +157 -0
- dateparser/data/date_translation_data/os.py +203 -0
- dateparser/data/date_translation_data/pa-Arab.py +150 -0
- dateparser/data/date_translation_data/pa-Guru.py +221 -0
- dateparser/data/date_translation_data/pa.py +221 -0
- dateparser/data/date_translation_data/pl.py +416 -0
- dateparser/data/date_translation_data/ps.py +150 -0
- dateparser/data/date_translation_data/pt.py +981 -0
- dateparser/data/date_translation_data/qu.py +176 -0
- dateparser/data/date_translation_data/rm.py +166 -0
- dateparser/data/date_translation_data/rn.py +169 -0
- dateparser/data/date_translation_data/ro.py +270 -0
- dateparser/data/date_translation_data/rof.py +157 -0
- dateparser/data/date_translation_data/ru.py +442 -0
- dateparser/data/date_translation_data/rw.py +169 -0
- dateparser/data/date_translation_data/rwk.py +168 -0
- dateparser/data/date_translation_data/sah.py +219 -0
- dateparser/data/date_translation_data/saq.py +169 -0
- dateparser/data/date_translation_data/sbp.py +169 -0
- dateparser/data/date_translation_data/se.py +280 -0
- dateparser/data/date_translation_data/seh.py +169 -0
- dateparser/data/date_translation_data/ses.py +167 -0
- dateparser/data/date_translation_data/sg.py +169 -0
- dateparser/data/date_translation_data/shi-Latn.py +169 -0
- dateparser/data/date_translation_data/shi-Tfng.py +169 -0
- dateparser/data/date_translation_data/shi.py +169 -0
- dateparser/data/date_translation_data/si.py +220 -0
- dateparser/data/date_translation_data/sk.py +327 -0
- dateparser/data/date_translation_data/sl.py +244 -0
- dateparser/data/date_translation_data/smn.py +176 -0
- dateparser/data/date_translation_data/sn.py +169 -0
- dateparser/data/date_translation_data/so.py +179 -0
- dateparser/data/date_translation_data/sq.py +237 -0
- dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
- dateparser/data/date_translation_data/sr-Latn.py +306 -0
- dateparser/data/date_translation_data/sr.py +255 -0
- dateparser/data/date_translation_data/sv.py +309 -0
- dateparser/data/date_translation_data/sw.py +231 -0
- dateparser/data/date_translation_data/ta.py +264 -0
- dateparser/data/date_translation_data/te.py +239 -0
- dateparser/data/date_translation_data/teo.py +173 -0
- dateparser/data/date_translation_data/th.py +300 -0
- dateparser/data/date_translation_data/ti.py +173 -0
- dateparser/data/date_translation_data/tl.py +137 -0
- dateparser/data/date_translation_data/to.py +216 -0
- dateparser/data/date_translation_data/tr.py +259 -0
- dateparser/data/date_translation_data/twq.py +167 -0
- dateparser/data/date_translation_data/tzm.py +169 -0
- dateparser/data/date_translation_data/ug.py +203 -0
- dateparser/data/date_translation_data/uk.py +502 -0
- dateparser/data/date_translation_data/ur.py +256 -0
- dateparser/data/date_translation_data/uz-Arab.py +167 -0
- dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
- dateparser/data/date_translation_data/uz-Latn.py +216 -0
- dateparser/data/date_translation_data/uz.py +216 -0
- dateparser/data/date_translation_data/vi.py +260 -0
- dateparser/data/date_translation_data/vun.py +168 -0
- dateparser/data/date_translation_data/wae.py +224 -0
- dateparser/data/date_translation_data/xog.py +169 -0
- dateparser/data/date_translation_data/yav.py +169 -0
- dateparser/data/date_translation_data/yi.py +178 -0
- dateparser/data/date_translation_data/yo.py +263 -0
- dateparser/data/date_translation_data/yue.py +203 -0
- dateparser/data/date_translation_data/zgh.py +169 -0
- dateparser/data/date_translation_data/zh-Hans.py +240 -0
- dateparser/data/date_translation_data/zh-Hant.py +402 -0
- dateparser/data/date_translation_data/zh.py +273 -0
- dateparser/data/date_translation_data/zu.py +196 -0
- dateparser/data/languages_info.py +826 -0
- dateparser/date.py +599 -0
- dateparser/date_parser.py +55 -0
- dateparser/freshness_date_parser.py +156 -0
- dateparser/languages/__init__.py +2 -0
- dateparser/languages/dictionary.py +352 -0
- dateparser/languages/loader.py +224 -0
- dateparser/languages/locale.py +625 -0
- dateparser/languages/validation.py +467 -0
- dateparser/parser.py +742 -0
- dateparser/search/__init__.py +71 -0
- dateparser/search/detection.py +78 -0
- dateparser/search/search.py +297 -0
- dateparser/search/text_detection.py +89 -0
- dateparser/timezone_parser.py +91 -0
- dateparser/timezones.py +469 -0
- dateparser/utils/__init__.py +257 -0
- dateparser/utils/strptime.py +108 -0
- dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
- dateparser-1.2.1.dist-info/LICENSE +12 -0
- dateparser-1.2.1.dist-info/METADATA +864 -0
- dateparser-1.2.1.dist-info/RECORD +256 -0
- dateparser-1.2.1.dist-info/WHEEL +5 -0
- dateparser-1.2.1.dist-info/entry_points.txt +2 -0
- dateparser-1.2.1.dist-info/top_level.txt +4 -0
- dateparser_cli/__init__.py +0 -0
- dateparser_cli/cli.py +36 -0
- dateparser_cli/exceptions.py +2 -0
- dateparser_cli/fasttext_manager.py +42 -0
- dateparser_cli/utils.py +27 -0
- dateparser_data/__init__.py +0 -0
- dateparser_data/settings.py +33 -0
- dateparser_scripts/__init__.py +0 -0
- dateparser_scripts/get_cldr_data.py +567 -0
- dateparser_scripts/order_languages.py +217 -0
- dateparser_scripts/update_supported_languages_and_locales.py +48 -0
- dateparser_scripts/utils.py +73 -0
- dateparser_scripts/write_complete_data.py +129 -0
|
@@ -0,0 +1,567 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from collections import OrderedDict
|
|
5
|
+
|
|
6
|
+
import regex as re
|
|
7
|
+
|
|
8
|
+
from dateparser_scripts.order_languages import _get_language_locale_dict
|
|
9
|
+
from dateparser_scripts.utils import get_dict_difference, get_raw_data
|
|
10
|
+
|
|
11
|
+
APOSTROPHE_LOOK_ALIKE_CHARS = [
|
|
12
|
+
"\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019'
|
|
13
|
+
"\N{MODIFIER LETTER APOSTROPHE}", # '\u02bc'
|
|
14
|
+
"\N{MODIFIER LETTER TURNED COMMA}", # '\u02bb'
|
|
15
|
+
"\N{ARMENIAN APOSTROPHE}", # '\u055a'
|
|
16
|
+
"\N{LATIN SMALL LETTER SALTILLO}", # '\ua78c'
|
|
17
|
+
"\N{PRIME}", # '\u2032'
|
|
18
|
+
"\N{REVERSED PRIME}", # '\u2035'
|
|
19
|
+
"\N{MODIFIER LETTER PRIME}", # '\u02b9'
|
|
20
|
+
"\N{FULLWIDTH APOSTROPHE}", # '\uff07'
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
DATE_ORDER_PATTERN = re.compile(
|
|
24
|
+
"([DMY])+\u200f*[-/. \t]*([DMY])+\u200f*[-/. \t]*([DMY])+"
|
|
25
|
+
)
|
|
26
|
+
RELATIVE_PATTERN = re.compile(r"(?<![\+\-]\s*)\{0\}")
|
|
27
|
+
DEFAULT_MONTH_PATTERN = re.compile(r"^M?\d+$", re.U)
|
|
28
|
+
RE_SANITIZE_APOSTROPHE = re.compile("|".join(APOSTROPHE_LOOK_ALIKE_CHARS))
|
|
29
|
+
AM_PATTERN = re.compile(r"^\s*[Aa]\s*\.?\s*[Mm]\s*\.?\s*$")
|
|
30
|
+
PM_PATTERN = re.compile(r"^\s*[Pp]\s*\.?\s*[Mm]\s*\.?\s*$")
|
|
31
|
+
PARENTHESIS_PATTERN = re.compile(r"[\(\)]")
|
|
32
|
+
|
|
33
|
+
cldr_dates_full_dir = "../raw_data/cldr_dates_full/main/"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _filter_relative_string(relative_string):
|
|
37
|
+
return (
|
|
38
|
+
isinstance(relative_string, str)
|
|
39
|
+
and RELATIVE_PATTERN.search(relative_string)
|
|
40
|
+
and not PARENTHESIS_PATTERN.search(relative_string)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _filter_month_name(month_name):
|
|
45
|
+
return not DEFAULT_MONTH_PATTERN.match(month_name)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _retrieve_locale_data(locale):
|
|
49
|
+
ca_gregorian_file = cldr_dates_full_dir + locale + "/ca-gregorian.json"
|
|
50
|
+
dateFields_file = cldr_dates_full_dir + locale + "/dateFields.json"
|
|
51
|
+
with open(ca_gregorian_file) as f:
|
|
52
|
+
cldr_gregorian_data = json.load(f, object_pairs_hook=OrderedDict)
|
|
53
|
+
|
|
54
|
+
with open(dateFields_file) as g:
|
|
55
|
+
cldr_datefields_data = json.load(g, object_pairs_hook=OrderedDict)
|
|
56
|
+
|
|
57
|
+
gregorian_dict = cldr_gregorian_data["main"][locale]["dates"]["calendars"][
|
|
58
|
+
"gregorian"
|
|
59
|
+
]
|
|
60
|
+
date_fields_dict = cldr_datefields_data["main"][locale]["dates"]["fields"]
|
|
61
|
+
|
|
62
|
+
json_dict = OrderedDict()
|
|
63
|
+
|
|
64
|
+
field_keys_1 = ["stand-alone", "format"]
|
|
65
|
+
field_keys_2 = [
|
|
66
|
+
"wide",
|
|
67
|
+
"abbreviated",
|
|
68
|
+
] # neglecting "narrow" to avoid problems in translation
|
|
69
|
+
year_keys = ["year", "year-short", "year-narrow"]
|
|
70
|
+
month_keys = ["month", "month-short", "month-narrow"]
|
|
71
|
+
week_keys = ["week", "week-short", "week-narrow"]
|
|
72
|
+
day_keys = ["day", "day-short", "day-narrow"]
|
|
73
|
+
hour_keys = ["hour", "hour-short", "hour-narrow"]
|
|
74
|
+
minute_keys = ["minute", "minute-short", "minute-narrow"]
|
|
75
|
+
second_keys = ["second", "second-short", "second-narrow"]
|
|
76
|
+
relative_keys = ["relativeTimePattern-count-one", "relativeTimePattern-count-other"]
|
|
77
|
+
|
|
78
|
+
json_dict["name"] = locale
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
date_format_string = gregorian_dict["dateFormats"]["short"].upper()
|
|
82
|
+
except AttributeError:
|
|
83
|
+
date_format_string = gregorian_dict["dateFormats"]["short"]["_value"].upper()
|
|
84
|
+
|
|
85
|
+
json_dict["date_order"] = DATE_ORDER_PATTERN.sub(
|
|
86
|
+
r"\1\2\3", DATE_ORDER_PATTERN.search(date_format_string).group()
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
json_dict["january"] = list(
|
|
90
|
+
filter(
|
|
91
|
+
_filter_month_name,
|
|
92
|
+
[
|
|
93
|
+
gregorian_dict["months"][key1][key2]["1"]
|
|
94
|
+
for key1 in field_keys_1
|
|
95
|
+
for key2 in field_keys_2
|
|
96
|
+
],
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
json_dict["february"] = list(
|
|
101
|
+
filter(
|
|
102
|
+
_filter_month_name,
|
|
103
|
+
[
|
|
104
|
+
gregorian_dict["months"][key1][key2]["2"]
|
|
105
|
+
for key1 in field_keys_1
|
|
106
|
+
for key2 in field_keys_2
|
|
107
|
+
],
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
json_dict["march"] = list(
|
|
112
|
+
filter(
|
|
113
|
+
_filter_month_name,
|
|
114
|
+
[
|
|
115
|
+
gregorian_dict["months"][key1][key2]["3"]
|
|
116
|
+
for key1 in field_keys_1
|
|
117
|
+
for key2 in field_keys_2
|
|
118
|
+
],
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
json_dict["april"] = list(
|
|
123
|
+
filter(
|
|
124
|
+
_filter_month_name,
|
|
125
|
+
[
|
|
126
|
+
gregorian_dict["months"][key1][key2]["4"]
|
|
127
|
+
for key1 in field_keys_1
|
|
128
|
+
for key2 in field_keys_2
|
|
129
|
+
],
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
json_dict["may"] = list(
|
|
134
|
+
filter(
|
|
135
|
+
_filter_month_name,
|
|
136
|
+
[
|
|
137
|
+
gregorian_dict["months"][key1][key2]["5"]
|
|
138
|
+
for key1 in field_keys_1
|
|
139
|
+
for key2 in field_keys_2
|
|
140
|
+
],
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
json_dict["june"] = list(
|
|
145
|
+
filter(
|
|
146
|
+
_filter_month_name,
|
|
147
|
+
[
|
|
148
|
+
gregorian_dict["months"][key1][key2]["6"]
|
|
149
|
+
for key1 in field_keys_1
|
|
150
|
+
for key2 in field_keys_2
|
|
151
|
+
],
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
json_dict["july"] = list(
|
|
156
|
+
filter(
|
|
157
|
+
_filter_month_name,
|
|
158
|
+
[
|
|
159
|
+
gregorian_dict["months"][key1][key2]["7"]
|
|
160
|
+
for key1 in field_keys_1
|
|
161
|
+
for key2 in field_keys_2
|
|
162
|
+
],
|
|
163
|
+
)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
json_dict["august"] = list(
|
|
167
|
+
filter(
|
|
168
|
+
_filter_month_name,
|
|
169
|
+
[
|
|
170
|
+
gregorian_dict["months"][key1][key2]["8"]
|
|
171
|
+
for key1 in field_keys_1
|
|
172
|
+
for key2 in field_keys_2
|
|
173
|
+
],
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
json_dict["september"] = list(
|
|
178
|
+
filter(
|
|
179
|
+
_filter_month_name,
|
|
180
|
+
[
|
|
181
|
+
gregorian_dict["months"][key1][key2]["9"]
|
|
182
|
+
for key1 in field_keys_1
|
|
183
|
+
for key2 in field_keys_2
|
|
184
|
+
],
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
json_dict["october"] = list(
|
|
189
|
+
filter(
|
|
190
|
+
_filter_month_name,
|
|
191
|
+
[
|
|
192
|
+
gregorian_dict["months"][key1][key2]["10"]
|
|
193
|
+
for key1 in field_keys_1
|
|
194
|
+
for key2 in field_keys_2
|
|
195
|
+
],
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
json_dict["november"] = list(
|
|
200
|
+
filter(
|
|
201
|
+
_filter_month_name,
|
|
202
|
+
[
|
|
203
|
+
gregorian_dict["months"][key1][key2]["11"]
|
|
204
|
+
for key1 in field_keys_1
|
|
205
|
+
for key2 in field_keys_2
|
|
206
|
+
],
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
json_dict["december"] = list(
|
|
211
|
+
filter(
|
|
212
|
+
_filter_month_name,
|
|
213
|
+
[
|
|
214
|
+
gregorian_dict["months"][key1][key2]["12"]
|
|
215
|
+
for key1 in field_keys_1
|
|
216
|
+
for key2 in field_keys_2
|
|
217
|
+
],
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
json_dict["monday"] = [
|
|
222
|
+
gregorian_dict["days"][key1][key2]["mon"]
|
|
223
|
+
for key1 in field_keys_1
|
|
224
|
+
for key2 in field_keys_2
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
json_dict["tuesday"] = [
|
|
228
|
+
gregorian_dict["days"][key1][key2]["tue"]
|
|
229
|
+
for key1 in field_keys_1
|
|
230
|
+
for key2 in field_keys_2
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
json_dict["wednesday"] = [
|
|
234
|
+
gregorian_dict["days"][key1][key2]["wed"]
|
|
235
|
+
for key1 in field_keys_1
|
|
236
|
+
for key2 in field_keys_2
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
json_dict["thursday"] = [
|
|
240
|
+
gregorian_dict["days"][key1][key2]["thu"]
|
|
241
|
+
for key1 in field_keys_1
|
|
242
|
+
for key2 in field_keys_2
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
json_dict["friday"] = [
|
|
246
|
+
gregorian_dict["days"][key1][key2]["fri"]
|
|
247
|
+
for key1 in field_keys_1
|
|
248
|
+
for key2 in field_keys_2
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
json_dict["saturday"] = [
|
|
252
|
+
gregorian_dict["days"][key1][key2]["sat"]
|
|
253
|
+
for key1 in field_keys_1
|
|
254
|
+
for key2 in field_keys_2
|
|
255
|
+
]
|
|
256
|
+
|
|
257
|
+
json_dict["sunday"] = [
|
|
258
|
+
gregorian_dict["days"][key1][key2]["sun"]
|
|
259
|
+
for key1 in field_keys_1
|
|
260
|
+
for key2 in field_keys_2
|
|
261
|
+
]
|
|
262
|
+
|
|
263
|
+
json_dict["am"] = [
|
|
264
|
+
AM_PATTERN.sub("am", x)
|
|
265
|
+
for x in [
|
|
266
|
+
gregorian_dict["dayPeriods"][key1][key2]["am"]
|
|
267
|
+
for key1 in field_keys_1
|
|
268
|
+
for key2 in field_keys_2
|
|
269
|
+
]
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
json_dict["pm"] = [
|
|
273
|
+
PM_PATTERN.sub("pm", x)
|
|
274
|
+
for x in [
|
|
275
|
+
gregorian_dict["dayPeriods"][key1][key2]["pm"]
|
|
276
|
+
for key1 in field_keys_1
|
|
277
|
+
for key2 in field_keys_2
|
|
278
|
+
]
|
|
279
|
+
]
|
|
280
|
+
|
|
281
|
+
json_dict["year"] = [date_fields_dict[key]["displayName"] for key in year_keys]
|
|
282
|
+
|
|
283
|
+
json_dict["month"] = [date_fields_dict[key]["displayName"] for key in month_keys]
|
|
284
|
+
|
|
285
|
+
json_dict["week"] = [date_fields_dict[key]["displayName"] for key in week_keys]
|
|
286
|
+
|
|
287
|
+
json_dict["day"] = [date_fields_dict[key]["displayName"] for key in day_keys]
|
|
288
|
+
|
|
289
|
+
json_dict["hour"] = [date_fields_dict[key]["displayName"] for key in hour_keys]
|
|
290
|
+
|
|
291
|
+
json_dict["minute"] = [date_fields_dict[key]["displayName"] for key in minute_keys]
|
|
292
|
+
|
|
293
|
+
json_dict["second"] = [date_fields_dict[key]["displayName"] for key in second_keys]
|
|
294
|
+
|
|
295
|
+
json_dict["relative-type"] = OrderedDict()
|
|
296
|
+
|
|
297
|
+
json_dict["relative-type"]["1 year ago"] = [
|
|
298
|
+
date_fields_dict[key]["relative-type--1"] for key in year_keys
|
|
299
|
+
]
|
|
300
|
+
|
|
301
|
+
json_dict["relative-type"]["0 year ago"] = [
|
|
302
|
+
date_fields_dict[key]["relative-type-0"] for key in year_keys
|
|
303
|
+
]
|
|
304
|
+
|
|
305
|
+
json_dict["relative-type"]["in 1 year"] = [
|
|
306
|
+
date_fields_dict[key]["relative-type-1"] for key in year_keys
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
json_dict["relative-type"]["1 month ago"] = [
|
|
310
|
+
date_fields_dict[key]["relative-type--1"] for key in month_keys
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
json_dict["relative-type"]["0 month ago"] = [
|
|
314
|
+
date_fields_dict[key]["relative-type-0"] for key in month_keys
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
json_dict["relative-type"]["in 1 month"] = [
|
|
318
|
+
date_fields_dict[key]["relative-type-1"] for key in month_keys
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
json_dict["relative-type"]["1 week ago"] = [
|
|
322
|
+
date_fields_dict[key]["relative-type--1"] for key in week_keys
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
json_dict["relative-type"]["0 week ago"] = [
|
|
326
|
+
date_fields_dict[key]["relative-type-0"] for key in week_keys
|
|
327
|
+
]
|
|
328
|
+
|
|
329
|
+
json_dict["relative-type"]["in 1 week"] = [
|
|
330
|
+
date_fields_dict[key]["relative-type-1"] for key in week_keys
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
json_dict["relative-type"]["1 day ago"] = [
|
|
334
|
+
date_fields_dict[key]["relative-type--1"] for key in day_keys
|
|
335
|
+
]
|
|
336
|
+
|
|
337
|
+
json_dict["relative-type"]["0 day ago"] = [
|
|
338
|
+
date_fields_dict[key]["relative-type-0"] for key in day_keys
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
json_dict["relative-type"]["in 1 day"] = [
|
|
342
|
+
date_fields_dict[key]["relative-type-1"] for key in day_keys
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
json_dict["relative-type"]["0 hour ago"] = [
|
|
346
|
+
date_fields_dict[key]["relative-type-0"] for key in hour_keys
|
|
347
|
+
]
|
|
348
|
+
|
|
349
|
+
json_dict["relative-type"]["0 minute ago"] = [
|
|
350
|
+
date_fields_dict[key]["relative-type-0"] for key in minute_keys
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
json_dict["relative-type"]["0 second ago"] = [
|
|
354
|
+
date_fields_dict[key]["relative-type-0"] for key in second_keys
|
|
355
|
+
]
|
|
356
|
+
|
|
357
|
+
json_dict["relative-type-regex"] = OrderedDict()
|
|
358
|
+
|
|
359
|
+
json_dict["relative-type-regex"]["in \\1 year"] = list(
|
|
360
|
+
filter(
|
|
361
|
+
_filter_relative_string,
|
|
362
|
+
[
|
|
363
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
364
|
+
for key1 in year_keys
|
|
365
|
+
for key2 in relative_keys
|
|
366
|
+
],
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
json_dict["relative-type-regex"]["\\1 year ago"] = list(
|
|
371
|
+
filter(
|
|
372
|
+
_filter_relative_string,
|
|
373
|
+
[
|
|
374
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
375
|
+
for key1 in year_keys
|
|
376
|
+
for key2 in relative_keys
|
|
377
|
+
],
|
|
378
|
+
)
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
json_dict["relative-type-regex"]["in \\1 month"] = list(
|
|
382
|
+
filter(
|
|
383
|
+
_filter_relative_string,
|
|
384
|
+
[
|
|
385
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
386
|
+
for key1 in month_keys
|
|
387
|
+
for key2 in relative_keys
|
|
388
|
+
],
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
json_dict["relative-type-regex"]["\\1 month ago"] = list(
|
|
393
|
+
filter(
|
|
394
|
+
_filter_relative_string,
|
|
395
|
+
[
|
|
396
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
397
|
+
for key1 in month_keys
|
|
398
|
+
for key2 in relative_keys
|
|
399
|
+
],
|
|
400
|
+
)
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
json_dict["relative-type-regex"]["in \\1 week"] = list(
|
|
404
|
+
filter(
|
|
405
|
+
_filter_relative_string,
|
|
406
|
+
[
|
|
407
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
408
|
+
for key1 in week_keys
|
|
409
|
+
for key2 in relative_keys
|
|
410
|
+
],
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
json_dict["relative-type-regex"]["\\1 week ago"] = list(
|
|
415
|
+
filter(
|
|
416
|
+
_filter_relative_string,
|
|
417
|
+
[
|
|
418
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
419
|
+
for key1 in week_keys
|
|
420
|
+
for key2 in relative_keys
|
|
421
|
+
],
|
|
422
|
+
)
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
json_dict["relative-type-regex"]["in \\1 day"] = list(
|
|
426
|
+
filter(
|
|
427
|
+
_filter_relative_string,
|
|
428
|
+
[
|
|
429
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
430
|
+
for key1 in day_keys
|
|
431
|
+
for key2 in relative_keys
|
|
432
|
+
],
|
|
433
|
+
)
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
json_dict["relative-type-regex"]["\\1 day ago"] = list(
|
|
437
|
+
filter(
|
|
438
|
+
_filter_relative_string,
|
|
439
|
+
[
|
|
440
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
441
|
+
for key1 in day_keys
|
|
442
|
+
for key2 in relative_keys
|
|
443
|
+
],
|
|
444
|
+
)
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
json_dict["relative-type-regex"]["in \\1 hour"] = list(
|
|
448
|
+
filter(
|
|
449
|
+
_filter_relative_string,
|
|
450
|
+
[
|
|
451
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
452
|
+
for key1 in hour_keys
|
|
453
|
+
for key2 in relative_keys
|
|
454
|
+
],
|
|
455
|
+
)
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
json_dict["relative-type-regex"]["\\1 hour ago"] = list(
|
|
459
|
+
filter(
|
|
460
|
+
_filter_relative_string,
|
|
461
|
+
[
|
|
462
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
463
|
+
for key1 in hour_keys
|
|
464
|
+
for key2 in relative_keys
|
|
465
|
+
],
|
|
466
|
+
)
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
json_dict["relative-type-regex"]["in \\1 minute"] = list(
|
|
470
|
+
filter(
|
|
471
|
+
_filter_relative_string,
|
|
472
|
+
[
|
|
473
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
474
|
+
for key1 in minute_keys
|
|
475
|
+
for key2 in relative_keys
|
|
476
|
+
],
|
|
477
|
+
)
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
json_dict["relative-type-regex"]["\\1 minute ago"] = list(
|
|
481
|
+
filter(
|
|
482
|
+
_filter_relative_string,
|
|
483
|
+
[
|
|
484
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
485
|
+
for key1 in minute_keys
|
|
486
|
+
for key2 in relative_keys
|
|
487
|
+
],
|
|
488
|
+
)
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
json_dict["relative-type-regex"]["in \\1 second"] = list(
|
|
492
|
+
filter(
|
|
493
|
+
_filter_relative_string,
|
|
494
|
+
[
|
|
495
|
+
date_fields_dict[key1]["relativeTime-type-future"].get(key2)
|
|
496
|
+
for key1 in second_keys
|
|
497
|
+
for key2 in relative_keys
|
|
498
|
+
],
|
|
499
|
+
)
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
json_dict["relative-type-regex"]["\\1 second ago"] = list(
|
|
503
|
+
filter(
|
|
504
|
+
_filter_relative_string,
|
|
505
|
+
[
|
|
506
|
+
date_fields_dict[key1]["relativeTime-type-past"].get(key2)
|
|
507
|
+
for key1 in second_keys
|
|
508
|
+
for key2 in relative_keys
|
|
509
|
+
],
|
|
510
|
+
)
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
return json_dict
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def _clean_string(given_string):
|
|
517
|
+
given_string = RE_SANITIZE_APOSTROPHE.sub("'", given_string)
|
|
518
|
+
given_string = given_string.replace(".", "")
|
|
519
|
+
given_string = given_string.lower()
|
|
520
|
+
return " ".join(given_string.split())
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def _clean_dict(json_dict):
|
|
524
|
+
"""Remove duplicates and sort"""
|
|
525
|
+
for key, value in json_dict.items():
|
|
526
|
+
if isinstance(value, list):
|
|
527
|
+
json_dict[key] = sorted(OrderedDict.fromkeys(map(_clean_string, value)))
|
|
528
|
+
elif isinstance(value, dict):
|
|
529
|
+
json_dict[key] = OrderedDict(sorted(value.items()))
|
|
530
|
+
json_dict[key] = _clean_dict(json_dict[key])
|
|
531
|
+
return OrderedDict(filter(lambda x: x[1], json_dict.items()))
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def main():
|
|
535
|
+
get_raw_data()
|
|
536
|
+
language_locale_dict = _get_language_locale_dict()
|
|
537
|
+
parent_directory = "../dateparser_data/cldr_language_data"
|
|
538
|
+
directory = "../dateparser_data/cldr_language_data/date_translation_data/"
|
|
539
|
+
if not os.path.isdir(parent_directory):
|
|
540
|
+
os.mkdir(parent_directory)
|
|
541
|
+
if os.path.isdir(directory):
|
|
542
|
+
shutil.rmtree(directory)
|
|
543
|
+
os.mkdir(directory)
|
|
544
|
+
|
|
545
|
+
for language in language_locale_dict:
|
|
546
|
+
json_language_dict = _clean_dict(_retrieve_locale_data(language))
|
|
547
|
+
locale_specific_dict = OrderedDict()
|
|
548
|
+
locales_list = language_locale_dict[language]
|
|
549
|
+
for locale in locales_list:
|
|
550
|
+
json_locale_dict = _clean_dict(_retrieve_locale_data(locale))
|
|
551
|
+
locale_specific_dict[locale] = _clean_dict(
|
|
552
|
+
get_dict_difference(json_language_dict, json_locale_dict)
|
|
553
|
+
)
|
|
554
|
+
json_language_dict["locale_specific"] = OrderedDict(
|
|
555
|
+
sorted(locale_specific_dict.items())
|
|
556
|
+
)
|
|
557
|
+
filename = directory + language + ".json"
|
|
558
|
+
print("writing " + filename)
|
|
559
|
+
json_string = json.dumps(
|
|
560
|
+
json_language_dict, indent=4, separators=(",", ": "), ensure_ascii=False
|
|
561
|
+
).encode("utf-8")
|
|
562
|
+
with open(filename, "wb") as f:
|
|
563
|
+
f.write(json_string)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
if __name__ == "__main__":
|
|
567
|
+
main()
|