dateparser 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dateparser/__init__.py +82 -0
- dateparser/calendars/__init__.py +144 -0
- dateparser/calendars/hijri.py +6 -0
- dateparser/calendars/hijri_parser.py +60 -0
- dateparser/calendars/jalali.py +9 -0
- dateparser/calendars/jalali_parser.py +184 -0
- dateparser/conf.py +267 -0
- dateparser/custom_language_detection/__init__.py +0 -0
- dateparser/custom_language_detection/fasttext.py +43 -0
- dateparser/custom_language_detection/langdetect.py +37 -0
- dateparser/custom_language_detection/language_mapping.py +18 -0
- dateparser/data/__init__.py +2 -0
- dateparser/data/date_translation_data/__init__.py +0 -0
- dateparser/data/date_translation_data/af.py +242 -0
- dateparser/data/date_translation_data/agq.py +169 -0
- dateparser/data/date_translation_data/ak.py +169 -0
- dateparser/data/date_translation_data/am.py +222 -0
- dateparser/data/date_translation_data/ar.py +574 -0
- dateparser/data/date_translation_data/as.py +164 -0
- dateparser/data/date_translation_data/asa.py +168 -0
- dateparser/data/date_translation_data/ast.py +280 -0
- dateparser/data/date_translation_data/az-Cyrl.py +168 -0
- dateparser/data/date_translation_data/az-Latn.py +217 -0
- dateparser/data/date_translation_data/az.py +217 -0
- dateparser/data/date_translation_data/bas.py +169 -0
- dateparser/data/date_translation_data/be.py +340 -0
- dateparser/data/date_translation_data/bem.py +161 -0
- dateparser/data/date_translation_data/bez.py +169 -0
- dateparser/data/date_translation_data/bg.py +345 -0
- dateparser/data/date_translation_data/bm.py +167 -0
- dateparser/data/date_translation_data/bn.py +241 -0
- dateparser/data/date_translation_data/bo.py +185 -0
- dateparser/data/date_translation_data/br.py +226 -0
- dateparser/data/date_translation_data/brx.py +157 -0
- dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
- dateparser/data/date_translation_data/bs-Latn.py +248 -0
- dateparser/data/date_translation_data/bs.py +248 -0
- dateparser/data/date_translation_data/ca.py +313 -0
- dateparser/data/date_translation_data/ce.py +225 -0
- dateparser/data/date_translation_data/cgg.py +169 -0
- dateparser/data/date_translation_data/chr.py +240 -0
- dateparser/data/date_translation_data/ckb.py +154 -0
- dateparser/data/date_translation_data/cs.py +316 -0
- dateparser/data/date_translation_data/cy.py +217 -0
- dateparser/data/date_translation_data/da.py +296 -0
- dateparser/data/date_translation_data/dav.py +169 -0
- dateparser/data/date_translation_data/de.py +357 -0
- dateparser/data/date_translation_data/dje.py +167 -0
- dateparser/data/date_translation_data/dsb.py +270 -0
- dateparser/data/date_translation_data/dua.py +169 -0
- dateparser/data/date_translation_data/dyo.py +168 -0
- dateparser/data/date_translation_data/dz.py +225 -0
- dateparser/data/date_translation_data/ebu.py +169 -0
- dateparser/data/date_translation_data/ee.py +233 -0
- dateparser/data/date_translation_data/el.py +279 -0
- dateparser/data/date_translation_data/en.py +851 -0
- dateparser/data/date_translation_data/eo.py +169 -0
- dateparser/data/date_translation_data/es.py +499 -0
- dateparser/data/date_translation_data/et.py +233 -0
- dateparser/data/date_translation_data/eu.py +219 -0
- dateparser/data/date_translation_data/ewo.py +169 -0
- dateparser/data/date_translation_data/fa.py +270 -0
- dateparser/data/date_translation_data/ff.py +179 -0
- dateparser/data/date_translation_data/fi.py +345 -0
- dateparser/data/date_translation_data/fil.py +223 -0
- dateparser/data/date_translation_data/fo.py +256 -0
- dateparser/data/date_translation_data/fr.py +520 -0
- dateparser/data/date_translation_data/fur.py +223 -0
- dateparser/data/date_translation_data/fy.py +223 -0
- dateparser/data/date_translation_data/ga.py +238 -0
- dateparser/data/date_translation_data/gd.py +277 -0
- dateparser/data/date_translation_data/gl.py +253 -0
- dateparser/data/date_translation_data/gsw.py +179 -0
- dateparser/data/date_translation_data/gu.py +216 -0
- dateparser/data/date_translation_data/guz.py +170 -0
- dateparser/data/date_translation_data/gv.py +166 -0
- dateparser/data/date_translation_data/ha.py +176 -0
- dateparser/data/date_translation_data/haw.py +168 -0
- dateparser/data/date_translation_data/he.py +371 -0
- dateparser/data/date_translation_data/hi.py +261 -0
- dateparser/data/date_translation_data/hr.py +378 -0
- dateparser/data/date_translation_data/hsb.py +271 -0
- dateparser/data/date_translation_data/hu.py +297 -0
- dateparser/data/date_translation_data/hy.py +246 -0
- dateparser/data/date_translation_data/id.py +272 -0
- dateparser/data/date_translation_data/ig.py +168 -0
- dateparser/data/date_translation_data/ii.py +157 -0
- dateparser/data/date_translation_data/is.py +242 -0
- dateparser/data/date_translation_data/it.py +282 -0
- dateparser/data/date_translation_data/ja.py +286 -0
- dateparser/data/date_translation_data/jgo.py +188 -0
- dateparser/data/date_translation_data/jmc.py +168 -0
- dateparser/data/date_translation_data/ka.py +241 -0
- dateparser/data/date_translation_data/kab.py +169 -0
- dateparser/data/date_translation_data/kam.py +169 -0
- dateparser/data/date_translation_data/kde.py +169 -0
- dateparser/data/date_translation_data/kea.py +230 -0
- dateparser/data/date_translation_data/khq.py +167 -0
- dateparser/data/date_translation_data/ki.py +169 -0
- dateparser/data/date_translation_data/kk.py +228 -0
- dateparser/data/date_translation_data/kl.py +213 -0
- dateparser/data/date_translation_data/kln.py +171 -0
- dateparser/data/date_translation_data/km.py +198 -0
- dateparser/data/date_translation_data/kn.py +225 -0
- dateparser/data/date_translation_data/ko.py +207 -0
- dateparser/data/date_translation_data/kok.py +157 -0
- dateparser/data/date_translation_data/ks.py +152 -0
- dateparser/data/date_translation_data/ksb.py +168 -0
- dateparser/data/date_translation_data/ksf.py +169 -0
- dateparser/data/date_translation_data/ksh.py +192 -0
- dateparser/data/date_translation_data/kw.py +169 -0
- dateparser/data/date_translation_data/ky.py +240 -0
- dateparser/data/date_translation_data/lag.py +169 -0
- dateparser/data/date_translation_data/lb.py +233 -0
- dateparser/data/date_translation_data/lg.py +169 -0
- dateparser/data/date_translation_data/lkt.py +194 -0
- dateparser/data/date_translation_data/ln.py +179 -0
- dateparser/data/date_translation_data/lo.py +228 -0
- dateparser/data/date_translation_data/lrc.py +154 -0
- dateparser/data/date_translation_data/lt.py +263 -0
- dateparser/data/date_translation_data/lu.py +169 -0
- dateparser/data/date_translation_data/luo.py +169 -0
- dateparser/data/date_translation_data/luy.py +168 -0
- dateparser/data/date_translation_data/lv.py +257 -0
- dateparser/data/date_translation_data/mas.py +173 -0
- dateparser/data/date_translation_data/mer.py +168 -0
- dateparser/data/date_translation_data/mfe.py +166 -0
- dateparser/data/date_translation_data/mg.py +168 -0
- dateparser/data/date_translation_data/mgh.py +169 -0
- dateparser/data/date_translation_data/mgo.py +151 -0
- dateparser/data/date_translation_data/mk.py +234 -0
- dateparser/data/date_translation_data/ml.py +217 -0
- dateparser/data/date_translation_data/mn.py +224 -0
- dateparser/data/date_translation_data/mr.py +229 -0
- dateparser/data/date_translation_data/ms.py +242 -0
- dateparser/data/date_translation_data/mt.py +175 -0
- dateparser/data/date_translation_data/mua.py +169 -0
- dateparser/data/date_translation_data/my.py +203 -0
- dateparser/data/date_translation_data/mzn.py +199 -0
- dateparser/data/date_translation_data/naq.py +169 -0
- dateparser/data/date_translation_data/nb.py +261 -0
- dateparser/data/date_translation_data/nd.py +169 -0
- dateparser/data/date_translation_data/ne.py +207 -0
- dateparser/data/date_translation_data/nl.py +273 -0
- dateparser/data/date_translation_data/nmg.py +169 -0
- dateparser/data/date_translation_data/nn.py +231 -0
- dateparser/data/date_translation_data/nnh.py +150 -0
- dateparser/data/date_translation_data/nus.py +166 -0
- dateparser/data/date_translation_data/nyn.py +169 -0
- dateparser/data/date_translation_data/om.py +173 -0
- dateparser/data/date_translation_data/or.py +157 -0
- dateparser/data/date_translation_data/os.py +203 -0
- dateparser/data/date_translation_data/pa-Arab.py +150 -0
- dateparser/data/date_translation_data/pa-Guru.py +221 -0
- dateparser/data/date_translation_data/pa.py +221 -0
- dateparser/data/date_translation_data/pl.py +416 -0
- dateparser/data/date_translation_data/ps.py +150 -0
- dateparser/data/date_translation_data/pt.py +981 -0
- dateparser/data/date_translation_data/qu.py +176 -0
- dateparser/data/date_translation_data/rm.py +166 -0
- dateparser/data/date_translation_data/rn.py +169 -0
- dateparser/data/date_translation_data/ro.py +270 -0
- dateparser/data/date_translation_data/rof.py +157 -0
- dateparser/data/date_translation_data/ru.py +442 -0
- dateparser/data/date_translation_data/rw.py +169 -0
- dateparser/data/date_translation_data/rwk.py +168 -0
- dateparser/data/date_translation_data/sah.py +219 -0
- dateparser/data/date_translation_data/saq.py +169 -0
- dateparser/data/date_translation_data/sbp.py +169 -0
- dateparser/data/date_translation_data/se.py +280 -0
- dateparser/data/date_translation_data/seh.py +169 -0
- dateparser/data/date_translation_data/ses.py +167 -0
- dateparser/data/date_translation_data/sg.py +169 -0
- dateparser/data/date_translation_data/shi-Latn.py +169 -0
- dateparser/data/date_translation_data/shi-Tfng.py +169 -0
- dateparser/data/date_translation_data/shi.py +169 -0
- dateparser/data/date_translation_data/si.py +220 -0
- dateparser/data/date_translation_data/sk.py +327 -0
- dateparser/data/date_translation_data/sl.py +244 -0
- dateparser/data/date_translation_data/smn.py +176 -0
- dateparser/data/date_translation_data/sn.py +169 -0
- dateparser/data/date_translation_data/so.py +179 -0
- dateparser/data/date_translation_data/sq.py +237 -0
- dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
- dateparser/data/date_translation_data/sr-Latn.py +306 -0
- dateparser/data/date_translation_data/sr.py +255 -0
- dateparser/data/date_translation_data/sv.py +309 -0
- dateparser/data/date_translation_data/sw.py +231 -0
- dateparser/data/date_translation_data/ta.py +264 -0
- dateparser/data/date_translation_data/te.py +239 -0
- dateparser/data/date_translation_data/teo.py +173 -0
- dateparser/data/date_translation_data/th.py +300 -0
- dateparser/data/date_translation_data/ti.py +173 -0
- dateparser/data/date_translation_data/tl.py +137 -0
- dateparser/data/date_translation_data/to.py +216 -0
- dateparser/data/date_translation_data/tr.py +259 -0
- dateparser/data/date_translation_data/twq.py +167 -0
- dateparser/data/date_translation_data/tzm.py +169 -0
- dateparser/data/date_translation_data/ug.py +203 -0
- dateparser/data/date_translation_data/uk.py +502 -0
- dateparser/data/date_translation_data/ur.py +256 -0
- dateparser/data/date_translation_data/uz-Arab.py +167 -0
- dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
- dateparser/data/date_translation_data/uz-Latn.py +216 -0
- dateparser/data/date_translation_data/uz.py +216 -0
- dateparser/data/date_translation_data/vi.py +260 -0
- dateparser/data/date_translation_data/vun.py +168 -0
- dateparser/data/date_translation_data/wae.py +224 -0
- dateparser/data/date_translation_data/xog.py +169 -0
- dateparser/data/date_translation_data/yav.py +169 -0
- dateparser/data/date_translation_data/yi.py +178 -0
- dateparser/data/date_translation_data/yo.py +263 -0
- dateparser/data/date_translation_data/yue.py +203 -0
- dateparser/data/date_translation_data/zgh.py +169 -0
- dateparser/data/date_translation_data/zh-Hans.py +240 -0
- dateparser/data/date_translation_data/zh-Hant.py +402 -0
- dateparser/data/date_translation_data/zh.py +273 -0
- dateparser/data/date_translation_data/zu.py +196 -0
- dateparser/data/languages_info.py +826 -0
- dateparser/date.py +599 -0
- dateparser/date_parser.py +55 -0
- dateparser/freshness_date_parser.py +156 -0
- dateparser/languages/__init__.py +2 -0
- dateparser/languages/dictionary.py +352 -0
- dateparser/languages/loader.py +224 -0
- dateparser/languages/locale.py +625 -0
- dateparser/languages/validation.py +467 -0
- dateparser/parser.py +742 -0
- dateparser/search/__init__.py +71 -0
- dateparser/search/detection.py +78 -0
- dateparser/search/search.py +297 -0
- dateparser/search/text_detection.py +89 -0
- dateparser/timezone_parser.py +91 -0
- dateparser/timezones.py +469 -0
- dateparser/utils/__init__.py +257 -0
- dateparser/utils/strptime.py +108 -0
- dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
- dateparser-1.2.1.dist-info/LICENSE +12 -0
- dateparser-1.2.1.dist-info/METADATA +864 -0
- dateparser-1.2.1.dist-info/RECORD +256 -0
- dateparser-1.2.1.dist-info/WHEEL +5 -0
- dateparser-1.2.1.dist-info/entry_points.txt +2 -0
- dateparser-1.2.1.dist-info/top_level.txt +4 -0
- dateparser_cli/__init__.py +0 -0
- dateparser_cli/cli.py +36 -0
- dateparser_cli/exceptions.py +2 -0
- dateparser_cli/fasttext_manager.py +42 -0
- dateparser_cli/utils.py +27 -0
- dateparser_data/__init__.py +0 -0
- dateparser_data/settings.py +33 -0
- dateparser_scripts/__init__.py +0 -0
- dateparser_scripts/get_cldr_data.py +567 -0
- dateparser_scripts/order_languages.py +217 -0
- dateparser_scripts/update_supported_languages_and_locales.py +48 -0
- dateparser_scripts/utils.py +73 -0
- dateparser_scripts/write_complete_data.py +129 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import regex as re
|
|
2
|
+
|
|
3
|
+
from dateparser.utils import get_logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LanguageValidator:
|
|
7
|
+
logger = None
|
|
8
|
+
|
|
9
|
+
VALID_KEYS = [
|
|
10
|
+
"name",
|
|
11
|
+
"skip",
|
|
12
|
+
"pertain",
|
|
13
|
+
"simplifications",
|
|
14
|
+
"no_word_spacing",
|
|
15
|
+
"ago",
|
|
16
|
+
"in",
|
|
17
|
+
"monday",
|
|
18
|
+
"tuesday",
|
|
19
|
+
"wednesday",
|
|
20
|
+
"thursday",
|
|
21
|
+
"friday",
|
|
22
|
+
"saturday",
|
|
23
|
+
"sunday",
|
|
24
|
+
"january",
|
|
25
|
+
"february",
|
|
26
|
+
"march",
|
|
27
|
+
"april",
|
|
28
|
+
"may",
|
|
29
|
+
"june",
|
|
30
|
+
"july",
|
|
31
|
+
"august",
|
|
32
|
+
"september",
|
|
33
|
+
"october",
|
|
34
|
+
"november",
|
|
35
|
+
"december",
|
|
36
|
+
"year",
|
|
37
|
+
"month",
|
|
38
|
+
"week",
|
|
39
|
+
"day",
|
|
40
|
+
"hour",
|
|
41
|
+
"minute",
|
|
42
|
+
"second",
|
|
43
|
+
"sentence_splitter_group",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def get_logger(cls):
|
|
48
|
+
if cls.logger is None:
|
|
49
|
+
cls.logger = get_logger()
|
|
50
|
+
return cls.logger
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def validate_info(cls, language_id, info):
|
|
54
|
+
result = True
|
|
55
|
+
|
|
56
|
+
result &= cls._validate_type(language_id, info)
|
|
57
|
+
if not result:
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
result &= cls._validate_name(language_id, info)
|
|
61
|
+
result &= cls._validate_word_spacing(language_id, info)
|
|
62
|
+
result &= cls._validate_skip_list(language_id, info)
|
|
63
|
+
result &= cls._validate_pertain_list(language_id, info)
|
|
64
|
+
result &= cls._validate_weekdays(language_id, info)
|
|
65
|
+
result &= cls._validate_months(language_id, info)
|
|
66
|
+
result &= cls._validate_units(language_id, info)
|
|
67
|
+
result &= cls._validate_other_words(language_id, info)
|
|
68
|
+
result &= cls._validate_simplifications(language_id, info)
|
|
69
|
+
result &= cls._validate_extra_keys(language_id, info)
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def _validate_type(cls, language_id, info):
|
|
74
|
+
result = True
|
|
75
|
+
|
|
76
|
+
if not isinstance(info, dict):
|
|
77
|
+
cls.get_logger().error(
|
|
78
|
+
"Language '%(id)s' info expected to be dict, but have got %(type)s",
|
|
79
|
+
{"id": language_id, "type": type(info).__name__},
|
|
80
|
+
)
|
|
81
|
+
result = False
|
|
82
|
+
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def _validate_name(cls, language_id, info):
|
|
87
|
+
result = True
|
|
88
|
+
|
|
89
|
+
if "name" not in info or not isinstance(info["name"], str) or not info["name"]:
|
|
90
|
+
cls.get_logger().error(
|
|
91
|
+
"Language '%(id)s' does not have a name", {"id": language_id}
|
|
92
|
+
)
|
|
93
|
+
result = False
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def _validate_word_spacing(cls, language_id, info):
|
|
99
|
+
if "no_word_spacing" not in info:
|
|
100
|
+
return True # Optional key
|
|
101
|
+
|
|
102
|
+
result = True
|
|
103
|
+
|
|
104
|
+
value = info["no_word_spacing"]
|
|
105
|
+
if value not in [True, False]:
|
|
106
|
+
cls.get_logger().error(
|
|
107
|
+
"Invalid 'no_word_spacing' value %(value)r for '%(id)s' language: "
|
|
108
|
+
"expected boolean",
|
|
109
|
+
{"value": value, "id": language_id},
|
|
110
|
+
)
|
|
111
|
+
result = False
|
|
112
|
+
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def _validate_sentence_splitter_group(cls, language_id, info):
|
|
117
|
+
if "sentence_splitter_group" not in info:
|
|
118
|
+
return True # Optional key
|
|
119
|
+
|
|
120
|
+
result = True
|
|
121
|
+
|
|
122
|
+
group = info["sentence_splitter_group"]
|
|
123
|
+
if isinstance(group, int) or not group:
|
|
124
|
+
if group < 1 or group > 6:
|
|
125
|
+
cls.get_logger().error(
|
|
126
|
+
"Invalid 'sentence_splitter_group' number %(number)r for '%(id)s' language: "
|
|
127
|
+
"expected number from 1 to 6",
|
|
128
|
+
{"number": group, "id": language_id},
|
|
129
|
+
)
|
|
130
|
+
result = False
|
|
131
|
+
else:
|
|
132
|
+
cls.get_logger().error(
|
|
133
|
+
"Invalid 'sentence_splitter_group' for '%(id)s' language: "
|
|
134
|
+
"expected int type but have got %(type)s",
|
|
135
|
+
{"id": language_id, "type": type(group).__name__},
|
|
136
|
+
)
|
|
137
|
+
result = False
|
|
138
|
+
|
|
139
|
+
return result
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def _validate_skip_list(cls, language_id, info):
|
|
143
|
+
if "skip" not in info:
|
|
144
|
+
return True # Optional key
|
|
145
|
+
|
|
146
|
+
result = True
|
|
147
|
+
|
|
148
|
+
skip_tokens_list = info["skip"]
|
|
149
|
+
if isinstance(skip_tokens_list, list):
|
|
150
|
+
for token in skip_tokens_list:
|
|
151
|
+
if not isinstance(token, str) or not token:
|
|
152
|
+
cls.get_logger().error(
|
|
153
|
+
"Invalid 'skip' token %(token)r for '%(id)s' language: "
|
|
154
|
+
"expected not empty string",
|
|
155
|
+
{"token": token, "id": language_id},
|
|
156
|
+
)
|
|
157
|
+
result = False
|
|
158
|
+
else:
|
|
159
|
+
cls.get_logger().error(
|
|
160
|
+
"Invalid 'skip' list for '%(id)s' language: "
|
|
161
|
+
"expected list type but have got %(type)s",
|
|
162
|
+
{"id": language_id, "type": type(skip_tokens_list).__name__},
|
|
163
|
+
)
|
|
164
|
+
result = False
|
|
165
|
+
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def _validate_pertain_list(cls, language_id, info):
|
|
170
|
+
if "pertain" not in info:
|
|
171
|
+
return True # Optional key
|
|
172
|
+
|
|
173
|
+
result = True
|
|
174
|
+
|
|
175
|
+
pertain_tokens_list = info["skip"]
|
|
176
|
+
if isinstance(pertain_tokens_list, list):
|
|
177
|
+
for token in pertain_tokens_list:
|
|
178
|
+
if not isinstance(token, str) or not token:
|
|
179
|
+
cls.get_logger().error(
|
|
180
|
+
"Invalid 'pertain' token %(token)r for '%(id)s' language: "
|
|
181
|
+
"expected not empty string",
|
|
182
|
+
{"token": token, "id": language_id},
|
|
183
|
+
)
|
|
184
|
+
result = False
|
|
185
|
+
else:
|
|
186
|
+
cls.get_logger().error(
|
|
187
|
+
"Invalid 'pertain' list for '%(id)s' language: "
|
|
188
|
+
"expected list type but have got %(type)s",
|
|
189
|
+
{"id": language_id, "type": type(pertain_tokens_list).__name__},
|
|
190
|
+
)
|
|
191
|
+
result = False
|
|
192
|
+
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def _validate_weekdays(cls, language_id, info):
|
|
197
|
+
result = True
|
|
198
|
+
|
|
199
|
+
for weekday in (
|
|
200
|
+
"monday",
|
|
201
|
+
"tuesday",
|
|
202
|
+
"wednesday",
|
|
203
|
+
"thursday",
|
|
204
|
+
"friday",
|
|
205
|
+
"saturday",
|
|
206
|
+
"sunday",
|
|
207
|
+
):
|
|
208
|
+
if weekday not in info or not info[weekday]:
|
|
209
|
+
cls.get_logger().error(
|
|
210
|
+
"No translations for '%(weekday)s' provided for '%(id)s' language",
|
|
211
|
+
{"weekday": weekday, "id": language_id},
|
|
212
|
+
)
|
|
213
|
+
result = False
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
translations_list = info[weekday]
|
|
217
|
+
if isinstance(translations_list, list):
|
|
218
|
+
for token in translations_list:
|
|
219
|
+
if not isinstance(token, str) or not token:
|
|
220
|
+
cls.get_logger().error(
|
|
221
|
+
"Invalid '%(weekday)s' translation %(token)r for '%(id)s' language: "
|
|
222
|
+
"expected not empty string",
|
|
223
|
+
{"weekday": weekday, "token": token, "id": language_id},
|
|
224
|
+
)
|
|
225
|
+
result = False
|
|
226
|
+
else:
|
|
227
|
+
cls.get_logger().error(
|
|
228
|
+
"Invalid '%(weekday)s' translations list for '%(id)s' language: "
|
|
229
|
+
"expected list type but have got %(type)s",
|
|
230
|
+
{
|
|
231
|
+
"weekday": weekday,
|
|
232
|
+
"id": language_id,
|
|
233
|
+
"type": type(translations_list).__name__,
|
|
234
|
+
},
|
|
235
|
+
)
|
|
236
|
+
result = False
|
|
237
|
+
|
|
238
|
+
return result
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def _validate_months(cls, language_id, info):
|
|
242
|
+
result = True
|
|
243
|
+
|
|
244
|
+
for month in (
|
|
245
|
+
"january",
|
|
246
|
+
"february",
|
|
247
|
+
"march",
|
|
248
|
+
"april",
|
|
249
|
+
"may",
|
|
250
|
+
"june",
|
|
251
|
+
"july",
|
|
252
|
+
"august",
|
|
253
|
+
"september",
|
|
254
|
+
"october",
|
|
255
|
+
"november",
|
|
256
|
+
"december",
|
|
257
|
+
):
|
|
258
|
+
if month not in info or not info[month]:
|
|
259
|
+
cls.get_logger().error(
|
|
260
|
+
"No translations for '%(month)s' provided for '%(id)s' language",
|
|
261
|
+
{"month": month, "id": language_id},
|
|
262
|
+
)
|
|
263
|
+
result = False
|
|
264
|
+
continue
|
|
265
|
+
|
|
266
|
+
translations_list = info[month]
|
|
267
|
+
if isinstance(translations_list, list):
|
|
268
|
+
for token in translations_list:
|
|
269
|
+
if not isinstance(token, str) or not token:
|
|
270
|
+
cls.get_logger().error(
|
|
271
|
+
"Invalid '%(month)s' translation %(token)r for '%(id)s' language: "
|
|
272
|
+
"expected not empty string",
|
|
273
|
+
{"month": month, "token": token, "id": language_id},
|
|
274
|
+
)
|
|
275
|
+
result = False
|
|
276
|
+
else:
|
|
277
|
+
cls.get_logger().error(
|
|
278
|
+
"Invalid '%(month)s' translations list for '%(id)s' language: "
|
|
279
|
+
"expected list type but have got %(type)s",
|
|
280
|
+
{
|
|
281
|
+
"month": month,
|
|
282
|
+
"id": language_id,
|
|
283
|
+
"type": type(translations_list).__name__,
|
|
284
|
+
},
|
|
285
|
+
)
|
|
286
|
+
result = False
|
|
287
|
+
|
|
288
|
+
return result
|
|
289
|
+
|
|
290
|
+
@classmethod
|
|
291
|
+
def _validate_units(cls, language_id, info):
|
|
292
|
+
result = True
|
|
293
|
+
|
|
294
|
+
for unit in "year", "month", "week", "day", "hour", "minute", "second":
|
|
295
|
+
if unit not in info or not info[unit]:
|
|
296
|
+
cls.get_logger().error(
|
|
297
|
+
"No translations for '%(unit)s' provided for '%(id)s' language",
|
|
298
|
+
{"unit": unit, "id": language_id},
|
|
299
|
+
)
|
|
300
|
+
result = False
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
translations_list = info[unit]
|
|
304
|
+
if isinstance(translations_list, list):
|
|
305
|
+
for token in translations_list:
|
|
306
|
+
if not isinstance(token, str) or not token:
|
|
307
|
+
cls.get_logger().error(
|
|
308
|
+
"Invalid '%(unit)s' translation %(token)r for '%(id)s' language: "
|
|
309
|
+
"expected not empty string",
|
|
310
|
+
{"unit": unit, "token": token, "id": language_id},
|
|
311
|
+
)
|
|
312
|
+
result = False
|
|
313
|
+
else:
|
|
314
|
+
cls.get_logger().error(
|
|
315
|
+
"Invalid '%(unit)s' translations list for '%(id)s' language: "
|
|
316
|
+
"expected list type but have got %(type)s",
|
|
317
|
+
{
|
|
318
|
+
"unit": unit,
|
|
319
|
+
"id": language_id,
|
|
320
|
+
"type": type(translations_list).__name__,
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
result = False
|
|
324
|
+
|
|
325
|
+
return result
|
|
326
|
+
|
|
327
|
+
@classmethod
|
|
328
|
+
def _validate_other_words(cls, language_id, info):
|
|
329
|
+
result = True
|
|
330
|
+
|
|
331
|
+
for word in ("ago",):
|
|
332
|
+
if word not in info or not info[word]:
|
|
333
|
+
cls.get_logger().error(
|
|
334
|
+
"No translations for '%(word)s' provided for '%(id)s' language",
|
|
335
|
+
{"word": word, "id": language_id},
|
|
336
|
+
)
|
|
337
|
+
result = False
|
|
338
|
+
continue
|
|
339
|
+
|
|
340
|
+
translations_list = info[word]
|
|
341
|
+
if isinstance(translations_list, list):
|
|
342
|
+
for token in translations_list:
|
|
343
|
+
if not isinstance(token, str) or not token:
|
|
344
|
+
cls.get_logger().error(
|
|
345
|
+
"Invalid '%(word)s' translation %(token)r for '%(id)s' language: "
|
|
346
|
+
"expected not empty string",
|
|
347
|
+
{"word": word, "token": token, "id": language_id},
|
|
348
|
+
)
|
|
349
|
+
result = False
|
|
350
|
+
else:
|
|
351
|
+
cls.get_logger().error(
|
|
352
|
+
"Invalid '%(word)s' translations list for '%(id)s' language: "
|
|
353
|
+
"expected list type but have got %(type)s",
|
|
354
|
+
{
|
|
355
|
+
"word": word,
|
|
356
|
+
"id": language_id,
|
|
357
|
+
"type": type(translations_list).__name__,
|
|
358
|
+
},
|
|
359
|
+
)
|
|
360
|
+
result = False
|
|
361
|
+
|
|
362
|
+
return result
|
|
363
|
+
|
|
364
|
+
@classmethod
|
|
365
|
+
def _validate_simplifications(cls, language_id, info):
|
|
366
|
+
if "simplifications" not in info:
|
|
367
|
+
return True # Optional key
|
|
368
|
+
|
|
369
|
+
result = True
|
|
370
|
+
|
|
371
|
+
simplifications_list = info["simplifications"]
|
|
372
|
+
if isinstance(simplifications_list, list):
|
|
373
|
+
for simplification in simplifications_list:
|
|
374
|
+
if not isinstance(simplification, dict) or len(simplification) != 1:
|
|
375
|
+
cls.get_logger().error(
|
|
376
|
+
"Invalid simplification %(simplification)r for '%(id)s' language: "
|
|
377
|
+
"eash simplification suppose to be one-to-one mapping",
|
|
378
|
+
{"simplification": simplification, "id": language_id},
|
|
379
|
+
)
|
|
380
|
+
result = False
|
|
381
|
+
continue
|
|
382
|
+
|
|
383
|
+
key, value = list(simplification.items())[0]
|
|
384
|
+
if not isinstance(key, str) or not isinstance(value, (str, int)):
|
|
385
|
+
cls.get_logger().error(
|
|
386
|
+
"Invalid simplification %(simplification)r for '%(id)s' language: "
|
|
387
|
+
"each simplification suppose to be string-to-string-or-int mapping",
|
|
388
|
+
{"simplification": simplification, "id": language_id},
|
|
389
|
+
)
|
|
390
|
+
result = False
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
compiled_key = re.compile(key)
|
|
394
|
+
value = str(value)
|
|
395
|
+
replacements = re.findall(r"\\(\d+)", value)
|
|
396
|
+
replacements.extend(re.findall(r"\\g<(.+?)>", value))
|
|
397
|
+
|
|
398
|
+
groups = []
|
|
399
|
+
for group in replacements:
|
|
400
|
+
if group.isdigit():
|
|
401
|
+
groups.append(int(group))
|
|
402
|
+
elif group in compiled_key.groupindex:
|
|
403
|
+
groups.append(compiled_key.groupindex[group])
|
|
404
|
+
else:
|
|
405
|
+
cls.get_logger().error(
|
|
406
|
+
"Invalid simplification %(simplification)r for '%(id)s' language: "
|
|
407
|
+
"unknown group %(group)s",
|
|
408
|
+
{
|
|
409
|
+
"simplification": simplification,
|
|
410
|
+
"id": language_id,
|
|
411
|
+
"group": group,
|
|
412
|
+
},
|
|
413
|
+
)
|
|
414
|
+
result = False
|
|
415
|
+
|
|
416
|
+
used_groups = set(map(int, groups))
|
|
417
|
+
expected_groups = set(range(0, compiled_key.groups + 1))
|
|
418
|
+
extra_groups = used_groups - expected_groups
|
|
419
|
+
not_used_groups = expected_groups - used_groups
|
|
420
|
+
not_used_groups -= {0} # Entire substring is not required to be used
|
|
421
|
+
|
|
422
|
+
if extra_groups:
|
|
423
|
+
cls.get_logger().error(
|
|
424
|
+
"Invalid simplification %(simplification)r for '%(id)s' language: "
|
|
425
|
+
"unknown groups %(groups)s",
|
|
426
|
+
{
|
|
427
|
+
"simplification": simplification,
|
|
428
|
+
"id": language_id,
|
|
429
|
+
"groups": ", ".join(map(str, sorted(extra_groups))),
|
|
430
|
+
},
|
|
431
|
+
)
|
|
432
|
+
result = False
|
|
433
|
+
|
|
434
|
+
if not_used_groups:
|
|
435
|
+
cls.get_logger().error(
|
|
436
|
+
"Invalid simplification %(simplification)r for '%(id)s' language: "
|
|
437
|
+
"groups %(groups)s were not used",
|
|
438
|
+
{
|
|
439
|
+
"simplification": simplification,
|
|
440
|
+
"id": language_id,
|
|
441
|
+
"groups": ", ".join(map(str, sorted(not_used_groups))),
|
|
442
|
+
},
|
|
443
|
+
)
|
|
444
|
+
result = False
|
|
445
|
+
else:
|
|
446
|
+
cls.get_logger().error(
|
|
447
|
+
"Invalid 'simplifications' list for '%(id)s' language: "
|
|
448
|
+
"expected list type but have got %(type)s",
|
|
449
|
+
{"id": language_id, "type": type(simplifications_list).__name__},
|
|
450
|
+
)
|
|
451
|
+
result = False
|
|
452
|
+
|
|
453
|
+
return result
|
|
454
|
+
|
|
455
|
+
@classmethod
|
|
456
|
+
def _validate_extra_keys(cls, language_id, info):
|
|
457
|
+
result = True
|
|
458
|
+
|
|
459
|
+
extra_keys = set(info.keys()) - set(cls.VALID_KEYS)
|
|
460
|
+
if extra_keys:
|
|
461
|
+
cls.get_logger().error(
|
|
462
|
+
"Extra keys found for '%(id)s' language: %(keys)s",
|
|
463
|
+
{"id": language_id, "keys": ", ".join(map(repr, extra_keys))},
|
|
464
|
+
)
|
|
465
|
+
result = False
|
|
466
|
+
|
|
467
|
+
return result
|