dateparser 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. dateparser/__init__.py +82 -0
  2. dateparser/calendars/__init__.py +144 -0
  3. dateparser/calendars/hijri.py +6 -0
  4. dateparser/calendars/hijri_parser.py +60 -0
  5. dateparser/calendars/jalali.py +9 -0
  6. dateparser/calendars/jalali_parser.py +184 -0
  7. dateparser/conf.py +267 -0
  8. dateparser/custom_language_detection/__init__.py +0 -0
  9. dateparser/custom_language_detection/fasttext.py +43 -0
  10. dateparser/custom_language_detection/langdetect.py +37 -0
  11. dateparser/custom_language_detection/language_mapping.py +18 -0
  12. dateparser/data/__init__.py +2 -0
  13. dateparser/data/date_translation_data/__init__.py +0 -0
  14. dateparser/data/date_translation_data/af.py +242 -0
  15. dateparser/data/date_translation_data/agq.py +169 -0
  16. dateparser/data/date_translation_data/ak.py +169 -0
  17. dateparser/data/date_translation_data/am.py +222 -0
  18. dateparser/data/date_translation_data/ar.py +574 -0
  19. dateparser/data/date_translation_data/as.py +164 -0
  20. dateparser/data/date_translation_data/asa.py +168 -0
  21. dateparser/data/date_translation_data/ast.py +280 -0
  22. dateparser/data/date_translation_data/az-Cyrl.py +168 -0
  23. dateparser/data/date_translation_data/az-Latn.py +217 -0
  24. dateparser/data/date_translation_data/az.py +217 -0
  25. dateparser/data/date_translation_data/bas.py +169 -0
  26. dateparser/data/date_translation_data/be.py +340 -0
  27. dateparser/data/date_translation_data/bem.py +161 -0
  28. dateparser/data/date_translation_data/bez.py +169 -0
  29. dateparser/data/date_translation_data/bg.py +345 -0
  30. dateparser/data/date_translation_data/bm.py +167 -0
  31. dateparser/data/date_translation_data/bn.py +241 -0
  32. dateparser/data/date_translation_data/bo.py +185 -0
  33. dateparser/data/date_translation_data/br.py +226 -0
  34. dateparser/data/date_translation_data/brx.py +157 -0
  35. dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
  36. dateparser/data/date_translation_data/bs-Latn.py +248 -0
  37. dateparser/data/date_translation_data/bs.py +248 -0
  38. dateparser/data/date_translation_data/ca.py +313 -0
  39. dateparser/data/date_translation_data/ce.py +225 -0
  40. dateparser/data/date_translation_data/cgg.py +169 -0
  41. dateparser/data/date_translation_data/chr.py +240 -0
  42. dateparser/data/date_translation_data/ckb.py +154 -0
  43. dateparser/data/date_translation_data/cs.py +316 -0
  44. dateparser/data/date_translation_data/cy.py +217 -0
  45. dateparser/data/date_translation_data/da.py +296 -0
  46. dateparser/data/date_translation_data/dav.py +169 -0
  47. dateparser/data/date_translation_data/de.py +357 -0
  48. dateparser/data/date_translation_data/dje.py +167 -0
  49. dateparser/data/date_translation_data/dsb.py +270 -0
  50. dateparser/data/date_translation_data/dua.py +169 -0
  51. dateparser/data/date_translation_data/dyo.py +168 -0
  52. dateparser/data/date_translation_data/dz.py +225 -0
  53. dateparser/data/date_translation_data/ebu.py +169 -0
  54. dateparser/data/date_translation_data/ee.py +233 -0
  55. dateparser/data/date_translation_data/el.py +279 -0
  56. dateparser/data/date_translation_data/en.py +851 -0
  57. dateparser/data/date_translation_data/eo.py +169 -0
  58. dateparser/data/date_translation_data/es.py +499 -0
  59. dateparser/data/date_translation_data/et.py +233 -0
  60. dateparser/data/date_translation_data/eu.py +219 -0
  61. dateparser/data/date_translation_data/ewo.py +169 -0
  62. dateparser/data/date_translation_data/fa.py +270 -0
  63. dateparser/data/date_translation_data/ff.py +179 -0
  64. dateparser/data/date_translation_data/fi.py +345 -0
  65. dateparser/data/date_translation_data/fil.py +223 -0
  66. dateparser/data/date_translation_data/fo.py +256 -0
  67. dateparser/data/date_translation_data/fr.py +520 -0
  68. dateparser/data/date_translation_data/fur.py +223 -0
  69. dateparser/data/date_translation_data/fy.py +223 -0
  70. dateparser/data/date_translation_data/ga.py +238 -0
  71. dateparser/data/date_translation_data/gd.py +277 -0
  72. dateparser/data/date_translation_data/gl.py +253 -0
  73. dateparser/data/date_translation_data/gsw.py +179 -0
  74. dateparser/data/date_translation_data/gu.py +216 -0
  75. dateparser/data/date_translation_data/guz.py +170 -0
  76. dateparser/data/date_translation_data/gv.py +166 -0
  77. dateparser/data/date_translation_data/ha.py +176 -0
  78. dateparser/data/date_translation_data/haw.py +168 -0
  79. dateparser/data/date_translation_data/he.py +371 -0
  80. dateparser/data/date_translation_data/hi.py +261 -0
  81. dateparser/data/date_translation_data/hr.py +378 -0
  82. dateparser/data/date_translation_data/hsb.py +271 -0
  83. dateparser/data/date_translation_data/hu.py +297 -0
  84. dateparser/data/date_translation_data/hy.py +246 -0
  85. dateparser/data/date_translation_data/id.py +272 -0
  86. dateparser/data/date_translation_data/ig.py +168 -0
  87. dateparser/data/date_translation_data/ii.py +157 -0
  88. dateparser/data/date_translation_data/is.py +242 -0
  89. dateparser/data/date_translation_data/it.py +282 -0
  90. dateparser/data/date_translation_data/ja.py +286 -0
  91. dateparser/data/date_translation_data/jgo.py +188 -0
  92. dateparser/data/date_translation_data/jmc.py +168 -0
  93. dateparser/data/date_translation_data/ka.py +241 -0
  94. dateparser/data/date_translation_data/kab.py +169 -0
  95. dateparser/data/date_translation_data/kam.py +169 -0
  96. dateparser/data/date_translation_data/kde.py +169 -0
  97. dateparser/data/date_translation_data/kea.py +230 -0
  98. dateparser/data/date_translation_data/khq.py +167 -0
  99. dateparser/data/date_translation_data/ki.py +169 -0
  100. dateparser/data/date_translation_data/kk.py +228 -0
  101. dateparser/data/date_translation_data/kl.py +213 -0
  102. dateparser/data/date_translation_data/kln.py +171 -0
  103. dateparser/data/date_translation_data/km.py +198 -0
  104. dateparser/data/date_translation_data/kn.py +225 -0
  105. dateparser/data/date_translation_data/ko.py +207 -0
  106. dateparser/data/date_translation_data/kok.py +157 -0
  107. dateparser/data/date_translation_data/ks.py +152 -0
  108. dateparser/data/date_translation_data/ksb.py +168 -0
  109. dateparser/data/date_translation_data/ksf.py +169 -0
  110. dateparser/data/date_translation_data/ksh.py +192 -0
  111. dateparser/data/date_translation_data/kw.py +169 -0
  112. dateparser/data/date_translation_data/ky.py +240 -0
  113. dateparser/data/date_translation_data/lag.py +169 -0
  114. dateparser/data/date_translation_data/lb.py +233 -0
  115. dateparser/data/date_translation_data/lg.py +169 -0
  116. dateparser/data/date_translation_data/lkt.py +194 -0
  117. dateparser/data/date_translation_data/ln.py +179 -0
  118. dateparser/data/date_translation_data/lo.py +228 -0
  119. dateparser/data/date_translation_data/lrc.py +154 -0
  120. dateparser/data/date_translation_data/lt.py +263 -0
  121. dateparser/data/date_translation_data/lu.py +169 -0
  122. dateparser/data/date_translation_data/luo.py +169 -0
  123. dateparser/data/date_translation_data/luy.py +168 -0
  124. dateparser/data/date_translation_data/lv.py +257 -0
  125. dateparser/data/date_translation_data/mas.py +173 -0
  126. dateparser/data/date_translation_data/mer.py +168 -0
  127. dateparser/data/date_translation_data/mfe.py +166 -0
  128. dateparser/data/date_translation_data/mg.py +168 -0
  129. dateparser/data/date_translation_data/mgh.py +169 -0
  130. dateparser/data/date_translation_data/mgo.py +151 -0
  131. dateparser/data/date_translation_data/mk.py +234 -0
  132. dateparser/data/date_translation_data/ml.py +217 -0
  133. dateparser/data/date_translation_data/mn.py +224 -0
  134. dateparser/data/date_translation_data/mr.py +229 -0
  135. dateparser/data/date_translation_data/ms.py +242 -0
  136. dateparser/data/date_translation_data/mt.py +175 -0
  137. dateparser/data/date_translation_data/mua.py +169 -0
  138. dateparser/data/date_translation_data/my.py +203 -0
  139. dateparser/data/date_translation_data/mzn.py +199 -0
  140. dateparser/data/date_translation_data/naq.py +169 -0
  141. dateparser/data/date_translation_data/nb.py +261 -0
  142. dateparser/data/date_translation_data/nd.py +169 -0
  143. dateparser/data/date_translation_data/ne.py +207 -0
  144. dateparser/data/date_translation_data/nl.py +273 -0
  145. dateparser/data/date_translation_data/nmg.py +169 -0
  146. dateparser/data/date_translation_data/nn.py +231 -0
  147. dateparser/data/date_translation_data/nnh.py +150 -0
  148. dateparser/data/date_translation_data/nus.py +166 -0
  149. dateparser/data/date_translation_data/nyn.py +169 -0
  150. dateparser/data/date_translation_data/om.py +173 -0
  151. dateparser/data/date_translation_data/or.py +157 -0
  152. dateparser/data/date_translation_data/os.py +203 -0
  153. dateparser/data/date_translation_data/pa-Arab.py +150 -0
  154. dateparser/data/date_translation_data/pa-Guru.py +221 -0
  155. dateparser/data/date_translation_data/pa.py +221 -0
  156. dateparser/data/date_translation_data/pl.py +416 -0
  157. dateparser/data/date_translation_data/ps.py +150 -0
  158. dateparser/data/date_translation_data/pt.py +981 -0
  159. dateparser/data/date_translation_data/qu.py +176 -0
  160. dateparser/data/date_translation_data/rm.py +166 -0
  161. dateparser/data/date_translation_data/rn.py +169 -0
  162. dateparser/data/date_translation_data/ro.py +270 -0
  163. dateparser/data/date_translation_data/rof.py +157 -0
  164. dateparser/data/date_translation_data/ru.py +442 -0
  165. dateparser/data/date_translation_data/rw.py +169 -0
  166. dateparser/data/date_translation_data/rwk.py +168 -0
  167. dateparser/data/date_translation_data/sah.py +219 -0
  168. dateparser/data/date_translation_data/saq.py +169 -0
  169. dateparser/data/date_translation_data/sbp.py +169 -0
  170. dateparser/data/date_translation_data/se.py +280 -0
  171. dateparser/data/date_translation_data/seh.py +169 -0
  172. dateparser/data/date_translation_data/ses.py +167 -0
  173. dateparser/data/date_translation_data/sg.py +169 -0
  174. dateparser/data/date_translation_data/shi-Latn.py +169 -0
  175. dateparser/data/date_translation_data/shi-Tfng.py +169 -0
  176. dateparser/data/date_translation_data/shi.py +169 -0
  177. dateparser/data/date_translation_data/si.py +220 -0
  178. dateparser/data/date_translation_data/sk.py +327 -0
  179. dateparser/data/date_translation_data/sl.py +244 -0
  180. dateparser/data/date_translation_data/smn.py +176 -0
  181. dateparser/data/date_translation_data/sn.py +169 -0
  182. dateparser/data/date_translation_data/so.py +179 -0
  183. dateparser/data/date_translation_data/sq.py +237 -0
  184. dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
  185. dateparser/data/date_translation_data/sr-Latn.py +306 -0
  186. dateparser/data/date_translation_data/sr.py +255 -0
  187. dateparser/data/date_translation_data/sv.py +309 -0
  188. dateparser/data/date_translation_data/sw.py +231 -0
  189. dateparser/data/date_translation_data/ta.py +264 -0
  190. dateparser/data/date_translation_data/te.py +239 -0
  191. dateparser/data/date_translation_data/teo.py +173 -0
  192. dateparser/data/date_translation_data/th.py +300 -0
  193. dateparser/data/date_translation_data/ti.py +173 -0
  194. dateparser/data/date_translation_data/tl.py +137 -0
  195. dateparser/data/date_translation_data/to.py +216 -0
  196. dateparser/data/date_translation_data/tr.py +259 -0
  197. dateparser/data/date_translation_data/twq.py +167 -0
  198. dateparser/data/date_translation_data/tzm.py +169 -0
  199. dateparser/data/date_translation_data/ug.py +203 -0
  200. dateparser/data/date_translation_data/uk.py +502 -0
  201. dateparser/data/date_translation_data/ur.py +256 -0
  202. dateparser/data/date_translation_data/uz-Arab.py +167 -0
  203. dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
  204. dateparser/data/date_translation_data/uz-Latn.py +216 -0
  205. dateparser/data/date_translation_data/uz.py +216 -0
  206. dateparser/data/date_translation_data/vi.py +260 -0
  207. dateparser/data/date_translation_data/vun.py +168 -0
  208. dateparser/data/date_translation_data/wae.py +224 -0
  209. dateparser/data/date_translation_data/xog.py +169 -0
  210. dateparser/data/date_translation_data/yav.py +169 -0
  211. dateparser/data/date_translation_data/yi.py +178 -0
  212. dateparser/data/date_translation_data/yo.py +263 -0
  213. dateparser/data/date_translation_data/yue.py +203 -0
  214. dateparser/data/date_translation_data/zgh.py +169 -0
  215. dateparser/data/date_translation_data/zh-Hans.py +240 -0
  216. dateparser/data/date_translation_data/zh-Hant.py +402 -0
  217. dateparser/data/date_translation_data/zh.py +273 -0
  218. dateparser/data/date_translation_data/zu.py +196 -0
  219. dateparser/data/languages_info.py +826 -0
  220. dateparser/date.py +599 -0
  221. dateparser/date_parser.py +55 -0
  222. dateparser/freshness_date_parser.py +156 -0
  223. dateparser/languages/__init__.py +2 -0
  224. dateparser/languages/dictionary.py +352 -0
  225. dateparser/languages/loader.py +224 -0
  226. dateparser/languages/locale.py +625 -0
  227. dateparser/languages/validation.py +467 -0
  228. dateparser/parser.py +742 -0
  229. dateparser/search/__init__.py +71 -0
  230. dateparser/search/detection.py +78 -0
  231. dateparser/search/search.py +297 -0
  232. dateparser/search/text_detection.py +89 -0
  233. dateparser/timezone_parser.py +91 -0
  234. dateparser/timezones.py +469 -0
  235. dateparser/utils/__init__.py +257 -0
  236. dateparser/utils/strptime.py +108 -0
  237. dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
  238. dateparser-1.2.1.dist-info/LICENSE +12 -0
  239. dateparser-1.2.1.dist-info/METADATA +864 -0
  240. dateparser-1.2.1.dist-info/RECORD +256 -0
  241. dateparser-1.2.1.dist-info/WHEEL +5 -0
  242. dateparser-1.2.1.dist-info/entry_points.txt +2 -0
  243. dateparser-1.2.1.dist-info/top_level.txt +4 -0
  244. dateparser_cli/__init__.py +0 -0
  245. dateparser_cli/cli.py +36 -0
  246. dateparser_cli/exceptions.py +2 -0
  247. dateparser_cli/fasttext_manager.py +42 -0
  248. dateparser_cli/utils.py +27 -0
  249. dateparser_data/__init__.py +0 -0
  250. dateparser_data/settings.py +33 -0
  251. dateparser_scripts/__init__.py +0 -0
  252. dateparser_scripts/get_cldr_data.py +567 -0
  253. dateparser_scripts/order_languages.py +217 -0
  254. dateparser_scripts/update_supported_languages_and_locales.py +48 -0
  255. dateparser_scripts/utils.py +73 -0
  256. dateparser_scripts/write_complete_data.py +129 -0
@@ -0,0 +1,567 @@
1
+ import json
2
+ import os
3
+ import shutil
4
+ from collections import OrderedDict
5
+
6
+ import regex as re
7
+
8
+ from dateparser_scripts.order_languages import _get_language_locale_dict
9
+ from dateparser_scripts.utils import get_dict_difference, get_raw_data
10
+
11
+ APOSTROPHE_LOOK_ALIKE_CHARS = [
12
+ "\N{RIGHT SINGLE QUOTATION MARK}", # '\u2019'
13
+ "\N{MODIFIER LETTER APOSTROPHE}", # '\u02bc'
14
+ "\N{MODIFIER LETTER TURNED COMMA}", # '\u02bb'
15
+ "\N{ARMENIAN APOSTROPHE}", # '\u055a'
16
+ "\N{LATIN SMALL LETTER SALTILLO}", # '\ua78c'
17
+ "\N{PRIME}", # '\u2032'
18
+ "\N{REVERSED PRIME}", # '\u2035'
19
+ "\N{MODIFIER LETTER PRIME}", # '\u02b9'
20
+ "\N{FULLWIDTH APOSTROPHE}", # '\uff07'
21
+ ]
22
+
23
+ DATE_ORDER_PATTERN = re.compile(
24
+ "([DMY])+\u200f*[-/. \t]*([DMY])+\u200f*[-/. \t]*([DMY])+"
25
+ )
26
+ RELATIVE_PATTERN = re.compile(r"(?<![\+\-]\s*)\{0\}")
27
+ DEFAULT_MONTH_PATTERN = re.compile(r"^M?\d+$", re.U)
28
+ RE_SANITIZE_APOSTROPHE = re.compile("|".join(APOSTROPHE_LOOK_ALIKE_CHARS))
29
+ AM_PATTERN = re.compile(r"^\s*[Aa]\s*\.?\s*[Mm]\s*\.?\s*$")
30
+ PM_PATTERN = re.compile(r"^\s*[Pp]\s*\.?\s*[Mm]\s*\.?\s*$")
31
+ PARENTHESIS_PATTERN = re.compile(r"[\(\)]")
32
+
33
+ cldr_dates_full_dir = "../raw_data/cldr_dates_full/main/"
34
+
35
+
36
+ def _filter_relative_string(relative_string):
37
+ return (
38
+ isinstance(relative_string, str)
39
+ and RELATIVE_PATTERN.search(relative_string)
40
+ and not PARENTHESIS_PATTERN.search(relative_string)
41
+ )
42
+
43
+
44
+ def _filter_month_name(month_name):
45
+ return not DEFAULT_MONTH_PATTERN.match(month_name)
46
+
47
+
48
+ def _retrieve_locale_data(locale):
49
+ ca_gregorian_file = cldr_dates_full_dir + locale + "/ca-gregorian.json"
50
+ dateFields_file = cldr_dates_full_dir + locale + "/dateFields.json"
51
+ with open(ca_gregorian_file) as f:
52
+ cldr_gregorian_data = json.load(f, object_pairs_hook=OrderedDict)
53
+
54
+ with open(dateFields_file) as g:
55
+ cldr_datefields_data = json.load(g, object_pairs_hook=OrderedDict)
56
+
57
+ gregorian_dict = cldr_gregorian_data["main"][locale]["dates"]["calendars"][
58
+ "gregorian"
59
+ ]
60
+ date_fields_dict = cldr_datefields_data["main"][locale]["dates"]["fields"]
61
+
62
+ json_dict = OrderedDict()
63
+
64
+ field_keys_1 = ["stand-alone", "format"]
65
+ field_keys_2 = [
66
+ "wide",
67
+ "abbreviated",
68
+ ] # neglecting "narrow" to avoid problems in translation
69
+ year_keys = ["year", "year-short", "year-narrow"]
70
+ month_keys = ["month", "month-short", "month-narrow"]
71
+ week_keys = ["week", "week-short", "week-narrow"]
72
+ day_keys = ["day", "day-short", "day-narrow"]
73
+ hour_keys = ["hour", "hour-short", "hour-narrow"]
74
+ minute_keys = ["minute", "minute-short", "minute-narrow"]
75
+ second_keys = ["second", "second-short", "second-narrow"]
76
+ relative_keys = ["relativeTimePattern-count-one", "relativeTimePattern-count-other"]
77
+
78
+ json_dict["name"] = locale
79
+
80
+ try:
81
+ date_format_string = gregorian_dict["dateFormats"]["short"].upper()
82
+ except AttributeError:
83
+ date_format_string = gregorian_dict["dateFormats"]["short"]["_value"].upper()
84
+
85
+ json_dict["date_order"] = DATE_ORDER_PATTERN.sub(
86
+ r"\1\2\3", DATE_ORDER_PATTERN.search(date_format_string).group()
87
+ )
88
+
89
+ json_dict["january"] = list(
90
+ filter(
91
+ _filter_month_name,
92
+ [
93
+ gregorian_dict["months"][key1][key2]["1"]
94
+ for key1 in field_keys_1
95
+ for key2 in field_keys_2
96
+ ],
97
+ )
98
+ )
99
+
100
+ json_dict["february"] = list(
101
+ filter(
102
+ _filter_month_name,
103
+ [
104
+ gregorian_dict["months"][key1][key2]["2"]
105
+ for key1 in field_keys_1
106
+ for key2 in field_keys_2
107
+ ],
108
+ )
109
+ )
110
+
111
+ json_dict["march"] = list(
112
+ filter(
113
+ _filter_month_name,
114
+ [
115
+ gregorian_dict["months"][key1][key2]["3"]
116
+ for key1 in field_keys_1
117
+ for key2 in field_keys_2
118
+ ],
119
+ )
120
+ )
121
+
122
+ json_dict["april"] = list(
123
+ filter(
124
+ _filter_month_name,
125
+ [
126
+ gregorian_dict["months"][key1][key2]["4"]
127
+ for key1 in field_keys_1
128
+ for key2 in field_keys_2
129
+ ],
130
+ )
131
+ )
132
+
133
+ json_dict["may"] = list(
134
+ filter(
135
+ _filter_month_name,
136
+ [
137
+ gregorian_dict["months"][key1][key2]["5"]
138
+ for key1 in field_keys_1
139
+ for key2 in field_keys_2
140
+ ],
141
+ )
142
+ )
143
+
144
+ json_dict["june"] = list(
145
+ filter(
146
+ _filter_month_name,
147
+ [
148
+ gregorian_dict["months"][key1][key2]["6"]
149
+ for key1 in field_keys_1
150
+ for key2 in field_keys_2
151
+ ],
152
+ )
153
+ )
154
+
155
+ json_dict["july"] = list(
156
+ filter(
157
+ _filter_month_name,
158
+ [
159
+ gregorian_dict["months"][key1][key2]["7"]
160
+ for key1 in field_keys_1
161
+ for key2 in field_keys_2
162
+ ],
163
+ )
164
+ )
165
+
166
+ json_dict["august"] = list(
167
+ filter(
168
+ _filter_month_name,
169
+ [
170
+ gregorian_dict["months"][key1][key2]["8"]
171
+ for key1 in field_keys_1
172
+ for key2 in field_keys_2
173
+ ],
174
+ )
175
+ )
176
+
177
+ json_dict["september"] = list(
178
+ filter(
179
+ _filter_month_name,
180
+ [
181
+ gregorian_dict["months"][key1][key2]["9"]
182
+ for key1 in field_keys_1
183
+ for key2 in field_keys_2
184
+ ],
185
+ )
186
+ )
187
+
188
+ json_dict["october"] = list(
189
+ filter(
190
+ _filter_month_name,
191
+ [
192
+ gregorian_dict["months"][key1][key2]["10"]
193
+ for key1 in field_keys_1
194
+ for key2 in field_keys_2
195
+ ],
196
+ )
197
+ )
198
+
199
+ json_dict["november"] = list(
200
+ filter(
201
+ _filter_month_name,
202
+ [
203
+ gregorian_dict["months"][key1][key2]["11"]
204
+ for key1 in field_keys_1
205
+ for key2 in field_keys_2
206
+ ],
207
+ )
208
+ )
209
+
210
+ json_dict["december"] = list(
211
+ filter(
212
+ _filter_month_name,
213
+ [
214
+ gregorian_dict["months"][key1][key2]["12"]
215
+ for key1 in field_keys_1
216
+ for key2 in field_keys_2
217
+ ],
218
+ )
219
+ )
220
+
221
+ json_dict["monday"] = [
222
+ gregorian_dict["days"][key1][key2]["mon"]
223
+ for key1 in field_keys_1
224
+ for key2 in field_keys_2
225
+ ]
226
+
227
+ json_dict["tuesday"] = [
228
+ gregorian_dict["days"][key1][key2]["tue"]
229
+ for key1 in field_keys_1
230
+ for key2 in field_keys_2
231
+ ]
232
+
233
+ json_dict["wednesday"] = [
234
+ gregorian_dict["days"][key1][key2]["wed"]
235
+ for key1 in field_keys_1
236
+ for key2 in field_keys_2
237
+ ]
238
+
239
+ json_dict["thursday"] = [
240
+ gregorian_dict["days"][key1][key2]["thu"]
241
+ for key1 in field_keys_1
242
+ for key2 in field_keys_2
243
+ ]
244
+
245
+ json_dict["friday"] = [
246
+ gregorian_dict["days"][key1][key2]["fri"]
247
+ for key1 in field_keys_1
248
+ for key2 in field_keys_2
249
+ ]
250
+
251
+ json_dict["saturday"] = [
252
+ gregorian_dict["days"][key1][key2]["sat"]
253
+ for key1 in field_keys_1
254
+ for key2 in field_keys_2
255
+ ]
256
+
257
+ json_dict["sunday"] = [
258
+ gregorian_dict["days"][key1][key2]["sun"]
259
+ for key1 in field_keys_1
260
+ for key2 in field_keys_2
261
+ ]
262
+
263
+ json_dict["am"] = [
264
+ AM_PATTERN.sub("am", x)
265
+ for x in [
266
+ gregorian_dict["dayPeriods"][key1][key2]["am"]
267
+ for key1 in field_keys_1
268
+ for key2 in field_keys_2
269
+ ]
270
+ ]
271
+
272
+ json_dict["pm"] = [
273
+ PM_PATTERN.sub("pm", x)
274
+ for x in [
275
+ gregorian_dict["dayPeriods"][key1][key2]["pm"]
276
+ for key1 in field_keys_1
277
+ for key2 in field_keys_2
278
+ ]
279
+ ]
280
+
281
+ json_dict["year"] = [date_fields_dict[key]["displayName"] for key in year_keys]
282
+
283
+ json_dict["month"] = [date_fields_dict[key]["displayName"] for key in month_keys]
284
+
285
+ json_dict["week"] = [date_fields_dict[key]["displayName"] for key in week_keys]
286
+
287
+ json_dict["day"] = [date_fields_dict[key]["displayName"] for key in day_keys]
288
+
289
+ json_dict["hour"] = [date_fields_dict[key]["displayName"] for key in hour_keys]
290
+
291
+ json_dict["minute"] = [date_fields_dict[key]["displayName"] for key in minute_keys]
292
+
293
+ json_dict["second"] = [date_fields_dict[key]["displayName"] for key in second_keys]
294
+
295
+ json_dict["relative-type"] = OrderedDict()
296
+
297
+ json_dict["relative-type"]["1 year ago"] = [
298
+ date_fields_dict[key]["relative-type--1"] for key in year_keys
299
+ ]
300
+
301
+ json_dict["relative-type"]["0 year ago"] = [
302
+ date_fields_dict[key]["relative-type-0"] for key in year_keys
303
+ ]
304
+
305
+ json_dict["relative-type"]["in 1 year"] = [
306
+ date_fields_dict[key]["relative-type-1"] for key in year_keys
307
+ ]
308
+
309
+ json_dict["relative-type"]["1 month ago"] = [
310
+ date_fields_dict[key]["relative-type--1"] for key in month_keys
311
+ ]
312
+
313
+ json_dict["relative-type"]["0 month ago"] = [
314
+ date_fields_dict[key]["relative-type-0"] for key in month_keys
315
+ ]
316
+
317
+ json_dict["relative-type"]["in 1 month"] = [
318
+ date_fields_dict[key]["relative-type-1"] for key in month_keys
319
+ ]
320
+
321
+ json_dict["relative-type"]["1 week ago"] = [
322
+ date_fields_dict[key]["relative-type--1"] for key in week_keys
323
+ ]
324
+
325
+ json_dict["relative-type"]["0 week ago"] = [
326
+ date_fields_dict[key]["relative-type-0"] for key in week_keys
327
+ ]
328
+
329
+ json_dict["relative-type"]["in 1 week"] = [
330
+ date_fields_dict[key]["relative-type-1"] for key in week_keys
331
+ ]
332
+
333
+ json_dict["relative-type"]["1 day ago"] = [
334
+ date_fields_dict[key]["relative-type--1"] for key in day_keys
335
+ ]
336
+
337
+ json_dict["relative-type"]["0 day ago"] = [
338
+ date_fields_dict[key]["relative-type-0"] for key in day_keys
339
+ ]
340
+
341
+ json_dict["relative-type"]["in 1 day"] = [
342
+ date_fields_dict[key]["relative-type-1"] for key in day_keys
343
+ ]
344
+
345
+ json_dict["relative-type"]["0 hour ago"] = [
346
+ date_fields_dict[key]["relative-type-0"] for key in hour_keys
347
+ ]
348
+
349
+ json_dict["relative-type"]["0 minute ago"] = [
350
+ date_fields_dict[key]["relative-type-0"] for key in minute_keys
351
+ ]
352
+
353
+ json_dict["relative-type"]["0 second ago"] = [
354
+ date_fields_dict[key]["relative-type-0"] for key in second_keys
355
+ ]
356
+
357
+ json_dict["relative-type-regex"] = OrderedDict()
358
+
359
+ json_dict["relative-type-regex"]["in \\1 year"] = list(
360
+ filter(
361
+ _filter_relative_string,
362
+ [
363
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
364
+ for key1 in year_keys
365
+ for key2 in relative_keys
366
+ ],
367
+ )
368
+ )
369
+
370
+ json_dict["relative-type-regex"]["\\1 year ago"] = list(
371
+ filter(
372
+ _filter_relative_string,
373
+ [
374
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
375
+ for key1 in year_keys
376
+ for key2 in relative_keys
377
+ ],
378
+ )
379
+ )
380
+
381
+ json_dict["relative-type-regex"]["in \\1 month"] = list(
382
+ filter(
383
+ _filter_relative_string,
384
+ [
385
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
386
+ for key1 in month_keys
387
+ for key2 in relative_keys
388
+ ],
389
+ )
390
+ )
391
+
392
+ json_dict["relative-type-regex"]["\\1 month ago"] = list(
393
+ filter(
394
+ _filter_relative_string,
395
+ [
396
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
397
+ for key1 in month_keys
398
+ for key2 in relative_keys
399
+ ],
400
+ )
401
+ )
402
+
403
+ json_dict["relative-type-regex"]["in \\1 week"] = list(
404
+ filter(
405
+ _filter_relative_string,
406
+ [
407
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
408
+ for key1 in week_keys
409
+ for key2 in relative_keys
410
+ ],
411
+ )
412
+ )
413
+
414
+ json_dict["relative-type-regex"]["\\1 week ago"] = list(
415
+ filter(
416
+ _filter_relative_string,
417
+ [
418
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
419
+ for key1 in week_keys
420
+ for key2 in relative_keys
421
+ ],
422
+ )
423
+ )
424
+
425
+ json_dict["relative-type-regex"]["in \\1 day"] = list(
426
+ filter(
427
+ _filter_relative_string,
428
+ [
429
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
430
+ for key1 in day_keys
431
+ for key2 in relative_keys
432
+ ],
433
+ )
434
+ )
435
+
436
+ json_dict["relative-type-regex"]["\\1 day ago"] = list(
437
+ filter(
438
+ _filter_relative_string,
439
+ [
440
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
441
+ for key1 in day_keys
442
+ for key2 in relative_keys
443
+ ],
444
+ )
445
+ )
446
+
447
+ json_dict["relative-type-regex"]["in \\1 hour"] = list(
448
+ filter(
449
+ _filter_relative_string,
450
+ [
451
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
452
+ for key1 in hour_keys
453
+ for key2 in relative_keys
454
+ ],
455
+ )
456
+ )
457
+
458
+ json_dict["relative-type-regex"]["\\1 hour ago"] = list(
459
+ filter(
460
+ _filter_relative_string,
461
+ [
462
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
463
+ for key1 in hour_keys
464
+ for key2 in relative_keys
465
+ ],
466
+ )
467
+ )
468
+
469
+ json_dict["relative-type-regex"]["in \\1 minute"] = list(
470
+ filter(
471
+ _filter_relative_string,
472
+ [
473
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
474
+ for key1 in minute_keys
475
+ for key2 in relative_keys
476
+ ],
477
+ )
478
+ )
479
+
480
+ json_dict["relative-type-regex"]["\\1 minute ago"] = list(
481
+ filter(
482
+ _filter_relative_string,
483
+ [
484
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
485
+ for key1 in minute_keys
486
+ for key2 in relative_keys
487
+ ],
488
+ )
489
+ )
490
+
491
+ json_dict["relative-type-regex"]["in \\1 second"] = list(
492
+ filter(
493
+ _filter_relative_string,
494
+ [
495
+ date_fields_dict[key1]["relativeTime-type-future"].get(key2)
496
+ for key1 in second_keys
497
+ for key2 in relative_keys
498
+ ],
499
+ )
500
+ )
501
+
502
+ json_dict["relative-type-regex"]["\\1 second ago"] = list(
503
+ filter(
504
+ _filter_relative_string,
505
+ [
506
+ date_fields_dict[key1]["relativeTime-type-past"].get(key2)
507
+ for key1 in second_keys
508
+ for key2 in relative_keys
509
+ ],
510
+ )
511
+ )
512
+
513
+ return json_dict
514
+
515
+
516
+ def _clean_string(given_string):
517
+ given_string = RE_SANITIZE_APOSTROPHE.sub("'", given_string)
518
+ given_string = given_string.replace(".", "")
519
+ given_string = given_string.lower()
520
+ return " ".join(given_string.split())
521
+
522
+
523
+ def _clean_dict(json_dict):
524
+ """Remove duplicates and sort"""
525
+ for key, value in json_dict.items():
526
+ if isinstance(value, list):
527
+ json_dict[key] = sorted(OrderedDict.fromkeys(map(_clean_string, value)))
528
+ elif isinstance(value, dict):
529
+ json_dict[key] = OrderedDict(sorted(value.items()))
530
+ json_dict[key] = _clean_dict(json_dict[key])
531
+ return OrderedDict(filter(lambda x: x[1], json_dict.items()))
532
+
533
+
534
+ def main():
535
+ get_raw_data()
536
+ language_locale_dict = _get_language_locale_dict()
537
+ parent_directory = "../dateparser_data/cldr_language_data"
538
+ directory = "../dateparser_data/cldr_language_data/date_translation_data/"
539
+ if not os.path.isdir(parent_directory):
540
+ os.mkdir(parent_directory)
541
+ if os.path.isdir(directory):
542
+ shutil.rmtree(directory)
543
+ os.mkdir(directory)
544
+
545
+ for language in language_locale_dict:
546
+ json_language_dict = _clean_dict(_retrieve_locale_data(language))
547
+ locale_specific_dict = OrderedDict()
548
+ locales_list = language_locale_dict[language]
549
+ for locale in locales_list:
550
+ json_locale_dict = _clean_dict(_retrieve_locale_data(locale))
551
+ locale_specific_dict[locale] = _clean_dict(
552
+ get_dict_difference(json_language_dict, json_locale_dict)
553
+ )
554
+ json_language_dict["locale_specific"] = OrderedDict(
555
+ sorted(locale_specific_dict.items())
556
+ )
557
+ filename = directory + language + ".json"
558
+ print("writing " + filename)
559
+ json_string = json.dumps(
560
+ json_language_dict, indent=4, separators=(",", ": "), ensure_ascii=False
561
+ ).encode("utf-8")
562
+ with open(filename, "wb") as f:
563
+ f.write(json_string)
564
+
565
+
566
+ if __name__ == "__main__":
567
+ main()