dateparser 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. dateparser/__init__.py +82 -0
  2. dateparser/calendars/__init__.py +144 -0
  3. dateparser/calendars/hijri.py +6 -0
  4. dateparser/calendars/hijri_parser.py +60 -0
  5. dateparser/calendars/jalali.py +9 -0
  6. dateparser/calendars/jalali_parser.py +184 -0
  7. dateparser/conf.py +267 -0
  8. dateparser/custom_language_detection/__init__.py +0 -0
  9. dateparser/custom_language_detection/fasttext.py +43 -0
  10. dateparser/custom_language_detection/langdetect.py +37 -0
  11. dateparser/custom_language_detection/language_mapping.py +18 -0
  12. dateparser/data/__init__.py +2 -0
  13. dateparser/data/date_translation_data/__init__.py +0 -0
  14. dateparser/data/date_translation_data/af.py +242 -0
  15. dateparser/data/date_translation_data/agq.py +169 -0
  16. dateparser/data/date_translation_data/ak.py +169 -0
  17. dateparser/data/date_translation_data/am.py +222 -0
  18. dateparser/data/date_translation_data/ar.py +574 -0
  19. dateparser/data/date_translation_data/as.py +164 -0
  20. dateparser/data/date_translation_data/asa.py +168 -0
  21. dateparser/data/date_translation_data/ast.py +280 -0
  22. dateparser/data/date_translation_data/az-Cyrl.py +168 -0
  23. dateparser/data/date_translation_data/az-Latn.py +217 -0
  24. dateparser/data/date_translation_data/az.py +217 -0
  25. dateparser/data/date_translation_data/bas.py +169 -0
  26. dateparser/data/date_translation_data/be.py +340 -0
  27. dateparser/data/date_translation_data/bem.py +161 -0
  28. dateparser/data/date_translation_data/bez.py +169 -0
  29. dateparser/data/date_translation_data/bg.py +345 -0
  30. dateparser/data/date_translation_data/bm.py +167 -0
  31. dateparser/data/date_translation_data/bn.py +241 -0
  32. dateparser/data/date_translation_data/bo.py +185 -0
  33. dateparser/data/date_translation_data/br.py +226 -0
  34. dateparser/data/date_translation_data/brx.py +157 -0
  35. dateparser/data/date_translation_data/bs-Cyrl.py +226 -0
  36. dateparser/data/date_translation_data/bs-Latn.py +248 -0
  37. dateparser/data/date_translation_data/bs.py +248 -0
  38. dateparser/data/date_translation_data/ca.py +313 -0
  39. dateparser/data/date_translation_data/ce.py +225 -0
  40. dateparser/data/date_translation_data/cgg.py +169 -0
  41. dateparser/data/date_translation_data/chr.py +240 -0
  42. dateparser/data/date_translation_data/ckb.py +154 -0
  43. dateparser/data/date_translation_data/cs.py +316 -0
  44. dateparser/data/date_translation_data/cy.py +217 -0
  45. dateparser/data/date_translation_data/da.py +296 -0
  46. dateparser/data/date_translation_data/dav.py +169 -0
  47. dateparser/data/date_translation_data/de.py +357 -0
  48. dateparser/data/date_translation_data/dje.py +167 -0
  49. dateparser/data/date_translation_data/dsb.py +270 -0
  50. dateparser/data/date_translation_data/dua.py +169 -0
  51. dateparser/data/date_translation_data/dyo.py +168 -0
  52. dateparser/data/date_translation_data/dz.py +225 -0
  53. dateparser/data/date_translation_data/ebu.py +169 -0
  54. dateparser/data/date_translation_data/ee.py +233 -0
  55. dateparser/data/date_translation_data/el.py +279 -0
  56. dateparser/data/date_translation_data/en.py +851 -0
  57. dateparser/data/date_translation_data/eo.py +169 -0
  58. dateparser/data/date_translation_data/es.py +499 -0
  59. dateparser/data/date_translation_data/et.py +233 -0
  60. dateparser/data/date_translation_data/eu.py +219 -0
  61. dateparser/data/date_translation_data/ewo.py +169 -0
  62. dateparser/data/date_translation_data/fa.py +270 -0
  63. dateparser/data/date_translation_data/ff.py +179 -0
  64. dateparser/data/date_translation_data/fi.py +345 -0
  65. dateparser/data/date_translation_data/fil.py +223 -0
  66. dateparser/data/date_translation_data/fo.py +256 -0
  67. dateparser/data/date_translation_data/fr.py +520 -0
  68. dateparser/data/date_translation_data/fur.py +223 -0
  69. dateparser/data/date_translation_data/fy.py +223 -0
  70. dateparser/data/date_translation_data/ga.py +238 -0
  71. dateparser/data/date_translation_data/gd.py +277 -0
  72. dateparser/data/date_translation_data/gl.py +253 -0
  73. dateparser/data/date_translation_data/gsw.py +179 -0
  74. dateparser/data/date_translation_data/gu.py +216 -0
  75. dateparser/data/date_translation_data/guz.py +170 -0
  76. dateparser/data/date_translation_data/gv.py +166 -0
  77. dateparser/data/date_translation_data/ha.py +176 -0
  78. dateparser/data/date_translation_data/haw.py +168 -0
  79. dateparser/data/date_translation_data/he.py +371 -0
  80. dateparser/data/date_translation_data/hi.py +261 -0
  81. dateparser/data/date_translation_data/hr.py +378 -0
  82. dateparser/data/date_translation_data/hsb.py +271 -0
  83. dateparser/data/date_translation_data/hu.py +297 -0
  84. dateparser/data/date_translation_data/hy.py +246 -0
  85. dateparser/data/date_translation_data/id.py +272 -0
  86. dateparser/data/date_translation_data/ig.py +168 -0
  87. dateparser/data/date_translation_data/ii.py +157 -0
  88. dateparser/data/date_translation_data/is.py +242 -0
  89. dateparser/data/date_translation_data/it.py +282 -0
  90. dateparser/data/date_translation_data/ja.py +286 -0
  91. dateparser/data/date_translation_data/jgo.py +188 -0
  92. dateparser/data/date_translation_data/jmc.py +168 -0
  93. dateparser/data/date_translation_data/ka.py +241 -0
  94. dateparser/data/date_translation_data/kab.py +169 -0
  95. dateparser/data/date_translation_data/kam.py +169 -0
  96. dateparser/data/date_translation_data/kde.py +169 -0
  97. dateparser/data/date_translation_data/kea.py +230 -0
  98. dateparser/data/date_translation_data/khq.py +167 -0
  99. dateparser/data/date_translation_data/ki.py +169 -0
  100. dateparser/data/date_translation_data/kk.py +228 -0
  101. dateparser/data/date_translation_data/kl.py +213 -0
  102. dateparser/data/date_translation_data/kln.py +171 -0
  103. dateparser/data/date_translation_data/km.py +198 -0
  104. dateparser/data/date_translation_data/kn.py +225 -0
  105. dateparser/data/date_translation_data/ko.py +207 -0
  106. dateparser/data/date_translation_data/kok.py +157 -0
  107. dateparser/data/date_translation_data/ks.py +152 -0
  108. dateparser/data/date_translation_data/ksb.py +168 -0
  109. dateparser/data/date_translation_data/ksf.py +169 -0
  110. dateparser/data/date_translation_data/ksh.py +192 -0
  111. dateparser/data/date_translation_data/kw.py +169 -0
  112. dateparser/data/date_translation_data/ky.py +240 -0
  113. dateparser/data/date_translation_data/lag.py +169 -0
  114. dateparser/data/date_translation_data/lb.py +233 -0
  115. dateparser/data/date_translation_data/lg.py +169 -0
  116. dateparser/data/date_translation_data/lkt.py +194 -0
  117. dateparser/data/date_translation_data/ln.py +179 -0
  118. dateparser/data/date_translation_data/lo.py +228 -0
  119. dateparser/data/date_translation_data/lrc.py +154 -0
  120. dateparser/data/date_translation_data/lt.py +263 -0
  121. dateparser/data/date_translation_data/lu.py +169 -0
  122. dateparser/data/date_translation_data/luo.py +169 -0
  123. dateparser/data/date_translation_data/luy.py +168 -0
  124. dateparser/data/date_translation_data/lv.py +257 -0
  125. dateparser/data/date_translation_data/mas.py +173 -0
  126. dateparser/data/date_translation_data/mer.py +168 -0
  127. dateparser/data/date_translation_data/mfe.py +166 -0
  128. dateparser/data/date_translation_data/mg.py +168 -0
  129. dateparser/data/date_translation_data/mgh.py +169 -0
  130. dateparser/data/date_translation_data/mgo.py +151 -0
  131. dateparser/data/date_translation_data/mk.py +234 -0
  132. dateparser/data/date_translation_data/ml.py +217 -0
  133. dateparser/data/date_translation_data/mn.py +224 -0
  134. dateparser/data/date_translation_data/mr.py +229 -0
  135. dateparser/data/date_translation_data/ms.py +242 -0
  136. dateparser/data/date_translation_data/mt.py +175 -0
  137. dateparser/data/date_translation_data/mua.py +169 -0
  138. dateparser/data/date_translation_data/my.py +203 -0
  139. dateparser/data/date_translation_data/mzn.py +199 -0
  140. dateparser/data/date_translation_data/naq.py +169 -0
  141. dateparser/data/date_translation_data/nb.py +261 -0
  142. dateparser/data/date_translation_data/nd.py +169 -0
  143. dateparser/data/date_translation_data/ne.py +207 -0
  144. dateparser/data/date_translation_data/nl.py +273 -0
  145. dateparser/data/date_translation_data/nmg.py +169 -0
  146. dateparser/data/date_translation_data/nn.py +231 -0
  147. dateparser/data/date_translation_data/nnh.py +150 -0
  148. dateparser/data/date_translation_data/nus.py +166 -0
  149. dateparser/data/date_translation_data/nyn.py +169 -0
  150. dateparser/data/date_translation_data/om.py +173 -0
  151. dateparser/data/date_translation_data/or.py +157 -0
  152. dateparser/data/date_translation_data/os.py +203 -0
  153. dateparser/data/date_translation_data/pa-Arab.py +150 -0
  154. dateparser/data/date_translation_data/pa-Guru.py +221 -0
  155. dateparser/data/date_translation_data/pa.py +221 -0
  156. dateparser/data/date_translation_data/pl.py +416 -0
  157. dateparser/data/date_translation_data/ps.py +150 -0
  158. dateparser/data/date_translation_data/pt.py +981 -0
  159. dateparser/data/date_translation_data/qu.py +176 -0
  160. dateparser/data/date_translation_data/rm.py +166 -0
  161. dateparser/data/date_translation_data/rn.py +169 -0
  162. dateparser/data/date_translation_data/ro.py +270 -0
  163. dateparser/data/date_translation_data/rof.py +157 -0
  164. dateparser/data/date_translation_data/ru.py +442 -0
  165. dateparser/data/date_translation_data/rw.py +169 -0
  166. dateparser/data/date_translation_data/rwk.py +168 -0
  167. dateparser/data/date_translation_data/sah.py +219 -0
  168. dateparser/data/date_translation_data/saq.py +169 -0
  169. dateparser/data/date_translation_data/sbp.py +169 -0
  170. dateparser/data/date_translation_data/se.py +280 -0
  171. dateparser/data/date_translation_data/seh.py +169 -0
  172. dateparser/data/date_translation_data/ses.py +167 -0
  173. dateparser/data/date_translation_data/sg.py +169 -0
  174. dateparser/data/date_translation_data/shi-Latn.py +169 -0
  175. dateparser/data/date_translation_data/shi-Tfng.py +169 -0
  176. dateparser/data/date_translation_data/shi.py +169 -0
  177. dateparser/data/date_translation_data/si.py +220 -0
  178. dateparser/data/date_translation_data/sk.py +327 -0
  179. dateparser/data/date_translation_data/sl.py +244 -0
  180. dateparser/data/date_translation_data/smn.py +176 -0
  181. dateparser/data/date_translation_data/sn.py +169 -0
  182. dateparser/data/date_translation_data/so.py +179 -0
  183. dateparser/data/date_translation_data/sq.py +237 -0
  184. dateparser/data/date_translation_data/sr-Cyrl.py +306 -0
  185. dateparser/data/date_translation_data/sr-Latn.py +306 -0
  186. dateparser/data/date_translation_data/sr.py +255 -0
  187. dateparser/data/date_translation_data/sv.py +309 -0
  188. dateparser/data/date_translation_data/sw.py +231 -0
  189. dateparser/data/date_translation_data/ta.py +264 -0
  190. dateparser/data/date_translation_data/te.py +239 -0
  191. dateparser/data/date_translation_data/teo.py +173 -0
  192. dateparser/data/date_translation_data/th.py +300 -0
  193. dateparser/data/date_translation_data/ti.py +173 -0
  194. dateparser/data/date_translation_data/tl.py +137 -0
  195. dateparser/data/date_translation_data/to.py +216 -0
  196. dateparser/data/date_translation_data/tr.py +259 -0
  197. dateparser/data/date_translation_data/twq.py +167 -0
  198. dateparser/data/date_translation_data/tzm.py +169 -0
  199. dateparser/data/date_translation_data/ug.py +203 -0
  200. dateparser/data/date_translation_data/uk.py +502 -0
  201. dateparser/data/date_translation_data/ur.py +256 -0
  202. dateparser/data/date_translation_data/uz-Arab.py +167 -0
  203. dateparser/data/date_translation_data/uz-Cyrl.py +210 -0
  204. dateparser/data/date_translation_data/uz-Latn.py +216 -0
  205. dateparser/data/date_translation_data/uz.py +216 -0
  206. dateparser/data/date_translation_data/vi.py +260 -0
  207. dateparser/data/date_translation_data/vun.py +168 -0
  208. dateparser/data/date_translation_data/wae.py +224 -0
  209. dateparser/data/date_translation_data/xog.py +169 -0
  210. dateparser/data/date_translation_data/yav.py +169 -0
  211. dateparser/data/date_translation_data/yi.py +178 -0
  212. dateparser/data/date_translation_data/yo.py +263 -0
  213. dateparser/data/date_translation_data/yue.py +203 -0
  214. dateparser/data/date_translation_data/zgh.py +169 -0
  215. dateparser/data/date_translation_data/zh-Hans.py +240 -0
  216. dateparser/data/date_translation_data/zh-Hant.py +402 -0
  217. dateparser/data/date_translation_data/zh.py +273 -0
  218. dateparser/data/date_translation_data/zu.py +196 -0
  219. dateparser/data/languages_info.py +826 -0
  220. dateparser/date.py +599 -0
  221. dateparser/date_parser.py +55 -0
  222. dateparser/freshness_date_parser.py +156 -0
  223. dateparser/languages/__init__.py +2 -0
  224. dateparser/languages/dictionary.py +352 -0
  225. dateparser/languages/loader.py +224 -0
  226. dateparser/languages/locale.py +625 -0
  227. dateparser/languages/validation.py +467 -0
  228. dateparser/parser.py +742 -0
  229. dateparser/search/__init__.py +71 -0
  230. dateparser/search/detection.py +78 -0
  231. dateparser/search/search.py +297 -0
  232. dateparser/search/text_detection.py +89 -0
  233. dateparser/timezone_parser.py +91 -0
  234. dateparser/timezones.py +469 -0
  235. dateparser/utils/__init__.py +257 -0
  236. dateparser/utils/strptime.py +108 -0
  237. dateparser-1.2.1.dist-info/AUTHORS.rst +17 -0
  238. dateparser-1.2.1.dist-info/LICENSE +12 -0
  239. dateparser-1.2.1.dist-info/METADATA +864 -0
  240. dateparser-1.2.1.dist-info/RECORD +256 -0
  241. dateparser-1.2.1.dist-info/WHEEL +5 -0
  242. dateparser-1.2.1.dist-info/entry_points.txt +2 -0
  243. dateparser-1.2.1.dist-info/top_level.txt +4 -0
  244. dateparser_cli/__init__.py +0 -0
  245. dateparser_cli/cli.py +36 -0
  246. dateparser_cli/exceptions.py +2 -0
  247. dateparser_cli/fasttext_manager.py +42 -0
  248. dateparser_cli/utils.py +27 -0
  249. dateparser_data/__init__.py +0 -0
  250. dateparser_data/settings.py +33 -0
  251. dateparser_scripts/__init__.py +0 -0
  252. dateparser_scripts/get_cldr_data.py +567 -0
  253. dateparser_scripts/order_languages.py +217 -0
  254. dateparser_scripts/update_supported_languages_and_locales.py +48 -0
  255. dateparser_scripts/utils.py +73 -0
  256. dateparser_scripts/write_complete_data.py +129 -0
@@ -0,0 +1,217 @@
1
+ import json
2
+ import os
3
+ from collections import OrderedDict
4
+
5
+ import regex as re
6
+ import requests
7
+ from parsel import Selector
8
+
9
+ from dateparser_scripts.utils import get_raw_data
10
+
11
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ # Languages with insufficient translation data are excluded
14
+ avoid_languages = {"cu", "kkj", "nds", "prg", "tk", "vai", "vai-Latn", "vai-Vaii", "vo"}
15
+
16
+
17
+ def _get_language_locale_dict():
18
+ cldr_dates_full_dir = "../raw_data/cldr_dates_full/main/"
19
+ available_locale_names = os.listdir(cldr_dates_full_dir)
20
+ available_language_names = [
21
+ shortname
22
+ for shortname in available_locale_names
23
+ if not re.search(r"-[A-Z0-9]+$", shortname)
24
+ ]
25
+ available_language_names.remove("root")
26
+ language_locale_dict = {}
27
+ for language_name in available_language_names:
28
+ language_locale_dict[language_name] = []
29
+ for locale_name in available_locale_names:
30
+ if re.match(language_name + "-[A-Z0-9]+$", locale_name):
31
+ language_locale_dict[language_name].append(locale_name)
32
+
33
+ for language in avoid_languages:
34
+ if language in language_locale_dict:
35
+ del language_locale_dict[language]
36
+ return language_locale_dict
37
+
38
+
39
+ def _get_language_order(language_locale_dict):
40
+ def get_most_common_locales():
41
+ # Order from https://w3techs.com/technologies/overview/content_language
42
+ # Last updated on 03.10.2022
43
+ old_common_locales = [
44
+ "en",
45
+ "ru",
46
+ "es",
47
+ "de",
48
+ "tr",
49
+ "fr",
50
+ "fa",
51
+ "ja",
52
+ "zh",
53
+ "vi",
54
+ "it",
55
+ "nl",
56
+ "pt",
57
+ "ar",
58
+ "pl",
59
+ "id",
60
+ "ko",
61
+ "uk",
62
+ "th",
63
+ "he",
64
+ "cs",
65
+ "sv",
66
+ "ro",
67
+ "el",
68
+ "da",
69
+ "hu",
70
+ "fi",
71
+ "sr",
72
+ "sk",
73
+ "bg",
74
+ "nb",
75
+ "hr",
76
+ "lt",
77
+ "no",
78
+ "hi",
79
+ "sl",
80
+ "ca",
81
+ "et",
82
+ ]
83
+
84
+ response = requests.get(
85
+ "https://w3techs.com/technologies/overview/content_language"
86
+ )
87
+ sel = Selector(text=response.text)
88
+ if response.ok:
89
+ try:
90
+ bars = sel.xpath("//table[@class='bars']//a/@href").getall()
91
+ if not bars:
92
+ raise ValueError("No bars found")
93
+ new_most_common_locales = [
94
+ i.replace("https://w3techs.com/technologies/details/cl", "").strip(
95
+ "-"
96
+ )
97
+ for i in bars
98
+ ]
99
+ if new_most_common_locales[0] != "en":
100
+ raise ValueError("English is not the first language")
101
+ except Exception as e:
102
+ print(e)
103
+ print("The website could have changed, please update the code")
104
+ return old_common_locales
105
+ else:
106
+ return old_common_locales
107
+ return new_most_common_locales
108
+
109
+ territory_info_file = "../raw_data/cldr_core/supplemental/territoryInfo.json"
110
+ with open(territory_info_file) as f:
111
+ territory_content = json.load(f)
112
+ territory_info_data = territory_content["supplemental"]["territoryInfo"]
113
+
114
+ language_population_dict = {}
115
+ for territory in territory_info_data:
116
+ population = int(territory_info_data[territory]["_population"])
117
+ try:
118
+ lang_dict = territory_info_data[territory]["languagePopulation"]
119
+ for language in lang_dict:
120
+ language_population = (
121
+ float(lang_dict[language]["_populationPercent"]) * population
122
+ )
123
+ if language in language_population_dict:
124
+ language_population_dict[language] += language_population
125
+ else:
126
+ language_population_dict[language] = language_population
127
+ except Exception:
128
+ pass
129
+
130
+ most_common_locales = get_most_common_locales()
131
+ language_order_with_duplicates = most_common_locales + sorted(
132
+ language_population_dict.keys(),
133
+ key=lambda x: (language_population_dict[x], x),
134
+ reverse=True,
135
+ )
136
+ language_order = sorted(
137
+ set(language_order_with_duplicates),
138
+ key=lambda x: language_order_with_duplicates.index(x),
139
+ )
140
+
141
+ for index in range(0, len(language_order)):
142
+ language_order[index] = re.sub(r"_", r"-", language_order[index])
143
+
144
+ cldr_languages = language_locale_dict.keys()
145
+ supplementary_date_directory = (
146
+ "../dateparser_data/supplementary_language_data/date_translation_data"
147
+ )
148
+ supplementary_languages = [x[:-5] for x in os.listdir(supplementary_date_directory)]
149
+ available_languages = set(cldr_languages).union(set(supplementary_languages))
150
+ language_order = [
151
+ shortname for shortname in language_order if shortname in available_languages
152
+ ]
153
+ absent_languages = set(available_languages) - set(language_order)
154
+ remaining_languages = []
155
+ for language in absent_languages:
156
+ parent_language = re.sub(r"-\w+", "", language)
157
+ if parent_language in language_order:
158
+ language_order.insert(language_order.index(parent_language) + 1, language)
159
+ else:
160
+ remaining_languages.append(language)
161
+ language_order = language_order + sorted(remaining_languages)
162
+ language_order = list(map(str, language_order))
163
+ return language_order
164
+
165
+
166
+ def generate_language_map(language_order):
167
+ data = {}
168
+ for lang in sorted(language_order):
169
+ if "-" not in lang:
170
+ data[lang] = [lang]
171
+ else:
172
+ data[lang.split("-")[0]].append(lang)
173
+ return data
174
+
175
+
176
+ def main():
177
+ get_raw_data()
178
+ language_locale_dict = _get_language_locale_dict()
179
+ language_order = _get_language_order(language_locale_dict)
180
+
181
+ parent_directory = "../dateparser/data/"
182
+ filename = "../dateparser/data/languages_info.py"
183
+ if not os.path.isdir(parent_directory):
184
+ os.mkdir(parent_directory)
185
+ language_order_string = "language_order = " + json.dumps(
186
+ language_order, separators=(",", ": "), indent=4
187
+ )
188
+
189
+ complete_language_locale_dict = OrderedDict()
190
+ for key in language_order:
191
+ if key in language_locale_dict.keys():
192
+ complete_language_locale_dict[key] = sorted(language_locale_dict[key])
193
+ else:
194
+ complete_language_locale_dict[key] = []
195
+
196
+ language_locale_dict_string = "language_locale_dict = " + json.dumps(
197
+ complete_language_locale_dict, separators=(",", ": "), indent=4
198
+ )
199
+ language_map_data = generate_language_map(language_order)
200
+ language_map_data_string = "language_map = " + json.dumps(
201
+ language_map_data, separators=(",", ": "), indent=4
202
+ )
203
+
204
+ languages_info_string = (
205
+ language_order_string
206
+ + "\n\n"
207
+ + language_map_data_string
208
+ + "\n\n"
209
+ + language_locale_dict_string
210
+ + "\n"
211
+ )
212
+ with open(filename, "w") as f:
213
+ f.write(languages_info_string)
214
+
215
+
216
+ if __name__ == "__main__":
217
+ main()
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import os
4
+ from tempfile import TemporaryFile
5
+
6
+ from dateparser.data.languages_info import language_locale_dict
7
+
8
+
9
+ def to_string(data):
10
+ result = ""
11
+ language_column_width = 18
12
+ for language in sorted(data):
13
+ result += language
14
+ locales = data[language]
15
+ if locales:
16
+ result += " " * (language_column_width - len(language))
17
+ result += ", ".join("'{}'".format(locale) for locale in sorted(locales))
18
+ result += "\n"
19
+ return result
20
+
21
+
22
+ def main():
23
+ readme_path = os.path.join(
24
+ os.path.dirname(__file__), "..", "docs", "supported_locales.rst"
25
+ )
26
+ new_data = to_string(language_locale_dict)
27
+ temporary_file = TemporaryFile("w+")
28
+ with open(readme_path) as readme_file:
29
+ delimiter = "============ ================================================================\n"
30
+ delimiters_seen = 0
31
+ is_inside_table = False
32
+ for line in readme_file:
33
+ if line == delimiter:
34
+ delimiters_seen += 1
35
+ is_inside_table = delimiters_seen == 2
36
+ elif is_inside_table:
37
+ continue
38
+ temporary_file.write(line)
39
+ if is_inside_table:
40
+ temporary_file.write(new_data)
41
+ temporary_file.seek(0)
42
+ with open(readme_path, "w") as readme_file:
43
+ readme_file.write(temporary_file.read())
44
+ temporary_file.close()
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
@@ -0,0 +1,73 @@
1
+ import os
2
+ import shutil
3
+ from collections import OrderedDict
4
+
5
+ from git import Repo
6
+
7
+
8
+ def get_raw_data():
9
+ cldr_version = "31.0.1"
10
+ raw_data_directory = "../raw_data"
11
+
12
+ cldr_data = {
13
+ "dates_full": {
14
+ "url": "https://github.com/unicode-cldr/cldr-dates-full.git",
15
+ "dir": "{}/cldr_dates_full/".format(raw_data_directory),
16
+ },
17
+ "core": {
18
+ "url": "https://github.com/unicode-cldr/cldr-core.git",
19
+ "dir": "{}/cldr_core/".format(raw_data_directory),
20
+ },
21
+ "rbnf": {
22
+ "url": "https://github.com/unicode-cldr/cldr-rbnf.git",
23
+ "dir": "{}/cldr_rbnf/".format(raw_data_directory),
24
+ },
25
+ }
26
+
27
+ if os.path.isdir(raw_data_directory):
28
+ # remove current raw data
29
+ shutil.rmtree(raw_data_directory)
30
+ os.mkdir(raw_data_directory)
31
+
32
+ for name, data in cldr_data.items():
33
+ print('Clonning "{}" from: {}'.format(name, data["url"]))
34
+ repo = Repo.clone_from(data["url"], data["dir"], branch="master")
35
+ repo.git.co(cldr_version)
36
+
37
+
38
+ def get_dict_difference(parent_dict, child_dict):
39
+ difference_dict = OrderedDict()
40
+ for key, child_value in child_dict.items():
41
+ parent_value = parent_dict.get(key)
42
+ child_specific_value = None
43
+ if not parent_value:
44
+ child_specific_value = child_value
45
+ elif isinstance(child_value, list):
46
+ child_specific_value = sorted(set(child_value) - set(parent_value))
47
+ elif isinstance(child_value, dict):
48
+ child_specific_value = get_dict_difference(parent_value, child_value)
49
+ elif child_value != parent_value:
50
+ child_specific_value = child_value
51
+ if child_specific_value:
52
+ difference_dict[key] = child_specific_value
53
+ return difference_dict
54
+
55
+
56
+ def combine_dicts(primary_dict, supplementary_dict):
57
+ combined_dict = OrderedDict()
58
+ for key, value in primary_dict.items():
59
+ if key in supplementary_dict:
60
+ if isinstance(value, list):
61
+ combined_dict[key] = value + supplementary_dict[key]
62
+ elif isinstance(value, dict):
63
+ combined_dict[key] = combine_dicts(value, supplementary_dict[key])
64
+ else:
65
+ combined_dict[key] = supplementary_dict[key]
66
+ else:
67
+ combined_dict[key] = primary_dict[key]
68
+ remaining_keys = [
69
+ key for key in supplementary_dict.keys() if key not in primary_dict.keys()
70
+ ]
71
+ for key in remaining_keys:
72
+ combined_dict[key] = supplementary_dict[key]
73
+ return combined_dict
@@ -0,0 +1,129 @@
1
+ import json
2
+ import os
3
+ import shutil
4
+ from collections import OrderedDict
5
+
6
+ import regex as re
7
+ from ruamel.yaml import RoundTripLoader
8
+
9
+ from dateparser_scripts.order_languages import avoid_languages
10
+ from dateparser_scripts.utils import combine_dicts
11
+
12
+ cldr_date_directory = "../dateparser_data/cldr_language_data/date_translation_data/"
13
+ supplementary_directory = "../dateparser_data/supplementary_language_data/"
14
+ supplementary_date_directory = (
15
+ "../dateparser_data/supplementary_language_data/date_translation_data/"
16
+ )
17
+ translation_data_directory = "../dateparser/data/"
18
+ date_translation_directory = "../dateparser/data/date_translation_data/"
19
+
20
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
21
+
22
+ cldr_languages = list(
23
+ set(map(lambda x: x[:-5], os.listdir(cldr_date_directory))) - avoid_languages
24
+ )
25
+ supplementary_languages = [x[:-5] for x in os.listdir(supplementary_date_directory)]
26
+ all_languages = set(cldr_languages).union(set(supplementary_languages))
27
+
28
+ RELATIVE_PATTERN = re.compile(r"\{0\}")
29
+
30
+
31
+ def _modify_relative_data(relative_data):
32
+ modified_relative_data = OrderedDict()
33
+ for key, value in relative_data.items():
34
+ for i, string in enumerate(value):
35
+ string = RELATIVE_PATTERN.sub(r"(\\d+[.,]?\\d*)", string)
36
+ value[i] = string
37
+ modified_relative_data[key] = value
38
+ return modified_relative_data
39
+
40
+
41
+ def _modify_data(language_data):
42
+ relative_data = language_data.get("relative-type-regex", {})
43
+ relative_data = _modify_relative_data(relative_data)
44
+ locale_specific_data = language_data.get("locale_specific", {})
45
+ for _, info in locale_specific_data.items():
46
+ locale_relative_data = info.get("relative-type-regex", {})
47
+ locale_relative_data = _modify_relative_data(locale_relative_data)
48
+
49
+
50
+ def _get_complete_date_translation_data(language):
51
+ cldr_data = {}
52
+ supplementary_data = {}
53
+ if language in cldr_languages:
54
+ with open(cldr_date_directory + language + ".json") as f:
55
+ cldr_data = json.load(f, object_pairs_hook=OrderedDict)
56
+ if language in supplementary_languages:
57
+ with open(supplementary_date_directory + language + ".yaml") as g:
58
+ supplementary_data = OrderedDict(RoundTripLoader(g).get_data())
59
+ complete_data = combine_dicts(cldr_data, supplementary_data)
60
+ if "name" not in complete_data:
61
+ complete_data["name"] = language
62
+ return complete_data
63
+
64
+
65
+ def _write_file(filename, text, mode, in_memory, in_memory_result):
66
+ if in_memory:
67
+ in_memory_result[filename] = text
68
+ else:
69
+ with open(filename, mode) as out:
70
+ out.write(text)
71
+
72
+
73
+ def write_complete_data(in_memory=False):
74
+ """
75
+ This function is responsible of generating the needed py files from the
76
+ CLDR files (JSON format) and supplementary language data (YAML format).
77
+
78
+ Use it with in_memory=True to avoid writing real files and getting a
79
+ dictionary containing the file names and their content (used when testing).
80
+ """
81
+ in_memory_result = {}
82
+
83
+ if not in_memory:
84
+ if not os.path.isdir(translation_data_directory):
85
+ os.mkdir(translation_data_directory)
86
+ if os.path.isdir(date_translation_directory):
87
+ shutil.rmtree(date_translation_directory)
88
+ os.mkdir(date_translation_directory)
89
+
90
+ with open(supplementary_directory + "base_data.yaml") as f:
91
+ base_data = RoundTripLoader(f).get_data()
92
+
93
+ for language in all_languages:
94
+ date_translation_data = _get_complete_date_translation_data(language)
95
+ date_translation_data = combine_dicts(date_translation_data, base_data)
96
+ _modify_data(date_translation_data)
97
+ translation_data = json.dumps(
98
+ date_translation_data, indent=4, separators=(",", ": "), ensure_ascii=False
99
+ )
100
+ out_text = ("info = " + translation_data + "\n").encode("utf-8")
101
+ _write_file(
102
+ date_translation_directory + language + ".py",
103
+ out_text,
104
+ "wb",
105
+ in_memory,
106
+ in_memory_result,
107
+ )
108
+
109
+ init_text = (
110
+ "from dateparser.data import date_translation_data\n"
111
+ "from .languages_info import language_order, language_locale_dict\n"
112
+ )
113
+
114
+ _write_file(
115
+ translation_data_directory + "__init__.py",
116
+ init_text,
117
+ "w",
118
+ False,
119
+ in_memory_result,
120
+ )
121
+ _write_file(
122
+ date_translation_directory + "__init__.py", "", "w", False, in_memory_result
123
+ )
124
+
125
+ return in_memory_result
126
+
127
+
128
+ if __name__ == "__main__":
129
+ write_complete_data()