phoonnx 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. phoonnx/__init__.py +0 -0
  2. phoonnx/config.py +490 -0
  3. phoonnx/locale/ca/phonetic_spellings.txt +2 -0
  4. phoonnx/locale/en/phonetic_spellings.txt +1 -0
  5. phoonnx/locale/gl/phonetic_spellings.txt +2 -0
  6. phoonnx/locale/pt/phonetic_spellings.txt +2 -0
  7. phoonnx/phoneme_ids.py +453 -0
  8. phoonnx/phonemizers/__init__.py +45 -0
  9. phoonnx/phonemizers/ar.py +42 -0
  10. phoonnx/phonemizers/base.py +216 -0
  11. phoonnx/phonemizers/en.py +250 -0
  12. phoonnx/phonemizers/fa.py +46 -0
  13. phoonnx/phonemizers/gl.py +142 -0
  14. phoonnx/phonemizers/he.py +67 -0
  15. phoonnx/phonemizers/ja.py +119 -0
  16. phoonnx/phonemizers/ko.py +97 -0
  17. phoonnx/phonemizers/mul.py +606 -0
  18. phoonnx/phonemizers/vi.py +44 -0
  19. phoonnx/phonemizers/zh.py +308 -0
  20. phoonnx/thirdparty/__init__.py +0 -0
  21. phoonnx/thirdparty/arpa2ipa.py +249 -0
  22. phoonnx/thirdparty/cotovia/cotovia_aarch64 +0 -0
  23. phoonnx/thirdparty/cotovia/cotovia_x86_64 +0 -0
  24. phoonnx/thirdparty/hangul2ipa.py +783 -0
  25. phoonnx/thirdparty/ko_tables/aspiration.csv +20 -0
  26. phoonnx/thirdparty/ko_tables/assimilation.csv +31 -0
  27. phoonnx/thirdparty/ko_tables/double_coda.csv +17 -0
  28. phoonnx/thirdparty/ko_tables/hanja.tsv +8525 -0
  29. phoonnx/thirdparty/ko_tables/ipa.csv +22 -0
  30. phoonnx/thirdparty/ko_tables/neutralization.csv +11 -0
  31. phoonnx/thirdparty/ko_tables/tensification.csv +56 -0
  32. phoonnx/thirdparty/ko_tables/yale.csv +22 -0
  33. phoonnx/thirdparty/kog2p/__init__.py +385 -0
  34. phoonnx/thirdparty/kog2p/rulebook.txt +212 -0
  35. phoonnx/thirdparty/mantoq/__init__.py +67 -0
  36. phoonnx/thirdparty/mantoq/buck/__init__.py +0 -0
  37. phoonnx/thirdparty/mantoq/buck/phonetise_buckwalter.py +569 -0
  38. phoonnx/thirdparty/mantoq/buck/symbols.py +64 -0
  39. phoonnx/thirdparty/mantoq/buck/tokenization.py +105 -0
  40. phoonnx/thirdparty/mantoq/num2words.py +37 -0
  41. phoonnx/thirdparty/mantoq/pyarabic/__init__.py +12 -0
  42. phoonnx/thirdparty/mantoq/pyarabic/arabrepr.py +64 -0
  43. phoonnx/thirdparty/mantoq/pyarabic/araby.py +1647 -0
  44. phoonnx/thirdparty/mantoq/pyarabic/named_const.py +227 -0
  45. phoonnx/thirdparty/mantoq/pyarabic/normalize.py +161 -0
  46. phoonnx/thirdparty/mantoq/pyarabic/number.py +826 -0
  47. phoonnx/thirdparty/mantoq/pyarabic/number_const.py +1704 -0
  48. phoonnx/thirdparty/mantoq/pyarabic/stack.py +52 -0
  49. phoonnx/thirdparty/mantoq/pyarabic/trans.py +517 -0
  50. phoonnx/thirdparty/mantoq/unicode_symbol2label.py +4173 -0
  51. phoonnx/thirdparty/tashkeel/LICENSE +22 -0
  52. phoonnx/thirdparty/tashkeel/SOURCE +1 -0
  53. phoonnx/thirdparty/tashkeel/__init__.py +212 -0
  54. phoonnx/thirdparty/tashkeel/hint_id_map.json +18 -0
  55. phoonnx/thirdparty/tashkeel/input_id_map.json +56 -0
  56. phoonnx/thirdparty/tashkeel/model.onnx +0 -0
  57. phoonnx/thirdparty/tashkeel/target_id_map.json +17 -0
  58. phoonnx/thirdparty/zh_num.py +238 -0
  59. phoonnx/util.py +705 -0
  60. phoonnx/version.py +6 -0
  61. phoonnx/voice.py +521 -0
  62. phoonnx-0.0.0.dist-info/METADATA +255 -0
  63. phoonnx-0.0.0.dist-info/RECORD +86 -0
  64. phoonnx-0.0.0.dist-info/WHEEL +5 -0
  65. phoonnx-0.0.0.dist-info/top_level.txt +2 -0
  66. phoonnx_train/__main__.py +151 -0
  67. phoonnx_train/export_onnx.py +109 -0
  68. phoonnx_train/norm_audio/__init__.py +92 -0
  69. phoonnx_train/norm_audio/trim.py +54 -0
  70. phoonnx_train/norm_audio/vad.py +54 -0
  71. phoonnx_train/preprocess.py +420 -0
  72. phoonnx_train/vits/__init__.py +0 -0
  73. phoonnx_train/vits/attentions.py +427 -0
  74. phoonnx_train/vits/commons.py +147 -0
  75. phoonnx_train/vits/config.py +330 -0
  76. phoonnx_train/vits/dataset.py +214 -0
  77. phoonnx_train/vits/lightning.py +352 -0
  78. phoonnx_train/vits/losses.py +58 -0
  79. phoonnx_train/vits/mel_processing.py +139 -0
  80. phoonnx_train/vits/models.py +732 -0
  81. phoonnx_train/vits/modules.py +527 -0
  82. phoonnx_train/vits/monotonic_align/__init__.py +20 -0
  83. phoonnx_train/vits/monotonic_align/setup.py +13 -0
  84. phoonnx_train/vits/transforms.py +212 -0
  85. phoonnx_train/vits/utils.py +16 -0
  86. phoonnx_train/vits/wavfile.py +860 -0
@@ -0,0 +1,1704 @@
1
+ #!/usr/bin/python
2
+ # -*- coding=utf-8 -*-
3
+ """
4
+ Constants used for number module
5
+ """
6
+ # at top of module
7
+ from __future__ import (absolute_import, division, print_function,
8
+ unicode_literals)
9
+
10
+ try:
11
+ import araby
12
+ import normalize
13
+ except:
14
+ from . import araby, normalize
15
+
16
+
17
+ THAOUSAND_MULTIPLE = ()
18
+ NUMBER_TEN_MASCULIN_UNITS = (
19
+ "اثني",
20
+ "اثنا",
21
+ "إثني",
22
+ "إثنا",
23
+ "أحد",
24
+ "ثلاثة",
25
+ "أربعة",
26
+ "خمسة",
27
+ "ستة",
28
+ "سبعة",
29
+ "ثمانية",
30
+ "تسعة",
31
+ )
32
+ NUMBER_TEN_FEMININ_UNITS = (
33
+ "إحدى",
34
+ "اثنتا",
35
+ "اثنتي",
36
+ "ثلاث",
37
+ "أربع",
38
+ "خمس",
39
+ "ست",
40
+ "سبع",
41
+ "ثمان",
42
+ "ثماني",
43
+ "تسع",
44
+ )
45
+ NUMBER_WORDS = {
46
+ "صفر": 0,
47
+ "واحد": 1,
48
+ "واحدة": 1,
49
+ "اثنان": 2,
50
+ "ثلاثة": 3,
51
+ "أربعة": 4,
52
+ "خمسة": 5,
53
+ "ستة": 6,
54
+ "سبعة": 7,
55
+ "ثمانية": 8,
56
+ "تسعة": 9,
57
+ "عشرة": 10,
58
+ "عشرون": 20,
59
+ "ثلاثون": 30,
60
+ "أربعون": 40,
61
+ "خمسون": 50,
62
+ "ستون": 60,
63
+ "سبعون": 70,
64
+ "ثمانون": 80,
65
+ "تسعون": 90,
66
+ "مئة": 100,
67
+ "مئتان": 200,
68
+ "ثلاثمئة": 300,
69
+ "أربعمئة": 400,
70
+ "خمسمئة": 500,
71
+ "ستمئة": 600,
72
+ "سبعمئة": 700,
73
+ "ثمانمئة": 800,
74
+ "تسعمئة": 900,
75
+ "ثلاثمائة": 300,
76
+ "أربعمائة": 400,
77
+ "خمسمائة": 500,
78
+ "ستمائة": 600,
79
+ "سبعمائة": 700,
80
+ "ثمانمائة": 800,
81
+ "تسعمائة": 900,
82
+ "ألف": 1000,
83
+ "ألفا": 1000,
84
+ "مليون": 1000000,
85
+ "مليار": 1000000000,
86
+ "ألفان": 2000,
87
+ "ألفين": 2000,
88
+ "مليونان": 2000000,
89
+ "مليونين": 2000000,
90
+ "ملياران": 2000000000,
91
+ "مليارين": 2000000000,
92
+ "أحد": 1,
93
+ "إحدى": 1,
94
+ "اثنين": 2,
95
+ "إثنين": 2,
96
+ "إثنان": 2,
97
+ "اثني": 2,
98
+ "اثنا": 2,
99
+ "إثني": 2,
100
+ "إثنا": 2,
101
+ "ثلاث": 3,
102
+ "أربع": 4,
103
+ "خمس": 5,
104
+ "ست": 6,
105
+ "سبع": 7,
106
+ "ثمان": 8,
107
+ "ثماني": 8,
108
+ "تسع": 9,
109
+ "عشر": 10,
110
+ "ثلاثا": 3,
111
+ "أربعا": 4,
112
+ "خمسا": 5,
113
+ "ستا": 6,
114
+ "سبعا": 7,
115
+ "تسعا": 9,
116
+ "عشرا": 10,
117
+ "عشرين": 20,
118
+ "ثلاثين": 30,
119
+ "أربعين": 40,
120
+ "خمسين": 50,
121
+ "ستين": 60,
122
+ "سبعين": 70,
123
+ "ثمانين": 80,
124
+ "تسعين": 90,
125
+ "مائة": 100,
126
+ "مئتين": 200,
127
+ "آلاف": 1000,
128
+ "ملايين": 1000000,
129
+ "مليارات": 1000000000,
130
+ }
131
+
132
+ VOCALIZED_NUMBER_WORDS = {
133
+ # i: unvocalized
134
+ # r ; marafou3 رفع
135
+ # r2 : marfou3 + tanwin
136
+ # n : mansoub
137
+ # n2: mansoub + tanwin
138
+ # j : majrour
139
+ # j2 : majrour + tanwin
140
+ "صفر": {
141
+ "i": "صِفْر",
142
+ "r": "صِفْرُ",
143
+ "r2": "صِفْرٌ",
144
+ "n": "صِفْرَ",
145
+ "n2": "صِفْرً",
146
+ "j": "صِفْرِ",
147
+ "j2": "صِفْرٍ",
148
+ "s": "",
149
+ },
150
+ "واحد": {
151
+ "i": "وَاحِد",
152
+ "r": "وَاحِدُ",
153
+ "r2": "وَاحِدٌ",
154
+ "n": "وَاحِدَ",
155
+ "n2": "وَاحِدً",
156
+ "j": "وَاحِدِ",
157
+ "j2": "وَاحِدٍ",
158
+ "s": "",
159
+ },
160
+ "واحدة": {
161
+ "i": "وَاحِدَة",
162
+ "r": "وَاحِدَةُ",
163
+ "r2": "وَاحِدَةٌ",
164
+ "n": "وَاحِدَةَ",
165
+ "n2": "وَاحِدَةً",
166
+ "j": "وَاحِدَةِ",
167
+ "j2": "وَاحِدَةٍ",
168
+ "s": "",
169
+ },
170
+ "اثنان": {
171
+ "i": "اثنان",
172
+ "r": "اثنان",
173
+ "r2": "اثنانٌ",
174
+ "n": "اثنانَ",
175
+ "n2": "اثنانً",
176
+ "j": "اثنانِ",
177
+ "j2": "اثنانٍ",
178
+ "s": "*",
179
+ },
180
+ "ثلاثة": {
181
+ "i": "ثَلاثَة",
182
+ "r": "ثَلاثَةُ",
183
+ "r2": "ثَلاثَةٌ",
184
+ "n": "ثَلاثَةَ",
185
+ "n2": "ثَلاثَةً",
186
+ "j": "ثَلاثَةِ",
187
+ "j2": "ثَلاثَةٍ",
188
+ "s": "",
189
+ },
190
+ "أربعة": {
191
+ "i": "أَرْبَعَة",
192
+ "r": "أَرْبَعَةُ",
193
+ "r2": "أَرْبَعَةٌ",
194
+ "n": "أَرْبَعَةَ",
195
+ "n2": "أَرْبَعَةً",
196
+ "j": "أَرْبَعَةِ",
197
+ "j2": "أَرْبَعَةٍ",
198
+ "s": "",
199
+ },
200
+ "خمسة": {
201
+ "i": "خَمْسَة",
202
+ "r": "خَمْسَةُ",
203
+ "r2": "خَمْسَةٌ",
204
+ "n": "خَمْسَةَ",
205
+ "n2": "خَمْسَةً",
206
+ "j": "خَمْسَةِ",
207
+ "j2": "خَمْسَةٍ",
208
+ "s": "",
209
+ },
210
+ "ستة": {
211
+ "i": "سِتَّة",
212
+ "r": "سِتَّةُ",
213
+ "r2": "سِتَّةٌ",
214
+ "n": "سِتَّةَ",
215
+ "n2": "سِتَّةً",
216
+ "j": "سِتَّةِ",
217
+ "j2": "سِتَّةٍ",
218
+ "s": "",
219
+ },
220
+ "سبعة": {
221
+ "i": "سَبْعَة",
222
+ "r": "سَبْعَةُ",
223
+ "r2": "سَبْعَةٌ",
224
+ "n": "سَبْعَةَ",
225
+ "n2": "سَبْعَةً",
226
+ "j": "سَبْعَةِ",
227
+ "j2": "سَبْعَةٍ",
228
+ "s": "",
229
+ },
230
+ "ثمانية": {
231
+ "i": "ثَمانِيَة",
232
+ "r": "ثَمانِيَةُ",
233
+ "r2": "ثَمانِيَةٌ",
234
+ "n": "ثَمانِيَةَ",
235
+ "n2": "ثَمانِيَةً",
236
+ "j": "ثَمانِيَةِ",
237
+ "j2": "ثَمانِيَةٍ",
238
+ "s": "",
239
+ },
240
+ "تسعة": {
241
+ "i": "تِسْعَة",
242
+ "r": "تِسْعَةُ",
243
+ "r2": "تِسْعَةٌ",
244
+ "n": "تِسْعَةَ",
245
+ "n2": "تِسْعَةً",
246
+ "j": "تِسْعَةِ",
247
+ "j2": "تِسْعَةٍ",
248
+ "s": "",
249
+ },
250
+ "عشرة": {
251
+ "i": "عَشْرَة",
252
+ "r": "عَشْرَةُ",
253
+ "r2": "عَشْرَةٌ",
254
+ "n": "عَشْرَةَ",
255
+ "n2": "عَشْرَةً",
256
+ "j": "عَشْرَةِ",
257
+ "j2": "عَشْرَةٍ",
258
+ "s": "",
259
+ },
260
+ "عشرون": {
261
+ "i": "عِشْرُونَ",
262
+ "r": "عِشْرُونَ",
263
+ "r2": "",
264
+ "n": "",
265
+ "n2": "",
266
+ "j": "",
267
+ "j2": "",
268
+ "s": "*",
269
+ },
270
+ "ثلاثون": {
271
+ "i": "ثَلاثُونَ",
272
+ "r": "ثَلاثُونَ",
273
+ "r2": "",
274
+ "n": "",
275
+ "n2": "",
276
+ "j": "",
277
+ "j2": "",
278
+ "s": "*",
279
+ },
280
+ "أربعون": {
281
+ "i": "أَرْبَعُونَ",
282
+ "r": "أَرْبَعُونَ",
283
+ "r2": "",
284
+ "n": "",
285
+ "n2": "",
286
+ "j": "",
287
+ "j2": "",
288
+ "s": "*",
289
+ },
290
+ "خمسون": {
291
+ "i": "خَمْسُونَ",
292
+ "r": "خَمْسُونَ",
293
+ "r2": "",
294
+ "n": "",
295
+ "n2": "",
296
+ "j": "",
297
+ "j2": "",
298
+ "s": "*",
299
+ },
300
+ "ستون": {
301
+ "i": "سِتُّونَ",
302
+ "r": "سِتُّونَ",
303
+ "r2": "",
304
+ "n": "",
305
+ "n2": "",
306
+ "j": "",
307
+ "j2": "",
308
+ "s": "*",
309
+ },
310
+ "سبعون": {
311
+ "i": "سَبْعُونَ",
312
+ "r": "سَبْعُونَ",
313
+ "r2": "",
314
+ "n": "",
315
+ "n2": "",
316
+ "j": "",
317
+ "j2": "",
318
+ "s": "*",
319
+ },
320
+ "ثمانون": {
321
+ "i": "ثمانون",
322
+ "r": "ثمانون",
323
+ "r2": "",
324
+ "n": "",
325
+ "n2": "",
326
+ "j": "",
327
+ "j2": "",
328
+ "s": "*",
329
+ },
330
+ "تسعون": {
331
+ "i": "تِسْعُونَ",
332
+ "r": "تِسْعُونَ",
333
+ "r2": "",
334
+ "n": "",
335
+ "n2": "",
336
+ "j": "",
337
+ "j2": "",
338
+ "s": "*",
339
+ },
340
+ "مئة": {
341
+ "i": "مِئَة",
342
+ "r": "مِئِةُ",
343
+ "r2": "مِئَةٌ",
344
+ "n": "مِئَةَ",
345
+ "n2": "مِئَةً",
346
+ "j": "مِئَةِ",
347
+ "j2": "مِئَةٍ",
348
+ "s": "",
349
+ },
350
+ "مئتان": {
351
+ "i": "مِئَتَانِ",
352
+ "r": "مِئَتَانِ",
353
+ "r2": "",
354
+ "n": "",
355
+ "n2": "",
356
+ "j": "",
357
+ "j2": "",
358
+ "s": "*",
359
+ },
360
+ "ثلاثمئة": {
361
+ "i": "ثَلَاثمِئَة",
362
+ "r": "ثَلَاثُمِئَةِ",
363
+ "r2": "ثَلَاثُمِئَةٍ",
364
+ "n": "ثَلَاثَمِئَةِ",
365
+ "n2": "ثَلَاثَمِئَةٍ",
366
+ "j": "ثَلَاثِمِئَةِ",
367
+ "j2": "ثَلَاثِمِئَةٍ",
368
+ "s": "",
369
+ },
370
+ "أربعمئة": {
371
+ "i": "أَرْبَعمِئَة",
372
+ "r": "أَرْبَعُمِئَةِ",
373
+ "r2": "أَرْبَعُمِئَةٍ",
374
+ "n": "أَرْبَعَمِئَةِ",
375
+ "n2": "أَرْبَعَمِئَةٍ",
376
+ "j": "أَرْبَعِمِئَةِ",
377
+ "j2": "أَرْبَعِمِئَةٍ",
378
+ "s": "",
379
+ },
380
+ "خمسمئة": {
381
+ "i": "خَمْسمِئَة",
382
+ "r": "خَمْسُمِئَةِ",
383
+ "r2": "خَمْسُمِئَةٍ",
384
+ "n": "خَمْسَمِئَةِ",
385
+ "n2": "خَمْسَمِئَةٍ",
386
+ "j": "خَمْسِمِئَةِ",
387
+ "j2": "خَمْسِمِئَةٍ",
388
+ "s": "",
389
+ },
390
+ "ستمئة": {
391
+ "i": "سِتّمِئَة",
392
+ "r": "سِتُّمِئَةِ",
393
+ "r2": "سِتُّمِئَةٍ",
394
+ "n": "سِتَّمِئَةِ",
395
+ "n2": "سِتَّمِئَةٍ",
396
+ "j": "سِتِّمِئَةِ",
397
+ "j2": "سِتِّمِئَةٍ",
398
+ "s": "",
399
+ },
400
+ "سبعمئة": {
401
+ "i": "سَبْعمِئَة",
402
+ "r": "سَبْعُمِئَةِ",
403
+ "r2": "سَبْعُمِئَةٍ",
404
+ "n": "سَبْعَمِئَةِ",
405
+ "n2": "سَبْعَمِئَةٍ",
406
+ "j": "سَبْعِمِئَةِ",
407
+ "j2": "سَبْعِمِئَةٍ",
408
+ "s": "",
409
+ },
410
+ "ثمانمئة": {
411
+ "i": "ثَمَانمِئَة",
412
+ "r": "ثَمَانُمِئَةِ",
413
+ "r2": "ثَمَانُمِئَةٍ",
414
+ "n": "ثَمَانَمِئَةِ",
415
+ "n2": "ثَمَانَمِئَةٍ",
416
+ "j": "ثَمَانِمِئَةِ",
417
+ "j2": "ثَمَانِمِئَةٍ",
418
+ "s": "",
419
+ },
420
+ "تسعمئة": {
421
+ "i": "تِسْعمِئَة",
422
+ "r": "تِسْعُمِئَةِ",
423
+ "r2": "تِسْعُمِئَةٍ",
424
+ "n": "تِسْعَمِئَةِ",
425
+ "n2": "تِسْعَمِئَةٍ",
426
+ "j": "تِسْعِمِئَةِ",
427
+ "j2": "تِسْعِمِئَةٍ",
428
+ "s": "",
429
+ },
430
+ "ثلاثمائة": {
431
+ "i": "ثَلَاثمِائَة",
432
+ "r": "ثَلَاثُمِائَةِ",
433
+ "r2": "ثَلَاثُمِائَةٍ",
434
+ "n": "ثَلَاثَمِائَةِ",
435
+ "n2": "ثَلَاثَمِائَةٍ",
436
+ "j": "ثَلَاثِمِائَةِ",
437
+ "j2": "ثَلَاثِمِائَةٍ",
438
+ "s": "",
439
+ },
440
+ "أربعمائة": {
441
+ "i": "أَرْبَعمِائَة",
442
+ "r": "أَرْبَعُمِائَةِ",
443
+ "r2": "أَرْبَعُمِائَةٍ",
444
+ "n": "أَرْبَعَمِائَةِ",
445
+ "n2": "أَرْبَعَمِائَةٍ",
446
+ "j": "أَرْبَعِمِائَةِ",
447
+ "j2": "أَرْبَعِمِائَةٍ",
448
+ "s": "",
449
+ },
450
+ "خمسمائة": {
451
+ "i": "خَمْسمِائَة",
452
+ "r": "خَمْسُمِائَةِ",
453
+ "r2": "خَمْسُمِائَةٍ",
454
+ "n": "خَمْسَمِائَةِ",
455
+ "n2": "خَمْسَمِائَةٍ",
456
+ "j": "خَمْسِمِائَةِ",
457
+ "j2": "خَمْسِمِائَةٍ",
458
+ "s": "",
459
+ },
460
+ "ستمائة": {
461
+ "i": "سِتّمِائَة",
462
+ "r": "سِتُّمِائَةِ",
463
+ "r2": "سِتُّمِائَةٍ",
464
+ "n": "سِتَّمِائَةِ",
465
+ "n2": "سِتَّمِائَةٍ",
466
+ "j": "سِتِّمِائَةِ",
467
+ "j2": "سِتِّمِائَةٍ",
468
+ "s": "",
469
+ },
470
+ "سبعمائة": {
471
+ "i": "سَبْعمِائَة",
472
+ "r": "سَبْعُمِائَةِ",
473
+ "r2": "سَبْعُمِائَةٍ",
474
+ "n": "سَبْعَمِائَةِ",
475
+ "n2": "سَبْعَمِائَةٍ",
476
+ "j": "سَبْعِمِائَةِ",
477
+ "j2": "سَبْعِمِائَةٍ",
478
+ "s": "",
479
+ },
480
+ "ثمانمائة": {
481
+ "i": "ثَمَانمِائَة",
482
+ "r": "ثَمَانُمِائَةِ",
483
+ "r2": "ثَمَانُمِائَةٍ",
484
+ "n": "ثَمَانَمِائَةِ",
485
+ "n2": "ثَمَانَمِائَةٍ",
486
+ "j": "ثَمَانِمِائَةِ",
487
+ "j2": "ثَمَانِمِائَةٍ",
488
+ "s": "",
489
+ },
490
+ "تسعمائة": {
491
+ "i": "تِسْعمِائَة",
492
+ "r": "تِسْعُمِائَةِ",
493
+ "r2": "تِسْعُمِائَةٍ",
494
+ "n": "تِسْعَمِائَةِ",
495
+ "n2": "تِسْعَمِائَةٍ",
496
+ "j": "تِسْعِمِائَةِ",
497
+ "j2": "تِسْعِمِائَةٍ",
498
+ "s": "",
499
+ },
500
+ "ألف": {
501
+ "i": "أَلْف",
502
+ "r": "أَلْف",
503
+ "r2": "أَلْفٌ",
504
+ "n": "أَلْفَ",
505
+ "n2": "أَلْفً",
506
+ "j": "أَلْفِ",
507
+ "j2": "أَلْفٍ",
508
+ "s": "",
509
+ },
510
+ "ألفا": {
511
+ "i": "أَلْفًا",
512
+ "r": "أَلْفًا",
513
+ "r2": "أَلْفًا",
514
+ "n": "أَلْفًا",
515
+ "n2": "أَلْفًا",
516
+ "j": "أَلْفًا",
517
+ "j2": "أَلْفًا",
518
+ "s": "أَلْفًا",
519
+ },
520
+ "مليون": {
521
+ "i": "مِلْيُون",
522
+ "r": "مِلْيُونُ",
523
+ "r2": "مِلْيُونٌ",
524
+ "n": "مِلْيُونَ",
525
+ "n2": "مِلْيُونً",
526
+ "j": "مِلْيُونِ",
527
+ "j2": "مِلْيُونٍ",
528
+ "s": "",
529
+ },
530
+ "مليار": {
531
+ "i": "مِلْيَار",
532
+ "r": "مِلْيَارُ",
533
+ "r2": "مِلْيَارٌ",
534
+ "n": "مِلْيَارَ",
535
+ "n2": "مِلْيَارً",
536
+ "j": "مِلْيَارِ",
537
+ "j2": "مِلْيَارٍ",
538
+ "s": "",
539
+ },
540
+ "ألفان": {
541
+ "i": "ألْفَانِ",
542
+ "r": "ألْفَانِ",
543
+ "r2": "",
544
+ "n": "",
545
+ "n2": "",
546
+ "j": "",
547
+ "j2": "",
548
+ "s": "*",
549
+ },
550
+ "ألفين": {
551
+ "i": "ألْفَيْنِ",
552
+ "r": "ألْفَيْنِ",
553
+ "r2": "",
554
+ "n": "",
555
+ "n2": "",
556
+ "j": "",
557
+ "j2": "",
558
+ "s": "*",
559
+ },
560
+ "مليونان": {
561
+ "i": "مِلْيُونَانِ",
562
+ "r": "مِلْيُونَانِ",
563
+ "r2": "",
564
+ "n": "",
565
+ "n2": "",
566
+ "j": "",
567
+ "j2": "",
568
+ "s": "*",
569
+ },
570
+ "مليونين": {
571
+ "i": "مِلْيُونَيْنِ",
572
+ "r": "مِلْيُونَيْنِ",
573
+ "r2": "",
574
+ "n": "",
575
+ "n2": "",
576
+ "j": "",
577
+ "j2": "",
578
+ "s": "*",
579
+ },
580
+ "ملياران": {
581
+ "i": "مِلْيَارَانِ",
582
+ "r": "مِلْيَارَانِ",
583
+ "r2": "",
584
+ "n": "",
585
+ "n2": "",
586
+ "j": "",
587
+ "j2": "",
588
+ "s": "*",
589
+ },
590
+ "مليارين": {
591
+ "i": "مِلْيَارَيْنِ",
592
+ "r": "مِلْيَارَيْنِ",
593
+ "r2": "",
594
+ "n": "",
595
+ "n2": "",
596
+ "j": "",
597
+ "j2": "",
598
+ "s": "*",
599
+ },
600
+ "أحد": {
601
+ "i": "أَحَد",
602
+ "r": "أَحُدُّ",
603
+ "r2": "أَحَدٌ",
604
+ "n": "أَحَدَ",
605
+ "n2": "أَحَدً",
606
+ "j": "أَحَدِ",
607
+ "j2": "أَحَدٍ",
608
+ "s": "",
609
+ },
610
+ "إحدى": {
611
+ "i": "إحْدَى",
612
+ "r": "إحْدَى",
613
+ "r2": "إحْدَىٌ",
614
+ "n": "إحْدَى",
615
+ "n2": "إحْدًى",
616
+ "j": "إحْدَىِ",
617
+ "j2": "إحْدَىٍ",
618
+ "s": "*",
619
+ },
620
+ "اثنين": {
621
+ "i": "اِثْنَينِ",
622
+ "r": "اِثْنَينِ",
623
+ "r2": "",
624
+ "n": "",
625
+ "n2": "",
626
+ "j": "",
627
+ "j2": "",
628
+ "s": "*",
629
+ },
630
+ "إثنين": {
631
+ "i": "إثنين",
632
+ "r": "إثنين",
633
+ "r2": "",
634
+ "n": "",
635
+ "n2": "",
636
+ "j": "",
637
+ "j2": "",
638
+ "s": "*",
639
+ },
640
+ "إثنان": {
641
+ "i": "إثنان",
642
+ "r": "إثنان",
643
+ "r2": "",
644
+ "n": "",
645
+ "n2": "",
646
+ "j": "",
647
+ "j2": "",
648
+ "s": "*",
649
+ },
650
+ "اثني": {
651
+ "i": "اِثْنَيْ",
652
+ "r": "اِثْنَيْ",
653
+ "r2": "",
654
+ "n": "",
655
+ "n2": "",
656
+ "j": "",
657
+ "j2": "",
658
+ "s": "*",
659
+ },
660
+ "اثنا": {
661
+ "i": "اِثْنَا",
662
+ "r": "اثنا",
663
+ "r2": "",
664
+ "n": "",
665
+ "n2": "",
666
+ "j": "",
667
+ "j2": "",
668
+ "s": "*",
669
+ },
670
+ "إثني": {
671
+ "i": "إثني",
672
+ "r": "إثني",
673
+ "r2": "",
674
+ "n": "",
675
+ "n2": "",
676
+ "j": "",
677
+ "j2": "",
678
+ "s": "*",
679
+ },
680
+ "إثنا": {
681
+ "i": "إثنا",
682
+ "r": "إثنا",
683
+ "r2": "",
684
+ "n": "",
685
+ "n2": "",
686
+ "j": "",
687
+ "j2": "",
688
+ "s": "*",
689
+ },
690
+ "ثلاث": {
691
+ "i": "ثَلاث",
692
+ "r": "ثَلاثُ",
693
+ "r2": "ثَلاثٌ",
694
+ "n": "ثَلاثَ",
695
+ "n2": "",
696
+ "j": "ثَلاثِ",
697
+ "j2": "ثَلاثٍ",
698
+ "s": "",
699
+ },
700
+ "أربع": {
701
+ "i": "أَرْبَع",
702
+ "r": "أَرْبَعُ",
703
+ "r2": "أَرْبَعٌ",
704
+ "n": "أَرْبَعَ",
705
+ "n2": "",
706
+ "j": "أَرْبَعِ",
707
+ "j2": "أَرْبَعٍ",
708
+ "s": "",
709
+ },
710
+ "خمس": {
711
+ "i": "خَمْس",
712
+ "r": "خَمْسُ",
713
+ "r2": "خَمْسٌ",
714
+ "n": "خَمْسَ",
715
+ "n2": "",
716
+ "j": "خَمْسِ",
717
+ "j2": "خَمْسٍ",
718
+ "s": "",
719
+ },
720
+ "ست": {
721
+ "i": "سِتّ",
722
+ "r": "سِتُّ",
723
+ "r2": "سِتٌّ",
724
+ "n": "سِتَّ",
725
+ "n2": "",
726
+ "j": "سِتِّ",
727
+ "j2": "سِتٍّ",
728
+ "s": "",
729
+ },
730
+ "سبع": {
731
+ "i": "سَبْع",
732
+ "r": "سَبْعُ",
733
+ "r2": "سَبْعٌ",
734
+ "n": "سَبْعَ",
735
+ "n2": "",
736
+ "j": "سَبْعِ",
737
+ "j2": "سَبْعٍ",
738
+ "s": "",
739
+ },
740
+ "ثمان": {
741
+ "i": "ثَمَان",
742
+ "r": "ثُمانُ",
743
+ "r2": "ثَمَانٌ",
744
+ "n": "ثَمَانَ",
745
+ "n2": "",
746
+ "j": "ثَمَانِ",
747
+ "j2": "ثَمَانٍ",
748
+ "s": "",
749
+ },
750
+ "ثماني": {
751
+ "i": "ثَمانِي",
752
+ "r": "ثَمانِي",
753
+ "r2": "",
754
+ "n": "",
755
+ "n2": "",
756
+ "j": "",
757
+ "j2": "",
758
+ "s": "*",
759
+ },
760
+ "تسع": {
761
+ "i": "تِسْع",
762
+ "r": "تِسْعُ",
763
+ "r2": "تِسْعٌ",
764
+ "n": "تِسْعَ",
765
+ "n2": "",
766
+ "j": "تِسْعِ",
767
+ "j2": "تِسْعٍ",
768
+ "s": "",
769
+ },
770
+ "عشر": {
771
+ "i": "عَشْر",
772
+ "r": "عَشْرُ",
773
+ "r2": "عَشْرٌ",
774
+ "n": "عَشَرَ",
775
+ "n2": "",
776
+ "j": "عَشْرِ",
777
+ "j2": "عَشْرٍ",
778
+ "s": "",
779
+ },
780
+ "ثلاثا": {
781
+ "i": "ثَلَاثًا",
782
+ "r": "",
783
+ "r2": "",
784
+ "n": "",
785
+ "n2": "ثَلَاثًا",
786
+ "j": "",
787
+ "j2": "",
788
+ "s": "*",
789
+ },
790
+ "أربعا": {
791
+ "i": "أَرْبَعًا",
792
+ "r": "",
793
+ "r2": "",
794
+ "n": "",
795
+ "n2": "أَرْبَعًا",
796
+ "j": "",
797
+ "j2": "",
798
+ "s": "*",
799
+ },
800
+ "خمسا": {
801
+ "i": "خَمْسًا",
802
+ "r": "",
803
+ "r2": "",
804
+ "n": "",
805
+ "n2": "خَمْسًا",
806
+ "j": "",
807
+ "j2": "",
808
+ "s": "*",
809
+ },
810
+ "ستا": {
811
+ "i": "سِتًّا",
812
+ "r": "",
813
+ "r2": "",
814
+ "n": "",
815
+ "n2": "سِتًّا",
816
+ "j": "",
817
+ "j2": "",
818
+ "s": "*",
819
+ },
820
+ "سبعا": {
821
+ "i": "سَبْعًا",
822
+ "r": "",
823
+ "r2": "",
824
+ "n": "",
825
+ "n2": "سَبْعًا",
826
+ "j": "",
827
+ "j2": "",
828
+ "s": "*",
829
+ },
830
+ "تسعا": {
831
+ "i": "تِسْعًا",
832
+ "r": "",
833
+ "r2": "",
834
+ "n": "",
835
+ "n2": "تِسْعًا",
836
+ "j": "",
837
+ "j2": "",
838
+ "s": "*",
839
+ },
840
+ "عشرا": {
841
+ "i": "عَشْرًا",
842
+ "r": "",
843
+ "r2": "",
844
+ "n": "",
845
+ "n2": "عَشْرًا",
846
+ "j": "",
847
+ "j2": "",
848
+ "s": "*",
849
+ },
850
+ "عشرين": {
851
+ "i": "عِشْرِينَ",
852
+ "r": "عِشْرِينَ",
853
+ "r2": "عِشْرِينَ",
854
+ "n": "عِشْرِينَ",
855
+ "n2": "عِشْرِينَ",
856
+ "j": "عِشْرِينَ",
857
+ "j2": "عِشْرِينَ",
858
+ "s": "*",
859
+ },
860
+ "ثلاثين": {
861
+ "i": "ثَلَاثِينَ",
862
+ "r": "ثَلَاثِينَ",
863
+ "r2": "ثَلَاثِينَ",
864
+ "n": "ثَلَاثِينَ",
865
+ "n2": "ثَلَاثِينَ",
866
+ "j": "ثَلَاثِينَ",
867
+ "j2": "ثَلَاثِينَ",
868
+ "s": "*",
869
+ },
870
+ "أربعين": {
871
+ "i": "أَرْبَعِينَ",
872
+ "r": "أَرْبَعِينَ",
873
+ "r2": "أَرْبَعِينَ",
874
+ "n": "أَرْبَعِينَ",
875
+ "n2": "أَرْبَعِينَ",
876
+ "j": "أَرْبَعِينَ",
877
+ "j2": "أَرْبَعِينَ",
878
+ "s": "*",
879
+ },
880
+ "خمسين": {
881
+ "i": "خَمْسِينَ",
882
+ "r": "خَمْسِينَ",
883
+ "r2": "خَمْسِينَ",
884
+ "n": "خَمْسِينَ",
885
+ "n2": "خَمْسِينَ",
886
+ "j": "خَمْسِينَ",
887
+ "j2": "خَمْسِينَ",
888
+ "s": "*",
889
+ },
890
+ "ستين": {
891
+ "i": "سِتِّينَ",
892
+ "r": "سِتِّينَ",
893
+ "r2": "سِتِّينَ",
894
+ "n": "سِتِّينَ",
895
+ "n2": "سِتِّينَ",
896
+ "j": "سِتِّينَ",
897
+ "j2": "سِتِّينَ",
898
+ "s": "*",
899
+ },
900
+ "سبعين": {
901
+ "i": "سَبْعِينَ",
902
+ "r": "سَبْعِينَ",
903
+ "r2": "سَبْعِينَ",
904
+ "n": "سَبْعِينَ",
905
+ "n2": "سَبْعِينَ",
906
+ "j": "سَبْعِينَ",
907
+ "j2": "سَبْعِينَ",
908
+ "s": "*",
909
+ },
910
+ "ثمانين": {
911
+ "i": "ثَمانِينَ",
912
+ "r": "ثَمانِينَ",
913
+ "r2": "ثَمانِينَ",
914
+ "n": "ثَمانِينَ",
915
+ "n2": "ثَمانِينَ",
916
+ "j": "ثَمانِينَ",
917
+ "j2": "ثَمانِينَ",
918
+ "s": "*",
919
+ },
920
+ "تسعين": {
921
+ "i": "تِسْعِينَ",
922
+ "r": "تِسْعِينَ",
923
+ "r2": "تِسْعِينَ",
924
+ "n": "تِسْعِينَ",
925
+ "n2": "تِسْعِينَ",
926
+ "j": "تِسْعِينَ",
927
+ "j2": "تِسْعِينَ",
928
+ "s": "*",
929
+ },
930
+ "مائة": {
931
+ "i": "مِائَة",
932
+ "r": "مائة",
933
+ "r2": "مِائَةٌ",
934
+ "n": "مِائَةَ",
935
+ "n2": "مِائَةً",
936
+ "j": "مِائَةِ",
937
+ "j2": "مِائَةٍ",
938
+ "s": "",
939
+ },
940
+ "مئتين": {
941
+ "i": "مئتين",
942
+ "r": "مئتين",
943
+ "r2": "",
944
+ "n": "",
945
+ "n2": "",
946
+ "j": "",
947
+ "j2": "",
948
+ "s": "*",
949
+ },
950
+ "آلاف": {
951
+ "i": "آلاَف",
952
+ "r": "آلاَفُ",
953
+ "r2": "آلاَفٌ",
954
+ "n": "آلاَفَ",
955
+ "n2": "",
956
+ "j": "آلاَفِ",
957
+ "j2": "آلاَفٍ",
958
+ "s": "",
959
+ },
960
+ "ملايين": {
961
+ "i": "مَلاَيِينُ",
962
+ "r": "مَلاَيِينُ",
963
+ "r2": "",
964
+ "n": "",
965
+ "n2": "",
966
+ "j": "",
967
+ "j2": "",
968
+ "s": "*",
969
+ },
970
+ "مليارات": {
971
+ "i": "مِلْيَارَات",
972
+ "r": "مِلْيَارَاتُ",
973
+ "r2": "مِلْيَارَاتٌ",
974
+ "n": "مِلْيَارَاتَ",
975
+ "n2": "مِلْيَارَاتً",
976
+ "j": "مِلْيَارَاتِ",
977
+ "j2": "مِلْيَارَاتٍ",
978
+ "s": "",
979
+ },
980
+ }
981
+
982
+ UNIT_WORDS = {
983
+ # i: invariant vocalization ثابت
984
+ # a: added case مضاف إليه
985
+ # n: mansoub منصوب
986
+ # p: plural: جمع
987
+ "أذرع": {
988
+ "i": "أّذْرُعٍ",
989
+ "a": "",
990
+ "n": "",
991
+ "p": "أّذْرُعٍ",
992
+ },
993
+ "أرطال": {
994
+ "i": "أَرْطَالٍ",
995
+ "a": "",
996
+ "n": "",
997
+ "p": "أَرْطَالٍ",
998
+ },
999
+ "أسابيع": {
1000
+ "i": "أَسَابِيعَ",
1001
+ "a": "",
1002
+ "n": "",
1003
+ "p": "أَسَابِيعَ",
1004
+ },
1005
+ "أسبوع": {
1006
+ "i": "أُسْبُوع",
1007
+ "a": "أُسْبُوعٍ",
1008
+ "n": "",
1009
+ "p": "أَسَابِيعَ",
1010
+ },
1011
+ "أسبوعا": {
1012
+ "i": "أُسْبُوعًا",
1013
+ "a": "",
1014
+ "n": "",
1015
+ "p": "",
1016
+ },
1017
+ "أشبار": {
1018
+ "i": "أَشْبَارٍ",
1019
+ "a": "",
1020
+ "n": "",
1021
+ "p": "أَشْبَارٍ",
1022
+ },
1023
+ "أشهر": {
1024
+ "i": "أَشْهُرٍ",
1025
+ "a": "",
1026
+ "n": "",
1027
+ "p": "أَشْهُرٍ",
1028
+ },
1029
+ "أعوام": {
1030
+ "i": "أَعْوَامٍ",
1031
+ "a": "",
1032
+ "n": "",
1033
+ "p": "أَعْوَامٍ",
1034
+ },
1035
+ "أميال": {
1036
+ "i": "أَمْيَالٍ",
1037
+ "a": "",
1038
+ "n": "",
1039
+ "p": "أَمْيَالٍ",
1040
+ },
1041
+ "أيام": {
1042
+ "i": "أَيَّامٍ",
1043
+ "a": "",
1044
+ "n": "",
1045
+ "p": "أَيَّامٍ",
1046
+ },
1047
+ "بوصات": {
1048
+ "i": "بُوصَاتٍ",
1049
+ "a": "",
1050
+ "n": "",
1051
+ "p": "بُوصَاتٍ",
1052
+ },
1053
+ "بوصة": {
1054
+ "i": "بُوصَة",
1055
+ "a": "بُوصَةٍ",
1056
+ "n": "بُوصَةً",
1057
+ "p": "بُوصَاتٍ",
1058
+ },
1059
+ "جنيه": {
1060
+ "i": "جُنَيْه",
1061
+ "a": "جُنَيْهٍ",
1062
+ "n": "",
1063
+ "p": "",
1064
+ },
1065
+ "جنيها": {
1066
+ "i": "جُنَيْهًا",
1067
+ "a": "",
1068
+ "n": "جُنَيْهًا",
1069
+ "p": "جُنَيْهَات",
1070
+ },
1071
+ "جنيهات": {
1072
+ "i": "جُنَيْهَات",
1073
+ "a": "",
1074
+ "n": "",
1075
+ "p": "جُنَيْهَات",
1076
+ },
1077
+ "دراهم": {
1078
+ "i": "دَرَاهِمَ",
1079
+ "a": "",
1080
+ "n": "",
1081
+ "p": "دَرَاهِمَ",
1082
+ },
1083
+ "درجات": {
1084
+ "i": "دَرَجَاتٍ",
1085
+ "a": "",
1086
+ "n": "",
1087
+ "p": "دَرَجَاتٍ",
1088
+ },
1089
+ "درجة": {
1090
+ "i": "دَرَجَة",
1091
+ "a": "دَرَجَةٍ",
1092
+ "n": "دَرَجَةً",
1093
+ "p": "دَرَجَاتٍ",
1094
+ },
1095
+ "درهم": {
1096
+ "i": "دِرْهَم",
1097
+ "a": "دِرْهَمٍ",
1098
+ "n": "",
1099
+ "p": "",
1100
+ },
1101
+ "درهما": {
1102
+ "i": "دِرْهَمًا",
1103
+ "a": "",
1104
+ "n": "دِرْهَمًا",
1105
+ "p": "دَرَاهِمَ",
1106
+ },
1107
+ "دنانير": {
1108
+ "i": "دَنَانِيرَ",
1109
+ "a": "",
1110
+ "n": "",
1111
+ "p": "دَنَانِيرَ",
1112
+ },
1113
+ "دولار": {
1114
+ "i": "دُولَار",
1115
+ "a": "دُولَارٍ",
1116
+ "n": "",
1117
+ "p": "",
1118
+ },
1119
+ "دولارا": {
1120
+ "i": "دُولَارًا",
1121
+ "a": "",
1122
+ "n": "دُولَارًا",
1123
+ "p": "دُولَارَاتٍ",
1124
+ },
1125
+ "دولارات": {
1126
+ "i": "دُولَارَاتٍ",
1127
+ "a": "",
1128
+ "n": "",
1129
+ "p": "دُولَارَاتٍ",
1130
+ },
1131
+ "دينار": {
1132
+ "i": "دِينَار",
1133
+ "a": "دِينَارٍ",
1134
+ "n": "",
1135
+ "p": "",
1136
+ },
1137
+ "دينارا": {
1138
+ "i": "دِينَارًا",
1139
+ "a": "",
1140
+ "n": "دِينَارًا",
1141
+ "p": "دَنَانِيرَ",
1142
+ },
1143
+ "ذراع": {
1144
+ "i": "ذِرَاع",
1145
+ "a": "ذِرَاعٍ",
1146
+ "n": "",
1147
+ "p": "",
1148
+ },
1149
+ "ذراعا": {
1150
+ "i": "ذِرَاعًا",
1151
+ "a": "",
1152
+ "n": "ذِرَاعًا",
1153
+ "p": "أّذْرُعٍ",
1154
+ },
1155
+ "رطل": {
1156
+ "i": "رِطْل",
1157
+ "a": "رِطْلٍ",
1158
+ "n": "",
1159
+ "p": "",
1160
+ },
1161
+ "رطلا": {
1162
+ "i": "رِطْلًا",
1163
+ "a": "",
1164
+ "n": "رِطْلًا",
1165
+ "p": "أَرْطَالٍ",
1166
+ },
1167
+ "ريال": {
1168
+ "i": "رِيَال",
1169
+ "a": "رِيَالٍ",
1170
+ "n": "",
1171
+ "p": "",
1172
+ },
1173
+ "ريالا": {
1174
+ "i": "رِيَالًا",
1175
+ "a": "",
1176
+ "n": "رِيَالًا",
1177
+ "p": "رِيَالَاتٍ",
1178
+ },
1179
+ "ريالات": {
1180
+ "i": "رِيَالَاتٍ",
1181
+ "a": "",
1182
+ "n": "",
1183
+ "p": "رِيَالَاتٍ",
1184
+ },
1185
+ "سنة": {
1186
+ "i": "سَنَة",
1187
+ "a": "سَنَةٍ",
1188
+ "n": "سَنَةً",
1189
+ "p": "سَنَوَاتٍ",
1190
+ },
1191
+ "سنتيم": {
1192
+ "i": "سَنْتِيم",
1193
+ "a": "سَنْتِيمٍ",
1194
+ "n": "",
1195
+ "p": "",
1196
+ },
1197
+ "سنتيما": {
1198
+ "i": "سَنْتِيمًا",
1199
+ "a": "",
1200
+ "n": "سَنْتِيمًا",
1201
+ "p": "سَنْتِيماتٍ",
1202
+ },
1203
+ "سنتيمات": {
1204
+ "i": "سَنْتِيماتٍ",
1205
+ "a": "",
1206
+ "n": "",
1207
+ "p": "سَنْتِيماتٍ",
1208
+ },
1209
+ "سنوات": {
1210
+ "i": "سَنَوَاتٍ",
1211
+ "a": "",
1212
+ "n": "",
1213
+ "p": "سَنَوَاتٍ",
1214
+ },
1215
+ "شبر": {
1216
+ "i": "شِبْر",
1217
+ "a": "شِبْرٍ",
1218
+ "n": "",
1219
+ "p": "",
1220
+ },
1221
+ "شبرا": {
1222
+ "i": "شِبْرًا",
1223
+ "a": "",
1224
+ "n": "شِبْرًا",
1225
+ "p": "أَشْبَارٍ",
1226
+ },
1227
+ "شهر": {
1228
+ "i": "شَهْر",
1229
+ "a": "شَهْرٍ",
1230
+ "n": "",
1231
+ "p": "",
1232
+ },
1233
+ "شهرا": {
1234
+ "i": "شَهْرًا",
1235
+ "a": "",
1236
+ "n": "شَهْرًا",
1237
+ "p": "أَشْهُرٍ",
1238
+ },
1239
+ "صفحات": {
1240
+ "i": "صَفْحَاتٍ",
1241
+ "a": "",
1242
+ "n": "",
1243
+ "p": "صَفْحَاتٍ",
1244
+ },
1245
+ "صفحة": {
1246
+ "i": "صَفْحَة",
1247
+ "a": "صَفْحَةٍ",
1248
+ "n": "صَفْحَةً",
1249
+ "p": "صَفْحَاتٍ",
1250
+ },
1251
+ "عام": {
1252
+ "i": "عَام",
1253
+ "a": "عَامٍ",
1254
+ "n": "",
1255
+ "p": "",
1256
+ },
1257
+ "عاما": {
1258
+ "i": "عَامًا",
1259
+ "a": "",
1260
+ "n": "عَامًا",
1261
+ "p": "أَعْوَامٍ",
1262
+ },
1263
+ "فراسخ": {
1264
+ "i": "فَرَاسِخَ",
1265
+ "a": "",
1266
+ "n": "",
1267
+ "p": "فَرَاسِخَ",
1268
+ },
1269
+ "فرسخ": {
1270
+ "i": "فَرْسَخ",
1271
+ "a": "فَرْسَخٍ",
1272
+ "n": "",
1273
+ "p": "",
1274
+ },
1275
+ "فرسخا": {
1276
+ "i": "فَرْسَخًا",
1277
+ "a": "",
1278
+ "n": "فَرْسَخًا",
1279
+ "p": "فَرَاسِخَ",
1280
+ },
1281
+ "فلس": {
1282
+ "i": "فِلْس",
1283
+ "a": "فِلْسٍ",
1284
+ "n": "",
1285
+ "p": "",
1286
+ },
1287
+ "فلسا": {
1288
+ "i": "فِلْسًا",
1289
+ "a": "",
1290
+ "n": "فِلْسًا",
1291
+ "p": "فُلُوسٍ",
1292
+ },
1293
+ "فلوس": {
1294
+ "i": "فُلُوسٍ",
1295
+ "a": "",
1296
+ "n": "",
1297
+ "p": "فُلُوسٍ",
1298
+ },
1299
+ "قرش": {
1300
+ "i": "قِرْش",
1301
+ "a": "قِرْشٍ",
1302
+ "n": "",
1303
+ "p": "",
1304
+ },
1305
+ "قرشا": {
1306
+ "i": "قِرْشًا",
1307
+ "a": "",
1308
+ "n": "قِرْشًا",
1309
+ "p": "قُرُوشٍ",
1310
+ },
1311
+ "قروش": {
1312
+ "i": "قُرُوشٍ",
1313
+ "a": "",
1314
+ "n": "",
1315
+ "p": "قُرُوشٍ",
1316
+ },
1317
+ "كيلوغرام": {
1318
+ "i": "كِيلُوغَرَام",
1319
+ "a": "كِيلُوغَرَامٍ",
1320
+ "n": "",
1321
+ "p": "",
1322
+ },
1323
+ "كيلوغراما": {
1324
+ "i": "كِيلُوغَرَامًا",
1325
+ "a": "",
1326
+ "n": "كِيلُوغَرَامًا",
1327
+ "p": "كِيلُوغَرَامَاتٍ",
1328
+ },
1329
+ "كيلوغرامات": {
1330
+ "i": "كِيلُوغَرَامَاتٍ",
1331
+ "a": "",
1332
+ "n": "",
1333
+ "p": "كِيلُوغَرَامَاتٍ",
1334
+ },
1335
+ "كيلومتر": {
1336
+ "i": "كِيلُومِتْر",
1337
+ "a": "كِيلُومِتْرٍ",
1338
+ "n": "",
1339
+ "p": "",
1340
+ },
1341
+ "كيلومترا": {
1342
+ "i": "كِيلُومِتْرًا",
1343
+ "a": "",
1344
+ "n": "كِيلُومِتْرًا",
1345
+ "p": "كِيلُومِتْرَاتٍ",
1346
+ },
1347
+ "كيلومترات": {
1348
+ "i": "كِيلُومِتْرَاتٍ",
1349
+ "a": "",
1350
+ "n": "",
1351
+ "p": "كِيلُومِتْرَاتٍ",
1352
+ },
1353
+ "لتر": {
1354
+ "i": "لِتْر",
1355
+ "a": "لِتْرٍ",
1356
+ "n": "",
1357
+ "p": "",
1358
+ },
1359
+ "لترا": {
1360
+ "i": "لِتْرًا",
1361
+ "a": "",
1362
+ "n": "لِتْرًا",
1363
+ "p": "لِتْرَاتٍ",
1364
+ },
1365
+ "لترات": {
1366
+ "i": "لِتْرَاتٍ",
1367
+ "a": "",
1368
+ "n": "",
1369
+ "p": "لِتْرَاتٍ",
1370
+ },
1371
+ "ليال": {
1372
+ "i": "لَيَالٍ",
1373
+ "a": "",
1374
+ "n": "",
1375
+ "p": "لَيَالٍ",
1376
+ },
1377
+ "ليرات": {
1378
+ "i": "لِيرَاتٍ",
1379
+ "a": "",
1380
+ "n": "",
1381
+ "p": "لِيرَاتٍ",
1382
+ },
1383
+ "ليرة": {
1384
+ "i": "لِيرَة",
1385
+ "a": "لِيرَةٍ",
1386
+ "n": "لِيرَةً",
1387
+ "p": "لِيرَاتٍ",
1388
+ },
1389
+ "ليلة": {
1390
+ "i": "لَيْلَة",
1391
+ "a": "لَيْلَةٍ",
1392
+ "n": "لَيْلَةً",
1393
+ "p": "لَيَالٍ",
1394
+ },
1395
+ "ميل": {
1396
+ "i": "مِيل",
1397
+ "a": "مِيلٍ",
1398
+ "n": "",
1399
+ "p": "",
1400
+ },
1401
+ "ميلا": {
1402
+ "i": "مِيلًا",
1403
+ "a": "",
1404
+ "n": "مِيلًا",
1405
+ "p": "أَمْيَالٍ",
1406
+ },
1407
+ "نقاط": {
1408
+ "i": "نِقَاطٍ",
1409
+ "a": "",
1410
+ "n": "",
1411
+ "p": "نِقَاطٍ",
1412
+ },
1413
+ "نقطة": {
1414
+ "i": "نُقْطَة",
1415
+ "a": "نُقْطَةٍ",
1416
+ "n": "نُقْطَةً",
1417
+ "p": "نِقَاطٍ",
1418
+ },
1419
+ "هللات": {
1420
+ "i": "هَلَلَاتٍ",
1421
+ "a": "",
1422
+ "n": "",
1423
+ "p": "هَلَلَاتٍ",
1424
+ },
1425
+ "هللة": {
1426
+ "i": "هَلَلَة",
1427
+ "a": "هَلَلَةٍ",
1428
+ "n": "هَلَلَةً",
1429
+ "p": "هَلَلَاتٍ",
1430
+ },
1431
+ "يورو": {
1432
+ "i": "يُورُو",
1433
+ "a": "يُورُو",
1434
+ "n": "يُورُو",
1435
+ "p": "يُورُو",
1436
+ },
1437
+ # ~ u'يورو': {'i':u'يُورُو', 'a':u'', 'n':u'', 'p':u'يُورُو', },
1438
+ "يوم": {
1439
+ "i": "يَوْم",
1440
+ "a": "يَوْمٍ",
1441
+ "n": "",
1442
+ "p": "",
1443
+ },
1444
+ "يوما": {
1445
+ "i": "يَوْمًا",
1446
+ "a": "",
1447
+ "n": "يَوْمًا",
1448
+ "p": "أَيَّامٍ",
1449
+ },
1450
+ }
1451
+
1452
+ INDIVIDUALS = {}
1453
+ INDIVIDUALS[0] = {}
1454
+ INDIVIDUALS[1] = {}
1455
+ INDIVIDUALS[2] = {}
1456
+ INDIVIDUALS[2][1] = {}
1457
+ INDIVIDUALS[2][2] = {}
1458
+ INDIVIDUALS[3] = {}
1459
+ INDIVIDUALS[4] = {}
1460
+ INDIVIDUALS[5] = {}
1461
+ INDIVIDUALS[6] = {}
1462
+ INDIVIDUALS[7] = {}
1463
+ INDIVIDUALS[8] = {}
1464
+ INDIVIDUALS[9] = {}
1465
+ INDIVIDUALS[10] = {}
1466
+ INDIVIDUALS[11] = {}
1467
+ INDIVIDUALS[12] = {}
1468
+ INDIVIDUALS[12][1] = {}
1469
+ INDIVIDUALS[12][2] = {}
1470
+ INDIVIDUALS[13] = {}
1471
+ INDIVIDUALS[14] = {}
1472
+ INDIVIDUALS[15] = {}
1473
+ INDIVIDUALS[16] = {}
1474
+ INDIVIDUALS[17] = {}
1475
+ INDIVIDUALS[18] = {}
1476
+ INDIVIDUALS[19] = {}
1477
+ INDIVIDUALS[20] = {}
1478
+ INDIVIDUALS[30] = {}
1479
+ INDIVIDUALS[40] = {}
1480
+ INDIVIDUALS[50] = {}
1481
+ INDIVIDUALS[60] = {}
1482
+ INDIVIDUALS[70] = {}
1483
+ INDIVIDUALS[80] = {}
1484
+ INDIVIDUALS[90] = {}
1485
+ INDIVIDUALS[100] = {}
1486
+ INDIVIDUALS[200] = {}
1487
+ INDIVIDUALS[300] = {}
1488
+ INDIVIDUALS[400] = {}
1489
+ INDIVIDUALS[500] = {}
1490
+ INDIVIDUALS[600] = {}
1491
+ INDIVIDUALS[700] = {}
1492
+ INDIVIDUALS[800] = {}
1493
+ INDIVIDUALS[900] = {}
1494
+ INDIVIDUALS[1000] = {}
1495
+ INDIVIDUALS[2000] = {}
1496
+ INDIVIDUALS[14] = {}
1497
+ INDIVIDUALS[0][1] = ""
1498
+ INDIVIDUALS[0][2] = ""
1499
+ INDIVIDUALS[1][1] = "واحد"
1500
+ INDIVIDUALS[1][2] = "واحدة"
1501
+ INDIVIDUALS[2][1][1] = "إثنان"
1502
+ INDIVIDUALS[2][1][2] = "إثنين"
1503
+ INDIVIDUALS[2][2][1] = "إثنتان"
1504
+ INDIVIDUALS[2][2][2] = "إثنتين"
1505
+
1506
+ INDIVIDUALS[3][1] = "ثلاث"
1507
+ INDIVIDUALS[4][1] = "أربع"
1508
+ INDIVIDUALS[5][1] = "خمس"
1509
+ INDIVIDUALS[6][1] = "ست"
1510
+ INDIVIDUALS[7][1] = "سبع"
1511
+ INDIVIDUALS[8][1] = "ثماني"
1512
+ INDIVIDUALS[9][1] = "تسع"
1513
+ INDIVIDUALS[10][1] = "عشر"
1514
+ INDIVIDUALS[3][2] = "ثلاثة"
1515
+ INDIVIDUALS[4][2] = "أربعة"
1516
+ INDIVIDUALS[5][2] = "خمسة"
1517
+ INDIVIDUALS[6][2] = "ستة"
1518
+ INDIVIDUALS[7][2] = "سبعة"
1519
+ INDIVIDUALS[8][2] = "ثمانية"
1520
+ INDIVIDUALS[9][2] = "تسعة"
1521
+ INDIVIDUALS[10][2] = "عشرة"
1522
+
1523
+ INDIVIDUALS[11][1] = "أحد عشر"
1524
+ INDIVIDUALS[11][2] = "إحدى عشرة"
1525
+
1526
+ INDIVIDUALS[12][1][1] = "إثنا عشر"
1527
+ INDIVIDUALS[12][1][2] = "إثني عشر"
1528
+ INDIVIDUALS[12][2][1] = "إثنتا عشرة"
1529
+ INDIVIDUALS[12][2][2] = "إثنتي عشرة"
1530
+
1531
+ INDIVIDUALS[13][1] = "ثلاث عشرة"
1532
+ INDIVIDUALS[14][1] = "أربع عشرة"
1533
+ INDIVIDUALS[15][1] = "خمس عشرة"
1534
+ INDIVIDUALS[16][1] = "ست عشرة"
1535
+ INDIVIDUALS[17][1] = "سبع عشرة"
1536
+ INDIVIDUALS[18][1] = "ثماني عشرة"
1537
+ INDIVIDUALS[19][1] = "تسع عشرة"
1538
+ INDIVIDUALS[13][2] = "ثلاثة عشر"
1539
+ INDIVIDUALS[14][2] = "أربعة عشر"
1540
+ INDIVIDUALS[15][2] = "خمسة عشر"
1541
+ INDIVIDUALS[16][2] = "ستة عشر"
1542
+ INDIVIDUALS[17][2] = "سبعة عشر"
1543
+ INDIVIDUALS[18][2] = "ثمانية عشر"
1544
+ INDIVIDUALS[19][2] = "تسعة عشر"
1545
+
1546
+ INDIVIDUALS[20][1] = "عشرون"
1547
+ INDIVIDUALS[30][1] = "ثلاثون"
1548
+ INDIVIDUALS[40][1] = "أربعون"
1549
+ INDIVIDUALS[50][1] = "خمسون"
1550
+ INDIVIDUALS[60][1] = "ستون"
1551
+ INDIVIDUALS[70][1] = "سبعون"
1552
+ INDIVIDUALS[80][1] = "ثمانون"
1553
+ INDIVIDUALS[90][1] = "تسعون"
1554
+ INDIVIDUALS[20][2] = "عشرين"
1555
+ INDIVIDUALS[30][2] = "ثلاثين"
1556
+ INDIVIDUALS[40][2] = "أربعين"
1557
+ INDIVIDUALS[50][2] = "خمسين"
1558
+ INDIVIDUALS[60][2] = "ستين"
1559
+ INDIVIDUALS[70][2] = "سبعين"
1560
+ INDIVIDUALS[80][2] = "ثمانين"
1561
+ INDIVIDUALS[90][2] = "تسعين"
1562
+
1563
+ INDIVIDUALS[200][1] = "مئتان"
1564
+ INDIVIDUALS[200][2] = "مئتين"
1565
+
1566
+ INDIVIDUALS[100] = "مئة"
1567
+ INDIVIDUALS[300] = "ثلاثمئة"
1568
+ INDIVIDUALS[400] = "أربعمئة"
1569
+ INDIVIDUALS[500] = "خمسمئة"
1570
+ INDIVIDUALS[600] = "ستمئة"
1571
+ INDIVIDUALS[700] = "سبعمئة"
1572
+ INDIVIDUALS[800] = "ثمانمئة"
1573
+ INDIVIDUALS[900] = "تسعمئة"
1574
+ COMPLICATIONS = {1: {}, 2: {}, 3: {}}
1575
+ COMPLICATIONS[1][1] = "ألفان"
1576
+ COMPLICATIONS[1][2] = "ألفين"
1577
+ COMPLICATIONS[1][3] = "آلاف"
1578
+ COMPLICATIONS[1][4] = "ألف"
1579
+
1580
+ COMPLICATIONS[2][1] = "مليونان"
1581
+ COMPLICATIONS[2][2] = "مليونين"
1582
+ COMPLICATIONS[2][3] = "ملايين"
1583
+ COMPLICATIONS[2][4] = "مليون"
1584
+
1585
+ COMPLICATIONS[3][1] = "ملياران"
1586
+ COMPLICATIONS[3][2] = "مليارين"
1587
+ COMPLICATIONS[3][3] = "مليارات"
1588
+ COMPLICATIONS[3][4] = "مليار"
1589
+
1590
+ UNITS_ORDINAL_WORDS = {
1591
+ "ثمان": "ثامن",
1592
+ "ست": "سادس",
1593
+ "إثني": "ثاني",
1594
+ "ثلاث": "ثالث",
1595
+ "اثنين": "ثاني",
1596
+ "اثني": "ثاني",
1597
+ "واحد": "حادي",
1598
+ "أربع": "رابع",
1599
+ "أحد": "حادي",
1600
+ "أربعة": "رابع",
1601
+ "سبع": "سابع",
1602
+ "ثماني": "ثامن",
1603
+ "خمس": "خامس",
1604
+ "واحدة": "حادي",
1605
+ "ثمانية": "ثامن",
1606
+ "ستة": "سادس",
1607
+ "تسع": "تاسع",
1608
+ "تسعة": "تاسع",
1609
+ "سبعة": "سابع",
1610
+ "ثلاثة": "ثالث",
1611
+ "اثنان": "ثاني",
1612
+ "عشرة": "عاشر",
1613
+ "خمسة": "خامس",
1614
+ "عشر": "عاشر",
1615
+ "صفر": "صفر",
1616
+ "إحدى": "حادي",
1617
+ "اثنا": "ثاني",
1618
+ "إثنا": "ثاني",
1619
+ "إثنان": "ثاني",
1620
+ }
1621
+ UNITS_ORDINAL_WORDS_FEMININ = {
1622
+ "ثمان": "ثامنة",
1623
+ "ست": "سادسة",
1624
+ "إثني": "ثانية",
1625
+ "ثلاث": "ثالثة",
1626
+ "اثنين": "ثانية",
1627
+ "اثني": "ثانية",
1628
+ "واحد": "حادية",
1629
+ "أربع": "رابعة",
1630
+ "أحد": "حادية",
1631
+ "أربعة": "رابعة",
1632
+ "سبع": "سابعة",
1633
+ "ثماني": "ثامنة",
1634
+ "خمس": "خامسة",
1635
+ "واحدة": "حادية",
1636
+ "ثمانية": "ثامنة",
1637
+ "ستة": "سادسة",
1638
+ "تسع": "تاسعة",
1639
+ "تسعة": "تاسعة",
1640
+ "سبعة": "سابعة",
1641
+ "ثلاثة": "ثالثة",
1642
+ "اثنان": "ثانية",
1643
+ "عشرة": "عاشرة",
1644
+ "خمسة": "خامسة",
1645
+ "عشر": "عاشرة",
1646
+ "صفر": "صفرة",
1647
+ "إحدى": "حادية",
1648
+ "اثنا": "ثانية",
1649
+ "إثنا": "ثانية",
1650
+ "إثنان": "ثانية",
1651
+ }
1652
+ # ~ def __build_normalizer():
1653
+ # ~ normalizer = [araby.normalize_ligature,
1654
+ # ~ araby.normalize_alef,
1655
+ # ~ araby.normalize_teh,
1656
+ # ~ araby.strip_tashkeel,
1657
+ # ~ araby.strip_tatweel,
1658
+ # ~ ]
1659
+ # ~ return util.Composer(normalizer)
1660
+
1661
+
1662
+ def __normalize_composite_dict(le_dict, normalizer):
1663
+ res = {}
1664
+ for key, value in le_dict.items():
1665
+ if isinstance(value, dict):
1666
+ res[key] = __normalize_composite_dict(value, normalizer)
1667
+ else:
1668
+ res[key] = normalizer(value)
1669
+ return res
1670
+
1671
+
1672
+ def normalize_constants():
1673
+ """
1674
+ builds a custom normalizer and applies it to the constants
1675
+ """
1676
+ # getting them
1677
+ global COMPLICATIONS, INDIVIDUALS, UNIT_WORDS, VOCALIZED_NUMBER_WORDS, NUMBER_TEN_FEMININ_UNITS, NUMBER_TEN_MASCULIN_UNITS, NUMBER_WORDS
1678
+ # getting the normalizer and a helper function
1679
+ normalizer = normalize_searchtext()
1680
+ dict_keys_normalizer = lambda dict: {
1681
+ normalizer(key): value for key, value in dict.items()
1682
+ }
1683
+
1684
+ # normalizing stuff
1685
+ UNIT_WORDS = dict_keys_normalizer(UNIT_WORDS)
1686
+ VOCALIZED_NUMBER_WORDS = dict_keys_normalizer(VOCALIZED_NUMBER_WORDS)
1687
+ NUMBER_WORDS = dict_keys_normalizer(NUMBER_WORDS)
1688
+ NUMBER_TEN_MASCULIN_UNITS = tuple(map(normalizer, NUMBER_TEN_MASCULIN_UNITS))
1689
+ NUMBER_TEN_FEMININ_UNITS = tuple(map(normalizer, NUMBER_TEN_FEMININ_UNITS))
1690
+
1691
+ # complications are a bit harder
1692
+ COMPLICATIONS = {
1693
+ key: {sub_key: sub_value for sub_key, sub_value in COMPLICATION.items()}
1694
+ for key, COMPLICATION in COMPLICATIONS.items()
1695
+ }
1696
+
1697
+ # individual is a bit harder too
1698
+ INDIVIDUALS = __normalize_composite_dict(INDIVIDUALS, normalizer)
1699
+
1700
+
1701
+ if __name__ == "__main__":
1702
+ print(INDIVIDUALS)
1703
+ normalize_constants()
1704
+ print(INDIVIDUALS)