@sarmay/kaz-converter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1480 @@
1
+ // src/disambiguation.ts
2
+ var NoopDisambiguator = class {
3
+ disambiguate(rawTokens) {
4
+ return rawTokens.map(([, converted]) => converted);
5
+ }
6
+ };
7
+
8
+ // src/arb2syr.ts
9
+ var CONSONANTS = {
10
+ "\u0628": "\u0431",
11
+ "\u06C6": "\u0432",
12
+ "\u06AF": "\u0433",
13
+ "\u0639": "\u0493",
14
+ "\u062F": "\u0434",
15
+ "\u062C": "\u0436",
16
+ "\u0632": "\u0437",
17
+ "\u0643": "\u043A",
18
+ "\u0642": "\u049B",
19
+ "\u0644": "\u043B",
20
+ "\u0645": "\u043C",
21
+ "\u0646": "\u043D",
22
+ "\u06AD": "\u04A3",
23
+ "\u067E": "\u043F",
24
+ "\u0631": "\u0440",
25
+ "\u0633": "\u0441",
26
+ "\u062A": "\u0442",
27
+ "\u0641": "\u0444",
28
+ "\u062D": "\u0445",
29
+ "\u06BE": "\u04BB",
30
+ "\u0686": "\u0447",
31
+ "\u0634": "\u0448"
32
+ };
33
+ var VOWEL_MAP = {
34
+ "\u0627": { b: "\u0430", f: "\u04D9" },
35
+ "\u0649": { b: "\u044B", f: "\u0456" },
36
+ "\u0648": { b: "\u043E", f: "\u04E9" },
37
+ "\u06C7": { b: "\u04B1", f: "\u04AF" },
38
+ "\u06D5": "\u0435",
39
+ "\u06CB": "\u0443"
40
+ };
41
+ var EXCEPTIONS = {
42
+ "\u0631\u06D5\u0633\u067E\u06CB\u0628\u0644\u064A\u0643\u0627": "\u0440\u0435\u0441\u043F\u0443\u0431\u043B\u0438\u043A\u0430",
43
+ "\u0643\u0648\u0645\u0645\u06CB\u0646\u064A\u0633\u062A\u0649\u0643": "\u043A\u043E\u043C\u043C\u0443\u043D\u0438\u0441\u0442\u0456\u043A",
44
+ "\u06D5\u0643\u0631\u0627\u0646": "\u044D\u043A\u0440\u0430\u043D",
45
+ "\u06D5\u0646\u06D5\u0631\u06AF\u06D5\u062A\u064A\u0643\u0627": "\u044D\u043D\u0435\u0440\u0433\u0435\u0442\u0438\u043A\u0430",
46
+ "\u06D5\u0646\u06D5\u0631\u06AF\u064A\u064A\u0627": "\u044D\u043D\u0435\u0440\u0433\u0438\u044F",
47
+ "\u0643\u0648\u0645\u064A\u062A\u06D5\u062A": "\u043A\u043E\u043C\u0438\u0442\u0435\u0442",
48
+ "\u0643\u0648\u0646\u062A\u0633\u06D5\u0631\u062A": "\u043A\u043E\u043D\u0446\u0435\u0440\u0442",
49
+ "\u0643\u0648\u0633\u0645\u0648\u0633": "\u043A\u043E\u0441\u043C\u043E\u0441",
50
+ "\u0643\u0648\u0644\u0644\u06D5\u0643\u062A\u0649\u064A\u06C6": "\u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432",
51
+ "\u0643\u0648\u0644\u0644\u06D5\u06AF\u0627": "\u043A\u043E\u043B\u043B\u0435\u0433\u0430",
52
+ "\u0643\u0648\u0631\u067E\u06CB\u0633": "\u043A\u043E\u0440\u043F\u0443\u0441",
53
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u064A\u064A\u0627": "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
54
+ "\u0643\u0648\u0645\u0645\u06CB\u0646\u064A\u0633\u062A": "\u043A\u043E\u043C\u043C\u0443\u043D\u0438\u0441\u0442",
55
+ "\u0631\u0627\u064A\u0648\u0646": "\u0440\u0430\u0439\u043E\u043D",
56
+ "\u0631\u0627\u062F\u064A\u0648": "\u0440\u0430\u0434\u0438\u043E",
57
+ "\u0643\u0627\u0631\u062A\u0627": "\u043A\u0430\u0440\u0442\u0430",
58
+ "\u0627\u0631\u062D\u0649\u064A\u062A\u06D5\u0643\u062A\u06CB\u0631\u0627": "\u0430\u0440\u0445\u0438\u0442\u0435\u043A\u0442\u0443\u0440\u0430",
59
+ "\u0627\u06C6\u062A\u0648\u0646\u0648\u0645\u064A\u0627": "\u0430\u0432\u0442\u043E\u043D\u043E\u043C\u0438\u044F",
60
+ "\u06C6\u0649\u064A\u062F\u064A\u0648": "\u0432\u0438\u0434\u0435\u043E",
61
+ "\u06C6\u0649\u064A\u062F\u06D5\u0648": "\u0432\u0438\u0434\u0435\u043E",
62
+ "\u0628\u0649\u064A\u0648\u0644\u0648\u06AF\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
63
+ "\u0643\u0648\u06D5\u0641\u0641\u064A\u062A\u0633\u0649\u064A\u06D5\u0646\u062A": "\u043A\u043E\u044D\u0444\u0444\u0438\u0446\u0438\u0435\u043D\u0442",
64
+ "\u0643\u0648\u06D5\u0641\u0641\u064A\u062A\u0633\u064A\u06D5\u0646\u062A": "\u043A\u043E\u044D\u0444\u0444\u0438\u0446\u0438\u0435\u043D\u0442",
65
+ "\u067E\u0631\u0648\u062A\u0633\u06D5\u0633": "\u043F\u0440\u043E\u0446\u0435\u0441\u0441",
66
+ "\u062A\u06D5\u0644\u06D5\u06C6\u0649\u064A\u0632\u0648\u0631": "\u0442\u0435\u043B\u0435\u0432\u0438\u0437\u043E\u0440",
67
+ "\u062A\u06D5\u0644\u06D5\u06C6\u064A\u0632\u0648\u0631": "\u0442\u0435\u043B\u0435\u0432\u0438\u0437\u043E\u0440",
68
+ "\u0674\u0627\u0631\u062F\u0627\u064A\u0649\u0645": "\u04D9\u0440\u0434\u0430\u0439\u044B\u043C",
69
+ "\u067E\u0627\u0631\u062A\u064A\u0627": "\u043F\u0430\u0440\u0442\u0438\u044F",
70
+ "\u0643\u0648\u0645\u067E\u064A\u06CB\u062A\u06D5\u0631": "\u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440",
71
+ "\u062A\u06D5\u0644\u06D5\u0641\u0648\u0646": "\u0442\u0435\u043B\u0435\u0444\u043E\u043D",
72
+ "\u064A\u0646\u062A\u06D5\u0631\u0646\u06D5\u062A": "\u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442",
73
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A\u064A\u064A\u0627": "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
74
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A\u064A\u0627": "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
75
+ "\u06D5\u0643\u0648\u0646\u0648\u0645\u064A\u0643\u0627": "\u044D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430",
76
+ "\u067E\u0648\u0644\u064A\u062A\u064A\u0643\u0627": "\u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430",
77
+ "\u0643\u0648\u0631\u0649\u062F\u0648\u0631": "\u043A\u043E\u0440\u0438\u0434\u043E\u0440",
78
+ "\u0643\u0648\u0646\u06AF\u0631\u06D5\u0633": "\u043A\u043E\u043D\u0433\u0440\u0435\u0441\u0441",
79
+ "\u06D5\u0644\u06D5\u0645\u06D5\u0646\u062A": "\u044D\u043B\u0435\u043C\u0435\u043D\u0442",
80
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u064A\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
81
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u0649\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
82
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
83
+ "\u0628\u0649\u064A\u0648\u0644\u0648\u06AF\u0649\u064A\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
84
+ "\u0628\u064A\u0648\u0644\u0648\u06AF\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
85
+ "\u0649\u064A\u0646\u0633\u062A\u0631\u06CB\u0645\u06D5\u0646\u062A": "\u0438\u043D\u0441\u0442\u0440\u0443\u043C\u0435\u043D\u0442",
86
+ "\u064A\u0646\u0633\u062A\u0631\u06CB\u0645\u06D5\u0646\u062A": "\u0438\u043D\u0441\u0442\u0440\u0443\u043C\u0435\u043D\u0442",
87
+ "\u067E\u0648\u062F\u06D5\u0632\u062F": "\u043F\u043E\u0434\u044A\u0435\u0437\u0434",
88
+ "\u0643\u0648\u0646\u0633\u062A\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u064A\u0627": "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
89
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u064A\u0627": "\u0441\u0442\u0430\u043D\u0446\u0438\u044F",
90
+ "\u0628\u0649\u0631": "\u0431\u0456\u0440",
91
+ "\u06C7\u0634": "\u04AF\u0448",
92
+ "\u062A\u0648\u0631\u062A": "\u0442\u04E9\u0440\u0442",
93
+ "\u0649\u0633": "\u0456\u0441",
94
+ "\u062F\u064A\u0633\u0633\u06D5\u0631\u062A\u0627\u062A\u0633\u0649\u064A\u064A\u0627": "\u0434\u0438\u0441\u0441\u0435\u0440\u0442\u0430\u0446\u0438\u044F",
95
+ "\u062F\u064A\u0633\u0633\u06D5\u0631\u062A\u0627\u062A\u0633\u064A\u0627": "\u0434\u0438\u0441\u0441\u0435\u0440\u0442\u0430\u0446\u0438\u044F",
96
+ "\u064A\u0646\u062A\u06D5\u06AF\u0631\u0627\u062A\u0633\u0649\u064A\u064A\u0627": "\u0438\u043D\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u044F",
97
+ "\u064A\u0646\u062A\u06D5\u06AF\u0631\u0627\u062A\u0633\u064A\u0627": "\u0438\u043D\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u044F",
98
+ "\u062A\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u064A\u0627": "\u0442\u0440\u0430\u043D\u0441\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u044F",
99
+ "\u0645\u0627\u062C\u0649\u0644\u0649\u0633": "\u043C\u04D9\u0436\u0456\u043B\u0456\u0441",
100
+ "\u0674\u062A\u0648\u0631\u0627\u0639\u0627": "\u0442\u04E9\u0440\u0430\u0493\u0430",
101
+ "\u0627\u0643\u0627\u062F\u06D5\u0645\u0649\u064A\u064A\u0627": "\u0430\u043A\u0430\u0434\u0435\u043C\u0438\u044F",
102
+ "\u0634\u0649\u0645\u0643\u06D5\u0646\u062A": "\u0428\u044B\u043C\u043A\u0435\u043D\u0442",
103
+ "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
104
+ "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
105
+ "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
106
+ "\u062C\u06C7\u06AD\u06AF\u0648": "\u0416\u04B1\u04A3\u0433\u043E",
107
+ "\u0634\u064A": "\u0421\u0438",
108
+ "\u062C\u064A\u0646\u067E\u064A\u06AD": "\u0426\u0437\u0438\u043D\u044C\u043F\u0438\u043D",
109
+ "\u0643\u0649\u062A\u0627\u067E": "\u043A\u0456\u0442\u0430\u043F",
110
+ "\u0631\u0627\u062D\u0645\u06D5\u062A": "\u0440\u0430\u0445\u043C\u0435\u0442",
111
+ "\u0627\u06CB\u0649\u0644": "\u0430\u0443\u044B\u043B",
112
+ "\u06AF\u0628": "\u0413\u0411",
113
+ "\u067E\u0631\u0648\u06AF\u0631\u06D5\u0633": "\u043F\u0440\u043E\u0433\u0440\u0435\u0441\u0441",
114
+ "\u0674\u0648\u0632\u0627\u0631\u0627": "\u04E9\u0437\u0430\u0440\u0430",
115
+ "\u062C\u0627\u06CB\u0627\u067E\u0643\u06D5\u0631\u0634\u0649\u0644\u0649\u0643": "\u0436\u0430\u0443\u0430\u043F\u043A\u0435\u0440\u0448\u0456\u043B\u0456\u043A",
116
+ "\u06D5\u0644\u06D5\u0643\u062A\u0631\u0644\u06D5\u0646\u062F\u0649\u0631\u06CB": "\u044D\u043B\u0435\u043A\u0442\u0440\u043B\u0435\u043D\u0434\u0456\u0440\u0443",
117
+ "\u0674\u0649\u0632\u0628\u0627\u0633\u0627\u0631": "\u0456\u0437\u0431\u0430\u0441\u0430\u0440",
118
+ "\u0674\u0627\u062F\u0649\u0633-\u062A\u0627\u0633\u0649\u0644": "\u04D9\u0434\u0456\u0441-\u0442\u04D9\u0441\u0456\u043B",
119
+ "\u0674\u062C\u0648\u0646-\u062C\u0648\u0633\u0649\u0642\u0633\u0649\u0632": "\u0436\u04E9\u043D-\u0436\u043E\u0441\u044B\u049B\u0441\u044B\u0437",
120
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u0649\u064A\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
121
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u064A\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
122
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
123
+ "\u0633\u0649\u064A\u0641\u0631\u0644\u0649\u0642": "\u0446\u0438\u0444\u0440\u043B\u044B\u049B",
124
+ "\u06D5\u06C6\u0648\u0644\u06CB\u062A\u0633\u0649\u064A\u064A\u0627": "\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u044F",
125
+ "\u06D5\u06C6\u0648\u0644\u064A\u06CB\u062A\u0633\u064A\u0627": "\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u044F",
126
+ "\u0645\u06D5\u0674\u062A\u0649\u0644\u0643\u06D5\u0634\u06D5": "\u043C\u04D9\u0442\u0456\u043B\u043A\u0435\u0448\u0435",
127
+ "\u062F\u0649\u064A\u0627\u06AF\u0646\u0648\u0632": "\u0434\u0438\u0430\u0433\u043D\u043E\u0437",
128
+ "\u0628\u06CB\u062F\u062C\u06D5\u062A": "\u0431\u044E\u0434\u0436\u0435\u0442",
129
+ "\u0641\u0649\u064A\u0644\u0645": "\u0444\u0438\u043B\u044C\u043C",
130
+ "\u0627\u0633\u0641\u0627\u0644\u062A": "\u0430\u0441\u0444\u0430\u043B\u044C\u0442",
131
+ "\u06D5\u06CB\u0631\u0648\u067E\u0627": "\u0415\u0443\u0440\u043E\u043F\u0430",
132
+ "\u062A\u0648\u0643\u0649\u064A\u0648": "\u0422\u043E\u043A\u0438\u043E",
133
+ "\u0646\u0649\u064A\u06CB-\u064A\u0648\u0631\u0643": "\u041D\u044C\u044E-\u0419\u043E\u0440\u043A",
134
+ "\u0628\u0627\u0646\u0643": "\u0431\u0430\u043D\u043A",
135
+ "\u0642\u0649\u064A\u0627\u0631": "\u049B\u0438\u044F\u0440",
136
+ "\u0628\u0649\u0631\u0627\u0642": "\u0431\u0456\u0440\u0430\u049B",
137
+ "\u062A\u0648\u0645\u06D5\u0646": "\u0442\u04E9\u043C\u0435\u043D",
138
+ "\u0645\u06D5\u064A\u0631\u0627\u0645": "\u043C\u0435\u0439\u0440\u0430\u043C",
139
+ "\u0645\u06D5\u064A\u0631\u0627\u0645\u062F\u0627\u0631\u0649\u0646\u0649\u06AD": "\u043C\u0435\u0439\u0440\u0430\u043C\u0434\u0430\u0440\u044B\u043D\u044B\u04A3",
140
+ "\u062F\u0627\u0633\u062A\u06C7\u0631": "\u0434\u04D9\u0441\u0442\u04AF\u0440",
141
+ "\u062F\u0627\u0633\u062A\u06C7\u0631\u0644\u06D5\u0631": "\u0434\u04D9\u0441\u0442\u04AF\u0440\u043B\u0435\u0440",
142
+ "\u062F\u06D5\u0646\u0633\u0627\u06CB\u0644\u0649\u0642": "\u0434\u0435\u043D\u0441\u0430\u0443\u043B\u044B\u049B",
143
+ "\u062A\u0627\u0631\u064A\u062D": "\u0442\u0430\u0440\u0438\u0445",
144
+ "\u06AF\u0631\u0627\u0645\u0645\u0627\u062A\u0649\u0643\u0627": "\u0433\u0440\u0430\u043C\u043C\u0430\u0442\u0438\u043A\u0430",
145
+ "\u06AF\u0631\u0627\u0645\u0645\u0627\u062A\u0649\u0643\u0627\u0644\u0649\u0642": "\u0433\u0440\u0430\u043C\u043C\u0430\u0442\u0438\u043A\u0430\u043B\u044B\u049B",
146
+ "\u062C \u0643 \u067E": "\u0416\u041A\u041F",
147
+ "\u0627\u0633\u0643\u06D5\u0631\u064A": "\u04D9\u0441\u043A\u0435\u0440\u0438",
148
+ "\u0628\u0627\u062A\u0649\u0644": "\u0431\u0430\u0442\u044B\u043B",
149
+ "\u0644\u064A \u0686\u064A\u0627\u06AD": "\u041B\u0438 \u0427\u044F\u04A3",
150
+ "\u062C\u0627\u06CB \u0644\u0649\u062C\u064A": "\u0416\u0430\u0443 \u041B\u044B\u0436\u0438",
151
+ "\u06CB\u0627\u06AD \u062D\u06CB\u0646\u064A\u06AD": "\u0423\u0430\u04A3 \u0425\u0443\u043D\u0438\u04A3",
152
+ "\u0633\u0627\u064A \u0686\u064A": "\u0421\u0430\u0439 \u0427\u0438",
153
+ "\u062F\u064A\u06AD \u0634\u06CB\u06D5\u0634\u064A\u0627\u06AD": "\u0414\u0438\u04A3 \u0428\u0443\u0435\u0448\u044F\u04A3",
154
+ "\u0644\u064A \u0634\u064A": "\u041B\u0438 \u0421\u0438",
155
+ "\u0633\u06CB\u0628\u062A\u0649\u064A\u062A\u0631": "\u0441\u0443\u0431\u0442\u0438\u0442\u0440",
156
+ "\u0633\u06CB\u0628\u062A\u0649\u064A\u062A\u0649\u0631\u0644\u06D5\u0631\u062F\u0649": "\u0441\u0443\u0431\u0442\u0438\u0442\u0440\u043B\u0435\u0440\u0434\u0456"
157
+ };
158
+ var LOANWORD_EXACT = /* @__PURE__ */ new Set([
159
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633\u0649\u064A\u062A\u06D5\u062A",
160
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633\u0649\u062A\u06D5\u062A",
161
+ "\u06CB\u0646\u0649\u06C6\u06D5\u0631\u0633\u0649\u062A\u06D5\u062A",
162
+ "\u06CB\u0646\u0649\u06C6\u06D5\u0631\u0633\u0649\u064A\u062A\u06D5\u062A",
163
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u0649\u0627",
164
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u064A\u0627",
165
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u0627",
166
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u0649\u0627",
167
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u0627",
168
+ "\u064A\u0646\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u0649\u064A\u064A\u0627",
169
+ "\u064A\u0646\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u0649\u064A\u0627",
170
+ "\u0645\u0627\u0634\u064A\u0646\u0627",
171
+ "\u0645\u0627\u0634\u0649\u0646\u0627",
172
+ "\u0627\u062A\u0648\u0645",
173
+ "\u0686\u06D5\u0645\u067E\u0649\u064A\u0648\u0646",
174
+ "\u0686\u06D5\u0645\u067E\u0649\u064A\u06C7\u0646",
175
+ "\u0634\u0634\u06D5\u062A\u0643\u0627"
176
+ ]);
177
+ var LOANWORD_E_PREFIXES = [
178
+ "\u06D5\u0643\u0631\u0627\u0646",
179
+ "\u06D5\u0643\u0633\u067E",
180
+ "\u06D5\u0644\u06D5\u0643\u062A\u0631",
181
+ "\u06D5\u0646\u06D5\u0631\u06AF",
182
+ "\u06D5\u0643\u0648\u0644\u0648\u06AF",
183
+ "\u06D5\u062A\u0627\u067E",
184
+ "\u06D5\u0641\u064A\u0631",
185
+ "\u06D5\u0641\u0641\u06D5\u0643\u062A",
186
+ "\u06D5\u0643\u0648\u0646\u0648\u0645",
187
+ "\u06D5\u0644\u06D5\u0645\u06D5\u0646\u062A",
188
+ "\u06D5\u0633\u062A\u0631\u0627\u062F",
189
+ "\u06D5\u067E\u0648\u0633",
190
+ "\u06D5\u067E\u0649\u064A\u0632\u0648\u062F"
191
+ ];
192
+ var LOANWORD_PREFIXES = [
193
+ "\u0627\u0631\u062D\u0649",
194
+ "\u0627\u0631\u062D\u064A",
195
+ "\u067E\u0631\u0648",
196
+ "\u067E\u0631\u0648\u06AF",
197
+ "\u067E\u0631\u0648\u062A\u0633",
198
+ "\u0648\u0632\u0627\u0631\u0627",
199
+ "\u0648\u0632\u0627\u0631\u06D5",
200
+ "\u062A\u06D5\u062D\u0646\u0648",
201
+ "\u06D5\u0643\u0648\u0646\u0648\u0645",
202
+ "\u06D5\u0643\u0627\u0646\u0648\u0645",
203
+ "\u06AF\u06CB\u0645\u0627\u0646",
204
+ "\u06AF\u0648\u0645\u0627\u0646",
205
+ "\u0631\u0627\u064A",
206
+ "\u0631\u0627\u062F",
207
+ "\u06C6\u0649\u064A\u062F",
208
+ "\u06C6\u064A\u062F",
209
+ "\u0643\u0648\u06D5\u0641\u0641",
210
+ "\u0643\u0648\u0626\u0641\u0641",
211
+ "\u0643\u0648\u0646\u0633\u06D5\u067E",
212
+ "\u0643\u0648\u0646\u062A\u0633\u06D5\u067E",
213
+ "\u0633\u0649\u064A\u0641\u0631",
214
+ "\u062A\u0633\u0649\u064A\u0641\u0631",
215
+ "\u06AF\u0631\u0627\u0641",
216
+ "\u06D5\u0646\u06D5\u0631\u06AF",
217
+ "\u06D5\u06C6\u0648\u0644",
218
+ "\u0627\u0631\u062D\u0649\u064A\u062A",
219
+ "\u0627\u0631\u062D\u064A\u062A",
220
+ "\u064A\u0646\u062A\u06D5\u0631",
221
+ "\u0643\u0648\u0646\u0633\u062A",
222
+ "\u0633\u062A\u062A",
223
+ "\u067E\u0648\u062F",
224
+ "\u0643\u0627\u0646\u0633\u062A",
225
+ "\u062F\u06D5\u0645\u0648\u0643\u0631",
226
+ "\u062F\u06D5\u0645\u0648",
227
+ "\u067E\u0648\u0644\u064A\u062A",
228
+ "\u062A\u06D5\u0644\u06D5\u0641",
229
+ "\u067E\u0648\u0644\u0649\u064A\u062A\u0633",
230
+ "\u067E\u0648\u0644\u064A\u062A\u0633",
231
+ "\u062F\u06D5\u067E\u0627\u0631\u062A",
232
+ "\u062F\u0649\u067E\u0627\u0631\u062A",
233
+ "\u0648\u067E\u06D5\u0631\u0627\u062A",
234
+ "\u0627\u0648\u067E\u06D5\u0631\u0627\u062A",
235
+ "\u0641\u06D5\u062F\u06D5\u0631\u0627\u0644",
236
+ "\u0641\u0649\u062F\u06D5\u0631\u0627\u0644",
237
+ "\u0643\u0631\u0649\u064A\u0645\u0649\u064A\u0646",
238
+ "\u0643\u0631\u064A\u0645\u064A\u0646",
239
+ "\u067E\u0633\u0649\u064A\u062D\u0648\u0644",
240
+ "\u067E\u0633\u064A\u062D\u0648\u0644",
241
+ "\u06C6\u0627\u0634\u0649\u064A\u0646\u06AF",
242
+ "\u06C6\u0627\u0634\u064A\u0646\u06AF",
243
+ "\u0627\u0645\u06D5\u0631\u0649\u064A\u0643",
244
+ "\u0627\u0645\u06D5\u0631\u064A\u0643",
245
+ "\u0627\u06C6\u062A\u0648\u0645\u0627\u062A",
246
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633",
247
+ "\u06CB\u0646\u064A\u06C6\u06D5\u0631\u0633",
248
+ "\u067E\u0631\u06D5\u0632\u0649\u064A\u062F",
249
+ "\u067E\u0631\u06D5\u0632\u064A\u062F",
250
+ "\u0649\u064A\u0646\u06C6\u06D5\u0633\u062A",
251
+ "\u064A\u0646\u06C6\u06D5\u0633\u062A",
252
+ "\u0631\u06D5\u0641\u0648\u0631\u0645",
253
+ "\u062F\u06D5\u0641\u0649\u064A\u062A\u0633",
254
+ "\u062F\u06D5\u0641\u064A\u062A\u0633",
255
+ "\u0643\u0648\u0648\u067E\u06D5\u0631",
256
+ "\u0643\u0648\u067E\u06D5\u0631",
257
+ "\u0643\u0648\u0646\u0641\u06D5\u0631\u06D5\u0646",
258
+ "\u0649\u064A\u0646\u0641\u0644\u0649\u064A\u0627\u062A",
259
+ "\u064A\u0646\u0641\u0644\u0649\u064A\u0627\u062A",
260
+ "\u0649\u064A\u0646\u0641\u0644\u064A\u0627\u062A",
261
+ "\u064A\u0646\u0641\u0644\u064A\u0627\u062A",
262
+ "\u0643\u0644\u064A\u0645\u0627\u062A",
263
+ "\u064A\u0646\u062A\u06D5\u0644\u0644\u06D5\u0643\u062A",
264
+ "\u06AF\u06D5\u0648\u0633\u0627\u064A\u0627\u0633",
265
+ "\u06AF\u06D5\u0648\u0633\u0627\u064A\u0627",
266
+ "\u0645\u0648\u062F\u06D5\u0644",
267
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A",
268
+ "\u062A\u0633\u0648\u0646",
269
+ "\u062A\u0633\u0627\u0646",
270
+ "\u062A\u0633\u06D5\u0646",
271
+ "\u062A\u0633\u06D5",
272
+ "\u062A\u0633\u0649\u064A",
273
+ "\u062A\u0633\u064A",
274
+ "\u0634\u0634\u06D5",
275
+ "\u0634\u0634\u0649",
276
+ "\u0634\u0634\u0648",
277
+ "\u0645\u0627\u062A\u0631",
278
+ "\u0645\u0627\u0634",
279
+ "\u0627\u062A\u0648\u0645",
280
+ "\u0633\u062A\u0627\u062A",
281
+ "\u0633\u062A\u0627\u0646\u062A",
282
+ "\u067E\u0648\u062F\u06D5",
283
+ "\u06AF\u0631\u0627\u0645",
284
+ "\u0633\u06CB\u0628\u062A"
285
+ ];
286
+ var PROPER_NOUNS = {
287
+ "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
288
+ "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
289
+ "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
290
+ "\u0627\u0631\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430"
291
+ };
292
+ var VALID_SUFFIXES = /* @__PURE__ */ new Set([
293
+ "\u0644\u0627\u0631",
294
+ "\u0644\u06D5\u0631",
295
+ "\u062F\u0627\u0631",
296
+ "\u062F\u06D5\u0631",
297
+ "\u062A\u0627\u0631",
298
+ "\u062A\u06D5\u0631",
299
+ "\u0646\u0649\u06AD",
300
+ "\u062F\u0649\u06AD",
301
+ "\u062A\u0649\u06AD",
302
+ "\u0639\u0627",
303
+ "\u06AF\u06D5",
304
+ "\u0642\u0627",
305
+ "\u0643\u06D5",
306
+ "\u0646\u0627",
307
+ "\u0646\u06D5",
308
+ "\u0627",
309
+ "\u06D5",
310
+ "\u0646\u0649",
311
+ "\u062F\u0649",
312
+ "\u062A\u0649",
313
+ "\u0646",
314
+ "\u062F\u0627",
315
+ "\u062F\u06D5",
316
+ "\u062A\u0627",
317
+ "\u062A\u06D5",
318
+ "\u0646\u062F\u0627",
319
+ "\u0646\u062F\u06D5",
320
+ "\u062F\u0627\u0646",
321
+ "\u062F\u06D5\u0646",
322
+ "\u062A\u0627\u0646",
323
+ "\u062A\u06D5\u0646",
324
+ "\u0646\u0627\u0646",
325
+ "\u0646\u06D5\u0646",
326
+ "\u0645\u06D5\u0646",
327
+ "\u0628\u06D5\u0646",
328
+ "\u067E\u06D5\u0646",
329
+ "\u0645",
330
+ "\u0645\u0649\u0632",
331
+ "\u06AD",
332
+ "\u06AD\u0649\u0632",
333
+ "\u0633\u0649",
334
+ "\u0649",
335
+ "\u0649\u0645\u044B\u0437",
336
+ "\u0649\u06AD\u0649\u0632",
337
+ "\u06D5\u0645\u0649\u0632",
338
+ "\u06D5\u06AD\u0649\u0632",
339
+ "\u0649\u0645",
340
+ "\u0649\u06AD",
341
+ "\u06D5\u0645",
342
+ "\u06D5\u06AD",
343
+ "\u0644\u0649\u0642",
344
+ "\u0644\u0649\u0643",
345
+ "\u062F\u0649\u0642",
346
+ "\u062F\u0649\u0643",
347
+ "\u062A\u0649\u0642",
348
+ "\u062A\u0649\u0643",
349
+ "\u0633\u0649\u0632",
350
+ "\u0634\u0649",
351
+ "\u0634\u0649\u0644\u062F\u0649\u0642",
352
+ "\u0634\u0649\u0644\u0649\u0643",
353
+ "\u062F\u0627\u0639\u0649",
354
+ "\u062F\u06D5\u06AF\u0649",
355
+ "\u062A\u0627\u0639\u0649",
356
+ "\u062A\u06D5\u06AF\u0649",
357
+ "\u0646\u062F\u0627\u0639\u0649",
358
+ "\u0646\u062F\u06D5\u06AF\u0649",
359
+ "\u0639\u0627\u0646",
360
+ "\u06AF\u06D5\u0646",
361
+ "\u0642\u0627\u0646",
362
+ "\u0643\u06D5\u0646",
363
+ "\u0645\u0627",
364
+ "\u0645\u06D5",
365
+ "\u0628\u0627",
366
+ "\u0628\u06D5",
367
+ "\u067E\u0627",
368
+ "\u067E\u06D5",
369
+ "\u067E",
370
+ "\u0649\u067E",
371
+ "\u06D5\u067E",
372
+ "\u06CB",
373
+ "\u06CB\u0634\u0649",
374
+ "\u062A\u06CB",
375
+ "\u062F\u06CB",
376
+ "\u0627\u0633\u0649\u06AD",
377
+ "\u06D5\u0633\u0649\u06AD",
378
+ "\u0627\u062F\u0649",
379
+ "\u06D5\u062F\u0649",
380
+ "\u064A\u062F\u0649",
381
+ "\u0645\u0649\u0646",
382
+ "\u0628\u0649\u0646",
383
+ "\u067E\u0649\u0646",
384
+ "\u0627\u0631",
385
+ "\u06D5\u0631",
386
+ "\u0645\u0627\u0633",
387
+ "\u0645\u06D5\u0633",
388
+ "\u0644\u06D5\u0646\u062F\u0649\u0631\u06CB",
389
+ "\u0644\u0627\u0646\u062F\u0649\u0631\u06CB",
390
+ "\u0644\u06D5\u0646\u06CB",
391
+ "\u0644\u0627\u0646\u06CB",
392
+ "\u0644\u06D5\u0646",
393
+ "\u0644\u0627\u0646",
394
+ "\u062F\u0649\u0631\u06CB",
395
+ "\u062F\u0649\u0631"
396
+ ]);
397
+ var COMPOUND_PIVOT_ROOTS = [
398
+ "\u0633\u0648\u0632",
399
+ "\u062A\u0649\u0644",
400
+ "\u0628\u0649\u0644\u0649\u0645",
401
+ "\u062D\u0627\u0646\u0627",
402
+ "\u0643\u0648\u0632",
403
+ "\u0648\u0631\u0649\u0646",
404
+ "\u0643\u06D5\u0631",
405
+ "\u0642\u0648\u0631",
406
+ "\u067E\u0627\u0632",
407
+ "\u06AF\u06D5\u0631",
408
+ "\u0634\u0649\u0644\u0649\u0643",
409
+ "\u062A\u06C7\u0633\u062A\u0649\u0643",
410
+ "\u0633\u0627\u06CB",
411
+ "\u0633\u064A\u0627\u0642",
412
+ "\u0648\u064A\u0649\u0646",
413
+ "\u062A\u06C7\u0633\u062A\u0649\u06AF"
414
+ ];
415
+ var IMPLICIT_SOFT_ROOTS = /* @__PURE__ */ new Set([
416
+ "\u06C7\u0645\u0649\u062A",
417
+ "\u062A\u06C7\u0628",
418
+ "\u06C7\u0634\u0649\u0646",
419
+ "\u0645\u06C7\u0645\u0643\u0649\u0646",
420
+ "\u0628\u0649\u0631",
421
+ "\u0628\u0649\u0632",
422
+ "\u0633\u0649\u0632",
423
+ "\u0643\u0649\u0645",
424
+ "\u062A\u0649\u0644",
425
+ "\u0643\u06C7\u0646",
426
+ "\u062A\u06C7\u0646",
427
+ "\u062C\u06C7\u0631",
428
+ "\u0649\u0644\u06AF\u06D5\u0631\u0649",
429
+ "\u0628\u06C7\u0644"
430
+ ]);
431
+ var ARAB_CONSONANTS_FOR_CLUSTER = "\u0628\u06C6\u06AF\u0639\u062F\u062C\u0632\u0643\u0642\u0644\u0645\u0646\u06AD\u067E\u0631\u0633\u062A\u0641\u062D\u06BE\u0686\u0634";
432
+ var NATIVE_CLUSTERS = /* @__PURE__ */ new Set([
433
+ "\u0642\u062A",
434
+ "\u0642\u0633",
435
+ "\u0642\u0628",
436
+ "\u0642\u062C",
437
+ "\u0642\u062F",
438
+ "\u0642\u0632",
439
+ "\u0644\u062F",
440
+ "\u0644\u062A",
441
+ "\u0644\u0642",
442
+ "\u0644\u062D",
443
+ "\u0644\u0633",
444
+ "\u0644\u0628",
445
+ "\u0644\u062C",
446
+ "\u0646\u062F",
447
+ "\u0646\u062A",
448
+ "\u0646\u0642",
449
+ "\u0646\u0633",
450
+ "\u0646\u062C",
451
+ "\u0646\u0628",
452
+ "\u0646\u0632",
453
+ "\u0633\u062A",
454
+ "\u0633\u0642",
455
+ "\u0633\u062F",
456
+ "\u0633\u0628",
457
+ "\u0633\u062C",
458
+ "\u0633\u0632",
459
+ "\u0634\u062A",
460
+ "\u0634\u0642",
461
+ "\u0634\u062F",
462
+ "\u0634\u0633",
463
+ "\u0631\u062A",
464
+ "\u0631\u062F",
465
+ "\u0631\u0642",
466
+ "\u0631\u0633",
467
+ "\u0631\u062C",
468
+ "\u0631\u0628",
469
+ "\u0631\u0632",
470
+ "\u0632\u062F",
471
+ "\u0632\u0642",
472
+ "\u06AD\u062F",
473
+ "\u06AD\u0642",
474
+ "\u06AD\u062A",
475
+ "\u0645\u062F",
476
+ "\u0645\u0628",
477
+ "\u0645\u0646",
478
+ "\u064A\u062A",
479
+ "\u064A\u0633",
480
+ "\u064A\u0642",
481
+ "\u064A\u062F",
482
+ "\u0649\u0644",
483
+ "\u0649\u0646",
484
+ "\u0649\u0631",
485
+ "\u0649\u0645"
486
+ ]);
487
+ function hasKey(map, key) {
488
+ return Object.prototype.hasOwnProperty.call(map, key);
489
+ }
490
+ var PrefixTrieNode = class {
491
+ constructor() {
492
+ this.children = /* @__PURE__ */ new Map();
493
+ this.isEnd = false;
494
+ }
495
+ };
496
+ var PrefixTrie = class {
497
+ constructor() {
498
+ this.root = new PrefixTrieNode();
499
+ }
500
+ insert(word) {
501
+ let node = this.root;
502
+ for (const char of word) {
503
+ let next = node.children.get(char);
504
+ if (!next) {
505
+ next = new PrefixTrieNode();
506
+ node.children.set(char, next);
507
+ }
508
+ node = next;
509
+ }
510
+ node.isEnd = true;
511
+ }
512
+ hasPrefixOf(word) {
513
+ let node = this.root;
514
+ for (const char of word) {
515
+ const next = node.children.get(char);
516
+ if (!next) {
517
+ return false;
518
+ }
519
+ node = next;
520
+ if (node.isEnd) {
521
+ return true;
522
+ }
523
+ }
524
+ return false;
525
+ }
526
+ };
527
+ var ArabicToCyrillicConverter = class {
528
+ constructor(options = {}) {
529
+ this.HAMZA = "\u0674";
530
+ this.loanwordPrefixTrie = new PrefixTrie();
531
+ this.reZwnjEtc = /[\u200B-\u200F\u202A-\u202E\uFEFF]/gu;
532
+ this.reSpaces = /[ \t]+/gu;
533
+ this.reHyphens = /\s*-\s*/gu;
534
+ this.reRedundantYye1 = /ىييە/gu;
535
+ this.reRedundantYye2 = /ييە/gu;
536
+ this.reRedundantYye3 = /يية/gu;
537
+ this.reUndantYa = /ىييا/gu;
538
+ this.reArabicWords = /[\u0600-\u06FF\uFB50-\uFDFF\uFE70-\uFEFF]+(?:[-\s]+[\u0600-\u06FF\uFB50-\uFDFF\uFE70-\uFEFF]+)*/gu;
539
+ this.reCapAfterPunct = /([.。::??!!])\s*([a-zа-яәіңғүұқөһ])/giu;
540
+ this.reCapAfterQuote = /([«"'"])\s*([a-zа-яәіңғүұқөһ])/giu;
541
+ this.frontVowelsCyr = /* @__PURE__ */ new Set(["\u04D9", "\u0435", "\u0456", "\u04E9", "\u04AF", "\u044D", "\u0438"]);
542
+ this.backVowelsCyr = /* @__PURE__ */ new Set(["\u0430", "\u043E", "\u04B1", "\u044B", "\u044F", "\u044E"]);
543
+ this.arabicVowels = /* @__PURE__ */ new Set(["\u0627", "\u0649", "\u0648", "\u06C7", "\u06D5", "\u06CB", "\u064A"]);
544
+ if (options.useLm && !options.disambiguator) {
545
+ throw new Error(
546
+ "Built-in LM disambiguation is not bundled with the npm package. Provide options.disambiguator in Node.js or use the pure rule-based converter."
547
+ );
548
+ }
549
+ this.disambiguator = options.disambiguator ?? new NoopDisambiguator();
550
+ for (const prefix of LOANWORD_PREFIXES) {
551
+ this.loanwordPrefixTrie.insert(prefix);
552
+ }
553
+ }
554
+ isLoanword(word) {
555
+ return LOANWORD_EXACT.has(word) || this.loanwordPrefixTrie.hasPrefixOf(word) || this.hasConsonantCluster(word);
556
+ }
557
+ hasConsonantCluster(word) {
558
+ let stem = word;
559
+ for (let length = word.length - 1; length > Math.max(2, word.length - 6); length -= 1) {
560
+ const candidateSuffix = word.slice(length);
561
+ if (candidateSuffix && this.isValidSuffixSequence(candidateSuffix)) {
562
+ stem = word.slice(0, length);
563
+ break;
564
+ }
565
+ }
566
+ let consonantCount = 0;
567
+ const chars = [...stem];
568
+ for (let index = 0; index < chars.length; index += 1) {
569
+ const char = chars[index];
570
+ if (ARAB_CONSONANTS_FOR_CLUSTER.includes(char)) {
571
+ if (consonantCount >= 1 && index >= 1) {
572
+ const pair = `${chars[index - 1]}${char}`;
573
+ if (NATIVE_CLUSTERS.has(pair)) {
574
+ consonantCount = 1;
575
+ continue;
576
+ }
577
+ }
578
+ consonantCount += 1;
579
+ if (consonantCount >= 3) {
580
+ return true;
581
+ }
582
+ } else {
583
+ consonantCount = 0;
584
+ }
585
+ }
586
+ return false;
587
+ }
588
+ isLoanwordWithEPrefix(word) {
589
+ return word.startsWith("\u06D5") && LOANWORD_E_PREFIXES.some((prefix) => word.startsWith(prefix));
590
+ }
591
+ getCyrillicVowelState(cyrillicWord) {
592
+ const cyr = cyrillicWord.toLowerCase();
593
+ if (cyr === "\u043A\u0456\u0442\u0430\u043F") {
594
+ return true;
595
+ }
596
+ for (let index = cyr.length - 1; index >= 0; index -= 1) {
597
+ const char = cyr[index];
598
+ if (this.frontVowelsCyr.has(char)) {
599
+ return true;
600
+ }
601
+ if (this.backVowelsCyr.has(char)) {
602
+ return false;
603
+ }
604
+ }
605
+ return false;
606
+ }
607
+ isValidSuffixSequence(suffix) {
608
+ if (!suffix) {
609
+ return true;
610
+ }
611
+ const dp = Array(suffix.length + 1).fill(false);
612
+ dp[0] = true;
613
+ for (let index = 1; index <= suffix.length; index += 1) {
614
+ for (let start = Math.max(0, index - 10); start < index; start += 1) {
615
+ if (dp[start] && VALID_SUFFIXES.has(suffix.slice(start, index))) {
616
+ dp[index] = true;
617
+ break;
618
+ }
619
+ }
620
+ }
621
+ return dp[suffix.length];
622
+ }
623
+ getHarmonyFromArabicRoot(word) {
624
+ for (const root of IMPLICIT_SOFT_ROOTS) {
625
+ if (word.startsWith(root) && ![...word].some((char) => char === "\u0642" || char === "\u0639")) {
626
+ return "soft";
627
+ }
628
+ }
629
+ let softSignals = 0;
630
+ let hardSignals = 0;
631
+ let hasHamza = false;
632
+ for (const char of word) {
633
+ if (char === this.HAMZA) {
634
+ hasHamza = true;
635
+ } else if (char === "\u0643" || char === "\u06AF") {
636
+ softSignals += 5;
637
+ } else if (char === "\u0642" || char === "\u0639") {
638
+ hardSignals += 5;
639
+ } else if (char === "\u06D5") {
640
+ softSignals += 3;
641
+ } else if ("\u0627\u0648\u06C7".includes(char)) {
642
+ hardSignals += 2;
643
+ }
644
+ }
645
+ if (hasHamza || softSignals > hardSignals) {
646
+ return "soft";
647
+ }
648
+ if (hardSignals > softSignals) {
649
+ return "hard";
650
+ }
651
+ return "hard";
652
+ }
653
+ segmentCompoundWord(word) {
654
+ if (word.includes("-")) {
655
+ return word.split("-");
656
+ }
657
+ if (word.startsWith(this.HAMZA)) {
658
+ return [word];
659
+ }
660
+ for (const pivot of COMPOUND_PIVOT_ROOTS) {
661
+ if (word.includes(pivot) && !word.startsWith(pivot)) {
662
+ const pivotIndex = word.indexOf(pivot);
663
+ if (pivotIndex > 0 && word[pivotIndex - 1] !== this.HAMZA) {
664
+ return [word.slice(0, pivotIndex), word.slice(pivotIndex)];
665
+ }
666
+ }
667
+ }
668
+ const suffixPatterns = [/(تاۋلىق(?:تار)?)$/u, /(زار)$/u, /(ستان)$/u];
669
+ for (const pattern of suffixPatterns) {
670
+ const match = word.match(pattern);
671
+ if (match && match.index !== void 0 && match.index > 0) {
672
+ return [word.slice(0, match.index), word.slice(match.index)];
673
+ }
674
+ }
675
+ return [word];
676
+ }
677
+ extractRootAndSuffix(word) {
678
+ if (!word) {
679
+ return { matchType: null, base: null, suffix: word };
680
+ }
681
+ for (let length = word.length; length > 1; length -= 1) {
682
+ const prefix = word.slice(0, length);
683
+ const suffix = word.slice(length);
684
+ if (!this.isValidSuffixSequence(suffix)) {
685
+ continue;
686
+ }
687
+ if (hasKey(EXCEPTIONS, prefix)) {
688
+ return { matchType: "exception", base: EXCEPTIONS[prefix], suffix };
689
+ }
690
+ if (hasKey(PROPER_NOUNS, prefix)) {
691
+ return { matchType: "proper", base: PROPER_NOUNS[prefix], suffix };
692
+ }
693
+ if (LOANWORD_EXACT.has(prefix)) {
694
+ return { matchType: "loanword", base: prefix, suffix };
695
+ }
696
+ }
697
+ for (let length = word.length - 1; length > 1; length -= 1) {
698
+ const prefix = word.slice(0, length);
699
+ const suffix = word.slice(length);
700
+ if (![...prefix].some((char) => this.arabicVowels.has(char))) {
701
+ continue;
702
+ }
703
+ if (this.isValidSuffixSequence(suffix)) {
704
+ return { matchType: "anonymous", base: prefix, suffix };
705
+ }
706
+ }
707
+ return { matchType: null, base: null, suffix: word };
708
+ }
709
+ convertSuffixOnly(suffix, isFront) {
710
+ if (!suffix) {
711
+ return "";
712
+ }
713
+ const result = [];
714
+ let index = 0;
715
+ while (index < suffix.length) {
716
+ const char = suffix[index];
717
+ const pair = suffix.slice(index, index + 2);
718
+ if (pair === "\u0649\u064A") {
719
+ if (suffix[index + 2] === "\u0627") {
720
+ result.push("\u0438\u044F");
721
+ index += 3;
722
+ } else {
723
+ result.push("\u0438");
724
+ index += 2;
725
+ }
726
+ continue;
727
+ }
728
+ if (char === "\u064A" && index + 1 < suffix.length) {
729
+ const nextChar = suffix[index + 1];
730
+ if (nextChar === "\u0627") {
731
+ result.push("\u044F");
732
+ index += 2;
733
+ continue;
734
+ }
735
+ if (nextChar === "\u06CB") {
736
+ result.push("\u044E");
737
+ index += 2;
738
+ continue;
739
+ }
740
+ }
741
+ if (pair === "\u0634\u0634") {
742
+ result.push("\u0449");
743
+ index += 2;
744
+ continue;
745
+ }
746
+ if (pair === "\u062A\u0633") {
747
+ result.push("\u0446");
748
+ index += 2;
749
+ continue;
750
+ }
751
+ if (char === this.HAMZA && index + 1 < suffix.length) {
752
+ const nextChar = suffix[index + 1];
753
+ if (nextChar === "\u0627") {
754
+ result.push("\u04D9");
755
+ index += 2;
756
+ continue;
757
+ }
758
+ if (nextChar === "\u0649") {
759
+ result.push("\u0456");
760
+ index += 2;
761
+ continue;
762
+ }
763
+ if (nextChar === "\u0648") {
764
+ result.push("\u04E9");
765
+ index += 2;
766
+ continue;
767
+ }
768
+ if (nextChar === "\u06C7") {
769
+ result.push("\u04AF");
770
+ index += 2;
771
+ continue;
772
+ }
773
+ index += 1;
774
+ continue;
775
+ }
776
+ if (hasKey(CONSONANTS, char)) {
777
+ result.push(CONSONANTS[char]);
778
+ index += 1;
779
+ continue;
780
+ }
781
+ if (Object.prototype.hasOwnProperty.call(VOWEL_MAP, char)) {
782
+ const vowel = VOWEL_MAP[char];
783
+ result.push(typeof vowel === "string" ? vowel : isFront ? vowel.f : vowel.b);
784
+ index += 1;
785
+ continue;
786
+ }
787
+ if (char === "\u064A") {
788
+ result.push(index === 0 ? "\u0439" : "\u0627\u0649\u0648\u06C7\u06D5\u06CB".includes(suffix[index - 1]) ? "\u0439" : "\u0438");
789
+ index += 1;
790
+ continue;
791
+ }
792
+ result.push(char);
793
+ index += 1;
794
+ }
795
+ return result.join("");
796
+ }
797
+ convertWord(word) {
798
+ if (!word) {
799
+ return word;
800
+ }
801
+ if (hasKey(EXCEPTIONS, word)) {
802
+ return EXCEPTIONS[word];
803
+ }
804
+ if (hasKey(PROPER_NOUNS, word)) {
805
+ return PROPER_NOUNS[word];
806
+ }
807
+ if (LOANWORD_EXACT.has(word)) {
808
+ return this.convertWordInternal(word);
809
+ }
810
+ const wholeWordIsFront = this.getHarmonyFromArabicRoot(word) === "soft";
811
+ const forcedState = wholeWordIsFront ? "soft" : "hard";
812
+ const { matchType, base, suffix } = this.extractRootAndSuffix(word);
813
+ if ((matchType === "exception" || matchType === "proper") && base) {
814
+ return `${base}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(base))}`;
815
+ }
816
+ if (matchType === "loanword" && base) {
817
+ const baseCyr = this.convertWordInternal(base);
818
+ return `${baseCyr}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(baseCyr))}`;
819
+ }
820
+ if (matchType === "anonymous" && base) {
821
+ if (this.isLoanword(base)) {
822
+ const baseCyr2 = this.convertWordInternal(base);
823
+ return `${baseCyr2}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(baseCyr2))}`;
824
+ }
825
+ const baseCyr = this.convertWordInternal(base, forcedState);
826
+ return `${baseCyr}${this.convertSuffixOnly(suffix, wholeWordIsFront)}`;
827
+ }
828
+ return this.convertWordInternal(word, forcedState);
829
+ }
830
+ convertWordInternal(word, forcedState) {
831
+ const segments = this.segmentCompoundWord(word);
832
+ if (segments.length > 1) {
833
+ const convertedSegments = segments.map((segment) => this.convertWord(segment));
834
+ return word.includes("-") ? convertedSegments.join("-") : convertedSegments.join("");
835
+ }
836
+ const isLoanwordE = this.isLoanwordWithEPrefix(word);
837
+ const isLoanword = this.isLoanword(word);
838
+ let currentState;
839
+ if (isLoanword) {
840
+ currentState = word.includes(this.HAMZA) ? "soft" : "hard";
841
+ } else if (forcedState) {
842
+ currentState = forcedState;
843
+ } else {
844
+ currentState = this.getHarmonyFromArabicRoot(word);
845
+ }
846
+ if (word === "\u062A\u0649\u064A\u0649\u0633") {
847
+ currentState = "soft";
848
+ }
849
+ const result = [];
850
+ let index = 0;
851
+ let isFirstChar = true;
852
+ while (index < word.length) {
853
+ const char = word[index];
854
+ if (isFirstChar && char === "\u06D5" && isLoanwordE) {
855
+ result.push("\u044D");
856
+ index += 1;
857
+ isFirstChar = false;
858
+ continue;
859
+ }
860
+ isFirstChar = false;
861
+ if (!isLoanword) {
862
+ if (char === "\u0642" || char === "\u0639") {
863
+ currentState = "hard";
864
+ } else if (char === "\u0643" || char === "\u06AF" || char === this.HAMZA) {
865
+ currentState = "soft";
866
+ }
867
+ } else if (char === this.HAMZA) {
868
+ currentState = "soft";
869
+ }
870
+ const twoChars = word.slice(index, index + 2);
871
+ const threeChars = word.slice(index, index + 3);
872
+ const fourChars = word.slice(index, index + 4);
873
+ if (twoChars === "\u0649\u064A" || twoChars === "\u064A\u064A") {
874
+ if (fourChars === "\u0649\u064A\u064A\u0627" || fourChars === "\u064A\u064A\u064A\u0627") {
875
+ result.push("\u0438\u044F");
876
+ index += 4;
877
+ continue;
878
+ }
879
+ if (threeChars === "\u0649\u064A\u0649\u0627" || threeChars === "\u064A\u064A\u0649\u0627" || word[index + 2] === "\u0627") {
880
+ result.push("\u0438\u044F");
881
+ index += 3;
882
+ continue;
883
+ }
884
+ result.push("\u0438");
885
+ index += 2;
886
+ continue;
887
+ }
888
+ if (char === "\u064A" && index + 1 < word.length) {
889
+ const nextChar = word[index + 1];
890
+ if (nextChar === "\u0627") {
891
+ if (isLoanword && index > 0) {
892
+ const prevChar = word[index - 1];
893
+ if (!`\u0627\u0649\u0648\u06C7\u06D5\u06CB${this.HAMZA}`.includes(prevChar)) {
894
+ result.push("\u0438\u044F");
895
+ index += 2;
896
+ continue;
897
+ }
898
+ }
899
+ result.push("\u044F");
900
+ index += 2;
901
+ continue;
902
+ }
903
+ if (nextChar === "\u06CB") {
904
+ result.push("\u044E");
905
+ index += 2;
906
+ continue;
907
+ }
908
+ if (nextChar === "\u0648" && index > 0 && "\u0627\u0649\u0648\u06C7\u06D5\u06CB".includes(word[index - 1])) {
909
+ result.push("\u0439\u043E");
910
+ index += 2;
911
+ continue;
912
+ }
913
+ }
914
+ if (twoChars === "\u0634\u0634") {
915
+ result.push("\u0449");
916
+ index += 2;
917
+ continue;
918
+ }
919
+ if (twoChars === "\u062A\u0633") {
920
+ if ((isLoanword || isLoanwordE) && index + 2 < word.length) {
921
+ result.push("\u0446");
922
+ index += 2;
923
+ continue;
924
+ }
925
+ }
926
+ if (isLoanword && word.slice(index, index + 6) === "\u067E\u0648\u062F\u06D5\u0632\u062F") {
927
+ result.push("\u043F\u043E\u0434\u044A\u0435\u0437\u0434");
928
+ index += 6;
929
+ continue;
930
+ }
931
+ if (char === this.HAMZA && index + 1 < word.length) {
932
+ const nextChar = word[index + 1];
933
+ if (nextChar === "\u0627") {
934
+ result.push("\u04D9");
935
+ index += 2;
936
+ continue;
937
+ }
938
+ if (nextChar === "\u0649") {
939
+ result.push("\u0456");
940
+ index += 2;
941
+ continue;
942
+ }
943
+ if (nextChar === "\u0648") {
944
+ result.push("\u04E9");
945
+ index += 2;
946
+ continue;
947
+ }
948
+ if (nextChar === "\u06C7") {
949
+ result.push("\u04AF");
950
+ index += 2;
951
+ continue;
952
+ }
953
+ index += 1;
954
+ continue;
955
+ }
956
+ if (hasKey(CONSONANTS, char)) {
957
+ result.push(CONSONANTS[char]);
958
+ index += 1;
959
+ continue;
960
+ }
961
+ if (Object.prototype.hasOwnProperty.call(VOWEL_MAP, char)) {
962
+ const vowel = VOWEL_MAP[char];
963
+ result.push(typeof vowel === "string" ? vowel : currentState === "soft" ? vowel.f : vowel.b);
964
+ index += 1;
965
+ continue;
966
+ }
967
+ if (char === "\u064A") {
968
+ if (index === 0) {
969
+ if (isLoanword) {
970
+ result.push("\u0438");
971
+ } else if (index + 1 < word.length && this.arabicVowels.has(word[index + 1])) {
972
+ result.push("\u0439");
973
+ } else {
974
+ result.push("\u0438");
975
+ }
976
+ } else {
977
+ const prevChar = word[index - 1];
978
+ result.push(["\u0627", "\u0649", "\u0648", "\u06C7", "\u06D5", "\u06CB"].includes(prevChar) ? "\u0439" : "\u0438");
979
+ }
980
+ index += 1;
981
+ continue;
982
+ }
983
+ if (isLoanword && char === "\u067E" && word.slice(index, index + 3) === "\u067E\u0648\u062F") {
984
+ result.push("\u043F\u043E\u0434");
985
+ index += 3;
986
+ if (index < word.length && word[index] === "\u06D5") {
987
+ result.push("\u044A\u0435");
988
+ index += 1;
989
+ }
990
+ continue;
991
+ }
992
+ result.push(char);
993
+ index += 1;
994
+ }
995
+ return result.join("");
996
+ }
997
+ preprocess(text) {
998
+ let next = text.replace(/ـ/gu, "-").replace(/\u0640/gu, "-");
999
+ next = next.replace(this.reZwnjEtc, "");
1000
+ next = next.replace(/ء/gu, this.HAMZA);
1001
+ next = next.replace(/أ/gu, `${this.HAMZA}\u0627`);
1002
+ next = next.replace(/ؤ/gu, `${this.HAMZA}\u0648`);
1003
+ next = next.replace(/ئ/gu, `${this.HAMZA}\u0649`);
1004
+ next = next.replace(/ٵ/gu, `${this.HAMZA}\u0627`);
1005
+ next = next.replace(/ٶ/gu, `${this.HAMZA}\u0648`);
1006
+ next = next.replace(/ٷ/gu, `${this.HAMZA}\u06C7`);
1007
+ next = next.replace(/ٸ/gu, `${this.HAMZA}\u0649`);
1008
+ next = next.replace(/\u06CC/gu, "\u0649");
1009
+ next = next.replace(/،/gu, ",").replace(/؛/gu, ";").replace(/؟/gu, "?").replace(/۔/gu, ".");
1010
+ next = next.replace(this.reSpaces, " ");
1011
+ next = next.replace(this.reHyphens, "-");
1012
+ next = next.replace(this.reRedundantYye1, "\u06D5");
1013
+ next = next.replace(this.reRedundantYye2, "\u06D5");
1014
+ next = next.replace(this.reRedundantYye3, "\u06D5");
1015
+ next = next.replace(this.reUndantYa, "\u064A\u0627");
1016
+ return next;
1017
+ }
1018
+ postProcessContextFix(rawTokens) {
1019
+ return rawTokens.map(([, cyr]) => cyr);
1020
+ }
1021
+ async postProcessContextFixAsync(rawTokens, contextSentence) {
1022
+ return this.disambiguator.disambiguate(rawTokens, contextSentence);
1023
+ }
1024
+ convertPhrase(phrase) {
1025
+ const words = phrase.split(" ");
1026
+ if (words.length <= 1) {
1027
+ return this.convertWord(phrase);
1028
+ }
1029
+ const rawTokens = words.map((word) => [word, hasKey(EXCEPTIONS, word) ? EXCEPTIONS[word] : this.convertWord(word)]);
1030
+ return this.postProcessContextFix(rawTokens).join(" ");
1031
+ }
1032
+ async convertPhraseAsync(phrase) {
1033
+ const words = phrase.split(" ");
1034
+ if (words.length <= 1) {
1035
+ return this.convertWord(phrase);
1036
+ }
1037
+ const rawTokens = words.map((word) => [word, hasKey(EXCEPTIONS, word) ? EXCEPTIONS[word] : this.convertWord(word)]);
1038
+ const fixed = await this.postProcessContextFixAsync(rawTokens, phrase);
1039
+ return fixed.join(" ");
1040
+ }
1041
+ convert(text) {
1042
+ const normalized = this.preprocess(text);
1043
+ const lines = normalized.split("\n");
1044
+ const convertedLines = [];
1045
+ for (const line of lines) {
1046
+ if (!line.trim()) {
1047
+ convertedLines.push("");
1048
+ continue;
1049
+ }
1050
+ let result = line.replace(this.reArabicWords, (phrase) => phrase.includes(" ") ? this.convertPhrase(phrase) : this.convertWord(phrase));
1051
+ if (result.length > 0) {
1052
+ result = result.replace(/[a-zа-яәіңғүұқөһ]/iu, (match) => match.toUpperCase());
1053
+ }
1054
+ result = result.replace(this.reCapAfterPunct, (_match, punctuation, char) => `${punctuation} ${char.toUpperCase()}`);
1055
+ result = result.replace(this.reCapAfterQuote, (_match, quote, char) => `${quote}${char.toUpperCase()}`);
1056
+ convertedLines.push(result);
1057
+ }
1058
+ return convertedLines.join("\n");
1059
+ }
1060
+ async convertAsync(text) {
1061
+ const normalized = this.preprocess(text);
1062
+ const lines = normalized.split("\n");
1063
+ const convertedLines = [];
1064
+ for (const line of lines) {
1065
+ if (!line.trim()) {
1066
+ convertedLines.push("");
1067
+ continue;
1068
+ }
1069
+ const matches = Array.from(line.matchAll(this.reArabicWords));
1070
+ let result = "";
1071
+ let lastIndex = 0;
1072
+ for (const match of matches) {
1073
+ const phrase = match[0];
1074
+ const matchIndex = match.index ?? 0;
1075
+ result += line.slice(lastIndex, matchIndex);
1076
+ result += phrase.includes(" ") ? await this.convertPhraseAsync(phrase) : this.convertWord(phrase);
1077
+ lastIndex = matchIndex + phrase.length;
1078
+ }
1079
+ result += line.slice(lastIndex);
1080
+ if (result.length > 0) {
1081
+ result = result.replace(/[a-zа-яәіңғүұқөһ]/iu, (char) => char.toUpperCase());
1082
+ }
1083
+ result = result.replace(this.reCapAfterPunct, (_match, punctuation, char) => `${punctuation} ${char.toUpperCase()}`);
1084
+ result = result.replace(this.reCapAfterQuote, (_match, quote, char) => `${quote}${char.toUpperCase()}`);
1085
+ convertedLines.push(result);
1086
+ }
1087
+ return convertedLines.join("\n");
1088
+ }
1089
+ };
1090
+ function arb2syr(text, options) {
1091
+ return new ArabicToCyrillicConverter(options).convert(text);
1092
+ }
1093
+ async function arb2syrAsync(text, options) {
1094
+ return new ArabicToCyrillicConverter(options).convertAsync(text);
1095
+ }
1096
+
1097
+ // src/lexicon.ts
1098
+ var DEFAULT_NATIVE_ROOTS = [
1099
+ "\u0431\u0430\u0441\u043F\u0430",
1100
+ "\u0441\u04E9\u0437",
1101
+ "\u04E9\u043D\u0435\u0440",
1102
+ "\u043A\u04D9\u0441\u0456\u043F",
1103
+ "\u0435\u043C",
1104
+ "\u0445\u0430\u043D\u0430",
1105
+ "\u0435\u04A3\u0431\u0435\u043A",
1106
+ "\u049B\u043E\u0440",
1107
+ "\u0441\u0443\u0440\u0435\u0442",
1108
+ "\u0448\u0456",
1109
+ "\u043E\u0442\u0430\u043D",
1110
+ "\u0430\u0434\u0430\u043C",
1111
+ "\u04E9\u0437\u0435\u043D",
1112
+ "\u04E9\u043C\u0456\u0440",
1113
+ "\u0436\u0430\u0443\u0430\u043F",
1114
+ "\u0436\u04B1\u043C\u044B\u0441"
1115
+ ];
1116
+ var DEFAULT_LOAN_ROOTS = [
1117
+ "\u043C\u0430\u0448\u0438\u043D\u0430",
1118
+ "\u0430\u0442\u043E\u043C",
1119
+ "\u0443\u043D\u0438\u0432\u0435\u0440\u0441\u0438\u0442\u0435\u0442",
1120
+ "\u043A\u0430\u0440\u0442\u0430",
1121
+ "\u0431\u044E\u0434\u0436\u0435\u0442",
1122
+ "\u0442\u0435\u043B\u0435\u0444\u043E\u043D",
1123
+ "\u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442",
1124
+ "\u0447\u0435\u043C\u043F\u0438\u043E\u043D",
1125
+ "\u0446\u0435\u043C\u0435\u043D\u0442",
1126
+ "\u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440",
1127
+ "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
1128
+ "\u0440\u0435\u0441\u0443\u0440\u0441",
1129
+ "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
1130
+ "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
1131
+ "\u0441\u0442\u0430\u043D\u0446\u0438\u044F",
1132
+ "\u0434\u0438\u0430\u0433\u043D\u043E\u0437",
1133
+ "\u0446\u0438\u0440\u043A",
1134
+ "\u0449\u0435\u0442\u043A\u0430",
1135
+ "\u043F\u043E\u0434\u044A\u0435\u0437\u0434"
1136
+ ];
1137
+
1138
+ // src/cyr2arb.ts
1139
+ var PROPER_NOUNS2 = {
1140
+ "\u0436\u0456\u04E9": "\u062C\u0649\u064A\u0649\u0648"
1141
+ };
1142
+ var COMMON_WORDS = {
1143
+ "\u0442\u0438\u0456\u0441": "\u062A\u0649\u064A\u0649\u0633",
1144
+ "\u0442\u0438\u0456\u0441\u0442\u0456": "\u062A\u0649\u064A\u0649\u0633\u062A\u0649",
1145
+ "\u0431\u0456\u0440\u0430\u049B": "\u0628\u0649\u0631\u0627\u0642",
1146
+ "\u049B\u043E\u044F\u043D": "\u0642\u0648\u0649\u064A\u0627\u0646",
1147
+ "\u04AF\u0448\u0456\u043D": "\u06C7\u0634\u0649\u0646"
1148
+ };
1149
+ var CONSONANTS2 = {
1150
+ "\u0431": "\u0628",
1151
+ "\u0432": "\u06C6",
1152
+ "\u0433": "\u06AF",
1153
+ "\u0493": "\u0639",
1154
+ "\u0434": "\u062F",
1155
+ "\u0436": "\u062C",
1156
+ "\u0437": "\u0632",
1157
+ "\u0439": "\u064A",
1158
+ "\u043A": "\u0643",
1159
+ "\u049B": "\u0642",
1160
+ "\u043B": "\u0644",
1161
+ "\u043C": "\u0645",
1162
+ "\u043D": "\u0646",
1163
+ "\u04A3": "\u06AD",
1164
+ "\u043F": "\u067E",
1165
+ "\u0440": "\u0631",
1166
+ "\u0441": "\u0633",
1167
+ "\u0442": "\u062A",
1168
+ "\u0444": "\u0641",
1169
+ "\u0445": "\u062D",
1170
+ "\u04BB": "\u06BE",
1171
+ "\u0447": "\u0686",
1172
+ "\u0448": "\u0634"
1173
+ };
1174
+ var VOWELS = {
1175
+ "\u0430": "\u0627",
1176
+ "\u04D9": "\u0627",
1177
+ "\u0435": "\u06D5",
1178
+ "\u043E": "\u0648",
1179
+ "\u04E9": "\u0648",
1180
+ "\u04B1": "\u06C7",
1181
+ "\u04AF": "\u06C7",
1182
+ "\u044B": "\u0649",
1183
+ "\u0456": "\u0649",
1184
+ "\u044D": "\u06D5"
1185
+ };
1186
+ var COMBINATIONS = {
1187
+ "\u0446": "\u062A\u0633",
1188
+ "\u0449": "\u0634\u0634",
1189
+ "\u0451": "\u064A\u0648"
1190
+ };
1191
+ var FRONT_VOWELS = /* @__PURE__ */ new Set(["\u04D9", "\u0435", "\u0456", "\u04E9", "\u04AF"]);
1192
+ var BACK_VOWELS = /* @__PURE__ */ new Set(["\u0430", "\u043E", "\u04B1", "\u044B", "\u0443"]);
1193
+ var I_INITIAL_NATIVE_WORDS = /* @__PURE__ */ new Set(["\u0438\u0456\u0441", "\u0438\u043D\u0435", "\u0438\u0442", "\u0438\u044E", "\u0438\u0456\u0440", "\u0438\u0456\u043B", "\u0438\u0440\u0456", "\u0438\u044B\u049B", "\u0438\u043D"]);
1194
+ var PUNCTUATION = {
1195
+ ",": "\u060C",
1196
+ ".": ".",
1197
+ ":": ":",
1198
+ ";": "\u061B",
1199
+ "?": "\u061F",
1200
+ "!": "!"
1201
+ };
1202
+ var TrieNode = class {
1203
+ constructor() {
1204
+ this.children = /* @__PURE__ */ new Map();
1205
+ this.isEndOfWord = false;
1206
+ this.isLoanword = false;
1207
+ this.harmony = null;
1208
+ }
1209
+ };
1210
+ var KazakhTrie = class {
1211
+ constructor() {
1212
+ this.root = new TrieNode();
1213
+ }
1214
+ determineHarmony(word) {
1215
+ const wordLower = word.toLowerCase();
1216
+ if ([...wordLower].some((char) => char === "\u043A" || char === "\u0433")) {
1217
+ return "front";
1218
+ }
1219
+ if ([...wordLower].some((char) => char === "\u049B" || char === "\u0493")) {
1220
+ return "back";
1221
+ }
1222
+ for (const char of wordLower) {
1223
+ if (FRONT_VOWELS.has(char)) {
1224
+ return "front";
1225
+ }
1226
+ if (BACK_VOWELS.has(char)) {
1227
+ return "back";
1228
+ }
1229
+ }
1230
+ return "back";
1231
+ }
1232
+ insert(word, isLoanword = false) {
1233
+ let node = this.root;
1234
+ const wordLower = word.toLowerCase();
1235
+ for (const char of wordLower) {
1236
+ let next = node.children.get(char);
1237
+ if (!next) {
1238
+ next = new TrieNode();
1239
+ node.children.set(char, next);
1240
+ }
1241
+ node = next;
1242
+ }
1243
+ node.isEndOfWord = true;
1244
+ node.isLoanword = isLoanword;
1245
+ node.harmony = this.determineHarmony(wordLower);
1246
+ }
1247
+ loadDictionary(nativeWords, loanWords) {
1248
+ for (const word of nativeWords) {
1249
+ this.insert(word, false);
1250
+ }
1251
+ for (const word of loanWords) {
1252
+ this.insert(word, true);
1253
+ }
1254
+ }
1255
+ };
1256
+ var CompoundSplitter = class {
1257
+ constructor(trie) {
1258
+ this.trie = trie;
1259
+ }
1260
+ splitWord(word) {
1261
+ const wordLower = word.toLowerCase();
1262
+ const parts = [];
1263
+ let index = 0;
1264
+ while (index < wordLower.length) {
1265
+ let node = this.trie.root;
1266
+ let matchLength = 0;
1267
+ let isLoan = false;
1268
+ for (let cursor = index; cursor < wordLower.length; cursor += 1) {
1269
+ const char = wordLower[cursor];
1270
+ const next = node.children.get(char);
1271
+ if (!next) {
1272
+ break;
1273
+ }
1274
+ node = next;
1275
+ if (node.isEndOfWord) {
1276
+ matchLength = cursor - index + 1;
1277
+ isLoan = node.isLoanword;
1278
+ }
1279
+ }
1280
+ if (matchLength > 0) {
1281
+ parts.push([wordLower.slice(index, index + matchLength), isLoan]);
1282
+ index += matchLength;
1283
+ continue;
1284
+ }
1285
+ const remaining = wordLower.slice(index);
1286
+ parts.push([remaining, this.fallbackIsLoanword(remaining)]);
1287
+ break;
1288
+ }
1289
+ return parts;
1290
+ }
1291
+ fallbackIsLoanword(word) {
1292
+ return [...word.toLowerCase()].some((char) => ["\u0444", "\u0432", "\u0446", "\u0447", "\u0449"].includes(char));
1293
+ }
1294
+ };
1295
+ function hasKey2(map, key) {
1296
+ return Object.prototype.hasOwnProperty.call(map, key);
1297
+ }
1298
+ var CyrillicToArabicConverter = class {
1299
+ constructor(options = {}) {
1300
+ this.HAMZA = "\u0674";
1301
+ this.trie = new KazakhTrie();
1302
+ this.splitter = new CompoundSplitter(this.trie);
1303
+ const nativeRoots = [...DEFAULT_NATIVE_ROOTS, ...options.lexicon?.nativeRoots ?? []];
1304
+ const loanRoots = [...DEFAULT_LOAN_ROOTS, ...options.lexicon?.loanRoots ?? []];
1305
+ this.trie.loadDictionary(nativeRoots, loanRoots);
1306
+ }
1307
+ getInitialHarmony(word) {
1308
+ const wordLower = word.toLowerCase();
1309
+ if ([...wordLower].some((char) => char === "\u049B" || char === "\u0493")) {
1310
+ return "back";
1311
+ }
1312
+ if ([...wordLower].some((char) => char === "\u043A" || char === "\u0433")) {
1313
+ return "front";
1314
+ }
1315
+ if (I_INITIAL_NATIVE_WORDS.has(wordLower)) {
1316
+ return "front";
1317
+ }
1318
+ for (const char of wordLower) {
1319
+ if (FRONT_VOWELS.has(char)) {
1320
+ return "front";
1321
+ }
1322
+ if (BACK_VOWELS.has(char)) {
1323
+ return "back";
1324
+ }
1325
+ }
1326
+ return "back";
1327
+ }
1328
+ applyHamzaRule(arabicResult, firstSegText, firstSegIsLoan, isSuffix = false) {
1329
+ if (arabicResult.includes(this.HAMZA) || !firstSegText || isSuffix) {
1330
+ return arabicResult;
1331
+ }
1332
+ const firstSegLower = firstSegText.toLowerCase();
1333
+ if (firstSegIsLoan || this.splitter.fallbackIsLoanword(firstSegLower)) {
1334
+ return arabicResult;
1335
+ }
1336
+ if ([...I_INITIAL_NATIVE_WORDS].some((word) => firstSegLower.startsWith(word))) {
1337
+ return arabicResult.startsWith(this.HAMZA) ? arabicResult : `${this.HAMZA}${arabicResult}`;
1338
+ }
1339
+ if ([...firstSegLower].some((char) => char === "\u043A" || char === "\u0433")) {
1340
+ return arabicResult;
1341
+ }
1342
+ const eHamzaWhitelist = /* @__PURE__ */ new Set(["\u04E9\u0437\u0435\u043D", "\u04E9\u0442\u0435", "\u04E9\u043D\u0435\u0440", "\u0438\u043D\u0435", "\u04D9\u043B\u0435\u043C"]);
1343
+ if (firstSegLower.includes("\u0435") && !eHamzaWhitelist.has(firstSegLower)) {
1344
+ return arabicResult;
1345
+ }
1346
+ if (this.getInitialHarmony(firstSegLower) === "front") {
1347
+ return arabicResult.startsWith(this.HAMZA) ? arabicResult : `${this.HAMZA}${arabicResult}`;
1348
+ }
1349
+ return arabicResult;
1350
+ }
1351
+ convertWord(word, isSuffix = false) {
1352
+ if (!word) {
1353
+ return word;
1354
+ }
1355
+ const wordLower = word.toLowerCase();
1356
+ if (hasKey2(PROPER_NOUNS2, wordLower)) {
1357
+ return PROPER_NOUNS2[wordLower];
1358
+ }
1359
+ if (hasKey2(COMMON_WORDS, wordLower)) {
1360
+ return COMMON_WORDS[wordLower];
1361
+ }
1362
+ const segments = this.splitter.splitWord(wordLower);
1363
+ const isLoanFlags = [];
1364
+ const isFrontFlags = [];
1365
+ const isHardLoanFlags = [];
1366
+ for (const [segText, isLoan] of segments) {
1367
+ let isHardLoan = false;
1368
+ let segFront = false;
1369
+ if (isLoan) {
1370
+ isHardLoan = [...segText].some((char) => "\u0430\u043E\u04B1\u044B".includes(char)) || ![...segText].some((char) => "\u04D9\u0435\u0456\u04E9\u04AF".includes(char));
1371
+ segFront = !isHardLoan;
1372
+ } else {
1373
+ segFront = this.getInitialHarmony(segText) === "front";
1374
+ }
1375
+ isLoanFlags.push(...Array(segText.length).fill(isLoan));
1376
+ isFrontFlags.push(...Array(segText.length).fill(segFront));
1377
+ isHardLoanFlags.push(...Array(segText.length).fill(isHardLoan));
1378
+ }
1379
+ const result = [];
1380
+ for (let index = 0; index < wordLower.length; index += 1) {
1381
+ const char = wordLower[index];
1382
+ const prevChar = index > 0 ? wordLower[index - 1] : "";
1383
+ const isLoanword = isLoanFlags[index];
1384
+ const isHardLoan = isHardLoanFlags[index];
1385
+ if (char === "\u044C" || char === "\u044A") {
1386
+ continue;
1387
+ }
1388
+ if (char === "\u0443") {
1389
+ result.push("\u06CB");
1390
+ continue;
1391
+ }
1392
+ if (char === "\u0438") {
1393
+ if (wordLower.includes("\u043C\u0430\u0448\u0438\u043D\u0430")) {
1394
+ result.push("\u064A");
1395
+ } else if (wordLower.includes("\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F") && index < wordLower.indexOf("\u0446")) {
1396
+ result.push("\u064A");
1397
+ } else if (isLoanword) {
1398
+ result.push(isHardLoan ? "\u0649\u064A" : "\u064A");
1399
+ } else if (index === 0) {
1400
+ result.push("\u064A");
1401
+ } else {
1402
+ result.push("\u0649\u064A");
1403
+ }
1404
+ continue;
1405
+ }
1406
+ if (char === "\u044F") {
1407
+ if (prevChar === "\u0438") {
1408
+ if (isLoanword) {
1409
+ const prevPrev = index >= 2 ? wordLower[index - 2] : "";
1410
+ result.push(prevPrev === "\u0433" || prevPrev === "\u043A" ? "\u0627" : "\u064A\u0627");
1411
+ } else {
1412
+ result.push("\u0627");
1413
+ }
1414
+ continue;
1415
+ }
1416
+ result.push("\u064A\u0627");
1417
+ continue;
1418
+ }
1419
+ if (char === "\u044E") {
1420
+ if (prevChar === "\u044C" || prevChar === "\u044A") {
1421
+ result.push("\u064A\u06CB");
1422
+ } else {
1423
+ result.push(isLoanword || prevChar === "\u0438" ? "\u06CB" : "\u064A\u06CB");
1424
+ }
1425
+ continue;
1426
+ }
1427
+ if (hasKey2(COMBINATIONS, char)) {
1428
+ result.push(COMBINATIONS[char]);
1429
+ } else if (hasKey2(CONSONANTS2, char)) {
1430
+ result.push(CONSONANTS2[char]);
1431
+ } else if (hasKey2(VOWELS, char)) {
1432
+ result.push(VOWELS[char]);
1433
+ } else {
1434
+ result.push(char);
1435
+ }
1436
+ }
1437
+ const converted = result.join("");
1438
+ return this.applyHamzaRule(converted, segments[0]?.[0] ?? "", segments[0]?.[1] ?? false, isSuffix);
1439
+ }
1440
+ convertCompoundWord(word) {
1441
+ if (!word) {
1442
+ return word;
1443
+ }
1444
+ const wordLower = word.toLowerCase();
1445
+ if (hasKey2(PROPER_NOUNS2, wordLower)) {
1446
+ return PROPER_NOUNS2[wordLower];
1447
+ }
1448
+ if (hasKey2(COMMON_WORDS, wordLower)) {
1449
+ return COMMON_WORDS[wordLower];
1450
+ }
1451
+ if (!word.includes("-")) {
1452
+ return this.convertWord(wordLower);
1453
+ }
1454
+ const parts = wordLower.split("-");
1455
+ const converted = [this.convertWord(parts[0])];
1456
+ for (const part of parts.slice(1)) {
1457
+ converted.push(this.convertWord(part, true));
1458
+ }
1459
+ return converted.join("-");
1460
+ }
1461
+ convert(text) {
1462
+ let convertedText = text;
1463
+ for (const [cyr, arab] of Object.entries(PUNCTUATION)) {
1464
+ convertedText = convertedText.split(cyr).join(arab);
1465
+ }
1466
+ const pattern = /[а-яәіңғүұқөһёэъь]+(?:-[а-яәіңғүұқөһёэъь]+)*/giu;
1467
+ return convertedText.replace(pattern, (match) => this.convertCompoundWord(match));
1468
+ }
1469
+ };
1470
+ function syr2arb(text, options) {
1471
+ return new CyrillicToArabicConverter(options).convert(text);
1472
+ }
1473
+ export {
1474
+ ArabicToCyrillicConverter,
1475
+ CyrillicToArabicConverter,
1476
+ NoopDisambiguator,
1477
+ arb2syr,
1478
+ arb2syrAsync,
1479
+ syr2arb
1480
+ };