@sarmay/kaz-converter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1512 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ ArabicToCyrillicConverter: () => ArabicToCyrillicConverter,
24
+ CyrillicToArabicConverter: () => CyrillicToArabicConverter,
25
+ NoopDisambiguator: () => NoopDisambiguator,
26
+ arb2syr: () => arb2syr,
27
+ arb2syrAsync: () => arb2syrAsync,
28
+ syr2arb: () => syr2arb
29
+ });
30
+ module.exports = __toCommonJS(index_exports);
31
+
32
+ // src/disambiguation.ts
33
+ var NoopDisambiguator = class {
34
+ disambiguate(rawTokens) {
35
+ return rawTokens.map(([, converted]) => converted);
36
+ }
37
+ };
38
+
39
+ // src/arb2syr.ts
40
+ var CONSONANTS = {
41
+ "\u0628": "\u0431",
42
+ "\u06C6": "\u0432",
43
+ "\u06AF": "\u0433",
44
+ "\u0639": "\u0493",
45
+ "\u062F": "\u0434",
46
+ "\u062C": "\u0436",
47
+ "\u0632": "\u0437",
48
+ "\u0643": "\u043A",
49
+ "\u0642": "\u049B",
50
+ "\u0644": "\u043B",
51
+ "\u0645": "\u043C",
52
+ "\u0646": "\u043D",
53
+ "\u06AD": "\u04A3",
54
+ "\u067E": "\u043F",
55
+ "\u0631": "\u0440",
56
+ "\u0633": "\u0441",
57
+ "\u062A": "\u0442",
58
+ "\u0641": "\u0444",
59
+ "\u062D": "\u0445",
60
+ "\u06BE": "\u04BB",
61
+ "\u0686": "\u0447",
62
+ "\u0634": "\u0448"
63
+ };
64
+ var VOWEL_MAP = {
65
+ "\u0627": { b: "\u0430", f: "\u04D9" },
66
+ "\u0649": { b: "\u044B", f: "\u0456" },
67
+ "\u0648": { b: "\u043E", f: "\u04E9" },
68
+ "\u06C7": { b: "\u04B1", f: "\u04AF" },
69
+ "\u06D5": "\u0435",
70
+ "\u06CB": "\u0443"
71
+ };
72
+ var EXCEPTIONS = {
73
+ "\u0631\u06D5\u0633\u067E\u06CB\u0628\u0644\u064A\u0643\u0627": "\u0440\u0435\u0441\u043F\u0443\u0431\u043B\u0438\u043A\u0430",
74
+ "\u0643\u0648\u0645\u0645\u06CB\u0646\u064A\u0633\u062A\u0649\u0643": "\u043A\u043E\u043C\u043C\u0443\u043D\u0438\u0441\u0442\u0456\u043A",
75
+ "\u06D5\u0643\u0631\u0627\u0646": "\u044D\u043A\u0440\u0430\u043D",
76
+ "\u06D5\u0646\u06D5\u0631\u06AF\u06D5\u062A\u064A\u0643\u0627": "\u044D\u043D\u0435\u0440\u0433\u0435\u0442\u0438\u043A\u0430",
77
+ "\u06D5\u0646\u06D5\u0631\u06AF\u064A\u064A\u0627": "\u044D\u043D\u0435\u0440\u0433\u0438\u044F",
78
+ "\u0643\u0648\u0645\u064A\u062A\u06D5\u062A": "\u043A\u043E\u043C\u0438\u0442\u0435\u0442",
79
+ "\u0643\u0648\u0646\u062A\u0633\u06D5\u0631\u062A": "\u043A\u043E\u043D\u0446\u0435\u0440\u0442",
80
+ "\u0643\u0648\u0633\u0645\u0648\u0633": "\u043A\u043E\u0441\u043C\u043E\u0441",
81
+ "\u0643\u0648\u0644\u0644\u06D5\u0643\u062A\u0649\u064A\u06C6": "\u043A\u043E\u043B\u043B\u0435\u043A\u0442\u0438\u0432",
82
+ "\u0643\u0648\u0644\u0644\u06D5\u06AF\u0627": "\u043A\u043E\u043B\u043B\u0435\u0433\u0430",
83
+ "\u0643\u0648\u0631\u067E\u06CB\u0633": "\u043A\u043E\u0440\u043F\u0443\u0441",
84
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u064A\u064A\u0627": "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
85
+ "\u0643\u0648\u0645\u0645\u06CB\u0646\u064A\u0633\u062A": "\u043A\u043E\u043C\u043C\u0443\u043D\u0438\u0441\u0442",
86
+ "\u0631\u0627\u064A\u0648\u0646": "\u0440\u0430\u0439\u043E\u043D",
87
+ "\u0631\u0627\u062F\u064A\u0648": "\u0440\u0430\u0434\u0438\u043E",
88
+ "\u0643\u0627\u0631\u062A\u0627": "\u043A\u0430\u0440\u0442\u0430",
89
+ "\u0627\u0631\u062D\u0649\u064A\u062A\u06D5\u0643\u062A\u06CB\u0631\u0627": "\u0430\u0440\u0445\u0438\u0442\u0435\u043A\u0442\u0443\u0440\u0430",
90
+ "\u0627\u06C6\u062A\u0648\u0646\u0648\u0645\u064A\u0627": "\u0430\u0432\u0442\u043E\u043D\u043E\u043C\u0438\u044F",
91
+ "\u06C6\u0649\u064A\u062F\u064A\u0648": "\u0432\u0438\u0434\u0435\u043E",
92
+ "\u06C6\u0649\u064A\u062F\u06D5\u0648": "\u0432\u0438\u0434\u0435\u043E",
93
+ "\u0628\u0649\u064A\u0648\u0644\u0648\u06AF\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
94
+ "\u0643\u0648\u06D5\u0641\u0641\u064A\u062A\u0633\u0649\u064A\u06D5\u0646\u062A": "\u043A\u043E\u044D\u0444\u0444\u0438\u0446\u0438\u0435\u043D\u0442",
95
+ "\u0643\u0648\u06D5\u0641\u0641\u064A\u062A\u0633\u064A\u06D5\u0646\u062A": "\u043A\u043E\u044D\u0444\u0444\u0438\u0446\u0438\u0435\u043D\u0442",
96
+ "\u067E\u0631\u0648\u062A\u0633\u06D5\u0633": "\u043F\u0440\u043E\u0446\u0435\u0441\u0441",
97
+ "\u062A\u06D5\u0644\u06D5\u06C6\u0649\u064A\u0632\u0648\u0631": "\u0442\u0435\u043B\u0435\u0432\u0438\u0437\u043E\u0440",
98
+ "\u062A\u06D5\u0644\u06D5\u06C6\u064A\u0632\u0648\u0631": "\u0442\u0435\u043B\u0435\u0432\u0438\u0437\u043E\u0440",
99
+ "\u0674\u0627\u0631\u062F\u0627\u064A\u0649\u0645": "\u04D9\u0440\u0434\u0430\u0439\u044B\u043C",
100
+ "\u067E\u0627\u0631\u062A\u064A\u0627": "\u043F\u0430\u0440\u0442\u0438\u044F",
101
+ "\u0643\u0648\u0645\u067E\u064A\u06CB\u062A\u06D5\u0631": "\u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440",
102
+ "\u062A\u06D5\u0644\u06D5\u0641\u0648\u0646": "\u0442\u0435\u043B\u0435\u0444\u043E\u043D",
103
+ "\u064A\u0646\u062A\u06D5\u0631\u0646\u06D5\u062A": "\u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442",
104
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A\u064A\u064A\u0627": "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
105
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A\u064A\u0627": "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
106
+ "\u06D5\u0643\u0648\u0646\u0648\u0645\u064A\u0643\u0627": "\u044D\u043A\u043E\u043D\u043E\u043C\u0438\u043A\u0430",
107
+ "\u067E\u0648\u0644\u064A\u062A\u064A\u0643\u0627": "\u043F\u043E\u043B\u0438\u0442\u0438\u043A\u0430",
108
+ "\u0643\u0648\u0631\u0649\u062F\u0648\u0631": "\u043A\u043E\u0440\u0438\u0434\u043E\u0440",
109
+ "\u0643\u0648\u0646\u06AF\u0631\u06D5\u0633": "\u043A\u043E\u043D\u0433\u0440\u0435\u0441\u0441",
110
+ "\u06D5\u0644\u06D5\u0645\u06D5\u0646\u062A": "\u044D\u043B\u0435\u043C\u0435\u043D\u0442",
111
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u064A\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
112
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u0649\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
113
+ "\u062A\u06D5\u062D\u0646\u0648\u0644\u0648\u06AF\u0649\u064A\u0627": "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
114
+ "\u0628\u0649\u064A\u0648\u0644\u0648\u06AF\u0649\u064A\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
115
+ "\u0628\u064A\u0648\u0644\u0648\u06AF\u064A\u0627": "\u0431\u0438\u043E\u043B\u043E\u0433\u0438\u044F",
116
+ "\u0649\u064A\u0646\u0633\u062A\u0631\u06CB\u0645\u06D5\u0646\u062A": "\u0438\u043D\u0441\u0442\u0440\u0443\u043C\u0435\u043D\u0442",
117
+ "\u064A\u0646\u0633\u062A\u0631\u06CB\u0645\u06D5\u0646\u062A": "\u0438\u043D\u0441\u0442\u0440\u0443\u043C\u0435\u043D\u0442",
118
+ "\u067E\u0648\u062F\u06D5\u0632\u062F": "\u043F\u043E\u0434\u044A\u0435\u0437\u0434",
119
+ "\u0643\u0648\u0646\u0633\u062A\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u064A\u0627": "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
120
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u064A\u0627": "\u0441\u0442\u0430\u043D\u0446\u0438\u044F",
121
+ "\u0628\u0649\u0631": "\u0431\u0456\u0440",
122
+ "\u06C7\u0634": "\u04AF\u0448",
123
+ "\u062A\u0648\u0631\u062A": "\u0442\u04E9\u0440\u0442",
124
+ "\u0649\u0633": "\u0456\u0441",
125
+ "\u062F\u064A\u0633\u0633\u06D5\u0631\u062A\u0627\u062A\u0633\u0649\u064A\u064A\u0627": "\u0434\u0438\u0441\u0441\u0435\u0440\u0442\u0430\u0446\u0438\u044F",
126
+ "\u062F\u064A\u0633\u0633\u06D5\u0631\u062A\u0627\u062A\u0633\u064A\u0627": "\u0434\u0438\u0441\u0441\u0435\u0440\u0442\u0430\u0446\u0438\u044F",
127
+ "\u064A\u0646\u062A\u06D5\u06AF\u0631\u0627\u062A\u0633\u0649\u064A\u064A\u0627": "\u0438\u043D\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u044F",
128
+ "\u064A\u0646\u062A\u06D5\u06AF\u0631\u0627\u062A\u0633\u064A\u0627": "\u0438\u043D\u0442\u0435\u0433\u0440\u0430\u0446\u0438\u044F",
129
+ "\u062A\u0631\u0627\u0646\u0633\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u064A\u0627": "\u0442\u0440\u0430\u043D\u0441\u0444\u043E\u0440\u043C\u0430\u0446\u0438\u044F",
130
+ "\u0645\u0627\u062C\u0649\u0644\u0649\u0633": "\u043C\u04D9\u0436\u0456\u043B\u0456\u0441",
131
+ "\u0674\u062A\u0648\u0631\u0627\u0639\u0627": "\u0442\u04E9\u0440\u0430\u0493\u0430",
132
+ "\u0627\u0643\u0627\u062F\u06D5\u0645\u0649\u064A\u064A\u0627": "\u0430\u043A\u0430\u0434\u0435\u043C\u0438\u044F",
133
+ "\u0634\u0649\u0645\u0643\u06D5\u0646\u062A": "\u0428\u044B\u043C\u043A\u0435\u043D\u0442",
134
+ "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
135
+ "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
136
+ "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
137
+ "\u062C\u06C7\u06AD\u06AF\u0648": "\u0416\u04B1\u04A3\u0433\u043E",
138
+ "\u0634\u064A": "\u0421\u0438",
139
+ "\u062C\u064A\u0646\u067E\u064A\u06AD": "\u0426\u0437\u0438\u043D\u044C\u043F\u0438\u043D",
140
+ "\u0643\u0649\u062A\u0627\u067E": "\u043A\u0456\u0442\u0430\u043F",
141
+ "\u0631\u0627\u062D\u0645\u06D5\u062A": "\u0440\u0430\u0445\u043C\u0435\u0442",
142
+ "\u0627\u06CB\u0649\u0644": "\u0430\u0443\u044B\u043B",
143
+ "\u06AF\u0628": "\u0413\u0411",
144
+ "\u067E\u0631\u0648\u06AF\u0631\u06D5\u0633": "\u043F\u0440\u043E\u0433\u0440\u0435\u0441\u0441",
145
+ "\u0674\u0648\u0632\u0627\u0631\u0627": "\u04E9\u0437\u0430\u0440\u0430",
146
+ "\u062C\u0627\u06CB\u0627\u067E\u0643\u06D5\u0631\u0634\u0649\u0644\u0649\u0643": "\u0436\u0430\u0443\u0430\u043F\u043A\u0435\u0440\u0448\u0456\u043B\u0456\u043A",
147
+ "\u06D5\u0644\u06D5\u0643\u062A\u0631\u0644\u06D5\u0646\u062F\u0649\u0631\u06CB": "\u044D\u043B\u0435\u043A\u0442\u0440\u043B\u0435\u043D\u0434\u0456\u0440\u0443",
148
+ "\u0674\u0649\u0632\u0628\u0627\u0633\u0627\u0631": "\u0456\u0437\u0431\u0430\u0441\u0430\u0440",
149
+ "\u0674\u0627\u062F\u0649\u0633-\u062A\u0627\u0633\u0649\u0644": "\u04D9\u0434\u0456\u0441-\u0442\u04D9\u0441\u0456\u043B",
150
+ "\u0674\u062C\u0648\u0646-\u062C\u0648\u0633\u0649\u0642\u0633\u0649\u0632": "\u0436\u04E9\u043D-\u0436\u043E\u0441\u044B\u049B\u0441\u044B\u0437",
151
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u0649\u064A\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
152
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u064A\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
153
+ "\u0643\u0648\u0646\u0633\u06D5\u067E\u062A\u0633\u064A\u0627": "\u043A\u043E\u043D\u0446\u0435\u043F\u0446\u0438\u044F",
154
+ "\u0633\u0649\u064A\u0641\u0631\u0644\u0649\u0642": "\u0446\u0438\u0444\u0440\u043B\u044B\u049B",
155
+ "\u06D5\u06C6\u0648\u0644\u06CB\u062A\u0633\u0649\u064A\u064A\u0627": "\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u044F",
156
+ "\u06D5\u06C6\u0648\u0644\u064A\u06CB\u062A\u0633\u064A\u0627": "\u044D\u0432\u043E\u043B\u044E\u0446\u0438\u044F",
157
+ "\u0645\u06D5\u0674\u062A\u0649\u0644\u0643\u06D5\u0634\u06D5": "\u043C\u04D9\u0442\u0456\u043B\u043A\u0435\u0448\u0435",
158
+ "\u062F\u0649\u064A\u0627\u06AF\u0646\u0648\u0632": "\u0434\u0438\u0430\u0433\u043D\u043E\u0437",
159
+ "\u0628\u06CB\u062F\u062C\u06D5\u062A": "\u0431\u044E\u0434\u0436\u0435\u0442",
160
+ "\u0641\u0649\u064A\u0644\u0645": "\u0444\u0438\u043B\u044C\u043C",
161
+ "\u0627\u0633\u0641\u0627\u0644\u062A": "\u0430\u0441\u0444\u0430\u043B\u044C\u0442",
162
+ "\u06D5\u06CB\u0631\u0648\u067E\u0627": "\u0415\u0443\u0440\u043E\u043F\u0430",
163
+ "\u062A\u0648\u0643\u0649\u064A\u0648": "\u0422\u043E\u043A\u0438\u043E",
164
+ "\u0646\u0649\u064A\u06CB-\u064A\u0648\u0631\u0643": "\u041D\u044C\u044E-\u0419\u043E\u0440\u043A",
165
+ "\u0628\u0627\u0646\u0643": "\u0431\u0430\u043D\u043A",
166
+ "\u0642\u0649\u064A\u0627\u0631": "\u049B\u0438\u044F\u0440",
167
+ "\u0628\u0649\u0631\u0627\u0642": "\u0431\u0456\u0440\u0430\u049B",
168
+ "\u062A\u0648\u0645\u06D5\u0646": "\u0442\u04E9\u043C\u0435\u043D",
169
+ "\u0645\u06D5\u064A\u0631\u0627\u0645": "\u043C\u0435\u0439\u0440\u0430\u043C",
170
+ "\u0645\u06D5\u064A\u0631\u0627\u0645\u062F\u0627\u0631\u0649\u0646\u0649\u06AD": "\u043C\u0435\u0439\u0440\u0430\u043C\u0434\u0430\u0440\u044B\u043D\u044B\u04A3",
171
+ "\u062F\u0627\u0633\u062A\u06C7\u0631": "\u0434\u04D9\u0441\u0442\u04AF\u0440",
172
+ "\u062F\u0627\u0633\u062A\u06C7\u0631\u0644\u06D5\u0631": "\u0434\u04D9\u0441\u0442\u04AF\u0440\u043B\u0435\u0440",
173
+ "\u062F\u06D5\u0646\u0633\u0627\u06CB\u0644\u0649\u0642": "\u0434\u0435\u043D\u0441\u0430\u0443\u043B\u044B\u049B",
174
+ "\u062A\u0627\u0631\u064A\u062D": "\u0442\u0430\u0440\u0438\u0445",
175
+ "\u06AF\u0631\u0627\u0645\u0645\u0627\u062A\u0649\u0643\u0627": "\u0433\u0440\u0430\u043C\u043C\u0430\u0442\u0438\u043A\u0430",
176
+ "\u06AF\u0631\u0627\u0645\u0645\u0627\u062A\u0649\u0643\u0627\u0644\u0649\u0642": "\u0433\u0440\u0430\u043C\u043C\u0430\u0442\u0438\u043A\u0430\u043B\u044B\u049B",
177
+ "\u062C \u0643 \u067E": "\u0416\u041A\u041F",
178
+ "\u0627\u0633\u0643\u06D5\u0631\u064A": "\u04D9\u0441\u043A\u0435\u0440\u0438",
179
+ "\u0628\u0627\u062A\u0649\u0644": "\u0431\u0430\u0442\u044B\u043B",
180
+ "\u0644\u064A \u0686\u064A\u0627\u06AD": "\u041B\u0438 \u0427\u044F\u04A3",
181
+ "\u062C\u0627\u06CB \u0644\u0649\u062C\u064A": "\u0416\u0430\u0443 \u041B\u044B\u0436\u0438",
182
+ "\u06CB\u0627\u06AD \u062D\u06CB\u0646\u064A\u06AD": "\u0423\u0430\u04A3 \u0425\u0443\u043D\u0438\u04A3",
183
+ "\u0633\u0627\u064A \u0686\u064A": "\u0421\u0430\u0439 \u0427\u0438",
184
+ "\u062F\u064A\u06AD \u0634\u06CB\u06D5\u0634\u064A\u0627\u06AD": "\u0414\u0438\u04A3 \u0428\u0443\u0435\u0448\u044F\u04A3",
185
+ "\u0644\u064A \u0634\u064A": "\u041B\u0438 \u0421\u0438",
186
+ "\u0633\u06CB\u0628\u062A\u0649\u064A\u062A\u0631": "\u0441\u0443\u0431\u0442\u0438\u0442\u0440",
187
+ "\u0633\u06CB\u0628\u062A\u0649\u064A\u062A\u0649\u0631\u0644\u06D5\u0631\u062F\u0649": "\u0441\u0443\u0431\u0442\u0438\u0442\u0440\u043B\u0435\u0440\u0434\u0456"
188
+ };
189
+ var LOANWORD_EXACT = /* @__PURE__ */ new Set([
190
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633\u0649\u064A\u062A\u06D5\u062A",
191
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633\u0649\u062A\u06D5\u062A",
192
+ "\u06CB\u0646\u0649\u06C6\u06D5\u0631\u0633\u0649\u062A\u06D5\u062A",
193
+ "\u06CB\u0646\u0649\u06C6\u06D5\u0631\u0633\u0649\u064A\u062A\u06D5\u062A",
194
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u0649\u0627",
195
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u064A\u0627",
196
+ "\u0643\u0648\u0646\u0633\u062A\u0649\u064A\u062A\u06CB\u062A\u0633\u0649\u064A\u0627",
197
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u0649\u0627",
198
+ "\u0633\u062A\u0627\u0646\u062A\u0633\u0649\u064A\u0627",
199
+ "\u064A\u0646\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u0649\u064A\u064A\u0627",
200
+ "\u064A\u0646\u0641\u0648\u0631\u0645\u0627\u062A\u0633\u0649\u064A\u0627",
201
+ "\u0645\u0627\u0634\u064A\u0646\u0627",
202
+ "\u0645\u0627\u0634\u0649\u0646\u0627",
203
+ "\u0627\u062A\u0648\u0645",
204
+ "\u0686\u06D5\u0645\u067E\u0649\u064A\u0648\u0646",
205
+ "\u0686\u06D5\u0645\u067E\u0649\u064A\u06C7\u0646",
206
+ "\u0634\u0634\u06D5\u062A\u0643\u0627"
207
+ ]);
208
+ var LOANWORD_E_PREFIXES = [
209
+ "\u06D5\u0643\u0631\u0627\u0646",
210
+ "\u06D5\u0643\u0633\u067E",
211
+ "\u06D5\u0644\u06D5\u0643\u062A\u0631",
212
+ "\u06D5\u0646\u06D5\u0631\u06AF",
213
+ "\u06D5\u0643\u0648\u0644\u0648\u06AF",
214
+ "\u06D5\u062A\u0627\u067E",
215
+ "\u06D5\u0641\u064A\u0631",
216
+ "\u06D5\u0641\u0641\u06D5\u0643\u062A",
217
+ "\u06D5\u0643\u0648\u0646\u0648\u0645",
218
+ "\u06D5\u0644\u06D5\u0645\u06D5\u0646\u062A",
219
+ "\u06D5\u0633\u062A\u0631\u0627\u062F",
220
+ "\u06D5\u067E\u0648\u0633",
221
+ "\u06D5\u067E\u0649\u064A\u0632\u0648\u062F"
222
+ ];
223
+ var LOANWORD_PREFIXES = [
224
+ "\u0627\u0631\u062D\u0649",
225
+ "\u0627\u0631\u062D\u064A",
226
+ "\u067E\u0631\u0648",
227
+ "\u067E\u0631\u0648\u06AF",
228
+ "\u067E\u0631\u0648\u062A\u0633",
229
+ "\u0648\u0632\u0627\u0631\u0627",
230
+ "\u0648\u0632\u0627\u0631\u06D5",
231
+ "\u062A\u06D5\u062D\u0646\u0648",
232
+ "\u06D5\u0643\u0648\u0646\u0648\u0645",
233
+ "\u06D5\u0643\u0627\u0646\u0648\u0645",
234
+ "\u06AF\u06CB\u0645\u0627\u0646",
235
+ "\u06AF\u0648\u0645\u0627\u0646",
236
+ "\u0631\u0627\u064A",
237
+ "\u0631\u0627\u062F",
238
+ "\u06C6\u0649\u064A\u062F",
239
+ "\u06C6\u064A\u062F",
240
+ "\u0643\u0648\u06D5\u0641\u0641",
241
+ "\u0643\u0648\u0626\u0641\u0641",
242
+ "\u0643\u0648\u0646\u0633\u06D5\u067E",
243
+ "\u0643\u0648\u0646\u062A\u0633\u06D5\u067E",
244
+ "\u0633\u0649\u064A\u0641\u0631",
245
+ "\u062A\u0633\u0649\u064A\u0641\u0631",
246
+ "\u06AF\u0631\u0627\u0641",
247
+ "\u06D5\u0646\u06D5\u0631\u06AF",
248
+ "\u06D5\u06C6\u0648\u0644",
249
+ "\u0627\u0631\u062D\u0649\u064A\u062A",
250
+ "\u0627\u0631\u062D\u064A\u062A",
251
+ "\u064A\u0646\u062A\u06D5\u0631",
252
+ "\u0643\u0648\u0646\u0633\u062A",
253
+ "\u0633\u062A\u062A",
254
+ "\u067E\u0648\u062F",
255
+ "\u0643\u0627\u0646\u0633\u062A",
256
+ "\u062F\u06D5\u0645\u0648\u0643\u0631",
257
+ "\u062F\u06D5\u0645\u0648",
258
+ "\u067E\u0648\u0644\u064A\u062A",
259
+ "\u062A\u06D5\u0644\u06D5\u0641",
260
+ "\u067E\u0648\u0644\u0649\u064A\u062A\u0633",
261
+ "\u067E\u0648\u0644\u064A\u062A\u0633",
262
+ "\u062F\u06D5\u067E\u0627\u0631\u062A",
263
+ "\u062F\u0649\u067E\u0627\u0631\u062A",
264
+ "\u0648\u067E\u06D5\u0631\u0627\u062A",
265
+ "\u0627\u0648\u067E\u06D5\u0631\u0627\u062A",
266
+ "\u0641\u06D5\u062F\u06D5\u0631\u0627\u0644",
267
+ "\u0641\u0649\u062F\u06D5\u0631\u0627\u0644",
268
+ "\u0643\u0631\u0649\u064A\u0645\u0649\u064A\u0646",
269
+ "\u0643\u0631\u064A\u0645\u064A\u0646",
270
+ "\u067E\u0633\u0649\u064A\u062D\u0648\u0644",
271
+ "\u067E\u0633\u064A\u062D\u0648\u0644",
272
+ "\u06C6\u0627\u0634\u0649\u064A\u0646\u06AF",
273
+ "\u06C6\u0627\u0634\u064A\u0646\u06AF",
274
+ "\u0627\u0645\u06D5\u0631\u0649\u064A\u0643",
275
+ "\u0627\u0645\u06D5\u0631\u064A\u0643",
276
+ "\u0627\u06C6\u062A\u0648\u0645\u0627\u062A",
277
+ "\u06CB\u0646\u0649\u064A\u06C6\u06D5\u0631\u0633",
278
+ "\u06CB\u0646\u064A\u06C6\u06D5\u0631\u0633",
279
+ "\u067E\u0631\u06D5\u0632\u0649\u064A\u062F",
280
+ "\u067E\u0631\u06D5\u0632\u064A\u062F",
281
+ "\u0649\u064A\u0646\u06C6\u06D5\u0633\u062A",
282
+ "\u064A\u0646\u06C6\u06D5\u0633\u062A",
283
+ "\u0631\u06D5\u0641\u0648\u0631\u0645",
284
+ "\u062F\u06D5\u0641\u0649\u064A\u062A\u0633",
285
+ "\u062F\u06D5\u0641\u064A\u062A\u0633",
286
+ "\u0643\u0648\u0648\u067E\u06D5\u0631",
287
+ "\u0643\u0648\u067E\u06D5\u0631",
288
+ "\u0643\u0648\u0646\u0641\u06D5\u0631\u06D5\u0646",
289
+ "\u0649\u064A\u0646\u0641\u0644\u0649\u064A\u0627\u062A",
290
+ "\u064A\u0646\u0641\u0644\u0649\u064A\u0627\u062A",
291
+ "\u0649\u064A\u0646\u0641\u0644\u064A\u0627\u062A",
292
+ "\u064A\u0646\u0641\u0644\u064A\u0627\u062A",
293
+ "\u0643\u0644\u064A\u0645\u0627\u062A",
294
+ "\u064A\u0646\u062A\u06D5\u0644\u0644\u06D5\u0643\u062A",
295
+ "\u06AF\u06D5\u0648\u0633\u0627\u064A\u0627\u0633",
296
+ "\u06AF\u06D5\u0648\u0633\u0627\u064A\u0627",
297
+ "\u0645\u0648\u062F\u06D5\u0644",
298
+ "\u062F\u06D5\u0645\u0648\u0643\u0631\u0627\u062A",
299
+ "\u062A\u0633\u0648\u0646",
300
+ "\u062A\u0633\u0627\u0646",
301
+ "\u062A\u0633\u06D5\u0646",
302
+ "\u062A\u0633\u06D5",
303
+ "\u062A\u0633\u0649\u064A",
304
+ "\u062A\u0633\u064A",
305
+ "\u0634\u0634\u06D5",
306
+ "\u0634\u0634\u0649",
307
+ "\u0634\u0634\u0648",
308
+ "\u0645\u0627\u062A\u0631",
309
+ "\u0645\u0627\u0634",
310
+ "\u0627\u062A\u0648\u0645",
311
+ "\u0633\u062A\u0627\u062A",
312
+ "\u0633\u062A\u0627\u0646\u062A",
313
+ "\u067E\u0648\u062F\u06D5",
314
+ "\u06AF\u0631\u0627\u0645",
315
+ "\u0633\u06CB\u0628\u062A"
316
+ ];
317
+ var PROPER_NOUNS = {
318
+ "\u0642\u0627\u0632\u0627\u0642\u0633\u062A\u0627\u0646": "\u049A\u0430\u0437\u0430\u049B\u0441\u0442\u0430\u043D",
319
+ "\u0627\u0644\u0645\u0627\u062A\u0649": "\u0410\u043B\u043C\u0430\u0442\u044B",
320
+ "\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430",
321
+ "\u0627\u0631\u0627\u0633\u062A\u0627\u0646\u0627": "\u0410\u0441\u0442\u0430\u043D\u0430"
322
+ };
323
+ var VALID_SUFFIXES = /* @__PURE__ */ new Set([
324
+ "\u0644\u0627\u0631",
325
+ "\u0644\u06D5\u0631",
326
+ "\u062F\u0627\u0631",
327
+ "\u062F\u06D5\u0631",
328
+ "\u062A\u0627\u0631",
329
+ "\u062A\u06D5\u0631",
330
+ "\u0646\u0649\u06AD",
331
+ "\u062F\u0649\u06AD",
332
+ "\u062A\u0649\u06AD",
333
+ "\u0639\u0627",
334
+ "\u06AF\u06D5",
335
+ "\u0642\u0627",
336
+ "\u0643\u06D5",
337
+ "\u0646\u0627",
338
+ "\u0646\u06D5",
339
+ "\u0627",
340
+ "\u06D5",
341
+ "\u0646\u0649",
342
+ "\u062F\u0649",
343
+ "\u062A\u0649",
344
+ "\u0646",
345
+ "\u062F\u0627",
346
+ "\u062F\u06D5",
347
+ "\u062A\u0627",
348
+ "\u062A\u06D5",
349
+ "\u0646\u062F\u0627",
350
+ "\u0646\u062F\u06D5",
351
+ "\u062F\u0627\u0646",
352
+ "\u062F\u06D5\u0646",
353
+ "\u062A\u0627\u0646",
354
+ "\u062A\u06D5\u0646",
355
+ "\u0646\u0627\u0646",
356
+ "\u0646\u06D5\u0646",
357
+ "\u0645\u06D5\u0646",
358
+ "\u0628\u06D5\u0646",
359
+ "\u067E\u06D5\u0646",
360
+ "\u0645",
361
+ "\u0645\u0649\u0632",
362
+ "\u06AD",
363
+ "\u06AD\u0649\u0632",
364
+ "\u0633\u0649",
365
+ "\u0649",
366
+ "\u0649\u0645\u044B\u0437",
367
+ "\u0649\u06AD\u0649\u0632",
368
+ "\u06D5\u0645\u0649\u0632",
369
+ "\u06D5\u06AD\u0649\u0632",
370
+ "\u0649\u0645",
371
+ "\u0649\u06AD",
372
+ "\u06D5\u0645",
373
+ "\u06D5\u06AD",
374
+ "\u0644\u0649\u0642",
375
+ "\u0644\u0649\u0643",
376
+ "\u062F\u0649\u0642",
377
+ "\u062F\u0649\u0643",
378
+ "\u062A\u0649\u0642",
379
+ "\u062A\u0649\u0643",
380
+ "\u0633\u0649\u0632",
381
+ "\u0634\u0649",
382
+ "\u0634\u0649\u0644\u062F\u0649\u0642",
383
+ "\u0634\u0649\u0644\u0649\u0643",
384
+ "\u062F\u0627\u0639\u0649",
385
+ "\u062F\u06D5\u06AF\u0649",
386
+ "\u062A\u0627\u0639\u0649",
387
+ "\u062A\u06D5\u06AF\u0649",
388
+ "\u0646\u062F\u0627\u0639\u0649",
389
+ "\u0646\u062F\u06D5\u06AF\u0649",
390
+ "\u0639\u0627\u0646",
391
+ "\u06AF\u06D5\u0646",
392
+ "\u0642\u0627\u0646",
393
+ "\u0643\u06D5\u0646",
394
+ "\u0645\u0627",
395
+ "\u0645\u06D5",
396
+ "\u0628\u0627",
397
+ "\u0628\u06D5",
398
+ "\u067E\u0627",
399
+ "\u067E\u06D5",
400
+ "\u067E",
401
+ "\u0649\u067E",
402
+ "\u06D5\u067E",
403
+ "\u06CB",
404
+ "\u06CB\u0634\u0649",
405
+ "\u062A\u06CB",
406
+ "\u062F\u06CB",
407
+ "\u0627\u0633\u0649\u06AD",
408
+ "\u06D5\u0633\u0649\u06AD",
409
+ "\u0627\u062F\u0649",
410
+ "\u06D5\u062F\u0649",
411
+ "\u064A\u062F\u0649",
412
+ "\u0645\u0649\u0646",
413
+ "\u0628\u0649\u0646",
414
+ "\u067E\u0649\u0646",
415
+ "\u0627\u0631",
416
+ "\u06D5\u0631",
417
+ "\u0645\u0627\u0633",
418
+ "\u0645\u06D5\u0633",
419
+ "\u0644\u06D5\u0646\u062F\u0649\u0631\u06CB",
420
+ "\u0644\u0627\u0646\u062F\u0649\u0631\u06CB",
421
+ "\u0644\u06D5\u0646\u06CB",
422
+ "\u0644\u0627\u0646\u06CB",
423
+ "\u0644\u06D5\u0646",
424
+ "\u0644\u0627\u0646",
425
+ "\u062F\u0649\u0631\u06CB",
426
+ "\u062F\u0649\u0631"
427
+ ]);
428
+ var COMPOUND_PIVOT_ROOTS = [
429
+ "\u0633\u0648\u0632",
430
+ "\u062A\u0649\u0644",
431
+ "\u0628\u0649\u0644\u0649\u0645",
432
+ "\u062D\u0627\u0646\u0627",
433
+ "\u0643\u0648\u0632",
434
+ "\u0648\u0631\u0649\u0646",
435
+ "\u0643\u06D5\u0631",
436
+ "\u0642\u0648\u0631",
437
+ "\u067E\u0627\u0632",
438
+ "\u06AF\u06D5\u0631",
439
+ "\u0634\u0649\u0644\u0649\u0643",
440
+ "\u062A\u06C7\u0633\u062A\u0649\u0643",
441
+ "\u0633\u0627\u06CB",
442
+ "\u0633\u064A\u0627\u0642",
443
+ "\u0648\u064A\u0649\u0646",
444
+ "\u062A\u06C7\u0633\u062A\u0649\u06AF"
445
+ ];
446
+ var IMPLICIT_SOFT_ROOTS = /* @__PURE__ */ new Set([
447
+ "\u06C7\u0645\u0649\u062A",
448
+ "\u062A\u06C7\u0628",
449
+ "\u06C7\u0634\u0649\u0646",
450
+ "\u0645\u06C7\u0645\u0643\u0649\u0646",
451
+ "\u0628\u0649\u0631",
452
+ "\u0628\u0649\u0632",
453
+ "\u0633\u0649\u0632",
454
+ "\u0643\u0649\u0645",
455
+ "\u062A\u0649\u0644",
456
+ "\u0643\u06C7\u0646",
457
+ "\u062A\u06C7\u0646",
458
+ "\u062C\u06C7\u0631",
459
+ "\u0649\u0644\u06AF\u06D5\u0631\u0649",
460
+ "\u0628\u06C7\u0644"
461
+ ]);
462
+ var ARAB_CONSONANTS_FOR_CLUSTER = "\u0628\u06C6\u06AF\u0639\u062F\u062C\u0632\u0643\u0642\u0644\u0645\u0646\u06AD\u067E\u0631\u0633\u062A\u0641\u062D\u06BE\u0686\u0634";
463
+ var NATIVE_CLUSTERS = /* @__PURE__ */ new Set([
464
+ "\u0642\u062A",
465
+ "\u0642\u0633",
466
+ "\u0642\u0628",
467
+ "\u0642\u062C",
468
+ "\u0642\u062F",
469
+ "\u0642\u0632",
470
+ "\u0644\u062F",
471
+ "\u0644\u062A",
472
+ "\u0644\u0642",
473
+ "\u0644\u062D",
474
+ "\u0644\u0633",
475
+ "\u0644\u0628",
476
+ "\u0644\u062C",
477
+ "\u0646\u062F",
478
+ "\u0646\u062A",
479
+ "\u0646\u0642",
480
+ "\u0646\u0633",
481
+ "\u0646\u062C",
482
+ "\u0646\u0628",
483
+ "\u0646\u0632",
484
+ "\u0633\u062A",
485
+ "\u0633\u0642",
486
+ "\u0633\u062F",
487
+ "\u0633\u0628",
488
+ "\u0633\u062C",
489
+ "\u0633\u0632",
490
+ "\u0634\u062A",
491
+ "\u0634\u0642",
492
+ "\u0634\u062F",
493
+ "\u0634\u0633",
494
+ "\u0631\u062A",
495
+ "\u0631\u062F",
496
+ "\u0631\u0642",
497
+ "\u0631\u0633",
498
+ "\u0631\u062C",
499
+ "\u0631\u0628",
500
+ "\u0631\u0632",
501
+ "\u0632\u062F",
502
+ "\u0632\u0642",
503
+ "\u06AD\u062F",
504
+ "\u06AD\u0642",
505
+ "\u06AD\u062A",
506
+ "\u0645\u062F",
507
+ "\u0645\u0628",
508
+ "\u0645\u0646",
509
+ "\u064A\u062A",
510
+ "\u064A\u0633",
511
+ "\u064A\u0642",
512
+ "\u064A\u062F",
513
+ "\u0649\u0644",
514
+ "\u0649\u0646",
515
+ "\u0649\u0631",
516
+ "\u0649\u0645"
517
+ ]);
518
+ function hasKey(map, key) {
519
+ return Object.prototype.hasOwnProperty.call(map, key);
520
+ }
521
+ var PrefixTrieNode = class {
522
+ constructor() {
523
+ this.children = /* @__PURE__ */ new Map();
524
+ this.isEnd = false;
525
+ }
526
+ };
527
+ var PrefixTrie = class {
528
+ constructor() {
529
+ this.root = new PrefixTrieNode();
530
+ }
531
+ insert(word) {
532
+ let node = this.root;
533
+ for (const char of word) {
534
+ let next = node.children.get(char);
535
+ if (!next) {
536
+ next = new PrefixTrieNode();
537
+ node.children.set(char, next);
538
+ }
539
+ node = next;
540
+ }
541
+ node.isEnd = true;
542
+ }
543
+ hasPrefixOf(word) {
544
+ let node = this.root;
545
+ for (const char of word) {
546
+ const next = node.children.get(char);
547
+ if (!next) {
548
+ return false;
549
+ }
550
+ node = next;
551
+ if (node.isEnd) {
552
+ return true;
553
+ }
554
+ }
555
+ return false;
556
+ }
557
+ };
558
+ var ArabicToCyrillicConverter = class {
559
+ constructor(options = {}) {
560
+ this.HAMZA = "\u0674";
561
+ this.loanwordPrefixTrie = new PrefixTrie();
562
+ this.reZwnjEtc = /[\u200B-\u200F\u202A-\u202E\uFEFF]/gu;
563
+ this.reSpaces = /[ \t]+/gu;
564
+ this.reHyphens = /\s*-\s*/gu;
565
+ this.reRedundantYye1 = /ىييە/gu;
566
+ this.reRedundantYye2 = /ييە/gu;
567
+ this.reRedundantYye3 = /يية/gu;
568
+ this.reUndantYa = /ىييا/gu;
569
+ this.reArabicWords = /[\u0600-\u06FF\uFB50-\uFDFF\uFE70-\uFEFF]+(?:[-\s]+[\u0600-\u06FF\uFB50-\uFDFF\uFE70-\uFEFF]+)*/gu;
570
+ this.reCapAfterPunct = /([.。::??!!])\s*([a-zа-яәіңғүұқөһ])/giu;
571
+ this.reCapAfterQuote = /([«"'"])\s*([a-zа-яәіңғүұқөһ])/giu;
572
+ this.frontVowelsCyr = /* @__PURE__ */ new Set(["\u04D9", "\u0435", "\u0456", "\u04E9", "\u04AF", "\u044D", "\u0438"]);
573
+ this.backVowelsCyr = /* @__PURE__ */ new Set(["\u0430", "\u043E", "\u04B1", "\u044B", "\u044F", "\u044E"]);
574
+ this.arabicVowels = /* @__PURE__ */ new Set(["\u0627", "\u0649", "\u0648", "\u06C7", "\u06D5", "\u06CB", "\u064A"]);
575
+ if (options.useLm && !options.disambiguator) {
576
+ throw new Error(
577
+ "Built-in LM disambiguation is not bundled with the npm package. Provide options.disambiguator in Node.js or use the pure rule-based converter."
578
+ );
579
+ }
580
+ this.disambiguator = options.disambiguator ?? new NoopDisambiguator();
581
+ for (const prefix of LOANWORD_PREFIXES) {
582
+ this.loanwordPrefixTrie.insert(prefix);
583
+ }
584
+ }
585
+ isLoanword(word) {
586
+ return LOANWORD_EXACT.has(word) || this.loanwordPrefixTrie.hasPrefixOf(word) || this.hasConsonantCluster(word);
587
+ }
588
+ hasConsonantCluster(word) {
589
+ let stem = word;
590
+ for (let length = word.length - 1; length > Math.max(2, word.length - 6); length -= 1) {
591
+ const candidateSuffix = word.slice(length);
592
+ if (candidateSuffix && this.isValidSuffixSequence(candidateSuffix)) {
593
+ stem = word.slice(0, length);
594
+ break;
595
+ }
596
+ }
597
+ let consonantCount = 0;
598
+ const chars = [...stem];
599
+ for (let index = 0; index < chars.length; index += 1) {
600
+ const char = chars[index];
601
+ if (ARAB_CONSONANTS_FOR_CLUSTER.includes(char)) {
602
+ if (consonantCount >= 1 && index >= 1) {
603
+ const pair = `${chars[index - 1]}${char}`;
604
+ if (NATIVE_CLUSTERS.has(pair)) {
605
+ consonantCount = 1;
606
+ continue;
607
+ }
608
+ }
609
+ consonantCount += 1;
610
+ if (consonantCount >= 3) {
611
+ return true;
612
+ }
613
+ } else {
614
+ consonantCount = 0;
615
+ }
616
+ }
617
+ return false;
618
+ }
619
+ isLoanwordWithEPrefix(word) {
620
+ return word.startsWith("\u06D5") && LOANWORD_E_PREFIXES.some((prefix) => word.startsWith(prefix));
621
+ }
622
+ getCyrillicVowelState(cyrillicWord) {
623
+ const cyr = cyrillicWord.toLowerCase();
624
+ if (cyr === "\u043A\u0456\u0442\u0430\u043F") {
625
+ return true;
626
+ }
627
+ for (let index = cyr.length - 1; index >= 0; index -= 1) {
628
+ const char = cyr[index];
629
+ if (this.frontVowelsCyr.has(char)) {
630
+ return true;
631
+ }
632
+ if (this.backVowelsCyr.has(char)) {
633
+ return false;
634
+ }
635
+ }
636
+ return false;
637
+ }
638
+ isValidSuffixSequence(suffix) {
639
+ if (!suffix) {
640
+ return true;
641
+ }
642
+ const dp = Array(suffix.length + 1).fill(false);
643
+ dp[0] = true;
644
+ for (let index = 1; index <= suffix.length; index += 1) {
645
+ for (let start = Math.max(0, index - 10); start < index; start += 1) {
646
+ if (dp[start] && VALID_SUFFIXES.has(suffix.slice(start, index))) {
647
+ dp[index] = true;
648
+ break;
649
+ }
650
+ }
651
+ }
652
+ return dp[suffix.length];
653
+ }
654
+ getHarmonyFromArabicRoot(word) {
655
+ for (const root of IMPLICIT_SOFT_ROOTS) {
656
+ if (word.startsWith(root) && ![...word].some((char) => char === "\u0642" || char === "\u0639")) {
657
+ return "soft";
658
+ }
659
+ }
660
+ let softSignals = 0;
661
+ let hardSignals = 0;
662
+ let hasHamza = false;
663
+ for (const char of word) {
664
+ if (char === this.HAMZA) {
665
+ hasHamza = true;
666
+ } else if (char === "\u0643" || char === "\u06AF") {
667
+ softSignals += 5;
668
+ } else if (char === "\u0642" || char === "\u0639") {
669
+ hardSignals += 5;
670
+ } else if (char === "\u06D5") {
671
+ softSignals += 3;
672
+ } else if ("\u0627\u0648\u06C7".includes(char)) {
673
+ hardSignals += 2;
674
+ }
675
+ }
676
+ if (hasHamza || softSignals > hardSignals) {
677
+ return "soft";
678
+ }
679
+ if (hardSignals > softSignals) {
680
+ return "hard";
681
+ }
682
+ return "hard";
683
+ }
684
+ segmentCompoundWord(word) {
685
+ if (word.includes("-")) {
686
+ return word.split("-");
687
+ }
688
+ if (word.startsWith(this.HAMZA)) {
689
+ return [word];
690
+ }
691
+ for (const pivot of COMPOUND_PIVOT_ROOTS) {
692
+ if (word.includes(pivot) && !word.startsWith(pivot)) {
693
+ const pivotIndex = word.indexOf(pivot);
694
+ if (pivotIndex > 0 && word[pivotIndex - 1] !== this.HAMZA) {
695
+ return [word.slice(0, pivotIndex), word.slice(pivotIndex)];
696
+ }
697
+ }
698
+ }
699
+ const suffixPatterns = [/(تاۋلىق(?:تار)?)$/u, /(زار)$/u, /(ستان)$/u];
700
+ for (const pattern of suffixPatterns) {
701
+ const match = word.match(pattern);
702
+ if (match && match.index !== void 0 && match.index > 0) {
703
+ return [word.slice(0, match.index), word.slice(match.index)];
704
+ }
705
+ }
706
+ return [word];
707
+ }
708
+ extractRootAndSuffix(word) {
709
+ if (!word) {
710
+ return { matchType: null, base: null, suffix: word };
711
+ }
712
+ for (let length = word.length; length > 1; length -= 1) {
713
+ const prefix = word.slice(0, length);
714
+ const suffix = word.slice(length);
715
+ if (!this.isValidSuffixSequence(suffix)) {
716
+ continue;
717
+ }
718
+ if (hasKey(EXCEPTIONS, prefix)) {
719
+ return { matchType: "exception", base: EXCEPTIONS[prefix], suffix };
720
+ }
721
+ if (hasKey(PROPER_NOUNS, prefix)) {
722
+ return { matchType: "proper", base: PROPER_NOUNS[prefix], suffix };
723
+ }
724
+ if (LOANWORD_EXACT.has(prefix)) {
725
+ return { matchType: "loanword", base: prefix, suffix };
726
+ }
727
+ }
728
+ for (let length = word.length - 1; length > 1; length -= 1) {
729
+ const prefix = word.slice(0, length);
730
+ const suffix = word.slice(length);
731
+ if (![...prefix].some((char) => this.arabicVowels.has(char))) {
732
+ continue;
733
+ }
734
+ if (this.isValidSuffixSequence(suffix)) {
735
+ return { matchType: "anonymous", base: prefix, suffix };
736
+ }
737
+ }
738
+ return { matchType: null, base: null, suffix: word };
739
+ }
740
+ convertSuffixOnly(suffix, isFront) {
741
+ if (!suffix) {
742
+ return "";
743
+ }
744
+ const result = [];
745
+ let index = 0;
746
+ while (index < suffix.length) {
747
+ const char = suffix[index];
748
+ const pair = suffix.slice(index, index + 2);
749
+ if (pair === "\u0649\u064A") {
750
+ if (suffix[index + 2] === "\u0627") {
751
+ result.push("\u0438\u044F");
752
+ index += 3;
753
+ } else {
754
+ result.push("\u0438");
755
+ index += 2;
756
+ }
757
+ continue;
758
+ }
759
+ if (char === "\u064A" && index + 1 < suffix.length) {
760
+ const nextChar = suffix[index + 1];
761
+ if (nextChar === "\u0627") {
762
+ result.push("\u044F");
763
+ index += 2;
764
+ continue;
765
+ }
766
+ if (nextChar === "\u06CB") {
767
+ result.push("\u044E");
768
+ index += 2;
769
+ continue;
770
+ }
771
+ }
772
+ if (pair === "\u0634\u0634") {
773
+ result.push("\u0449");
774
+ index += 2;
775
+ continue;
776
+ }
777
+ if (pair === "\u062A\u0633") {
778
+ result.push("\u0446");
779
+ index += 2;
780
+ continue;
781
+ }
782
+ if (char === this.HAMZA && index + 1 < suffix.length) {
783
+ const nextChar = suffix[index + 1];
784
+ if (nextChar === "\u0627") {
785
+ result.push("\u04D9");
786
+ index += 2;
787
+ continue;
788
+ }
789
+ if (nextChar === "\u0649") {
790
+ result.push("\u0456");
791
+ index += 2;
792
+ continue;
793
+ }
794
+ if (nextChar === "\u0648") {
795
+ result.push("\u04E9");
796
+ index += 2;
797
+ continue;
798
+ }
799
+ if (nextChar === "\u06C7") {
800
+ result.push("\u04AF");
801
+ index += 2;
802
+ continue;
803
+ }
804
+ index += 1;
805
+ continue;
806
+ }
807
+ if (hasKey(CONSONANTS, char)) {
808
+ result.push(CONSONANTS[char]);
809
+ index += 1;
810
+ continue;
811
+ }
812
+ if (Object.prototype.hasOwnProperty.call(VOWEL_MAP, char)) {
813
+ const vowel = VOWEL_MAP[char];
814
+ result.push(typeof vowel === "string" ? vowel : isFront ? vowel.f : vowel.b);
815
+ index += 1;
816
+ continue;
817
+ }
818
+ if (char === "\u064A") {
819
+ result.push(index === 0 ? "\u0439" : "\u0627\u0649\u0648\u06C7\u06D5\u06CB".includes(suffix[index - 1]) ? "\u0439" : "\u0438");
820
+ index += 1;
821
+ continue;
822
+ }
823
+ result.push(char);
824
+ index += 1;
825
+ }
826
+ return result.join("");
827
+ }
828
+ convertWord(word) {
829
+ if (!word) {
830
+ return word;
831
+ }
832
+ if (hasKey(EXCEPTIONS, word)) {
833
+ return EXCEPTIONS[word];
834
+ }
835
+ if (hasKey(PROPER_NOUNS, word)) {
836
+ return PROPER_NOUNS[word];
837
+ }
838
+ if (LOANWORD_EXACT.has(word)) {
839
+ return this.convertWordInternal(word);
840
+ }
841
+ const wholeWordIsFront = this.getHarmonyFromArabicRoot(word) === "soft";
842
+ const forcedState = wholeWordIsFront ? "soft" : "hard";
843
+ const { matchType, base, suffix } = this.extractRootAndSuffix(word);
844
+ if ((matchType === "exception" || matchType === "proper") && base) {
845
+ return `${base}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(base))}`;
846
+ }
847
+ if (matchType === "loanword" && base) {
848
+ const baseCyr = this.convertWordInternal(base);
849
+ return `${baseCyr}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(baseCyr))}`;
850
+ }
851
+ if (matchType === "anonymous" && base) {
852
+ if (this.isLoanword(base)) {
853
+ const baseCyr2 = this.convertWordInternal(base);
854
+ return `${baseCyr2}${this.convertSuffixOnly(suffix, this.getCyrillicVowelState(baseCyr2))}`;
855
+ }
856
+ const baseCyr = this.convertWordInternal(base, forcedState);
857
+ return `${baseCyr}${this.convertSuffixOnly(suffix, wholeWordIsFront)}`;
858
+ }
859
+ return this.convertWordInternal(word, forcedState);
860
+ }
861
+ convertWordInternal(word, forcedState) {
862
+ const segments = this.segmentCompoundWord(word);
863
+ if (segments.length > 1) {
864
+ const convertedSegments = segments.map((segment) => this.convertWord(segment));
865
+ return word.includes("-") ? convertedSegments.join("-") : convertedSegments.join("");
866
+ }
867
+ const isLoanwordE = this.isLoanwordWithEPrefix(word);
868
+ const isLoanword = this.isLoanword(word);
869
+ let currentState;
870
+ if (isLoanword) {
871
+ currentState = word.includes(this.HAMZA) ? "soft" : "hard";
872
+ } else if (forcedState) {
873
+ currentState = forcedState;
874
+ } else {
875
+ currentState = this.getHarmonyFromArabicRoot(word);
876
+ }
877
+ if (word === "\u062A\u0649\u064A\u0649\u0633") {
878
+ currentState = "soft";
879
+ }
880
+ const result = [];
881
+ let index = 0;
882
+ let isFirstChar = true;
883
+ while (index < word.length) {
884
+ const char = word[index];
885
+ if (isFirstChar && char === "\u06D5" && isLoanwordE) {
886
+ result.push("\u044D");
887
+ index += 1;
888
+ isFirstChar = false;
889
+ continue;
890
+ }
891
+ isFirstChar = false;
892
+ if (!isLoanword) {
893
+ if (char === "\u0642" || char === "\u0639") {
894
+ currentState = "hard";
895
+ } else if (char === "\u0643" || char === "\u06AF" || char === this.HAMZA) {
896
+ currentState = "soft";
897
+ }
898
+ } else if (char === this.HAMZA) {
899
+ currentState = "soft";
900
+ }
901
+ const twoChars = word.slice(index, index + 2);
902
+ const threeChars = word.slice(index, index + 3);
903
+ const fourChars = word.slice(index, index + 4);
904
+ if (twoChars === "\u0649\u064A" || twoChars === "\u064A\u064A") {
905
+ if (fourChars === "\u0649\u064A\u064A\u0627" || fourChars === "\u064A\u064A\u064A\u0627") {
906
+ result.push("\u0438\u044F");
907
+ index += 4;
908
+ continue;
909
+ }
910
+ if (threeChars === "\u0649\u064A\u0649\u0627" || threeChars === "\u064A\u064A\u0649\u0627" || word[index + 2] === "\u0627") {
911
+ result.push("\u0438\u044F");
912
+ index += 3;
913
+ continue;
914
+ }
915
+ result.push("\u0438");
916
+ index += 2;
917
+ continue;
918
+ }
919
+ if (char === "\u064A" && index + 1 < word.length) {
920
+ const nextChar = word[index + 1];
921
+ if (nextChar === "\u0627") {
922
+ if (isLoanword && index > 0) {
923
+ const prevChar = word[index - 1];
924
+ if (!`\u0627\u0649\u0648\u06C7\u06D5\u06CB${this.HAMZA}`.includes(prevChar)) {
925
+ result.push("\u0438\u044F");
926
+ index += 2;
927
+ continue;
928
+ }
929
+ }
930
+ result.push("\u044F");
931
+ index += 2;
932
+ continue;
933
+ }
934
+ if (nextChar === "\u06CB") {
935
+ result.push("\u044E");
936
+ index += 2;
937
+ continue;
938
+ }
939
+ if (nextChar === "\u0648" && index > 0 && "\u0627\u0649\u0648\u06C7\u06D5\u06CB".includes(word[index - 1])) {
940
+ result.push("\u0439\u043E");
941
+ index += 2;
942
+ continue;
943
+ }
944
+ }
945
+ if (twoChars === "\u0634\u0634") {
946
+ result.push("\u0449");
947
+ index += 2;
948
+ continue;
949
+ }
950
+ if (twoChars === "\u062A\u0633") {
951
+ if ((isLoanword || isLoanwordE) && index + 2 < word.length) {
952
+ result.push("\u0446");
953
+ index += 2;
954
+ continue;
955
+ }
956
+ }
957
+ if (isLoanword && word.slice(index, index + 6) === "\u067E\u0648\u062F\u06D5\u0632\u062F") {
958
+ result.push("\u043F\u043E\u0434\u044A\u0435\u0437\u0434");
959
+ index += 6;
960
+ continue;
961
+ }
962
+ if (char === this.HAMZA && index + 1 < word.length) {
963
+ const nextChar = word[index + 1];
964
+ if (nextChar === "\u0627") {
965
+ result.push("\u04D9");
966
+ index += 2;
967
+ continue;
968
+ }
969
+ if (nextChar === "\u0649") {
970
+ result.push("\u0456");
971
+ index += 2;
972
+ continue;
973
+ }
974
+ if (nextChar === "\u0648") {
975
+ result.push("\u04E9");
976
+ index += 2;
977
+ continue;
978
+ }
979
+ if (nextChar === "\u06C7") {
980
+ result.push("\u04AF");
981
+ index += 2;
982
+ continue;
983
+ }
984
+ index += 1;
985
+ continue;
986
+ }
987
+ if (hasKey(CONSONANTS, char)) {
988
+ result.push(CONSONANTS[char]);
989
+ index += 1;
990
+ continue;
991
+ }
992
+ if (Object.prototype.hasOwnProperty.call(VOWEL_MAP, char)) {
993
+ const vowel = VOWEL_MAP[char];
994
+ result.push(typeof vowel === "string" ? vowel : currentState === "soft" ? vowel.f : vowel.b);
995
+ index += 1;
996
+ continue;
997
+ }
998
+ if (char === "\u064A") {
999
+ if (index === 0) {
1000
+ if (isLoanword) {
1001
+ result.push("\u0438");
1002
+ } else if (index + 1 < word.length && this.arabicVowels.has(word[index + 1])) {
1003
+ result.push("\u0439");
1004
+ } else {
1005
+ result.push("\u0438");
1006
+ }
1007
+ } else {
1008
+ const prevChar = word[index - 1];
1009
+ result.push(["\u0627", "\u0649", "\u0648", "\u06C7", "\u06D5", "\u06CB"].includes(prevChar) ? "\u0439" : "\u0438");
1010
+ }
1011
+ index += 1;
1012
+ continue;
1013
+ }
1014
+ if (isLoanword && char === "\u067E" && word.slice(index, index + 3) === "\u067E\u0648\u062F") {
1015
+ result.push("\u043F\u043E\u0434");
1016
+ index += 3;
1017
+ if (index < word.length && word[index] === "\u06D5") {
1018
+ result.push("\u044A\u0435");
1019
+ index += 1;
1020
+ }
1021
+ continue;
1022
+ }
1023
+ result.push(char);
1024
+ index += 1;
1025
+ }
1026
+ return result.join("");
1027
+ }
1028
+ preprocess(text) {
1029
+ let next = text.replace(/ـ/gu, "-").replace(/\u0640/gu, "-");
1030
+ next = next.replace(this.reZwnjEtc, "");
1031
+ next = next.replace(/ء/gu, this.HAMZA);
1032
+ next = next.replace(/أ/gu, `${this.HAMZA}\u0627`);
1033
+ next = next.replace(/ؤ/gu, `${this.HAMZA}\u0648`);
1034
+ next = next.replace(/ئ/gu, `${this.HAMZA}\u0649`);
1035
+ next = next.replace(/ٵ/gu, `${this.HAMZA}\u0627`);
1036
+ next = next.replace(/ٶ/gu, `${this.HAMZA}\u0648`);
1037
+ next = next.replace(/ٷ/gu, `${this.HAMZA}\u06C7`);
1038
+ next = next.replace(/ٸ/gu, `${this.HAMZA}\u0649`);
1039
+ next = next.replace(/\u06CC/gu, "\u0649");
1040
+ next = next.replace(/،/gu, ",").replace(/؛/gu, ";").replace(/؟/gu, "?").replace(/۔/gu, ".");
1041
+ next = next.replace(this.reSpaces, " ");
1042
+ next = next.replace(this.reHyphens, "-");
1043
+ next = next.replace(this.reRedundantYye1, "\u06D5");
1044
+ next = next.replace(this.reRedundantYye2, "\u06D5");
1045
+ next = next.replace(this.reRedundantYye3, "\u06D5");
1046
+ next = next.replace(this.reUndantYa, "\u064A\u0627");
1047
+ return next;
1048
+ }
1049
+ postProcessContextFix(rawTokens) {
1050
+ return rawTokens.map(([, cyr]) => cyr);
1051
+ }
1052
+ async postProcessContextFixAsync(rawTokens, contextSentence) {
1053
+ return this.disambiguator.disambiguate(rawTokens, contextSentence);
1054
+ }
1055
+ convertPhrase(phrase) {
1056
+ const words = phrase.split(" ");
1057
+ if (words.length <= 1) {
1058
+ return this.convertWord(phrase);
1059
+ }
1060
+ const rawTokens = words.map((word) => [word, hasKey(EXCEPTIONS, word) ? EXCEPTIONS[word] : this.convertWord(word)]);
1061
+ return this.postProcessContextFix(rawTokens).join(" ");
1062
+ }
1063
+ async convertPhraseAsync(phrase) {
1064
+ const words = phrase.split(" ");
1065
+ if (words.length <= 1) {
1066
+ return this.convertWord(phrase);
1067
+ }
1068
+ const rawTokens = words.map((word) => [word, hasKey(EXCEPTIONS, word) ? EXCEPTIONS[word] : this.convertWord(word)]);
1069
+ const fixed = await this.postProcessContextFixAsync(rawTokens, phrase);
1070
+ return fixed.join(" ");
1071
+ }
1072
+ convert(text) {
1073
+ const normalized = this.preprocess(text);
1074
+ const lines = normalized.split("\n");
1075
+ const convertedLines = [];
1076
+ for (const line of lines) {
1077
+ if (!line.trim()) {
1078
+ convertedLines.push("");
1079
+ continue;
1080
+ }
1081
+ let result = line.replace(this.reArabicWords, (phrase) => phrase.includes(" ") ? this.convertPhrase(phrase) : this.convertWord(phrase));
1082
+ if (result.length > 0) {
1083
+ result = result.replace(/[a-zа-яәіңғүұқөһ]/iu, (match) => match.toUpperCase());
1084
+ }
1085
+ result = result.replace(this.reCapAfterPunct, (_match, punctuation, char) => `${punctuation} ${char.toUpperCase()}`);
1086
+ result = result.replace(this.reCapAfterQuote, (_match, quote, char) => `${quote}${char.toUpperCase()}`);
1087
+ convertedLines.push(result);
1088
+ }
1089
+ return convertedLines.join("\n");
1090
+ }
1091
+ async convertAsync(text) {
1092
+ const normalized = this.preprocess(text);
1093
+ const lines = normalized.split("\n");
1094
+ const convertedLines = [];
1095
+ for (const line of lines) {
1096
+ if (!line.trim()) {
1097
+ convertedLines.push("");
1098
+ continue;
1099
+ }
1100
+ const matches = Array.from(line.matchAll(this.reArabicWords));
1101
+ let result = "";
1102
+ let lastIndex = 0;
1103
+ for (const match of matches) {
1104
+ const phrase = match[0];
1105
+ const matchIndex = match.index ?? 0;
1106
+ result += line.slice(lastIndex, matchIndex);
1107
+ result += phrase.includes(" ") ? await this.convertPhraseAsync(phrase) : this.convertWord(phrase);
1108
+ lastIndex = matchIndex + phrase.length;
1109
+ }
1110
+ result += line.slice(lastIndex);
1111
+ if (result.length > 0) {
1112
+ result = result.replace(/[a-zа-яәіңғүұқөһ]/iu, (char) => char.toUpperCase());
1113
+ }
1114
+ result = result.replace(this.reCapAfterPunct, (_match, punctuation, char) => `${punctuation} ${char.toUpperCase()}`);
1115
+ result = result.replace(this.reCapAfterQuote, (_match, quote, char) => `${quote}${char.toUpperCase()}`);
1116
+ convertedLines.push(result);
1117
+ }
1118
+ return convertedLines.join("\n");
1119
+ }
1120
+ };
1121
+ function arb2syr(text, options) {
1122
+ return new ArabicToCyrillicConverter(options).convert(text);
1123
+ }
1124
+ async function arb2syrAsync(text, options) {
1125
+ return new ArabicToCyrillicConverter(options).convertAsync(text);
1126
+ }
1127
+
1128
+ // src/lexicon.ts
1129
+ var DEFAULT_NATIVE_ROOTS = [
1130
+ "\u0431\u0430\u0441\u043F\u0430",
1131
+ "\u0441\u04E9\u0437",
1132
+ "\u04E9\u043D\u0435\u0440",
1133
+ "\u043A\u04D9\u0441\u0456\u043F",
1134
+ "\u0435\u043C",
1135
+ "\u0445\u0430\u043D\u0430",
1136
+ "\u0435\u04A3\u0431\u0435\u043A",
1137
+ "\u049B\u043E\u0440",
1138
+ "\u0441\u0443\u0440\u0435\u0442",
1139
+ "\u0448\u0456",
1140
+ "\u043E\u0442\u0430\u043D",
1141
+ "\u0430\u0434\u0430\u043C",
1142
+ "\u04E9\u0437\u0435\u043D",
1143
+ "\u04E9\u043C\u0456\u0440",
1144
+ "\u0436\u0430\u0443\u0430\u043F",
1145
+ "\u0436\u04B1\u043C\u044B\u0441"
1146
+ ];
1147
+ var DEFAULT_LOAN_ROOTS = [
1148
+ "\u043C\u0430\u0448\u0438\u043D\u0430",
1149
+ "\u0430\u0442\u043E\u043C",
1150
+ "\u0443\u043D\u0438\u0432\u0435\u0440\u0441\u0438\u0442\u0435\u0442",
1151
+ "\u043A\u0430\u0440\u0442\u0430",
1152
+ "\u0431\u044E\u0434\u0436\u0435\u0442",
1153
+ "\u0442\u0435\u043B\u0435\u0444\u043E\u043D",
1154
+ "\u0438\u043D\u0442\u0435\u0440\u043D\u0435\u0442",
1155
+ "\u0447\u0435\u043C\u043F\u0438\u043E\u043D",
1156
+ "\u0446\u0435\u043C\u0435\u043D\u0442",
1157
+ "\u043A\u043E\u043C\u043F\u044C\u044E\u0442\u0435\u0440",
1158
+ "\u0442\u0435\u0445\u043D\u043E\u043B\u043E\u0433\u0438\u044F",
1159
+ "\u0440\u0435\u0441\u0443\u0440\u0441",
1160
+ "\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F",
1161
+ "\u0434\u0435\u043C\u043E\u043A\u0440\u0430\u0442\u0438\u044F",
1162
+ "\u0441\u0442\u0430\u043D\u0446\u0438\u044F",
1163
+ "\u0434\u0438\u0430\u0433\u043D\u043E\u0437",
1164
+ "\u0446\u0438\u0440\u043A",
1165
+ "\u0449\u0435\u0442\u043A\u0430",
1166
+ "\u043F\u043E\u0434\u044A\u0435\u0437\u0434"
1167
+ ];
1168
+
1169
+ // src/cyr2arb.ts
1170
+ var PROPER_NOUNS2 = {
1171
+ "\u0436\u0456\u04E9": "\u062C\u0649\u064A\u0649\u0648"
1172
+ };
1173
+ var COMMON_WORDS = {
1174
+ "\u0442\u0438\u0456\u0441": "\u062A\u0649\u064A\u0649\u0633",
1175
+ "\u0442\u0438\u0456\u0441\u0442\u0456": "\u062A\u0649\u064A\u0649\u0633\u062A\u0649",
1176
+ "\u0431\u0456\u0440\u0430\u049B": "\u0628\u0649\u0631\u0627\u0642",
1177
+ "\u049B\u043E\u044F\u043D": "\u0642\u0648\u0649\u064A\u0627\u0646",
1178
+ "\u04AF\u0448\u0456\u043D": "\u06C7\u0634\u0649\u0646"
1179
+ };
1180
+ var CONSONANTS2 = {
1181
+ "\u0431": "\u0628",
1182
+ "\u0432": "\u06C6",
1183
+ "\u0433": "\u06AF",
1184
+ "\u0493": "\u0639",
1185
+ "\u0434": "\u062F",
1186
+ "\u0436": "\u062C",
1187
+ "\u0437": "\u0632",
1188
+ "\u0439": "\u064A",
1189
+ "\u043A": "\u0643",
1190
+ "\u049B": "\u0642",
1191
+ "\u043B": "\u0644",
1192
+ "\u043C": "\u0645",
1193
+ "\u043D": "\u0646",
1194
+ "\u04A3": "\u06AD",
1195
+ "\u043F": "\u067E",
1196
+ "\u0440": "\u0631",
1197
+ "\u0441": "\u0633",
1198
+ "\u0442": "\u062A",
1199
+ "\u0444": "\u0641",
1200
+ "\u0445": "\u062D",
1201
+ "\u04BB": "\u06BE",
1202
+ "\u0447": "\u0686",
1203
+ "\u0448": "\u0634"
1204
+ };
1205
+ var VOWELS = {
1206
+ "\u0430": "\u0627",
1207
+ "\u04D9": "\u0627",
1208
+ "\u0435": "\u06D5",
1209
+ "\u043E": "\u0648",
1210
+ "\u04E9": "\u0648",
1211
+ "\u04B1": "\u06C7",
1212
+ "\u04AF": "\u06C7",
1213
+ "\u044B": "\u0649",
1214
+ "\u0456": "\u0649",
1215
+ "\u044D": "\u06D5"
1216
+ };
1217
+ var COMBINATIONS = {
1218
+ "\u0446": "\u062A\u0633",
1219
+ "\u0449": "\u0634\u0634",
1220
+ "\u0451": "\u064A\u0648"
1221
+ };
1222
+ var FRONT_VOWELS = /* @__PURE__ */ new Set(["\u04D9", "\u0435", "\u0456", "\u04E9", "\u04AF"]);
1223
+ var BACK_VOWELS = /* @__PURE__ */ new Set(["\u0430", "\u043E", "\u04B1", "\u044B", "\u0443"]);
1224
+ var I_INITIAL_NATIVE_WORDS = /* @__PURE__ */ new Set(["\u0438\u0456\u0441", "\u0438\u043D\u0435", "\u0438\u0442", "\u0438\u044E", "\u0438\u0456\u0440", "\u0438\u0456\u043B", "\u0438\u0440\u0456", "\u0438\u044B\u049B", "\u0438\u043D"]);
1225
+ var PUNCTUATION = {
1226
+ ",": "\u060C",
1227
+ ".": ".",
1228
+ ":": ":",
1229
+ ";": "\u061B",
1230
+ "?": "\u061F",
1231
+ "!": "!"
1232
+ };
1233
+ var TrieNode = class {
1234
+ constructor() {
1235
+ this.children = /* @__PURE__ */ new Map();
1236
+ this.isEndOfWord = false;
1237
+ this.isLoanword = false;
1238
+ this.harmony = null;
1239
+ }
1240
+ };
1241
+ var KazakhTrie = class {
1242
+ constructor() {
1243
+ this.root = new TrieNode();
1244
+ }
1245
+ determineHarmony(word) {
1246
+ const wordLower = word.toLowerCase();
1247
+ if ([...wordLower].some((char) => char === "\u043A" || char === "\u0433")) {
1248
+ return "front";
1249
+ }
1250
+ if ([...wordLower].some((char) => char === "\u049B" || char === "\u0493")) {
1251
+ return "back";
1252
+ }
1253
+ for (const char of wordLower) {
1254
+ if (FRONT_VOWELS.has(char)) {
1255
+ return "front";
1256
+ }
1257
+ if (BACK_VOWELS.has(char)) {
1258
+ return "back";
1259
+ }
1260
+ }
1261
+ return "back";
1262
+ }
1263
+ insert(word, isLoanword = false) {
1264
+ let node = this.root;
1265
+ const wordLower = word.toLowerCase();
1266
+ for (const char of wordLower) {
1267
+ let next = node.children.get(char);
1268
+ if (!next) {
1269
+ next = new TrieNode();
1270
+ node.children.set(char, next);
1271
+ }
1272
+ node = next;
1273
+ }
1274
+ node.isEndOfWord = true;
1275
+ node.isLoanword = isLoanword;
1276
+ node.harmony = this.determineHarmony(wordLower);
1277
+ }
1278
+ loadDictionary(nativeWords, loanWords) {
1279
+ for (const word of nativeWords) {
1280
+ this.insert(word, false);
1281
+ }
1282
+ for (const word of loanWords) {
1283
+ this.insert(word, true);
1284
+ }
1285
+ }
1286
+ };
1287
+ var CompoundSplitter = class {
1288
+ constructor(trie) {
1289
+ this.trie = trie;
1290
+ }
1291
+ splitWord(word) {
1292
+ const wordLower = word.toLowerCase();
1293
+ const parts = [];
1294
+ let index = 0;
1295
+ while (index < wordLower.length) {
1296
+ let node = this.trie.root;
1297
+ let matchLength = 0;
1298
+ let isLoan = false;
1299
+ for (let cursor = index; cursor < wordLower.length; cursor += 1) {
1300
+ const char = wordLower[cursor];
1301
+ const next = node.children.get(char);
1302
+ if (!next) {
1303
+ break;
1304
+ }
1305
+ node = next;
1306
+ if (node.isEndOfWord) {
1307
+ matchLength = cursor - index + 1;
1308
+ isLoan = node.isLoanword;
1309
+ }
1310
+ }
1311
+ if (matchLength > 0) {
1312
+ parts.push([wordLower.slice(index, index + matchLength), isLoan]);
1313
+ index += matchLength;
1314
+ continue;
1315
+ }
1316
+ const remaining = wordLower.slice(index);
1317
+ parts.push([remaining, this.fallbackIsLoanword(remaining)]);
1318
+ break;
1319
+ }
1320
+ return parts;
1321
+ }
1322
+ fallbackIsLoanword(word) {
1323
+ return [...word.toLowerCase()].some((char) => ["\u0444", "\u0432", "\u0446", "\u0447", "\u0449"].includes(char));
1324
+ }
1325
+ };
1326
+ function hasKey2(map, key) {
1327
+ return Object.prototype.hasOwnProperty.call(map, key);
1328
+ }
1329
+ var CyrillicToArabicConverter = class {
1330
+ constructor(options = {}) {
1331
+ this.HAMZA = "\u0674";
1332
+ this.trie = new KazakhTrie();
1333
+ this.splitter = new CompoundSplitter(this.trie);
1334
+ const nativeRoots = [...DEFAULT_NATIVE_ROOTS, ...options.lexicon?.nativeRoots ?? []];
1335
+ const loanRoots = [...DEFAULT_LOAN_ROOTS, ...options.lexicon?.loanRoots ?? []];
1336
+ this.trie.loadDictionary(nativeRoots, loanRoots);
1337
+ }
1338
+ getInitialHarmony(word) {
1339
+ const wordLower = word.toLowerCase();
1340
+ if ([...wordLower].some((char) => char === "\u049B" || char === "\u0493")) {
1341
+ return "back";
1342
+ }
1343
+ if ([...wordLower].some((char) => char === "\u043A" || char === "\u0433")) {
1344
+ return "front";
1345
+ }
1346
+ if (I_INITIAL_NATIVE_WORDS.has(wordLower)) {
1347
+ return "front";
1348
+ }
1349
+ for (const char of wordLower) {
1350
+ if (FRONT_VOWELS.has(char)) {
1351
+ return "front";
1352
+ }
1353
+ if (BACK_VOWELS.has(char)) {
1354
+ return "back";
1355
+ }
1356
+ }
1357
+ return "back";
1358
+ }
1359
+ applyHamzaRule(arabicResult, firstSegText, firstSegIsLoan, isSuffix = false) {
1360
+ if (arabicResult.includes(this.HAMZA) || !firstSegText || isSuffix) {
1361
+ return arabicResult;
1362
+ }
1363
+ const firstSegLower = firstSegText.toLowerCase();
1364
+ if (firstSegIsLoan || this.splitter.fallbackIsLoanword(firstSegLower)) {
1365
+ return arabicResult;
1366
+ }
1367
+ if ([...I_INITIAL_NATIVE_WORDS].some((word) => firstSegLower.startsWith(word))) {
1368
+ return arabicResult.startsWith(this.HAMZA) ? arabicResult : `${this.HAMZA}${arabicResult}`;
1369
+ }
1370
+ if ([...firstSegLower].some((char) => char === "\u043A" || char === "\u0433")) {
1371
+ return arabicResult;
1372
+ }
1373
+ const eHamzaWhitelist = /* @__PURE__ */ new Set(["\u04E9\u0437\u0435\u043D", "\u04E9\u0442\u0435", "\u04E9\u043D\u0435\u0440", "\u0438\u043D\u0435", "\u04D9\u043B\u0435\u043C"]);
1374
+ if (firstSegLower.includes("\u0435") && !eHamzaWhitelist.has(firstSegLower)) {
1375
+ return arabicResult;
1376
+ }
1377
+ if (this.getInitialHarmony(firstSegLower) === "front") {
1378
+ return arabicResult.startsWith(this.HAMZA) ? arabicResult : `${this.HAMZA}${arabicResult}`;
1379
+ }
1380
+ return arabicResult;
1381
+ }
1382
+ convertWord(word, isSuffix = false) {
1383
+ if (!word) {
1384
+ return word;
1385
+ }
1386
+ const wordLower = word.toLowerCase();
1387
+ if (hasKey2(PROPER_NOUNS2, wordLower)) {
1388
+ return PROPER_NOUNS2[wordLower];
1389
+ }
1390
+ if (hasKey2(COMMON_WORDS, wordLower)) {
1391
+ return COMMON_WORDS[wordLower];
1392
+ }
1393
+ const segments = this.splitter.splitWord(wordLower);
1394
+ const isLoanFlags = [];
1395
+ const isFrontFlags = [];
1396
+ const isHardLoanFlags = [];
1397
+ for (const [segText, isLoan] of segments) {
1398
+ let isHardLoan = false;
1399
+ let segFront = false;
1400
+ if (isLoan) {
1401
+ isHardLoan = [...segText].some((char) => "\u0430\u043E\u04B1\u044B".includes(char)) || ![...segText].some((char) => "\u04D9\u0435\u0456\u04E9\u04AF".includes(char));
1402
+ segFront = !isHardLoan;
1403
+ } else {
1404
+ segFront = this.getInitialHarmony(segText) === "front";
1405
+ }
1406
+ isLoanFlags.push(...Array(segText.length).fill(isLoan));
1407
+ isFrontFlags.push(...Array(segText.length).fill(segFront));
1408
+ isHardLoanFlags.push(...Array(segText.length).fill(isHardLoan));
1409
+ }
1410
+ const result = [];
1411
+ for (let index = 0; index < wordLower.length; index += 1) {
1412
+ const char = wordLower[index];
1413
+ const prevChar = index > 0 ? wordLower[index - 1] : "";
1414
+ const isLoanword = isLoanFlags[index];
1415
+ const isHardLoan = isHardLoanFlags[index];
1416
+ if (char === "\u044C" || char === "\u044A") {
1417
+ continue;
1418
+ }
1419
+ if (char === "\u0443") {
1420
+ result.push("\u06CB");
1421
+ continue;
1422
+ }
1423
+ if (char === "\u0438") {
1424
+ if (wordLower.includes("\u043C\u0430\u0448\u0438\u043D\u0430")) {
1425
+ result.push("\u064A");
1426
+ } else if (wordLower.includes("\u043A\u043E\u043D\u0441\u0442\u0438\u0442\u0443\u0446\u0438\u044F") && index < wordLower.indexOf("\u0446")) {
1427
+ result.push("\u064A");
1428
+ } else if (isLoanword) {
1429
+ result.push(isHardLoan ? "\u0649\u064A" : "\u064A");
1430
+ } else if (index === 0) {
1431
+ result.push("\u064A");
1432
+ } else {
1433
+ result.push("\u0649\u064A");
1434
+ }
1435
+ continue;
1436
+ }
1437
+ if (char === "\u044F") {
1438
+ if (prevChar === "\u0438") {
1439
+ if (isLoanword) {
1440
+ const prevPrev = index >= 2 ? wordLower[index - 2] : "";
1441
+ result.push(prevPrev === "\u0433" || prevPrev === "\u043A" ? "\u0627" : "\u064A\u0627");
1442
+ } else {
1443
+ result.push("\u0627");
1444
+ }
1445
+ continue;
1446
+ }
1447
+ result.push("\u064A\u0627");
1448
+ continue;
1449
+ }
1450
+ if (char === "\u044E") {
1451
+ if (prevChar === "\u044C" || prevChar === "\u044A") {
1452
+ result.push("\u064A\u06CB");
1453
+ } else {
1454
+ result.push(isLoanword || prevChar === "\u0438" ? "\u06CB" : "\u064A\u06CB");
1455
+ }
1456
+ continue;
1457
+ }
1458
+ if (hasKey2(COMBINATIONS, char)) {
1459
+ result.push(COMBINATIONS[char]);
1460
+ } else if (hasKey2(CONSONANTS2, char)) {
1461
+ result.push(CONSONANTS2[char]);
1462
+ } else if (hasKey2(VOWELS, char)) {
1463
+ result.push(VOWELS[char]);
1464
+ } else {
1465
+ result.push(char);
1466
+ }
1467
+ }
1468
+ const converted = result.join("");
1469
+ return this.applyHamzaRule(converted, segments[0]?.[0] ?? "", segments[0]?.[1] ?? false, isSuffix);
1470
+ }
1471
+ convertCompoundWord(word) {
1472
+ if (!word) {
1473
+ return word;
1474
+ }
1475
+ const wordLower = word.toLowerCase();
1476
+ if (hasKey2(PROPER_NOUNS2, wordLower)) {
1477
+ return PROPER_NOUNS2[wordLower];
1478
+ }
1479
+ if (hasKey2(COMMON_WORDS, wordLower)) {
1480
+ return COMMON_WORDS[wordLower];
1481
+ }
1482
+ if (!word.includes("-")) {
1483
+ return this.convertWord(wordLower);
1484
+ }
1485
+ const parts = wordLower.split("-");
1486
+ const converted = [this.convertWord(parts[0])];
1487
+ for (const part of parts.slice(1)) {
1488
+ converted.push(this.convertWord(part, true));
1489
+ }
1490
+ return converted.join("-");
1491
+ }
1492
+ convert(text) {
1493
+ let convertedText = text;
1494
+ for (const [cyr, arab] of Object.entries(PUNCTUATION)) {
1495
+ convertedText = convertedText.split(cyr).join(arab);
1496
+ }
1497
+ const pattern = /[а-яәіңғүұқөһёэъь]+(?:-[а-яәіңғүұқөһёэъь]+)*/giu;
1498
+ return convertedText.replace(pattern, (match) => this.convertCompoundWord(match));
1499
+ }
1500
+ };
1501
+ function syr2arb(text, options) {
1502
+ return new CyrillicToArabicConverter(options).convert(text);
1503
+ }
1504
+ // Annotate the CommonJS export names for ESM import in node:
1505
+ 0 && (module.exports = {
1506
+ ArabicToCyrillicConverter,
1507
+ CyrillicToArabicConverter,
1508
+ NoopDisambiguator,
1509
+ arb2syr,
1510
+ arb2syrAsync,
1511
+ syr2arb
1512
+ });