taraskevizer 5.1.7 → 5.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +224 -93
- package/dist/index.d.ts +5 -4
- package/dist/index.js +224 -92
- package/package.json +4 -4
package/dist/index.cjs
CHANGED
|
@@ -25,11 +25,70 @@ __export(src_exports, {
|
|
|
25
25
|
Taraskevizer: () => Taraskevizer,
|
|
26
26
|
VARIATION: () => VARIATION,
|
|
27
27
|
__tarask__: () => __tarask__,
|
|
28
|
-
convertAlphabet: () => convertAlphabet,
|
|
29
28
|
gobj: () => gobj
|
|
30
29
|
});
|
|
31
30
|
module.exports = __toCommonJS(src_exports);
|
|
32
31
|
|
|
32
|
+
// src/dict/iwords.ts
|
|
33
|
+
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
34
|
+
var iwords = toOneLine(`біс
|
|
35
|
+
бсэн
|
|
36
|
+
в[аеоы]
|
|
37
|
+
верс
|
|
38
|
+
вал[гз]
|
|
39
|
+
гар
|
|
40
|
+
грышч
|
|
41
|
+
грэк
|
|
42
|
+
дал
|
|
43
|
+
дыш
|
|
44
|
+
жыц
|
|
45
|
+
канапіс
|
|
46
|
+
кань?н
|
|
47
|
+
ка[цўл]
|
|
48
|
+
каў[кц]
|
|
49
|
+
ксі
|
|
50
|
+
леус
|
|
51
|
+
л(іст| )
|
|
52
|
+
лістас
|
|
53
|
+
льк
|
|
54
|
+
мант
|
|
55
|
+
мась?ц
|
|
56
|
+
мбры[кч]
|
|
57
|
+
менна
|
|
58
|
+
мідж
|
|
59
|
+
мпар[тц]
|
|
60
|
+
мпульс[аеуы]
|
|
61
|
+
нахадз
|
|
62
|
+
нды([ійюя] |ев)
|
|
63
|
+
ндэкс(а(ў|мі?)? |[еуыі])
|
|
64
|
+
н[еі][ейяю]
|
|
65
|
+
нк([аіу])
|
|
66
|
+
нтэрым
|
|
67
|
+
нфікс
|
|
68
|
+
нфімум
|
|
69
|
+
ншась?ц
|
|
70
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
71
|
+
псілан
|
|
72
|
+
р([аыу]|а[мхйў]|амі|)
|
|
73
|
+
рад
|
|
74
|
+
рбіс
|
|
75
|
+
рмас
|
|
76
|
+
рха
|
|
77
|
+
рыс
|
|
78
|
+
скарк
|
|
79
|
+
скарак
|
|
80
|
+
скра
|
|
81
|
+
скравец
|
|
82
|
+
скрачк
|
|
83
|
+
ста
|
|
84
|
+
с[нт]ась?ц
|
|
85
|
+
сь?ці[нк]
|
|
86
|
+
та[р ]
|
|
87
|
+
тры
|
|
88
|
+
х(ны[хя]?|ную|на[яей])?
|
|
89
|
+
ць?він
|
|
90
|
+
шыяс`);
|
|
91
|
+
|
|
33
92
|
// src/dict/latin.ts
|
|
34
93
|
var latinLetters = [
|
|
35
94
|
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
@@ -79,14 +138,123 @@ var latinLetters = [
|
|
|
79
138
|
[/łi/, "li"]
|
|
80
139
|
];
|
|
81
140
|
var latinLettersUpperCase = [
|
|
82
|
-
[new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
|
|
83
|
-
[new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
|
|
84
|
-
[new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
|
|
85
|
-
[new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
|
|
86
|
-
[/(?<=[
|
|
87
|
-
[/(?<=[
|
|
88
|
-
[/(?<=[
|
|
89
|
-
[/(?<=[
|
|
141
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
142
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
143
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
144
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
145
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
146
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
147
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
148
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
149
|
+
[/Е/, "IE"],
|
|
150
|
+
[/Ё/, "IO"],
|
|
151
|
+
[/Ю/, "IU"],
|
|
152
|
+
[/Я/, "IA"],
|
|
153
|
+
[/Ц[Ьь]/, "Ć"],
|
|
154
|
+
[/З[Ьь]/, "Ź"],
|
|
155
|
+
[/С[Ьь]/, "Ś"],
|
|
156
|
+
[/Н[Ьь]/, "Ń"],
|
|
157
|
+
[/Л[Ьь]/, "L"],
|
|
158
|
+
[/А/, "A"],
|
|
159
|
+
[/Б/, "B"],
|
|
160
|
+
[/В/, "V"],
|
|
161
|
+
[/Г/, "H"],
|
|
162
|
+
[/Ґ/, "G"],
|
|
163
|
+
[/Д/, "D"],
|
|
164
|
+
[/Ж/, "Ž"],
|
|
165
|
+
[/З/, "Z"],
|
|
166
|
+
[/І/, "I"],
|
|
167
|
+
[/Й/, "J"],
|
|
168
|
+
[/К/, "K"],
|
|
169
|
+
[/Л/, "Ł"],
|
|
170
|
+
[/М/, "M"],
|
|
171
|
+
[/Н/, "N"],
|
|
172
|
+
[/О/, "O"],
|
|
173
|
+
[/П/, "P"],
|
|
174
|
+
[/Р/, "R"],
|
|
175
|
+
[/С/, "S"],
|
|
176
|
+
[/Т/, "T"],
|
|
177
|
+
[/У/, "U"],
|
|
178
|
+
[/Ў/, "Ŭ"],
|
|
179
|
+
[/Ф/, "F"],
|
|
180
|
+
[/ Х(?=[\p{Ll} ])/u, " Ch"],
|
|
181
|
+
[/Х/, "CH"],
|
|
182
|
+
[/Ц/, "C"],
|
|
183
|
+
[/Ч/, "Č"],
|
|
184
|
+
[/Ш/, "Š"],
|
|
185
|
+
[/Ы/, "Y"],
|
|
186
|
+
[/Э/, "E"],
|
|
187
|
+
[/[ŁL][Ii]([AEOUaeou])/, "L$1"],
|
|
188
|
+
[/Łi/, "Li"],
|
|
189
|
+
[/ŁI/, "LI"]
|
|
190
|
+
];
|
|
191
|
+
var rawLatinLettersJi = [
|
|
192
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
|
|
193
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
|
|
194
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
|
|
195
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
|
|
196
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
|
|
197
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
|
|
198
|
+
[`і(?=${iwords})`, "ji"],
|
|
199
|
+
[`І(?=${iwords})`, "Ji"],
|
|
200
|
+
[`І(?=${iwords.toUpperCase()})`, "JI"],
|
|
201
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
202
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
203
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
204
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
205
|
+
[/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
|
|
206
|
+
[/ʼ/, ""],
|
|
207
|
+
[/ць/, "ć"],
|
|
208
|
+
[/зь/, "ź"],
|
|
209
|
+
[/сь/, "ś"],
|
|
210
|
+
[/нь/, "ń"],
|
|
211
|
+
[/ль/, "l"],
|
|
212
|
+
[/а/, "a"],
|
|
213
|
+
[/б/, "b"],
|
|
214
|
+
[/в/, "v"],
|
|
215
|
+
[/г/, "h"],
|
|
216
|
+
[/ґ/, "g"],
|
|
217
|
+
[/д/, "d"],
|
|
218
|
+
[/е/, "ie"],
|
|
219
|
+
[/ё/, "io"],
|
|
220
|
+
[/ж/, "ž"],
|
|
221
|
+
[/з/, "z"],
|
|
222
|
+
[/і/, "i"],
|
|
223
|
+
[/й/, "j"],
|
|
224
|
+
[/к/, "k"],
|
|
225
|
+
[/л/, "ł"],
|
|
226
|
+
[/м/, "m"],
|
|
227
|
+
[/н/, "n"],
|
|
228
|
+
[/о/, "o"],
|
|
229
|
+
[/п/, "p"],
|
|
230
|
+
[/р/, "r"],
|
|
231
|
+
[/с/, "s"],
|
|
232
|
+
[/т/, "t"],
|
|
233
|
+
[/у/, "u"],
|
|
234
|
+
[/ў/, "ŭ"],
|
|
235
|
+
[/ф/, "f"],
|
|
236
|
+
[/х/, "ch"],
|
|
237
|
+
[/ц/, "c"],
|
|
238
|
+
[/ч/, "č"],
|
|
239
|
+
[/ш/, "š"],
|
|
240
|
+
[/ы/, "y"],
|
|
241
|
+
[/э/, "e"],
|
|
242
|
+
[/ю/, "iu"],
|
|
243
|
+
[/я/, "ia"],
|
|
244
|
+
[/[łl]i([eoua])/, "l$1"],
|
|
245
|
+
[/łi/, "li"]
|
|
246
|
+
];
|
|
247
|
+
var rawLatinLettersUpperCaseJi = [
|
|
248
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
249
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
250
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
251
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
252
|
+
[new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
|
|
253
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
254
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
255
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
256
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
257
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
|
|
90
258
|
[/Е/, "IE"],
|
|
91
259
|
[/Ё/, "IO"],
|
|
92
260
|
[/Ю/, "IU"],
|
|
@@ -202,64 +370,7 @@ var chemicalElements2 = [
|
|
|
202
370
|
];
|
|
203
371
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
204
372
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
205
|
-
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
206
373
|
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
207
|
-
var iwords = toOneLine(`
|
|
208
|
-
біс
|
|
209
|
-
бсэн
|
|
210
|
-
в[аеоы]
|
|
211
|
-
верс
|
|
212
|
-
вал[гз]
|
|
213
|
-
гар
|
|
214
|
-
грышч
|
|
215
|
-
грэк
|
|
216
|
-
дал
|
|
217
|
-
дыш
|
|
218
|
-
жыц
|
|
219
|
-
канапіс
|
|
220
|
-
кань?н
|
|
221
|
-
ка[цўл]
|
|
222
|
-
каў[кц]
|
|
223
|
-
ксі
|
|
224
|
-
леус
|
|
225
|
-
л(іст| )
|
|
226
|
-
лістас
|
|
227
|
-
льк
|
|
228
|
-
мант
|
|
229
|
-
мась?ц
|
|
230
|
-
мбры[кч]
|
|
231
|
-
менна
|
|
232
|
-
мідж
|
|
233
|
-
мпар[тц]
|
|
234
|
-
мпульс[аеуы]
|
|
235
|
-
нахадз
|
|
236
|
-
нды([ійюя] |ев)
|
|
237
|
-
ндэкс(а(ў|мі?)? |[еуыі])
|
|
238
|
-
н[еі][ейяю]
|
|
239
|
-
нк([аіу])
|
|
240
|
-
нтэрым
|
|
241
|
-
нфікс
|
|
242
|
-
нфімум
|
|
243
|
-
ншась?ц
|
|
244
|
-
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
245
|
-
псілан
|
|
246
|
-
рад
|
|
247
|
-
рбіс
|
|
248
|
-
рмас
|
|
249
|
-
рха
|
|
250
|
-
рыс
|
|
251
|
-
скарк
|
|
252
|
-
скарак
|
|
253
|
-
скра
|
|
254
|
-
скравец
|
|
255
|
-
скрачк
|
|
256
|
-
с[нт]ась?ц
|
|
257
|
-
сь?цік
|
|
258
|
-
сь?цін
|
|
259
|
-
тар
|
|
260
|
-
тры
|
|
261
|
-
ць?він
|
|
262
|
-
шыяс`);
|
|
263
374
|
var rawWordlist = [
|
|
264
375
|
/* А */
|
|
265
376
|
[/аахен/, "аахэн"],
|
|
@@ -1744,8 +1855,7 @@ var rawWordlist = [
|
|
|
1744
1855
|
[/ г[еэ]рцагс/, " гэрцагс"],
|
|
1745
1856
|
[/ (дву|тро|чатыро)хс/, " $1хс"],
|
|
1746
1857
|
[/[гґзжхш]с(?=(?:к|тв)\S)/, "с"],
|
|
1747
|
-
[/ адск/, " адск"],
|
|
1748
|
-
[/ падск/, " падск"],
|
|
1858
|
+
[/ ([нп]?)адск/, " $1адск"],
|
|
1749
1859
|
[/дс(?=к|тв)/, "дз"],
|
|
1750
1860
|
// [/([аеёіоуыэюя])вс/, '$1ўс'],
|
|
1751
1861
|
[/ (б|кнд|нот)р /, " $1р "],
|
|
@@ -1945,14 +2055,18 @@ var gobj = {
|
|
|
1945
2055
|
var wordlist = [];
|
|
1946
2056
|
var softeners = [];
|
|
1947
2057
|
var arabLetters = [];
|
|
2058
|
+
var latinLettersJi = [];
|
|
2059
|
+
var latinLettersUpperCaseJi = [];
|
|
1948
2060
|
var arr = [
|
|
1949
2061
|
[rawWordlist, wordlist],
|
|
1950
2062
|
[rawsofteners, softeners],
|
|
1951
|
-
[rawArabLetters, arabLetters]
|
|
2063
|
+
[rawArabLetters, arabLetters],
|
|
2064
|
+
[rawLatinLettersJi, latinLettersJi],
|
|
2065
|
+
[rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
|
|
1952
2066
|
];
|
|
1953
2067
|
for (const [raw, obj] of arr)
|
|
1954
2068
|
for (const [pattern, result] of raw)
|
|
1955
|
-
obj.push([RegExp(pattern, "g"), result]);
|
|
2069
|
+
obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
|
|
1956
2070
|
for (const obj of [latinLetters, latinLettersUpperCase])
|
|
1957
2071
|
for (const item of obj)
|
|
1958
2072
|
item[0] = RegExp(item[0], "g" + item[0].flags);
|
|
@@ -1975,7 +2089,7 @@ var ALPHABET = {
|
|
|
1975
2089
|
CYRILLIC: 0,
|
|
1976
2090
|
LATIN: 1,
|
|
1977
2091
|
ARABIC: 2,
|
|
1978
|
-
LATIN_JI:
|
|
2092
|
+
LATIN_JI: 3
|
|
1979
2093
|
};
|
|
1980
2094
|
var REPLACE_J = {
|
|
1981
2095
|
NEVER: 0,
|
|
@@ -1989,10 +2103,12 @@ var VARIATION = {
|
|
|
1989
2103
|
};
|
|
1990
2104
|
var letters = {
|
|
1991
2105
|
[ALPHABET.LATIN]: latinLetters,
|
|
1992
|
-
[ALPHABET.ARABIC]: arabLetters
|
|
2106
|
+
[ALPHABET.ARABIC]: arabLetters,
|
|
2107
|
+
[ALPHABET.LATIN_JI]: latinLettersJi
|
|
1993
2108
|
};
|
|
1994
2109
|
var lettersUpperCase = {
|
|
1995
|
-
[ALPHABET.LATIN]: latinLettersUpperCase
|
|
2110
|
+
[ALPHABET.LATIN]: latinLettersUpperCase,
|
|
2111
|
+
[ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
|
|
1996
2112
|
};
|
|
1997
2113
|
var wrappers = {
|
|
1998
2114
|
html: {
|
|
@@ -2018,7 +2134,7 @@ var afterTarask = [
|
|
|
2018
2134
|
];
|
|
2019
2135
|
var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
|
|
2020
2136
|
var join = (textArr) => textArr.join(" ").replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1");
|
|
2021
|
-
var
|
|
2137
|
+
var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
|
|
2022
2138
|
var replaceG = (text, replacer) => text.replace(
|
|
2023
2139
|
G_REGEX,
|
|
2024
2140
|
// @ts-ignore
|
|
@@ -2110,10 +2226,10 @@ var replaceWithDict = (text, dict = []) => {
|
|
|
2110
2226
|
);
|
|
2111
2227
|
return text;
|
|
2112
2228
|
};
|
|
2113
|
-
var toJ = (
|
|
2229
|
+
var toJ = (shortU) => "й " + (shortU ? "у" : "");
|
|
2114
2230
|
var replaceIbyJ = (text, always = false) => text.replace(
|
|
2115
|
-
/([аеёіоуыэюя] )і (ў?)/g,
|
|
2116
|
-
always ? ($0, $1
|
|
2231
|
+
/(?<=[аеёіоуыэюя] )і (ў?)/g,
|
|
2232
|
+
always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
|
|
2117
2233
|
);
|
|
2118
2234
|
var __tarask__ = {
|
|
2119
2235
|
wordlist,
|
|
@@ -2155,7 +2271,10 @@ var Taraskevizer = class {
|
|
|
2155
2271
|
convert(text) {
|
|
2156
2272
|
const wrapInColorOf = wrappers.ansiColors;
|
|
2157
2273
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2158
|
-
const
|
|
2274
|
+
const noFixArr = [];
|
|
2275
|
+
const { splitted, splittedOrig } = this.process(
|
|
2276
|
+
this.prepare(text, noFixArr, "<")
|
|
2277
|
+
);
|
|
2159
2278
|
if (this.nonHtml.ansiColors)
|
|
2160
2279
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
|
|
2161
2280
|
text = join(splitted);
|
|
@@ -2172,12 +2291,15 @@ var Taraskevizer = class {
|
|
|
2172
2291
|
this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
|
|
2173
2292
|
);
|
|
2174
2293
|
}
|
|
2175
|
-
return
|
|
2294
|
+
return finalize(applyNoFix(noFixArr, text).replace(/(/g, "("), "\n");
|
|
2176
2295
|
}
|
|
2177
2296
|
convertToHtml(text) {
|
|
2178
2297
|
const wrapInTag = wrappers.html;
|
|
2179
2298
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2180
|
-
const
|
|
2299
|
+
const noFixArr = [];
|
|
2300
|
+
const { splitted, splittedOrig } = this.process(
|
|
2301
|
+
this.prepare(text, noFixArr, "<")
|
|
2302
|
+
);
|
|
2181
2303
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
|
|
2182
2304
|
text = join(splitted);
|
|
2183
2305
|
if (isCyrillic)
|
|
@@ -2185,7 +2307,7 @@ var Taraskevizer = class {
|
|
|
2185
2307
|
text,
|
|
2186
2308
|
this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
|
|
2187
2309
|
);
|
|
2188
|
-
return
|
|
2310
|
+
return finalize(
|
|
2189
2311
|
applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
|
|
2190
2312
|
const options = $0.slice(1, -1).split("|");
|
|
2191
2313
|
const main = options.shift();
|
|
@@ -2194,33 +2316,43 @@ var Taraskevizer = class {
|
|
|
2194
2316
|
"<br>"
|
|
2195
2317
|
);
|
|
2196
2318
|
}
|
|
2197
|
-
|
|
2198
|
-
const { abc, j } = this;
|
|
2199
|
-
const noFixArr = [];
|
|
2319
|
+
prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
|
|
2200
2320
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2201
|
-
if (
|
|
2321
|
+
if (doEscapeCapitalized)
|
|
2202
2322
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2203
|
-
|
|
2323
|
+
return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2204
2324
|
if ($2 === ",")
|
|
2205
2325
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2206
2326
|
if ($1)
|
|
2207
2327
|
$3 = restoreCase(
|
|
2208
|
-
[replaceWithDict($3.toLowerCase(), letters[abc])],
|
|
2328
|
+
[replaceWithDict($3.toLowerCase(), letters[this.abc])],
|
|
2209
2329
|
[$3]
|
|
2210
2330
|
);
|
|
2211
2331
|
noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
|
|
2212
2332
|
return NOFIX_CHAR;
|
|
2213
2333
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2214
|
-
|
|
2215
|
-
|
|
2334
|
+
}
|
|
2335
|
+
convertAlphabetOnly(text) {
|
|
2336
|
+
const noFixArr = [];
|
|
2337
|
+
return finalize(
|
|
2338
|
+
applyNoFix(
|
|
2339
|
+
noFixArr,
|
|
2340
|
+
convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
|
|
2341
|
+
).replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1"),
|
|
2342
|
+
"\n"
|
|
2343
|
+
);
|
|
2344
|
+
}
|
|
2345
|
+
process(text) {
|
|
2346
|
+
const { abc, j } = this;
|
|
2347
|
+
const splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2216
2348
|
text = this.taraskevize(text.toLowerCase());
|
|
2217
|
-
if (j)
|
|
2349
|
+
if (j && abc !== ALPHABET.LATIN_JI)
|
|
2218
2350
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
2219
2351
|
text = replaceWithDict(text, letters[abc]);
|
|
2220
|
-
splitted = text.split(" ");
|
|
2352
|
+
let splitted = text.split(" ");
|
|
2221
2353
|
if (abc !== ALPHABET.ARABIC)
|
|
2222
2354
|
splitted = restoreCase(splitted, splittedOrig);
|
|
2223
|
-
return { splittedOrig, splitted
|
|
2355
|
+
return { splittedOrig, splitted };
|
|
2224
2356
|
}
|
|
2225
2357
|
taraskevize(text) {
|
|
2226
2358
|
text = replaceWithDict(text, wordlist);
|
|
@@ -2242,6 +2374,5 @@ var Taraskevizer = class {
|
|
|
2242
2374
|
Taraskevizer,
|
|
2243
2375
|
VARIATION,
|
|
2244
2376
|
__tarask__,
|
|
2245
|
-
convertAlphabet,
|
|
2246
2377
|
gobj
|
|
2247
2378
|
});
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ type ModifyObjectType<T, TResultObj> = T extends object ? T extends (...args: an
|
|
|
2
2
|
type DeepPartialReadonly<T> = ModifyObjectType<T, {
|
|
3
3
|
readonly [P in keyof T]?: DeepPartialReadonly<T[P]>;
|
|
4
4
|
}>;
|
|
5
|
-
type Alphabet = 0 | 1 | 2;
|
|
5
|
+
type Alphabet = 0 | 1 | 2 | 3;
|
|
6
6
|
type OptionJ = 0 | 1 | 2;
|
|
7
7
|
type Variation = 0 | 1 | 2;
|
|
8
8
|
type TaraskOptions = {
|
|
@@ -28,7 +28,7 @@ declare const ALPHABET: {
|
|
|
28
28
|
readonly CYRILLIC: 0;
|
|
29
29
|
readonly LATIN: 1;
|
|
30
30
|
readonly ARABIC: 2;
|
|
31
|
-
readonly LATIN_JI:
|
|
31
|
+
readonly LATIN_JI: 3;
|
|
32
32
|
};
|
|
33
33
|
declare const REPLACE_J: {
|
|
34
34
|
readonly NEVER: 0;
|
|
@@ -46,7 +46,6 @@ declare const __tarask__: {
|
|
|
46
46
|
readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
|
|
47
47
|
readonly afterTarask: ExtendedDict;
|
|
48
48
|
};
|
|
49
|
-
declare const convertAlphabet: (text: string, abc: Alphabet) => string;
|
|
50
49
|
declare class Taraskevizer {
|
|
51
50
|
abc: Alphabet;
|
|
52
51
|
j: OptionJ;
|
|
@@ -67,6 +66,8 @@ declare class Taraskevizer {
|
|
|
67
66
|
}>);
|
|
68
67
|
convert(text: string): string;
|
|
69
68
|
convertToHtml(text: string): string;
|
|
69
|
+
private prepare;
|
|
70
|
+
convertAlphabetOnly(text: string): string;
|
|
70
71
|
private process;
|
|
71
72
|
protected taraskevize(text: string): string;
|
|
72
73
|
}
|
|
@@ -78,4 +79,4 @@ declare const gobj: {
|
|
|
78
79
|
readonly Ґ: "Г";
|
|
79
80
|
};
|
|
80
81
|
|
|
81
|
-
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__,
|
|
82
|
+
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, gobj };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,63 @@
|
|
|
1
|
+
// src/dict/iwords.ts
|
|
2
|
+
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
3
|
+
var iwords = toOneLine(`біс
|
|
4
|
+
бсэн
|
|
5
|
+
в[аеоы]
|
|
6
|
+
верс
|
|
7
|
+
вал[гз]
|
|
8
|
+
гар
|
|
9
|
+
грышч
|
|
10
|
+
грэк
|
|
11
|
+
дал
|
|
12
|
+
дыш
|
|
13
|
+
жыц
|
|
14
|
+
канапіс
|
|
15
|
+
кань?н
|
|
16
|
+
ка[цўл]
|
|
17
|
+
каў[кц]
|
|
18
|
+
ксі
|
|
19
|
+
леус
|
|
20
|
+
л(іст| )
|
|
21
|
+
лістас
|
|
22
|
+
льк
|
|
23
|
+
мант
|
|
24
|
+
мась?ц
|
|
25
|
+
мбры[кч]
|
|
26
|
+
менна
|
|
27
|
+
мідж
|
|
28
|
+
мпар[тц]
|
|
29
|
+
мпульс[аеуы]
|
|
30
|
+
нахадз
|
|
31
|
+
нды([ійюя] |ев)
|
|
32
|
+
ндэкс(а(ў|мі?)? |[еуыі])
|
|
33
|
+
н[еі][ейяю]
|
|
34
|
+
нк([аіу])
|
|
35
|
+
нтэрым
|
|
36
|
+
нфікс
|
|
37
|
+
нфімум
|
|
38
|
+
ншась?ц
|
|
39
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
40
|
+
псілан
|
|
41
|
+
р([аыу]|а[мхйў]|амі|)
|
|
42
|
+
рад
|
|
43
|
+
рбіс
|
|
44
|
+
рмас
|
|
45
|
+
рха
|
|
46
|
+
рыс
|
|
47
|
+
скарк
|
|
48
|
+
скарак
|
|
49
|
+
скра
|
|
50
|
+
скравец
|
|
51
|
+
скрачк
|
|
52
|
+
ста
|
|
53
|
+
с[нт]ась?ц
|
|
54
|
+
сь?ці[нк]
|
|
55
|
+
та[р ]
|
|
56
|
+
тры
|
|
57
|
+
х(ны[хя]?|ную|на[яей])?
|
|
58
|
+
ць?він
|
|
59
|
+
шыяс`);
|
|
60
|
+
|
|
1
61
|
// src/dict/latin.ts
|
|
2
62
|
var latinLetters = [
|
|
3
63
|
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
@@ -47,14 +107,123 @@ var latinLetters = [
|
|
|
47
107
|
[/łi/, "li"]
|
|
48
108
|
];
|
|
49
109
|
var latinLettersUpperCase = [
|
|
50
|
-
[new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
|
|
51
|
-
[new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
|
|
52
|
-
[new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
|
|
53
|
-
[new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
|
|
54
|
-
[/(?<=[
|
|
55
|
-
[/(?<=[
|
|
56
|
-
[/(?<=[
|
|
57
|
-
[/(?<=[
|
|
110
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
111
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
112
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
113
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
114
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
115
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
116
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
117
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
118
|
+
[/Е/, "IE"],
|
|
119
|
+
[/Ё/, "IO"],
|
|
120
|
+
[/Ю/, "IU"],
|
|
121
|
+
[/Я/, "IA"],
|
|
122
|
+
[/Ц[Ьь]/, "Ć"],
|
|
123
|
+
[/З[Ьь]/, "Ź"],
|
|
124
|
+
[/С[Ьь]/, "Ś"],
|
|
125
|
+
[/Н[Ьь]/, "Ń"],
|
|
126
|
+
[/Л[Ьь]/, "L"],
|
|
127
|
+
[/А/, "A"],
|
|
128
|
+
[/Б/, "B"],
|
|
129
|
+
[/В/, "V"],
|
|
130
|
+
[/Г/, "H"],
|
|
131
|
+
[/Ґ/, "G"],
|
|
132
|
+
[/Д/, "D"],
|
|
133
|
+
[/Ж/, "Ž"],
|
|
134
|
+
[/З/, "Z"],
|
|
135
|
+
[/І/, "I"],
|
|
136
|
+
[/Й/, "J"],
|
|
137
|
+
[/К/, "K"],
|
|
138
|
+
[/Л/, "Ł"],
|
|
139
|
+
[/М/, "M"],
|
|
140
|
+
[/Н/, "N"],
|
|
141
|
+
[/О/, "O"],
|
|
142
|
+
[/П/, "P"],
|
|
143
|
+
[/Р/, "R"],
|
|
144
|
+
[/С/, "S"],
|
|
145
|
+
[/Т/, "T"],
|
|
146
|
+
[/У/, "U"],
|
|
147
|
+
[/Ў/, "Ŭ"],
|
|
148
|
+
[/Ф/, "F"],
|
|
149
|
+
[/ Х(?=[\p{Ll} ])/u, " Ch"],
|
|
150
|
+
[/Х/, "CH"],
|
|
151
|
+
[/Ц/, "C"],
|
|
152
|
+
[/Ч/, "Č"],
|
|
153
|
+
[/Ш/, "Š"],
|
|
154
|
+
[/Ы/, "Y"],
|
|
155
|
+
[/Э/, "E"],
|
|
156
|
+
[/[ŁL][Ii]([AEOUaeou])/, "L$1"],
|
|
157
|
+
[/Łi/, "Li"],
|
|
158
|
+
[/ŁI/, "LI"]
|
|
159
|
+
];
|
|
160
|
+
var rawLatinLettersJi = [
|
|
161
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
|
|
162
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
|
|
163
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
|
|
164
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
|
|
165
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
|
|
166
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
|
|
167
|
+
[`і(?=${iwords})`, "ji"],
|
|
168
|
+
[`І(?=${iwords})`, "Ji"],
|
|
169
|
+
[`І(?=${iwords.toUpperCase()})`, "JI"],
|
|
170
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
171
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
172
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
173
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
174
|
+
[/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
|
|
175
|
+
[/ʼ/, ""],
|
|
176
|
+
[/ць/, "ć"],
|
|
177
|
+
[/зь/, "ź"],
|
|
178
|
+
[/сь/, "ś"],
|
|
179
|
+
[/нь/, "ń"],
|
|
180
|
+
[/ль/, "l"],
|
|
181
|
+
[/а/, "a"],
|
|
182
|
+
[/б/, "b"],
|
|
183
|
+
[/в/, "v"],
|
|
184
|
+
[/г/, "h"],
|
|
185
|
+
[/ґ/, "g"],
|
|
186
|
+
[/д/, "d"],
|
|
187
|
+
[/е/, "ie"],
|
|
188
|
+
[/ё/, "io"],
|
|
189
|
+
[/ж/, "ž"],
|
|
190
|
+
[/з/, "z"],
|
|
191
|
+
[/і/, "i"],
|
|
192
|
+
[/й/, "j"],
|
|
193
|
+
[/к/, "k"],
|
|
194
|
+
[/л/, "ł"],
|
|
195
|
+
[/м/, "m"],
|
|
196
|
+
[/н/, "n"],
|
|
197
|
+
[/о/, "o"],
|
|
198
|
+
[/п/, "p"],
|
|
199
|
+
[/р/, "r"],
|
|
200
|
+
[/с/, "s"],
|
|
201
|
+
[/т/, "t"],
|
|
202
|
+
[/у/, "u"],
|
|
203
|
+
[/ў/, "ŭ"],
|
|
204
|
+
[/ф/, "f"],
|
|
205
|
+
[/х/, "ch"],
|
|
206
|
+
[/ц/, "c"],
|
|
207
|
+
[/ч/, "č"],
|
|
208
|
+
[/ш/, "š"],
|
|
209
|
+
[/ы/, "y"],
|
|
210
|
+
[/э/, "e"],
|
|
211
|
+
[/ю/, "iu"],
|
|
212
|
+
[/я/, "ia"],
|
|
213
|
+
[/[łl]i([eoua])/, "l$1"],
|
|
214
|
+
[/łi/, "li"]
|
|
215
|
+
];
|
|
216
|
+
var rawLatinLettersUpperCaseJi = [
|
|
217
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
218
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
219
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
220
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
221
|
+
[new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
|
|
222
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
223
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
224
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
225
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
226
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
|
|
58
227
|
[/Е/, "IE"],
|
|
59
228
|
[/Ё/, "IO"],
|
|
60
229
|
[/Ю/, "IU"],
|
|
@@ -170,64 +339,7 @@ var chemicalElements2 = [
|
|
|
170
339
|
];
|
|
171
340
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
172
341
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
173
|
-
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
174
342
|
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
175
|
-
var iwords = toOneLine(`
|
|
176
|
-
біс
|
|
177
|
-
бсэн
|
|
178
|
-
в[аеоы]
|
|
179
|
-
верс
|
|
180
|
-
вал[гз]
|
|
181
|
-
гар
|
|
182
|
-
грышч
|
|
183
|
-
грэк
|
|
184
|
-
дал
|
|
185
|
-
дыш
|
|
186
|
-
жыц
|
|
187
|
-
канапіс
|
|
188
|
-
кань?н
|
|
189
|
-
ка[цўл]
|
|
190
|
-
каў[кц]
|
|
191
|
-
ксі
|
|
192
|
-
леус
|
|
193
|
-
л(іст| )
|
|
194
|
-
лістас
|
|
195
|
-
льк
|
|
196
|
-
мант
|
|
197
|
-
мась?ц
|
|
198
|
-
мбры[кч]
|
|
199
|
-
менна
|
|
200
|
-
мідж
|
|
201
|
-
мпар[тц]
|
|
202
|
-
мпульс[аеуы]
|
|
203
|
-
нахадз
|
|
204
|
-
нды([ійюя] |ев)
|
|
205
|
-
ндэкс(а(ў|мі?)? |[еуыі])
|
|
206
|
-
н[еі][ейяю]
|
|
207
|
-
нк([аіу])
|
|
208
|
-
нтэрым
|
|
209
|
-
нфікс
|
|
210
|
-
нфімум
|
|
211
|
-
ншась?ц
|
|
212
|
-
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
213
|
-
псілан
|
|
214
|
-
рад
|
|
215
|
-
рбіс
|
|
216
|
-
рмас
|
|
217
|
-
рха
|
|
218
|
-
рыс
|
|
219
|
-
скарк
|
|
220
|
-
скарак
|
|
221
|
-
скра
|
|
222
|
-
скравец
|
|
223
|
-
скрачк
|
|
224
|
-
с[нт]ась?ц
|
|
225
|
-
сь?цік
|
|
226
|
-
сь?цін
|
|
227
|
-
тар
|
|
228
|
-
тры
|
|
229
|
-
ць?він
|
|
230
|
-
шыяс`);
|
|
231
343
|
var rawWordlist = [
|
|
232
344
|
/* А */
|
|
233
345
|
[/аахен/, "аахэн"],
|
|
@@ -1712,8 +1824,7 @@ var rawWordlist = [
|
|
|
1712
1824
|
[/ г[еэ]рцагс/, " гэрцагс"],
|
|
1713
1825
|
[/ (дву|тро|чатыро)хс/, " $1хс"],
|
|
1714
1826
|
[/[гґзжхш]с(?=(?:к|тв)\S)/, "с"],
|
|
1715
|
-
[/ адск/, " адск"],
|
|
1716
|
-
[/ падск/, " падск"],
|
|
1827
|
+
[/ ([нп]?)адск/, " $1адск"],
|
|
1717
1828
|
[/дс(?=к|тв)/, "дз"],
|
|
1718
1829
|
// [/([аеёіоуыэюя])вс/, '$1ўс'],
|
|
1719
1830
|
[/ (б|кнд|нот)р /, " $1р "],
|
|
@@ -1913,14 +2024,18 @@ var gobj = {
|
|
|
1913
2024
|
var wordlist = [];
|
|
1914
2025
|
var softeners = [];
|
|
1915
2026
|
var arabLetters = [];
|
|
2027
|
+
var latinLettersJi = [];
|
|
2028
|
+
var latinLettersUpperCaseJi = [];
|
|
1916
2029
|
var arr = [
|
|
1917
2030
|
[rawWordlist, wordlist],
|
|
1918
2031
|
[rawsofteners, softeners],
|
|
1919
|
-
[rawArabLetters, arabLetters]
|
|
2032
|
+
[rawArabLetters, arabLetters],
|
|
2033
|
+
[rawLatinLettersJi, latinLettersJi],
|
|
2034
|
+
[rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
|
|
1920
2035
|
];
|
|
1921
2036
|
for (const [raw, obj] of arr)
|
|
1922
2037
|
for (const [pattern, result] of raw)
|
|
1923
|
-
obj.push([RegExp(pattern, "g"), result]);
|
|
2038
|
+
obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
|
|
1924
2039
|
for (const obj of [latinLetters, latinLettersUpperCase])
|
|
1925
2040
|
for (const item of obj)
|
|
1926
2041
|
item[0] = RegExp(item[0], "g" + item[0].flags);
|
|
@@ -1943,7 +2058,7 @@ var ALPHABET = {
|
|
|
1943
2058
|
CYRILLIC: 0,
|
|
1944
2059
|
LATIN: 1,
|
|
1945
2060
|
ARABIC: 2,
|
|
1946
|
-
LATIN_JI:
|
|
2061
|
+
LATIN_JI: 3
|
|
1947
2062
|
};
|
|
1948
2063
|
var REPLACE_J = {
|
|
1949
2064
|
NEVER: 0,
|
|
@@ -1957,10 +2072,12 @@ var VARIATION = {
|
|
|
1957
2072
|
};
|
|
1958
2073
|
var letters = {
|
|
1959
2074
|
[ALPHABET.LATIN]: latinLetters,
|
|
1960
|
-
[ALPHABET.ARABIC]: arabLetters
|
|
2075
|
+
[ALPHABET.ARABIC]: arabLetters,
|
|
2076
|
+
[ALPHABET.LATIN_JI]: latinLettersJi
|
|
1961
2077
|
};
|
|
1962
2078
|
var lettersUpperCase = {
|
|
1963
|
-
[ALPHABET.LATIN]: latinLettersUpperCase
|
|
2079
|
+
[ALPHABET.LATIN]: latinLettersUpperCase,
|
|
2080
|
+
[ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
|
|
1964
2081
|
};
|
|
1965
2082
|
var wrappers = {
|
|
1966
2083
|
html: {
|
|
@@ -1986,7 +2103,7 @@ var afterTarask = [
|
|
|
1986
2103
|
];
|
|
1987
2104
|
var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
|
|
1988
2105
|
var join = (textArr) => textArr.join(" ").replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1");
|
|
1989
|
-
var
|
|
2106
|
+
var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
|
|
1990
2107
|
var replaceG = (text, replacer) => text.replace(
|
|
1991
2108
|
G_REGEX,
|
|
1992
2109
|
// @ts-ignore
|
|
@@ -2078,10 +2195,10 @@ var replaceWithDict = (text, dict = []) => {
|
|
|
2078
2195
|
);
|
|
2079
2196
|
return text;
|
|
2080
2197
|
};
|
|
2081
|
-
var toJ = (
|
|
2198
|
+
var toJ = (shortU) => "й " + (shortU ? "у" : "");
|
|
2082
2199
|
var replaceIbyJ = (text, always = false) => text.replace(
|
|
2083
|
-
/([аеёіоуыэюя] )і (ў?)/g,
|
|
2084
|
-
always ? ($0, $1
|
|
2200
|
+
/(?<=[аеёіоуыэюя] )і (ў?)/g,
|
|
2201
|
+
always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
|
|
2085
2202
|
);
|
|
2086
2203
|
var __tarask__ = {
|
|
2087
2204
|
wordlist,
|
|
@@ -2123,7 +2240,10 @@ var Taraskevizer = class {
|
|
|
2123
2240
|
convert(text) {
|
|
2124
2241
|
const wrapInColorOf = wrappers.ansiColors;
|
|
2125
2242
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2126
|
-
const
|
|
2243
|
+
const noFixArr = [];
|
|
2244
|
+
const { splitted, splittedOrig } = this.process(
|
|
2245
|
+
this.prepare(text, noFixArr, "<")
|
|
2246
|
+
);
|
|
2127
2247
|
if (this.nonHtml.ansiColors)
|
|
2128
2248
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
|
|
2129
2249
|
text = join(splitted);
|
|
@@ -2140,12 +2260,15 @@ var Taraskevizer = class {
|
|
|
2140
2260
|
this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
|
|
2141
2261
|
);
|
|
2142
2262
|
}
|
|
2143
|
-
return
|
|
2263
|
+
return finalize(applyNoFix(noFixArr, text).replace(/(/g, "("), "\n");
|
|
2144
2264
|
}
|
|
2145
2265
|
convertToHtml(text) {
|
|
2146
2266
|
const wrapInTag = wrappers.html;
|
|
2147
2267
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2148
|
-
const
|
|
2268
|
+
const noFixArr = [];
|
|
2269
|
+
const { splitted, splittedOrig } = this.process(
|
|
2270
|
+
this.prepare(text, noFixArr, "<")
|
|
2271
|
+
);
|
|
2149
2272
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
|
|
2150
2273
|
text = join(splitted);
|
|
2151
2274
|
if (isCyrillic)
|
|
@@ -2153,7 +2276,7 @@ var Taraskevizer = class {
|
|
|
2153
2276
|
text,
|
|
2154
2277
|
this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
|
|
2155
2278
|
);
|
|
2156
|
-
return
|
|
2279
|
+
return finalize(
|
|
2157
2280
|
applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
|
|
2158
2281
|
const options = $0.slice(1, -1).split("|");
|
|
2159
2282
|
const main = options.shift();
|
|
@@ -2162,33 +2285,43 @@ var Taraskevizer = class {
|
|
|
2162
2285
|
"<br>"
|
|
2163
2286
|
);
|
|
2164
2287
|
}
|
|
2165
|
-
|
|
2166
|
-
const { abc, j } = this;
|
|
2167
|
-
const noFixArr = [];
|
|
2288
|
+
prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
|
|
2168
2289
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2169
|
-
if (
|
|
2290
|
+
if (doEscapeCapitalized)
|
|
2170
2291
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2171
|
-
|
|
2292
|
+
return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2172
2293
|
if ($2 === ",")
|
|
2173
2294
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2174
2295
|
if ($1)
|
|
2175
2296
|
$3 = restoreCase(
|
|
2176
|
-
[replaceWithDict($3.toLowerCase(), letters[abc])],
|
|
2297
|
+
[replaceWithDict($3.toLowerCase(), letters[this.abc])],
|
|
2177
2298
|
[$3]
|
|
2178
2299
|
);
|
|
2179
2300
|
noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
|
|
2180
2301
|
return NOFIX_CHAR;
|
|
2181
2302
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2182
|
-
|
|
2183
|
-
|
|
2303
|
+
}
|
|
2304
|
+
convertAlphabetOnly(text) {
|
|
2305
|
+
const noFixArr = [];
|
|
2306
|
+
return finalize(
|
|
2307
|
+
applyNoFix(
|
|
2308
|
+
noFixArr,
|
|
2309
|
+
convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
|
|
2310
|
+
).replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1"),
|
|
2311
|
+
"\n"
|
|
2312
|
+
);
|
|
2313
|
+
}
|
|
2314
|
+
process(text) {
|
|
2315
|
+
const { abc, j } = this;
|
|
2316
|
+
const splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2184
2317
|
text = this.taraskevize(text.toLowerCase());
|
|
2185
|
-
if (j)
|
|
2318
|
+
if (j && abc !== ALPHABET.LATIN_JI)
|
|
2186
2319
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
2187
2320
|
text = replaceWithDict(text, letters[abc]);
|
|
2188
|
-
splitted = text.split(" ");
|
|
2321
|
+
let splitted = text.split(" ");
|
|
2189
2322
|
if (abc !== ALPHABET.ARABIC)
|
|
2190
2323
|
splitted = restoreCase(splitted, splittedOrig);
|
|
2191
|
-
return { splittedOrig, splitted
|
|
2324
|
+
return { splittedOrig, splitted };
|
|
2192
2325
|
}
|
|
2193
2326
|
taraskevize(text) {
|
|
2194
2327
|
text = replaceWithDict(text, wordlist);
|
|
@@ -2209,6 +2342,5 @@ export {
|
|
|
2209
2342
|
Taraskevizer,
|
|
2210
2343
|
VARIATION,
|
|
2211
2344
|
__tarask__,
|
|
2212
|
-
convertAlphabet,
|
|
2213
2345
|
gobj
|
|
2214
2346
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "taraskevizer",
|
|
3
|
-
"version": "5.1.
|
|
3
|
+
"version": "5.1.9",
|
|
4
4
|
"author": "GooseOb",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -41,9 +41,9 @@
|
|
|
41
41
|
"scripts": {
|
|
42
42
|
"build": "tsup --config build-config/index.ts",
|
|
43
43
|
"build:bun_EXPERIMENTAL": "bun ./build-config/bun.ts",
|
|
44
|
-
"dev": "esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
|
|
45
|
-
"dev:bun": "bun ./test/bun-watch.ts",
|
|
46
|
-
"dev-bun": "bun test --watch",
|
|
44
|
+
"dev": "NOCLI=true esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
|
|
45
|
+
"dev:bun": "NOCLI=true bun ./test/bun-watch.ts",
|
|
46
|
+
"dev-bun": "NOCLI=true bun test --watch",
|
|
47
47
|
"test": "esrun --send-code-mode=temporaryFile test",
|
|
48
48
|
"prepare": "husky install",
|
|
49
49
|
"typecheck": "tsc --project src/tsconfig.json"
|