taraskevizer 5.1.8 → 5.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +223 -91
- package/dist/index.d.ts +5 -4
- package/dist/index.js +223 -90
- package/package.json +4 -4
package/dist/index.cjs
CHANGED
|
@@ -25,11 +25,70 @@ __export(src_exports, {
|
|
|
25
25
|
Taraskevizer: () => Taraskevizer,
|
|
26
26
|
VARIATION: () => VARIATION,
|
|
27
27
|
__tarask__: () => __tarask__,
|
|
28
|
-
convertAlphabet: () => convertAlphabet,
|
|
29
28
|
gobj: () => gobj
|
|
30
29
|
});
|
|
31
30
|
module.exports = __toCommonJS(src_exports);
|
|
32
31
|
|
|
32
|
+
// src/dict/iwords.ts
|
|
33
|
+
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
34
|
+
var iwords = toOneLine(`біс
|
|
35
|
+
бсэн
|
|
36
|
+
в[аеоы]
|
|
37
|
+
верс
|
|
38
|
+
вал[гз]
|
|
39
|
+
гар
|
|
40
|
+
грышч
|
|
41
|
+
грэк
|
|
42
|
+
дал
|
|
43
|
+
дыш
|
|
44
|
+
жыц
|
|
45
|
+
канапіс
|
|
46
|
+
кань?н
|
|
47
|
+
ка[цўл]
|
|
48
|
+
каў[кц]
|
|
49
|
+
ксі
|
|
50
|
+
леус
|
|
51
|
+
л(іст| )
|
|
52
|
+
лістас
|
|
53
|
+
льк
|
|
54
|
+
мант
|
|
55
|
+
мась?ц
|
|
56
|
+
мбры[кч]
|
|
57
|
+
менна
|
|
58
|
+
мідж
|
|
59
|
+
мпар[тц]
|
|
60
|
+
мпульс[аеуы]
|
|
61
|
+
нахадз
|
|
62
|
+
нды([ійюя] |ев)
|
|
63
|
+
ндэкс(а(ў|мі?)? |[еуыі])
|
|
64
|
+
н[еі][ейяю]
|
|
65
|
+
нк([аіу])
|
|
66
|
+
нтэрым
|
|
67
|
+
нфікс
|
|
68
|
+
нфімум
|
|
69
|
+
ншась?ц
|
|
70
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
71
|
+
псілан
|
|
72
|
+
р([аыу]|а[мхйў]|амі|)
|
|
73
|
+
рад
|
|
74
|
+
рбіс
|
|
75
|
+
рмас
|
|
76
|
+
рха
|
|
77
|
+
рыс
|
|
78
|
+
скарк
|
|
79
|
+
скарак
|
|
80
|
+
скра
|
|
81
|
+
скравец
|
|
82
|
+
скрачк
|
|
83
|
+
ста
|
|
84
|
+
с[нт]ась?ц
|
|
85
|
+
сь?ці[нк]
|
|
86
|
+
та[р ]
|
|
87
|
+
тры
|
|
88
|
+
х(ны[хя]?|ную|на[яей])?
|
|
89
|
+
ць?він
|
|
90
|
+
шыяс`);
|
|
91
|
+
|
|
33
92
|
// src/dict/latin.ts
|
|
34
93
|
var latinLetters = [
|
|
35
94
|
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
@@ -79,14 +138,123 @@ var latinLetters = [
|
|
|
79
138
|
[/łi/, "li"]
|
|
80
139
|
];
|
|
81
140
|
var latinLettersUpperCase = [
|
|
82
|
-
[new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
|
|
83
|
-
[new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
|
|
84
|
-
[new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
|
|
85
|
-
[new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
|
|
86
|
-
[/(?<=[
|
|
87
|
-
[/(?<=[
|
|
88
|
-
[/(?<=[
|
|
89
|
-
[/(?<=[
|
|
141
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
142
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
143
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
144
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
145
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
146
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
147
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
148
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
149
|
+
[/Е/, "IE"],
|
|
150
|
+
[/Ё/, "IO"],
|
|
151
|
+
[/Ю/, "IU"],
|
|
152
|
+
[/Я/, "IA"],
|
|
153
|
+
[/Ц[Ьь]/, "Ć"],
|
|
154
|
+
[/З[Ьь]/, "Ź"],
|
|
155
|
+
[/С[Ьь]/, "Ś"],
|
|
156
|
+
[/Н[Ьь]/, "Ń"],
|
|
157
|
+
[/Л[Ьь]/, "L"],
|
|
158
|
+
[/А/, "A"],
|
|
159
|
+
[/Б/, "B"],
|
|
160
|
+
[/В/, "V"],
|
|
161
|
+
[/Г/, "H"],
|
|
162
|
+
[/Ґ/, "G"],
|
|
163
|
+
[/Д/, "D"],
|
|
164
|
+
[/Ж/, "Ž"],
|
|
165
|
+
[/З/, "Z"],
|
|
166
|
+
[/І/, "I"],
|
|
167
|
+
[/Й/, "J"],
|
|
168
|
+
[/К/, "K"],
|
|
169
|
+
[/Л/, "Ł"],
|
|
170
|
+
[/М/, "M"],
|
|
171
|
+
[/Н/, "N"],
|
|
172
|
+
[/О/, "O"],
|
|
173
|
+
[/П/, "P"],
|
|
174
|
+
[/Р/, "R"],
|
|
175
|
+
[/С/, "S"],
|
|
176
|
+
[/Т/, "T"],
|
|
177
|
+
[/У/, "U"],
|
|
178
|
+
[/Ў/, "Ŭ"],
|
|
179
|
+
[/Ф/, "F"],
|
|
180
|
+
[/ Х(?=[\p{Ll} ])/u, " Ch"],
|
|
181
|
+
[/Х/, "CH"],
|
|
182
|
+
[/Ц/, "C"],
|
|
183
|
+
[/Ч/, "Č"],
|
|
184
|
+
[/Ш/, "Š"],
|
|
185
|
+
[/Ы/, "Y"],
|
|
186
|
+
[/Э/, "E"],
|
|
187
|
+
[/[ŁL][Ii]([AEOUaeou])/, "L$1"],
|
|
188
|
+
[/Łi/, "Li"],
|
|
189
|
+
[/ŁI/, "LI"]
|
|
190
|
+
];
|
|
191
|
+
var rawLatinLettersJi = [
|
|
192
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
|
|
193
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
|
|
194
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
|
|
195
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
|
|
196
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
|
|
197
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
|
|
198
|
+
[`і(?=${iwords})`, "ji"],
|
|
199
|
+
[`І(?=${iwords})`, "Ji"],
|
|
200
|
+
[`І(?=${iwords.toUpperCase()})`, "JI"],
|
|
201
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
202
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
203
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
204
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
205
|
+
[/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
|
|
206
|
+
[/ʼ/, ""],
|
|
207
|
+
[/ць/, "ć"],
|
|
208
|
+
[/зь/, "ź"],
|
|
209
|
+
[/сь/, "ś"],
|
|
210
|
+
[/нь/, "ń"],
|
|
211
|
+
[/ль/, "l"],
|
|
212
|
+
[/а/, "a"],
|
|
213
|
+
[/б/, "b"],
|
|
214
|
+
[/в/, "v"],
|
|
215
|
+
[/г/, "h"],
|
|
216
|
+
[/ґ/, "g"],
|
|
217
|
+
[/д/, "d"],
|
|
218
|
+
[/е/, "ie"],
|
|
219
|
+
[/ё/, "io"],
|
|
220
|
+
[/ж/, "ž"],
|
|
221
|
+
[/з/, "z"],
|
|
222
|
+
[/і/, "i"],
|
|
223
|
+
[/й/, "j"],
|
|
224
|
+
[/к/, "k"],
|
|
225
|
+
[/л/, "ł"],
|
|
226
|
+
[/м/, "m"],
|
|
227
|
+
[/н/, "n"],
|
|
228
|
+
[/о/, "o"],
|
|
229
|
+
[/п/, "p"],
|
|
230
|
+
[/р/, "r"],
|
|
231
|
+
[/с/, "s"],
|
|
232
|
+
[/т/, "t"],
|
|
233
|
+
[/у/, "u"],
|
|
234
|
+
[/ў/, "ŭ"],
|
|
235
|
+
[/ф/, "f"],
|
|
236
|
+
[/х/, "ch"],
|
|
237
|
+
[/ц/, "c"],
|
|
238
|
+
[/ч/, "č"],
|
|
239
|
+
[/ш/, "š"],
|
|
240
|
+
[/ы/, "y"],
|
|
241
|
+
[/э/, "e"],
|
|
242
|
+
[/ю/, "iu"],
|
|
243
|
+
[/я/, "ia"],
|
|
244
|
+
[/[łl]i([eoua])/, "l$1"],
|
|
245
|
+
[/łi/, "li"]
|
|
246
|
+
];
|
|
247
|
+
var rawLatinLettersUpperCaseJi = [
|
|
248
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
249
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
250
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
251
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
252
|
+
[new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
|
|
253
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
254
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
255
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
256
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
257
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
|
|
90
258
|
[/Е/, "IE"],
|
|
91
259
|
[/Ё/, "IO"],
|
|
92
260
|
[/Ю/, "IU"],
|
|
@@ -202,64 +370,7 @@ var chemicalElements2 = [
|
|
|
202
370
|
];
|
|
203
371
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
204
372
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
205
|
-
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
206
373
|
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
207
|
-
var iwords = toOneLine(`
|
|
208
|
-
біс
|
|
209
|
-
бсэн
|
|
210
|
-
в[аеоы]
|
|
211
|
-
верс
|
|
212
|
-
вал[гз]
|
|
213
|
-
гар
|
|
214
|
-
грышч
|
|
215
|
-
грэк
|
|
216
|
-
дал
|
|
217
|
-
дыш
|
|
218
|
-
жыц
|
|
219
|
-
канапіс
|
|
220
|
-
кань?н
|
|
221
|
-
ка[цўл]
|
|
222
|
-
каў[кц]
|
|
223
|
-
ксі
|
|
224
|
-
леус
|
|
225
|
-
л(іст| )
|
|
226
|
-
лістас
|
|
227
|
-
льк
|
|
228
|
-
мант
|
|
229
|
-
мась?ц
|
|
230
|
-
мбры[кч]
|
|
231
|
-
менна
|
|
232
|
-
мідж
|
|
233
|
-
мпар[тц]
|
|
234
|
-
мпульс[аеуы]
|
|
235
|
-
нахадз
|
|
236
|
-
нды([ійюя] |ев)
|
|
237
|
-
ндэкс(а(ў|мі?)? |[еуыі])
|
|
238
|
-
н[еі][ейяю]
|
|
239
|
-
нк([аіу])
|
|
240
|
-
нтэрым
|
|
241
|
-
нфікс
|
|
242
|
-
нфімум
|
|
243
|
-
ншась?ц
|
|
244
|
-
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
245
|
-
псілан
|
|
246
|
-
рад
|
|
247
|
-
рбіс
|
|
248
|
-
рмас
|
|
249
|
-
рха
|
|
250
|
-
рыс
|
|
251
|
-
скарк
|
|
252
|
-
скарак
|
|
253
|
-
скра
|
|
254
|
-
скравец
|
|
255
|
-
скрачк
|
|
256
|
-
с[нт]ась?ц
|
|
257
|
-
сь?цік
|
|
258
|
-
сь?цін
|
|
259
|
-
тар
|
|
260
|
-
тры
|
|
261
|
-
ць?він
|
|
262
|
-
шыяс`);
|
|
263
374
|
var rawWordlist = [
|
|
264
375
|
/* А */
|
|
265
376
|
[/аахен/, "аахэн"],
|
|
@@ -1944,14 +2055,18 @@ var gobj = {
|
|
|
1944
2055
|
var wordlist = [];
|
|
1945
2056
|
var softeners = [];
|
|
1946
2057
|
var arabLetters = [];
|
|
2058
|
+
var latinLettersJi = [];
|
|
2059
|
+
var latinLettersUpperCaseJi = [];
|
|
1947
2060
|
var arr = [
|
|
1948
2061
|
[rawWordlist, wordlist],
|
|
1949
2062
|
[rawsofteners, softeners],
|
|
1950
|
-
[rawArabLetters, arabLetters]
|
|
2063
|
+
[rawArabLetters, arabLetters],
|
|
2064
|
+
[rawLatinLettersJi, latinLettersJi],
|
|
2065
|
+
[rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
|
|
1951
2066
|
];
|
|
1952
2067
|
for (const [raw, obj] of arr)
|
|
1953
2068
|
for (const [pattern, result] of raw)
|
|
1954
|
-
obj.push([RegExp(pattern, "g"), result]);
|
|
2069
|
+
obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
|
|
1955
2070
|
for (const obj of [latinLetters, latinLettersUpperCase])
|
|
1956
2071
|
for (const item of obj)
|
|
1957
2072
|
item[0] = RegExp(item[0], "g" + item[0].flags);
|
|
@@ -1974,7 +2089,7 @@ var ALPHABET = {
|
|
|
1974
2089
|
CYRILLIC: 0,
|
|
1975
2090
|
LATIN: 1,
|
|
1976
2091
|
ARABIC: 2,
|
|
1977
|
-
LATIN_JI:
|
|
2092
|
+
LATIN_JI: 3
|
|
1978
2093
|
};
|
|
1979
2094
|
var REPLACE_J = {
|
|
1980
2095
|
NEVER: 0,
|
|
@@ -1988,10 +2103,12 @@ var VARIATION = {
|
|
|
1988
2103
|
};
|
|
1989
2104
|
var letters = {
|
|
1990
2105
|
[ALPHABET.LATIN]: latinLetters,
|
|
1991
|
-
[ALPHABET.ARABIC]: arabLetters
|
|
2106
|
+
[ALPHABET.ARABIC]: arabLetters,
|
|
2107
|
+
[ALPHABET.LATIN_JI]: latinLettersJi
|
|
1992
2108
|
};
|
|
1993
2109
|
var lettersUpperCase = {
|
|
1994
|
-
[ALPHABET.LATIN]: latinLettersUpperCase
|
|
2110
|
+
[ALPHABET.LATIN]: latinLettersUpperCase,
|
|
2111
|
+
[ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
|
|
1995
2112
|
};
|
|
1996
2113
|
var wrappers = {
|
|
1997
2114
|
html: {
|
|
@@ -2017,7 +2134,7 @@ var afterTarask = [
|
|
|
2017
2134
|
];
|
|
2018
2135
|
var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
|
|
2019
2136
|
var join = (textArr) => textArr.join(" ").replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1");
|
|
2020
|
-
var
|
|
2137
|
+
var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
|
|
2021
2138
|
var replaceG = (text, replacer) => text.replace(
|
|
2022
2139
|
G_REGEX,
|
|
2023
2140
|
// @ts-ignore
|
|
@@ -2109,10 +2226,10 @@ var replaceWithDict = (text, dict = []) => {
|
|
|
2109
2226
|
);
|
|
2110
2227
|
return text;
|
|
2111
2228
|
};
|
|
2112
|
-
var toJ = (
|
|
2229
|
+
var toJ = (shortU) => "й " + (shortU ? "у" : "");
|
|
2113
2230
|
var replaceIbyJ = (text, always = false) => text.replace(
|
|
2114
|
-
/([аеёіоуыэюя] )і (ў?)/g,
|
|
2115
|
-
always ? ($0, $1
|
|
2231
|
+
/(?<=[аеёіоуыэюя] )і (ў?)/g,
|
|
2232
|
+
always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
|
|
2116
2233
|
);
|
|
2117
2234
|
var __tarask__ = {
|
|
2118
2235
|
wordlist,
|
|
@@ -2154,7 +2271,10 @@ var Taraskevizer = class {
|
|
|
2154
2271
|
convert(text) {
|
|
2155
2272
|
const wrapInColorOf = wrappers.ansiColors;
|
|
2156
2273
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2157
|
-
const
|
|
2274
|
+
const noFixArr = [];
|
|
2275
|
+
const { splitted, splittedOrig } = this.process(
|
|
2276
|
+
this.prepare(text, noFixArr, "<")
|
|
2277
|
+
);
|
|
2158
2278
|
if (this.nonHtml.ansiColors)
|
|
2159
2279
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
|
|
2160
2280
|
text = join(splitted);
|
|
@@ -2171,12 +2291,15 @@ var Taraskevizer = class {
|
|
|
2171
2291
|
this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
|
|
2172
2292
|
);
|
|
2173
2293
|
}
|
|
2174
|
-
return
|
|
2294
|
+
return finalize(applyNoFix(noFixArr, text).replace(/(/g, "("), "\n");
|
|
2175
2295
|
}
|
|
2176
2296
|
convertToHtml(text) {
|
|
2177
2297
|
const wrapInTag = wrappers.html;
|
|
2178
2298
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2179
|
-
const
|
|
2299
|
+
const noFixArr = [];
|
|
2300
|
+
const { splitted, splittedOrig } = this.process(
|
|
2301
|
+
this.prepare(text, noFixArr, "<")
|
|
2302
|
+
);
|
|
2180
2303
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
|
|
2181
2304
|
text = join(splitted);
|
|
2182
2305
|
if (isCyrillic)
|
|
@@ -2184,7 +2307,7 @@ var Taraskevizer = class {
|
|
|
2184
2307
|
text,
|
|
2185
2308
|
this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
|
|
2186
2309
|
);
|
|
2187
|
-
return
|
|
2310
|
+
return finalize(
|
|
2188
2311
|
applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
|
|
2189
2312
|
const options = $0.slice(1, -1).split("|");
|
|
2190
2313
|
const main = options.shift();
|
|
@@ -2193,33 +2316,43 @@ var Taraskevizer = class {
|
|
|
2193
2316
|
"<br>"
|
|
2194
2317
|
);
|
|
2195
2318
|
}
|
|
2196
|
-
|
|
2197
|
-
const { abc, j } = this;
|
|
2198
|
-
const noFixArr = [];
|
|
2319
|
+
prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
|
|
2199
2320
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2200
|
-
if (
|
|
2321
|
+
if (doEscapeCapitalized)
|
|
2201
2322
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2202
|
-
|
|
2323
|
+
return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2203
2324
|
if ($2 === ",")
|
|
2204
2325
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2205
2326
|
if ($1)
|
|
2206
2327
|
$3 = restoreCase(
|
|
2207
|
-
[replaceWithDict($3.toLowerCase(), letters[abc])],
|
|
2328
|
+
[replaceWithDict($3.toLowerCase(), letters[this.abc])],
|
|
2208
2329
|
[$3]
|
|
2209
2330
|
);
|
|
2210
2331
|
noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
|
|
2211
2332
|
return NOFIX_CHAR;
|
|
2212
2333
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2213
|
-
|
|
2214
|
-
|
|
2334
|
+
}
|
|
2335
|
+
convertAlphabetOnly(text) {
|
|
2336
|
+
const noFixArr = [];
|
|
2337
|
+
return finalize(
|
|
2338
|
+
applyNoFix(
|
|
2339
|
+
noFixArr,
|
|
2340
|
+
convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
|
|
2341
|
+
).replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1"),
|
|
2342
|
+
"\n"
|
|
2343
|
+
);
|
|
2344
|
+
}
|
|
2345
|
+
process(text) {
|
|
2346
|
+
const { abc, j } = this;
|
|
2347
|
+
const splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2215
2348
|
text = this.taraskevize(text.toLowerCase());
|
|
2216
|
-
if (j)
|
|
2349
|
+
if (j && abc !== ALPHABET.LATIN_JI)
|
|
2217
2350
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
2218
2351
|
text = replaceWithDict(text, letters[abc]);
|
|
2219
|
-
splitted = text.split(" ");
|
|
2352
|
+
let splitted = text.split(" ");
|
|
2220
2353
|
if (abc !== ALPHABET.ARABIC)
|
|
2221
2354
|
splitted = restoreCase(splitted, splittedOrig);
|
|
2222
|
-
return { splittedOrig, splitted
|
|
2355
|
+
return { splittedOrig, splitted };
|
|
2223
2356
|
}
|
|
2224
2357
|
taraskevize(text) {
|
|
2225
2358
|
text = replaceWithDict(text, wordlist);
|
|
@@ -2241,6 +2374,5 @@ var Taraskevizer = class {
|
|
|
2241
2374
|
Taraskevizer,
|
|
2242
2375
|
VARIATION,
|
|
2243
2376
|
__tarask__,
|
|
2244
|
-
convertAlphabet,
|
|
2245
2377
|
gobj
|
|
2246
2378
|
});
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ type ModifyObjectType<T, TResultObj> = T extends object ? T extends (...args: an
|
|
|
2
2
|
type DeepPartialReadonly<T> = ModifyObjectType<T, {
|
|
3
3
|
readonly [P in keyof T]?: DeepPartialReadonly<T[P]>;
|
|
4
4
|
}>;
|
|
5
|
-
type Alphabet = 0 | 1 | 2;
|
|
5
|
+
type Alphabet = 0 | 1 | 2 | 3;
|
|
6
6
|
type OptionJ = 0 | 1 | 2;
|
|
7
7
|
type Variation = 0 | 1 | 2;
|
|
8
8
|
type TaraskOptions = {
|
|
@@ -28,7 +28,7 @@ declare const ALPHABET: {
|
|
|
28
28
|
readonly CYRILLIC: 0;
|
|
29
29
|
readonly LATIN: 1;
|
|
30
30
|
readonly ARABIC: 2;
|
|
31
|
-
readonly LATIN_JI:
|
|
31
|
+
readonly LATIN_JI: 3;
|
|
32
32
|
};
|
|
33
33
|
declare const REPLACE_J: {
|
|
34
34
|
readonly NEVER: 0;
|
|
@@ -46,7 +46,6 @@ declare const __tarask__: {
|
|
|
46
46
|
readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
|
|
47
47
|
readonly afterTarask: ExtendedDict;
|
|
48
48
|
};
|
|
49
|
-
declare const convertAlphabet: (text: string, abc: Alphabet) => string;
|
|
50
49
|
declare class Taraskevizer {
|
|
51
50
|
abc: Alphabet;
|
|
52
51
|
j: OptionJ;
|
|
@@ -67,6 +66,8 @@ declare class Taraskevizer {
|
|
|
67
66
|
}>);
|
|
68
67
|
convert(text: string): string;
|
|
69
68
|
convertToHtml(text: string): string;
|
|
69
|
+
private prepare;
|
|
70
|
+
convertAlphabetOnly(text: string): string;
|
|
70
71
|
private process;
|
|
71
72
|
protected taraskevize(text: string): string;
|
|
72
73
|
}
|
|
@@ -78,4 +79,4 @@ declare const gobj: {
|
|
|
78
79
|
readonly Ґ: "Г";
|
|
79
80
|
};
|
|
80
81
|
|
|
81
|
-
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__,
|
|
82
|
+
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, gobj };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,63 @@
|
|
|
1
|
+
// src/dict/iwords.ts
|
|
2
|
+
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
3
|
+
var iwords = toOneLine(`біс
|
|
4
|
+
бсэн
|
|
5
|
+
в[аеоы]
|
|
6
|
+
верс
|
|
7
|
+
вал[гз]
|
|
8
|
+
гар
|
|
9
|
+
грышч
|
|
10
|
+
грэк
|
|
11
|
+
дал
|
|
12
|
+
дыш
|
|
13
|
+
жыц
|
|
14
|
+
канапіс
|
|
15
|
+
кань?н
|
|
16
|
+
ка[цўл]
|
|
17
|
+
каў[кц]
|
|
18
|
+
ксі
|
|
19
|
+
леус
|
|
20
|
+
л(іст| )
|
|
21
|
+
лістас
|
|
22
|
+
льк
|
|
23
|
+
мант
|
|
24
|
+
мась?ц
|
|
25
|
+
мбры[кч]
|
|
26
|
+
менна
|
|
27
|
+
мідж
|
|
28
|
+
мпар[тц]
|
|
29
|
+
мпульс[аеуы]
|
|
30
|
+
нахадз
|
|
31
|
+
нды([ійюя] |ев)
|
|
32
|
+
ндэкс(а(ў|мі?)? |[еуыі])
|
|
33
|
+
н[еі][ейяю]
|
|
34
|
+
нк([аіу])
|
|
35
|
+
нтэрым
|
|
36
|
+
нфікс
|
|
37
|
+
нфімум
|
|
38
|
+
ншась?ц
|
|
39
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
40
|
+
псілан
|
|
41
|
+
р([аыу]|а[мхйў]|амі|)
|
|
42
|
+
рад
|
|
43
|
+
рбіс
|
|
44
|
+
рмас
|
|
45
|
+
рха
|
|
46
|
+
рыс
|
|
47
|
+
скарк
|
|
48
|
+
скарак
|
|
49
|
+
скра
|
|
50
|
+
скравец
|
|
51
|
+
скрачк
|
|
52
|
+
ста
|
|
53
|
+
с[нт]ась?ц
|
|
54
|
+
сь?ці[нк]
|
|
55
|
+
та[р ]
|
|
56
|
+
тры
|
|
57
|
+
х(ны[хя]?|ную|на[яей])?
|
|
58
|
+
ць?він
|
|
59
|
+
шыяс`);
|
|
60
|
+
|
|
1
61
|
// src/dict/latin.ts
|
|
2
62
|
var latinLetters = [
|
|
3
63
|
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
@@ -47,14 +107,123 @@ var latinLetters = [
|
|
|
47
107
|
[/łi/, "li"]
|
|
48
108
|
];
|
|
49
109
|
var latinLettersUpperCase = [
|
|
50
|
-
[new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
|
|
51
|
-
[new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
|
|
52
|
-
[new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
|
|
53
|
-
[new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
|
|
54
|
-
[/(?<=[
|
|
55
|
-
[/(?<=[
|
|
56
|
-
[/(?<=[
|
|
57
|
-
[/(?<=[
|
|
110
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
111
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
112
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
113
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
114
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
115
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
116
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
117
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
118
|
+
[/Е/, "IE"],
|
|
119
|
+
[/Ё/, "IO"],
|
|
120
|
+
[/Ю/, "IU"],
|
|
121
|
+
[/Я/, "IA"],
|
|
122
|
+
[/Ц[Ьь]/, "Ć"],
|
|
123
|
+
[/З[Ьь]/, "Ź"],
|
|
124
|
+
[/С[Ьь]/, "Ś"],
|
|
125
|
+
[/Н[Ьь]/, "Ń"],
|
|
126
|
+
[/Л[Ьь]/, "L"],
|
|
127
|
+
[/А/, "A"],
|
|
128
|
+
[/Б/, "B"],
|
|
129
|
+
[/В/, "V"],
|
|
130
|
+
[/Г/, "H"],
|
|
131
|
+
[/Ґ/, "G"],
|
|
132
|
+
[/Д/, "D"],
|
|
133
|
+
[/Ж/, "Ž"],
|
|
134
|
+
[/З/, "Z"],
|
|
135
|
+
[/І/, "I"],
|
|
136
|
+
[/Й/, "J"],
|
|
137
|
+
[/К/, "K"],
|
|
138
|
+
[/Л/, "Ł"],
|
|
139
|
+
[/М/, "M"],
|
|
140
|
+
[/Н/, "N"],
|
|
141
|
+
[/О/, "O"],
|
|
142
|
+
[/П/, "P"],
|
|
143
|
+
[/Р/, "R"],
|
|
144
|
+
[/С/, "S"],
|
|
145
|
+
[/Т/, "T"],
|
|
146
|
+
[/У/, "U"],
|
|
147
|
+
[/Ў/, "Ŭ"],
|
|
148
|
+
[/Ф/, "F"],
|
|
149
|
+
[/ Х(?=[\p{Ll} ])/u, " Ch"],
|
|
150
|
+
[/Х/, "CH"],
|
|
151
|
+
[/Ц/, "C"],
|
|
152
|
+
[/Ч/, "Č"],
|
|
153
|
+
[/Ш/, "Š"],
|
|
154
|
+
[/Ы/, "Y"],
|
|
155
|
+
[/Э/, "E"],
|
|
156
|
+
[/[ŁL][Ii]([AEOUaeou])/, "L$1"],
|
|
157
|
+
[/Łi/, "Li"],
|
|
158
|
+
[/ŁI/, "LI"]
|
|
159
|
+
];
|
|
160
|
+
var rawLatinLettersJi = [
|
|
161
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
|
|
162
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
|
|
163
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
|
|
164
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
|
|
165
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
|
|
166
|
+
[/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
|
|
167
|
+
[`і(?=${iwords})`, "ji"],
|
|
168
|
+
[`І(?=${iwords})`, "Ji"],
|
|
169
|
+
[`І(?=${iwords.toUpperCase()})`, "JI"],
|
|
170
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
171
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
172
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
173
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
174
|
+
[/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
|
|
175
|
+
[/ʼ/, ""],
|
|
176
|
+
[/ць/, "ć"],
|
|
177
|
+
[/зь/, "ź"],
|
|
178
|
+
[/сь/, "ś"],
|
|
179
|
+
[/нь/, "ń"],
|
|
180
|
+
[/ль/, "l"],
|
|
181
|
+
[/а/, "a"],
|
|
182
|
+
[/б/, "b"],
|
|
183
|
+
[/в/, "v"],
|
|
184
|
+
[/г/, "h"],
|
|
185
|
+
[/ґ/, "g"],
|
|
186
|
+
[/д/, "d"],
|
|
187
|
+
[/е/, "ie"],
|
|
188
|
+
[/ё/, "io"],
|
|
189
|
+
[/ж/, "ž"],
|
|
190
|
+
[/з/, "z"],
|
|
191
|
+
[/і/, "i"],
|
|
192
|
+
[/й/, "j"],
|
|
193
|
+
[/к/, "k"],
|
|
194
|
+
[/л/, "ł"],
|
|
195
|
+
[/м/, "m"],
|
|
196
|
+
[/н/, "n"],
|
|
197
|
+
[/о/, "o"],
|
|
198
|
+
[/п/, "p"],
|
|
199
|
+
[/р/, "r"],
|
|
200
|
+
[/с/, "s"],
|
|
201
|
+
[/т/, "t"],
|
|
202
|
+
[/у/, "u"],
|
|
203
|
+
[/ў/, "ŭ"],
|
|
204
|
+
[/ф/, "f"],
|
|
205
|
+
[/х/, "ch"],
|
|
206
|
+
[/ц/, "c"],
|
|
207
|
+
[/ч/, "č"],
|
|
208
|
+
[/ш/, "š"],
|
|
209
|
+
[/ы/, "y"],
|
|
210
|
+
[/э/, "e"],
|
|
211
|
+
[/ю/, "iu"],
|
|
212
|
+
[/я/, "ia"],
|
|
213
|
+
[/[łl]i([eoua])/, "l$1"],
|
|
214
|
+
[/łi/, "li"]
|
|
215
|
+
];
|
|
216
|
+
var rawLatinLettersUpperCaseJi = [
|
|
217
|
+
[new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
|
|
218
|
+
[new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
|
|
219
|
+
[new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
|
|
220
|
+
[new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
|
|
221
|
+
[new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
|
|
222
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
|
|
223
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
|
|
224
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
|
|
225
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
|
|
226
|
+
[/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
|
|
58
227
|
[/Е/, "IE"],
|
|
59
228
|
[/Ё/, "IO"],
|
|
60
229
|
[/Ю/, "IU"],
|
|
@@ -170,64 +339,7 @@ var chemicalElements2 = [
|
|
|
170
339
|
];
|
|
171
340
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
172
341
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
173
|
-
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
174
342
|
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
175
|
-
var iwords = toOneLine(`
|
|
176
|
-
біс
|
|
177
|
-
бсэн
|
|
178
|
-
в[аеоы]
|
|
179
|
-
верс
|
|
180
|
-
вал[гз]
|
|
181
|
-
гар
|
|
182
|
-
грышч
|
|
183
|
-
грэк
|
|
184
|
-
дал
|
|
185
|
-
дыш
|
|
186
|
-
жыц
|
|
187
|
-
канапіс
|
|
188
|
-
кань?н
|
|
189
|
-
ка[цўл]
|
|
190
|
-
каў[кц]
|
|
191
|
-
ксі
|
|
192
|
-
леус
|
|
193
|
-
л(іст| )
|
|
194
|
-
лістас
|
|
195
|
-
льк
|
|
196
|
-
мант
|
|
197
|
-
мась?ц
|
|
198
|
-
мбры[кч]
|
|
199
|
-
менна
|
|
200
|
-
мідж
|
|
201
|
-
мпар[тц]
|
|
202
|
-
мпульс[аеуы]
|
|
203
|
-
нахадз
|
|
204
|
-
нды([ійюя] |ев)
|
|
205
|
-
ндэкс(а(ў|мі?)? |[еуыі])
|
|
206
|
-
н[еі][ейяю]
|
|
207
|
-
нк([аіу])
|
|
208
|
-
нтэрым
|
|
209
|
-
нфікс
|
|
210
|
-
нфімум
|
|
211
|
-
ншась?ц
|
|
212
|
-
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
213
|
-
псілан
|
|
214
|
-
рад
|
|
215
|
-
рбіс
|
|
216
|
-
рмас
|
|
217
|
-
рха
|
|
218
|
-
рыс
|
|
219
|
-
скарк
|
|
220
|
-
скарак
|
|
221
|
-
скра
|
|
222
|
-
скравец
|
|
223
|
-
скрачк
|
|
224
|
-
с[нт]ась?ц
|
|
225
|
-
сь?цік
|
|
226
|
-
сь?цін
|
|
227
|
-
тар
|
|
228
|
-
тры
|
|
229
|
-
ць?він
|
|
230
|
-
шыяс`);
|
|
231
343
|
var rawWordlist = [
|
|
232
344
|
/* А */
|
|
233
345
|
[/аахен/, "аахэн"],
|
|
@@ -1912,14 +2024,18 @@ var gobj = {
|
|
|
1912
2024
|
var wordlist = [];
|
|
1913
2025
|
var softeners = [];
|
|
1914
2026
|
var arabLetters = [];
|
|
2027
|
+
var latinLettersJi = [];
|
|
2028
|
+
var latinLettersUpperCaseJi = [];
|
|
1915
2029
|
var arr = [
|
|
1916
2030
|
[rawWordlist, wordlist],
|
|
1917
2031
|
[rawsofteners, softeners],
|
|
1918
|
-
[rawArabLetters, arabLetters]
|
|
2032
|
+
[rawArabLetters, arabLetters],
|
|
2033
|
+
[rawLatinLettersJi, latinLettersJi],
|
|
2034
|
+
[rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
|
|
1919
2035
|
];
|
|
1920
2036
|
for (const [raw, obj] of arr)
|
|
1921
2037
|
for (const [pattern, result] of raw)
|
|
1922
|
-
obj.push([RegExp(pattern, "g"), result]);
|
|
2038
|
+
obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
|
|
1923
2039
|
for (const obj of [latinLetters, latinLettersUpperCase])
|
|
1924
2040
|
for (const item of obj)
|
|
1925
2041
|
item[0] = RegExp(item[0], "g" + item[0].flags);
|
|
@@ -1942,7 +2058,7 @@ var ALPHABET = {
|
|
|
1942
2058
|
CYRILLIC: 0,
|
|
1943
2059
|
LATIN: 1,
|
|
1944
2060
|
ARABIC: 2,
|
|
1945
|
-
LATIN_JI:
|
|
2061
|
+
LATIN_JI: 3
|
|
1946
2062
|
};
|
|
1947
2063
|
var REPLACE_J = {
|
|
1948
2064
|
NEVER: 0,
|
|
@@ -1956,10 +2072,12 @@ var VARIATION = {
|
|
|
1956
2072
|
};
|
|
1957
2073
|
var letters = {
|
|
1958
2074
|
[ALPHABET.LATIN]: latinLetters,
|
|
1959
|
-
[ALPHABET.ARABIC]: arabLetters
|
|
2075
|
+
[ALPHABET.ARABIC]: arabLetters,
|
|
2076
|
+
[ALPHABET.LATIN_JI]: latinLettersJi
|
|
1960
2077
|
};
|
|
1961
2078
|
var lettersUpperCase = {
|
|
1962
|
-
[ALPHABET.LATIN]: latinLettersUpperCase
|
|
2079
|
+
[ALPHABET.LATIN]: latinLettersUpperCase,
|
|
2080
|
+
[ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
|
|
1963
2081
|
};
|
|
1964
2082
|
var wrappers = {
|
|
1965
2083
|
html: {
|
|
@@ -1985,7 +2103,7 @@ var afterTarask = [
|
|
|
1985
2103
|
];
|
|
1986
2104
|
var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
|
|
1987
2105
|
var join = (textArr) => textArr.join(" ").replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1");
|
|
1988
|
-
var
|
|
2106
|
+
var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
|
|
1989
2107
|
var replaceG = (text, replacer) => text.replace(
|
|
1990
2108
|
G_REGEX,
|
|
1991
2109
|
// @ts-ignore
|
|
@@ -2077,10 +2195,10 @@ var replaceWithDict = (text, dict = []) => {
|
|
|
2077
2195
|
);
|
|
2078
2196
|
return text;
|
|
2079
2197
|
};
|
|
2080
|
-
var toJ = (
|
|
2198
|
+
var toJ = (shortU) => "й " + (shortU ? "у" : "");
|
|
2081
2199
|
var replaceIbyJ = (text, always = false) => text.replace(
|
|
2082
|
-
/([аеёіоуыэюя] )і (ў?)/g,
|
|
2083
|
-
always ? ($0, $1
|
|
2200
|
+
/(?<=[аеёіоуыэюя] )і (ў?)/g,
|
|
2201
|
+
always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
|
|
2084
2202
|
);
|
|
2085
2203
|
var __tarask__ = {
|
|
2086
2204
|
wordlist,
|
|
@@ -2122,7 +2240,10 @@ var Taraskevizer = class {
|
|
|
2122
2240
|
convert(text) {
|
|
2123
2241
|
const wrapInColorOf = wrappers.ansiColors;
|
|
2124
2242
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2125
|
-
const
|
|
2243
|
+
const noFixArr = [];
|
|
2244
|
+
const { splitted, splittedOrig } = this.process(
|
|
2245
|
+
this.prepare(text, noFixArr, "<")
|
|
2246
|
+
);
|
|
2126
2247
|
if (this.nonHtml.ansiColors)
|
|
2127
2248
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
|
|
2128
2249
|
text = join(splitted);
|
|
@@ -2139,12 +2260,15 @@ var Taraskevizer = class {
|
|
|
2139
2260
|
this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
|
|
2140
2261
|
);
|
|
2141
2262
|
}
|
|
2142
|
-
return
|
|
2263
|
+
return finalize(applyNoFix(noFixArr, text).replace(/(/g, "("), "\n");
|
|
2143
2264
|
}
|
|
2144
2265
|
convertToHtml(text) {
|
|
2145
2266
|
const wrapInTag = wrappers.html;
|
|
2146
2267
|
const isCyrillic = this.abc === ALPHABET.CYRILLIC;
|
|
2147
|
-
const
|
|
2268
|
+
const noFixArr = [];
|
|
2269
|
+
const { splitted, splittedOrig } = this.process(
|
|
2270
|
+
this.prepare(text, noFixArr, "<")
|
|
2271
|
+
);
|
|
2148
2272
|
highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
|
|
2149
2273
|
text = join(splitted);
|
|
2150
2274
|
if (isCyrillic)
|
|
@@ -2152,7 +2276,7 @@ var Taraskevizer = class {
|
|
|
2152
2276
|
text,
|
|
2153
2277
|
this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
|
|
2154
2278
|
);
|
|
2155
|
-
return
|
|
2279
|
+
return finalize(
|
|
2156
2280
|
applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
|
|
2157
2281
|
const options = $0.slice(1, -1).split("|");
|
|
2158
2282
|
const main = options.shift();
|
|
@@ -2161,33 +2285,43 @@ var Taraskevizer = class {
|
|
|
2161
2285
|
"<br>"
|
|
2162
2286
|
);
|
|
2163
2287
|
}
|
|
2164
|
-
|
|
2165
|
-
const { abc, j } = this;
|
|
2166
|
-
const noFixArr = [];
|
|
2288
|
+
prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
|
|
2167
2289
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2168
|
-
if (
|
|
2290
|
+
if (doEscapeCapitalized)
|
|
2169
2291
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2170
|
-
|
|
2292
|
+
return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2171
2293
|
if ($2 === ",")
|
|
2172
2294
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2173
2295
|
if ($1)
|
|
2174
2296
|
$3 = restoreCase(
|
|
2175
|
-
[replaceWithDict($3.toLowerCase(), letters[abc])],
|
|
2297
|
+
[replaceWithDict($3.toLowerCase(), letters[this.abc])],
|
|
2176
2298
|
[$3]
|
|
2177
2299
|
);
|
|
2178
2300
|
noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
|
|
2179
2301
|
return NOFIX_CHAR;
|
|
2180
2302
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2181
|
-
|
|
2182
|
-
|
|
2303
|
+
}
|
|
2304
|
+
convertAlphabetOnly(text) {
|
|
2305
|
+
const noFixArr = [];
|
|
2306
|
+
return finalize(
|
|
2307
|
+
applyNoFix(
|
|
2308
|
+
noFixArr,
|
|
2309
|
+
convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
|
|
2310
|
+
).replace(/ /g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|() ", "gu"), "$1"),
|
|
2311
|
+
"\n"
|
|
2312
|
+
);
|
|
2313
|
+
}
|
|
2314
|
+
process(text) {
|
|
2315
|
+
const { abc, j } = this;
|
|
2316
|
+
const splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2183
2317
|
text = this.taraskevize(text.toLowerCase());
|
|
2184
|
-
if (j)
|
|
2318
|
+
if (j && abc !== ALPHABET.LATIN_JI)
|
|
2185
2319
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
2186
2320
|
text = replaceWithDict(text, letters[abc]);
|
|
2187
|
-
splitted = text.split(" ");
|
|
2321
|
+
let splitted = text.split(" ");
|
|
2188
2322
|
if (abc !== ALPHABET.ARABIC)
|
|
2189
2323
|
splitted = restoreCase(splitted, splittedOrig);
|
|
2190
|
-
return { splittedOrig, splitted
|
|
2324
|
+
return { splittedOrig, splitted };
|
|
2191
2325
|
}
|
|
2192
2326
|
taraskevize(text) {
|
|
2193
2327
|
text = replaceWithDict(text, wordlist);
|
|
@@ -2208,6 +2342,5 @@ export {
|
|
|
2208
2342
|
Taraskevizer,
|
|
2209
2343
|
VARIATION,
|
|
2210
2344
|
__tarask__,
|
|
2211
|
-
convertAlphabet,
|
|
2212
2345
|
gobj
|
|
2213
2346
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "taraskevizer",
|
|
3
|
-
"version": "5.1.
|
|
3
|
+
"version": "5.1.9",
|
|
4
4
|
"author": "GooseOb",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -41,9 +41,9 @@
|
|
|
41
41
|
"scripts": {
|
|
42
42
|
"build": "tsup --config build-config/index.ts",
|
|
43
43
|
"build:bun_EXPERIMENTAL": "bun ./build-config/bun.ts",
|
|
44
|
-
"dev": "esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
|
|
45
|
-
"dev:bun": "bun ./test/bun-watch.ts",
|
|
46
|
-
"dev-bun": "bun test --watch",
|
|
44
|
+
"dev": "NOCLI=true esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
|
|
45
|
+
"dev:bun": "NOCLI=true bun ./test/bun-watch.ts",
|
|
46
|
+
"dev-bun": "NOCLI=true bun test --watch",
|
|
47
47
|
"test": "esrun --send-code-mode=temporaryFile test",
|
|
48
48
|
"prepare": "husky install",
|
|
49
49
|
"typecheck": "tsc --project src/tsconfig.json"
|