taraskevizer 5.1.8 → 5.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -25,11 +25,70 @@ __export(src_exports, {
25
25
  Taraskevizer: () => Taraskevizer,
26
26
  VARIATION: () => VARIATION,
27
27
  __tarask__: () => __tarask__,
28
- convertAlphabet: () => convertAlphabet,
29
28
  gobj: () => gobj
30
29
  });
31
30
  module.exports = __toCommonJS(src_exports);
32
31
 
32
+ // src/dict/iwords.ts
33
+ var toOneLine = (str) => str.replace(/\n/g, "|");
34
+ var iwords = toOneLine(`біс
35
+ бсэн
36
+ в[аеоы]
37
+ верс
38
+ вал[гз]
39
+ гар
40
+ грышч
41
+ грэк
42
+ дал
43
+ дыш
44
+ жыц
45
+ канапіс
46
+ кань?н
47
+ ка[цўл]
48
+ каў[кц]
49
+ ксі
50
+ леус
51
+ л(іст| )
52
+ лістас
53
+ льк
54
+ мант
55
+ мась?ц
56
+ мбры[кч]
57
+ менна
58
+ мідж
59
+ мпар[тц]
60
+ мпульс[аеуы]
61
+ нахадз
62
+ нды([ійюя] |ев)
63
+ ндэкс(а(ў|мі?)? |[еуыі])
64
+ н[еі][ейяю]
65
+ нк([аіу])
66
+ нтэрым
67
+ нфікс
68
+ нфімум
69
+ ншась?ц
70
+ нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
71
+ псілан
72
+ р([аыу]|а[мхйў]|амі|)
73
+ рад
74
+ рбіс
75
+ рмас
76
+ рха
77
+ рыс
78
+ скарк
79
+ скарак
80
+ скра
81
+ скравец
82
+ скрачк
83
+ ста
84
+ с[нт]ась?ц
85
+ сь?ці[нк]
86
+ та[р ]
87
+ тры
88
+ х(ны[хя]?|ную|на[яей])?
89
+ ць?він
90
+ шыяс`);
91
+
33
92
  // src/dict/latin.ts
34
93
  var latinLetters = [
35
94
  [/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
@@ -79,14 +138,123 @@ var latinLetters = [
79
138
  [/łi/, "li"]
80
139
  ];
81
140
  var latinLettersUpperCase = [
82
- [new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
83
- [new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
84
- [new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
85
- [new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
86
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Е/, "JE"],
87
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Ё/, "JO"],
88
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Ю/, "JU"],
89
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Я/, "JA"],
141
+ [new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
142
+ [new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
143
+ [new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
144
+ [new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
145
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
146
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
147
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
148
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
149
+ [/Е/, "IE"],
150
+ [/Ё/, "IO"],
151
+ [/Ю/, "IU"],
152
+ [/Я/, "IA"],
153
+ [/Ц[Ьь]/, "Ć"],
154
+ [/З[Ьь]/, "Ź"],
155
+ [/С[Ьь]/, "Ś"],
156
+ [/Н[Ьь]/, "Ń"],
157
+ [/Л[Ьь]/, "L"],
158
+ [/А/, "A"],
159
+ [/Б/, "B"],
160
+ [/В/, "V"],
161
+ [/Г/, "H"],
162
+ [/Ґ/, "G"],
163
+ [/Д/, "D"],
164
+ [/Ж/, "Ž"],
165
+ [/З/, "Z"],
166
+ [/І/, "I"],
167
+ [/Й/, "J"],
168
+ [/К/, "K"],
169
+ [/Л/, "Ł"],
170
+ [/М/, "M"],
171
+ [/Н/, "N"],
172
+ [/О/, "O"],
173
+ [/П/, "P"],
174
+ [/Р/, "R"],
175
+ [/С/, "S"],
176
+ [/Т/, "T"],
177
+ [/У/, "U"],
178
+ [/Ў/, "Ŭ"],
179
+ [/Ф/, "F"],
180
+ [/ Х(?=[\p{Ll} ])/u, " Ch"],
181
+ [/Х/, "CH"],
182
+ [/Ц/, "C"],
183
+ [/Ч/, "Č"],
184
+ [/Ш/, "Š"],
185
+ [/Ы/, "Y"],
186
+ [/Э/, "E"],
187
+ [/[ŁL][Ii]([AEOUaeou])/, "L$1"],
188
+ [/Łi/, "Li"],
189
+ [/ŁI/, "LI"]
190
+ ];
191
+ var rawLatinLettersJi = [
192
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
193
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
194
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
195
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
196
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
197
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
198
+ [`і(?=${iwords})`, "ji"],
199
+ [`І(?=${iwords})`, "Ji"],
200
+ [`І(?=${iwords.toUpperCase()})`, "JI"],
201
+ [/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
202
+ [/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
203
+ [/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
204
+ [/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
205
+ [/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
206
+ [/ʼ/, ""],
207
+ [/ць/, "ć"],
208
+ [/зь/, "ź"],
209
+ [/сь/, "ś"],
210
+ [/нь/, "ń"],
211
+ [/ль/, "l"],
212
+ [/а/, "a"],
213
+ [/б/, "b"],
214
+ [/в/, "v"],
215
+ [/г/, "h"],
216
+ [/ґ/, "g"],
217
+ [/д/, "d"],
218
+ [/е/, "ie"],
219
+ [/ё/, "io"],
220
+ [/ж/, "ž"],
221
+ [/з/, "z"],
222
+ [/і/, "i"],
223
+ [/й/, "j"],
224
+ [/к/, "k"],
225
+ [/л/, "ł"],
226
+ [/м/, "m"],
227
+ [/н/, "n"],
228
+ [/о/, "o"],
229
+ [/п/, "p"],
230
+ [/р/, "r"],
231
+ [/с/, "s"],
232
+ [/т/, "t"],
233
+ [/у/, "u"],
234
+ [/ў/, "ŭ"],
235
+ [/ф/, "f"],
236
+ [/х/, "ch"],
237
+ [/ц/, "c"],
238
+ [/ч/, "č"],
239
+ [/ш/, "š"],
240
+ [/ы/, "y"],
241
+ [/э/, "e"],
242
+ [/ю/, "iu"],
243
+ [/я/, "ia"],
244
+ [/[łl]i([eoua])/, "l$1"],
245
+ [/łi/, "li"]
246
+ ];
247
+ var rawLatinLettersUpperCaseJi = [
248
+ [new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
249
+ [new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
250
+ [new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
251
+ [new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
252
+ [new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
253
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
254
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
255
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
256
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
257
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
90
258
  [/Е/, "IE"],
91
259
  [/Ё/, "IO"],
92
260
  [/Ю/, "IU"],
@@ -202,64 +370,7 @@ var chemicalElements2 = [
202
370
  ];
203
371
  chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
204
372
  var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
205
- var toOneLine = (str) => str.replace(/\n/g, "|");
206
373
  var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
207
- var iwords = toOneLine(`
208
- біс
209
- бсэн
210
- в[аеоы]
211
- верс
212
- вал[гз]
213
- гар
214
- грышч
215
- грэк
216
- дал
217
- дыш
218
- жыц
219
- канапіс
220
- кань?н
221
- ка[цўл]
222
- каў[кц]
223
- ксі
224
- леус
225
- л(іст| )
226
- лістас
227
- льк
228
- мант
229
- мась?ц
230
- мбры[кч]
231
- менна
232
- мідж
233
- мпар[тц]
234
- мпульс[аеуы]
235
- нахадз
236
- нды([ійюя] |ев)
237
- ндэкс(а(ў|мі?)? |[еуыі])
238
- н[еі][ейяю]
239
- нк([аіу])
240
- нтэрым
241
- нфікс
242
- нфімум
243
- ншась?ц
244
- нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
245
- псілан
246
- рад
247
- рбіс
248
- рмас
249
- рха
250
- рыс
251
- скарк
252
- скарак
253
- скра
254
- скравец
255
- скрачк
256
- с[нт]ась?ц
257
- сь?цік
258
- сь?цін
259
- тар
260
- тры
261
- ць?він
262
- шыяс`);
263
374
  var rawWordlist = [
264
375
  /* А */
265
376
  [/аахен/, "аахэн"],
@@ -1944,14 +2055,18 @@ var gobj = {
1944
2055
  var wordlist = [];
1945
2056
  var softeners = [];
1946
2057
  var arabLetters = [];
2058
+ var latinLettersJi = [];
2059
+ var latinLettersUpperCaseJi = [];
1947
2060
  var arr = [
1948
2061
  [rawWordlist, wordlist],
1949
2062
  [rawsofteners, softeners],
1950
- [rawArabLetters, arabLetters]
2063
+ [rawArabLetters, arabLetters],
2064
+ [rawLatinLettersJi, latinLettersJi],
2065
+ [rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
1951
2066
  ];
1952
2067
  for (const [raw, obj] of arr)
1953
2068
  for (const [pattern, result] of raw)
1954
- obj.push([RegExp(pattern, "g"), result]);
2069
+ obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
1955
2070
  for (const obj of [latinLetters, latinLettersUpperCase])
1956
2071
  for (const item of obj)
1957
2072
  item[0] = RegExp(item[0], "g" + item[0].flags);
@@ -1974,7 +2089,7 @@ var ALPHABET = {
1974
2089
  CYRILLIC: 0,
1975
2090
  LATIN: 1,
1976
2091
  ARABIC: 2,
1977
- LATIN_JI: 1
2092
+ LATIN_JI: 3
1978
2093
  };
1979
2094
  var REPLACE_J = {
1980
2095
  NEVER: 0,
@@ -1988,10 +2103,12 @@ var VARIATION = {
1988
2103
  };
1989
2104
  var letters = {
1990
2105
  [ALPHABET.LATIN]: latinLetters,
1991
- [ALPHABET.ARABIC]: arabLetters
2106
+ [ALPHABET.ARABIC]: arabLetters,
2107
+ [ALPHABET.LATIN_JI]: latinLettersJi
1992
2108
  };
1993
2109
  var lettersUpperCase = {
1994
- [ALPHABET.LATIN]: latinLettersUpperCase
2110
+ [ALPHABET.LATIN]: latinLettersUpperCase,
2111
+ [ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
1995
2112
  };
1996
2113
  var wrappers = {
1997
2114
  html: {
@@ -2017,7 +2134,7 @@ var afterTarask = [
2017
2134
  ];
2018
2135
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
2019
2136
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
2020
- var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
2137
+ var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
2021
2138
  var replaceG = (text, replacer) => text.replace(
2022
2139
  G_REGEX,
2023
2140
  // @ts-ignore
@@ -2109,10 +2226,10 @@ var replaceWithDict = (text, dict = []) => {
2109
2226
  );
2110
2227
  return text;
2111
2228
  };
2112
- var toJ = (vow, shortU) => vow + "й " + (shortU ? "у" : "");
2229
+ var toJ = (shortU) => "й " + (shortU ? "у" : "");
2113
2230
  var replaceIbyJ = (text, always = false) => text.replace(
2114
- /([аеёіоуыэюя] )і (ў?)/g,
2115
- always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2231
+ /(?<=[аеёіоуыэюя] )і (ў?)/g,
2232
+ always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
2116
2233
  );
2117
2234
  var __tarask__ = {
2118
2235
  wordlist,
@@ -2154,7 +2271,10 @@ var Taraskevizer = class {
2154
2271
  convert(text) {
2155
2272
  const wrapInColorOf = wrappers.ansiColors;
2156
2273
  const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2157
- const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2274
+ const noFixArr = [];
2275
+ const { splitted, splittedOrig } = this.process(
2276
+ this.prepare(text, noFixArr, "<")
2277
+ );
2158
2278
  if (this.nonHtml.ansiColors)
2159
2279
  highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2160
2280
  text = join(splitted);
@@ -2171,12 +2291,15 @@ var Taraskevizer = class {
2171
2291
  this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2172
2292
  );
2173
2293
  }
2174
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2294
+ return finalize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2175
2295
  }
2176
2296
  convertToHtml(text) {
2177
2297
  const wrapInTag = wrappers.html;
2178
2298
  const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2179
- const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2299
+ const noFixArr = [];
2300
+ const { splitted, splittedOrig } = this.process(
2301
+ this.prepare(text, noFixArr, "&lt;")
2302
+ );
2180
2303
  highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2181
2304
  text = join(splitted);
2182
2305
  if (isCyrillic)
@@ -2184,7 +2307,7 @@ var Taraskevizer = class {
2184
2307
  text,
2185
2308
  this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2186
2309
  );
2187
- return finilize(
2310
+ return finalize(
2188
2311
  applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2189
2312
  const options = $0.slice(1, -1).split("|");
2190
2313
  const main = options.shift();
@@ -2193,33 +2316,43 @@ var Taraskevizer = class {
2193
2316
  "<br>"
2194
2317
  );
2195
2318
  }
2196
- process(text, LEFT_ANGLE_BRACKET) {
2197
- const { abc, j } = this;
2198
- const noFixArr = [];
2319
+ prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
2199
2320
  text = ` ${text.trim()} `.replace(//g, "");
2200
- if (this.doEscapeCapitalized)
2321
+ if (doEscapeCapitalized)
2201
2322
  text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
2202
- text = text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
2323
+ return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
2203
2324
  if ($2 === ",")
2204
2325
  return LEFT_ANGLE_BRACKET + $3 + ">";
2205
2326
  if ($1)
2206
2327
  $3 = restoreCase(
2207
- [replaceWithDict($3.toLowerCase(), letters[abc])],
2328
+ [replaceWithDict($3.toLowerCase(), letters[this.abc])],
2208
2329
  [$3]
2209
2330
  );
2210
2331
  noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
2211
2332
  return NOFIX_CHAR;
2212
2333
  }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2213
- let splittedOrig, splitted;
2214
- splittedOrig = convertAlphabet(text, abc).split(" ");
2334
+ }
2335
+ convertAlphabetOnly(text) {
2336
+ const noFixArr = [];
2337
+ return finalize(
2338
+ applyNoFix(
2339
+ noFixArr,
2340
+ convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
2341
+ ).replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1"),
2342
+ "\n"
2343
+ );
2344
+ }
2345
+ process(text) {
2346
+ const { abc, j } = this;
2347
+ const splittedOrig = convertAlphabet(text, abc).split(" ");
2215
2348
  text = this.taraskevize(text.toLowerCase());
2216
- if (j)
2349
+ if (j && abc !== ALPHABET.LATIN_JI)
2217
2350
  text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2218
2351
  text = replaceWithDict(text, letters[abc]);
2219
- splitted = text.split(" ");
2352
+ let splitted = text.split(" ");
2220
2353
  if (abc !== ALPHABET.ARABIC)
2221
2354
  splitted = restoreCase(splitted, splittedOrig);
2222
- return { splittedOrig, splitted, noFixArr };
2355
+ return { splittedOrig, splitted };
2223
2356
  }
2224
2357
  taraskevize(text) {
2225
2358
  text = replaceWithDict(text, wordlist);
@@ -2241,6 +2374,5 @@ var Taraskevizer = class {
2241
2374
  Taraskevizer,
2242
2375
  VARIATION,
2243
2376
  __tarask__,
2244
- convertAlphabet,
2245
2377
  gobj
2246
2378
  });
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ type ModifyObjectType<T, TResultObj> = T extends object ? T extends (...args: an
2
2
  type DeepPartialReadonly<T> = ModifyObjectType<T, {
3
3
  readonly [P in keyof T]?: DeepPartialReadonly<T[P]>;
4
4
  }>;
5
- type Alphabet = 0 | 1 | 2;
5
+ type Alphabet = 0 | 1 | 2 | 3;
6
6
  type OptionJ = 0 | 1 | 2;
7
7
  type Variation = 0 | 1 | 2;
8
8
  type TaraskOptions = {
@@ -28,7 +28,7 @@ declare const ALPHABET: {
28
28
  readonly CYRILLIC: 0;
29
29
  readonly LATIN: 1;
30
30
  readonly ARABIC: 2;
31
- readonly LATIN_JI: 1;
31
+ readonly LATIN_JI: 3;
32
32
  };
33
33
  declare const REPLACE_J: {
34
34
  readonly NEVER: 0;
@@ -46,7 +46,6 @@ declare const __tarask__: {
46
46
  readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
47
47
  readonly afterTarask: ExtendedDict;
48
48
  };
49
- declare const convertAlphabet: (text: string, abc: Alphabet) => string;
50
49
  declare class Taraskevizer {
51
50
  abc: Alphabet;
52
51
  j: OptionJ;
@@ -67,6 +66,8 @@ declare class Taraskevizer {
67
66
  }>);
68
67
  convert(text: string): string;
69
68
  convertToHtml(text: string): string;
69
+ private prepare;
70
+ convertAlphabetOnly(text: string): string;
70
71
  private process;
71
72
  protected taraskevize(text: string): string;
72
73
  }
@@ -78,4 +79,4 @@ declare const gobj: {
78
79
  readonly Ґ: "Г";
79
80
  };
80
81
 
81
- export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, convertAlphabet, gobj };
82
+ export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, gobj };
package/dist/index.js CHANGED
@@ -1,3 +1,63 @@
1
+ // src/dict/iwords.ts
2
+ var toOneLine = (str) => str.replace(/\n/g, "|");
3
+ var iwords = toOneLine(`біс
4
+ бсэн
5
+ в[аеоы]
6
+ верс
7
+ вал[гз]
8
+ гар
9
+ грышч
10
+ грэк
11
+ дал
12
+ дыш
13
+ жыц
14
+ канапіс
15
+ кань?н
16
+ ка[цўл]
17
+ каў[кц]
18
+ ксі
19
+ леус
20
+ л(іст| )
21
+ лістас
22
+ льк
23
+ мант
24
+ мась?ц
25
+ мбры[кч]
26
+ менна
27
+ мідж
28
+ мпар[тц]
29
+ мпульс[аеуы]
30
+ нахадз
31
+ нды([ійюя] |ев)
32
+ ндэкс(а(ў|мі?)? |[еуыі])
33
+ н[еі][ейяю]
34
+ нк([аіу])
35
+ нтэрым
36
+ нфікс
37
+ нфімум
38
+ ншась?ц
39
+ нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
40
+ псілан
41
+ р([аыу]|а[мхйў]|амі|)
42
+ рад
43
+ рбіс
44
+ рмас
45
+ рха
46
+ рыс
47
+ скарк
48
+ скарак
49
+ скра
50
+ скравец
51
+ скрачк
52
+ ста
53
+ с[нт]ась?ц
54
+ сь?ці[нк]
55
+ та[р ]
56
+ тры
57
+ х(ны[хя]?|ную|на[яей])?
58
+ ць?він
59
+ шыяс`);
60
+
1
61
  // src/dict/latin.ts
2
62
  var latinLetters = [
3
63
  [/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
@@ -47,14 +107,123 @@ var latinLetters = [
47
107
  [/łi/, "li"]
48
108
  ];
49
109
  var latinLettersUpperCase = [
50
- [new RegExp(" Е(?= *\\p{Ll})", "u"), " Je"],
51
- [new RegExp(" Ё(?= *\\p{Ll})", "u"), " Jo"],
52
- [new RegExp(" Ю(?= *\\p{Ll})", "u"), " Ju"],
53
- [new RegExp(" Я(?= *\\p{Ll})", "u"), " Ja"],
54
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Е/, "JE"],
55
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Ё/, "JO"],
56
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Ю/, "JU"],
57
- [/(?<=[АЕЁІОУЎЫЭЮЯЬʼ| ]\(?)Я/, "JA"],
110
+ [new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
111
+ [new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
112
+ [new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
113
+ [new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
114
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
115
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
116
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
117
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
118
+ [/Е/, "IE"],
119
+ [/Ё/, "IO"],
120
+ [/Ю/, "IU"],
121
+ [/Я/, "IA"],
122
+ [/Ц[Ьь]/, "Ć"],
123
+ [/З[Ьь]/, "Ź"],
124
+ [/С[Ьь]/, "Ś"],
125
+ [/Н[Ьь]/, "Ń"],
126
+ [/Л[Ьь]/, "L"],
127
+ [/А/, "A"],
128
+ [/Б/, "B"],
129
+ [/В/, "V"],
130
+ [/Г/, "H"],
131
+ [/Ґ/, "G"],
132
+ [/Д/, "D"],
133
+ [/Ж/, "Ž"],
134
+ [/З/, "Z"],
135
+ [/І/, "I"],
136
+ [/Й/, "J"],
137
+ [/К/, "K"],
138
+ [/Л/, "Ł"],
139
+ [/М/, "M"],
140
+ [/Н/, "N"],
141
+ [/О/, "O"],
142
+ [/П/, "P"],
143
+ [/Р/, "R"],
144
+ [/С/, "S"],
145
+ [/Т/, "T"],
146
+ [/У/, "U"],
147
+ [/Ў/, "Ŭ"],
148
+ [/Ф/, "F"],
149
+ [/ Х(?=[\p{Ll} ])/u, " Ch"],
150
+ [/Х/, "CH"],
151
+ [/Ц/, "C"],
152
+ [/Ч/, "Č"],
153
+ [/Ш/, "Š"],
154
+ [/Ы/, "Y"],
155
+ [/Э/, "E"],
156
+ [/[ŁL][Ii]([AEOUaeou])/, "L$1"],
157
+ [/Łi/, "Li"],
158
+ [/ŁI/, "LI"]
159
+ ];
160
+ var rawLatinLettersJi = [
161
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і Ў/, "j U"],
162
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і ў/, "j u"],
163
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )і /, "j "],
164
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І Ў/, "J U"],
165
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І ў/, "J u"],
166
+ [/(?<=[аеёіоуыэюяАЕЁІОУЫЭЮЯ] )І /, "J "],
167
+ [`і(?=${iwords})`, "ji"],
168
+ [`І(?=${iwords})`, "Ji"],
169
+ [`І(?=${iwords.toUpperCase()})`, "JI"],
170
+ [/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
171
+ [/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
172
+ [/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
173
+ [/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
174
+ [/(?<=[eouaаеёіоуўыэюяьʼАЕЁІОУЎЫЭЮЯЬ] *)і/, "ji"],
175
+ [/ʼ/, ""],
176
+ [/ць/, "ć"],
177
+ [/зь/, "ź"],
178
+ [/сь/, "ś"],
179
+ [/нь/, "ń"],
180
+ [/ль/, "l"],
181
+ [/а/, "a"],
182
+ [/б/, "b"],
183
+ [/в/, "v"],
184
+ [/г/, "h"],
185
+ [/ґ/, "g"],
186
+ [/д/, "d"],
187
+ [/е/, "ie"],
188
+ [/ё/, "io"],
189
+ [/ж/, "ž"],
190
+ [/з/, "z"],
191
+ [/і/, "i"],
192
+ [/й/, "j"],
193
+ [/к/, "k"],
194
+ [/л/, "ł"],
195
+ [/м/, "m"],
196
+ [/н/, "n"],
197
+ [/о/, "o"],
198
+ [/п/, "p"],
199
+ [/р/, "r"],
200
+ [/с/, "s"],
201
+ [/т/, "t"],
202
+ [/у/, "u"],
203
+ [/ў/, "ŭ"],
204
+ [/ф/, "f"],
205
+ [/х/, "ch"],
206
+ [/ц/, "c"],
207
+ [/ч/, "č"],
208
+ [/ш/, "š"],
209
+ [/ы/, "y"],
210
+ [/э/, "e"],
211
+ [/ю/, "iu"],
212
+ [/я/, "ia"],
213
+ [/[łl]i([eoua])/, "l$1"],
214
+ [/łi/, "li"]
215
+ ];
216
+ var rawLatinLettersUpperCaseJi = [
217
+ [new RegExp(" Е(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Je"],
218
+ [new RegExp(" Ё(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Jo"],
219
+ [new RegExp(" Ю(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ju"],
220
+ [new RegExp(" Я(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), " Ja"],
221
+ [new RegExp("(?<=[eoua] *)І(?=[ \\p{P}\\d]*\\p{Lu}?\\p{Ll})", "u"), "Ji"],
222
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Е/, "JE"],
223
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ё/, "JO"],
224
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Ю/, "JU"],
225
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ| ]\(?)Я/, "JA"],
226
+ [/(?<=[АЕЁІОУЎЫЭЮЯЬ]\(?)І/, "JI"],
58
227
  [/Е/, "IE"],
59
228
  [/Ё/, "IO"],
60
229
  [/Ю/, "IU"],
@@ -170,64 +339,7 @@ var chemicalElements2 = [
170
339
  ];
171
340
  chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
172
341
  var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
173
- var toOneLine = (str) => str.replace(/\n/g, "|");
174
342
  var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
175
- var iwords = toOneLine(`
176
- біс
177
- бсэн
178
- в[аеоы]
179
- верс
180
- вал[гз]
181
- гар
182
- грышч
183
- грэк
184
- дал
185
- дыш
186
- жыц
187
- канапіс
188
- кань?н
189
- ка[цўл]
190
- каў[кц]
191
- ксі
192
- леус
193
- л(іст| )
194
- лістас
195
- льк
196
- мант
197
- мась?ц
198
- мбры[кч]
199
- менна
200
- мідж
201
- мпар[тц]
202
- мпульс[аеуы]
203
- нахадз
204
- нды([ійюя] |ев)
205
- ндэкс(а(ў|мі?)? |[еуыі])
206
- н[еі][ейяю]
207
- нк([аіу])
208
- нтэрым
209
- нфікс
210
- нфімум
211
- ншась?ц
212
- нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
213
- псілан
214
- рад
215
- рбіс
216
- рмас
217
- рха
218
- рыс
219
- скарк
220
- скарак
221
- скра
222
- скравец
223
- скрачк
224
- с[нт]ась?ц
225
- сь?цік
226
- сь?цін
227
- тар
228
- тры
229
- ць?він
230
- шыяс`);
231
343
  var rawWordlist = [
232
344
  /* А */
233
345
  [/аахен/, "аахэн"],
@@ -1912,14 +2024,18 @@ var gobj = {
1912
2024
  var wordlist = [];
1913
2025
  var softeners = [];
1914
2026
  var arabLetters = [];
2027
+ var latinLettersJi = [];
2028
+ var latinLettersUpperCaseJi = [];
1915
2029
  var arr = [
1916
2030
  [rawWordlist, wordlist],
1917
2031
  [rawsofteners, softeners],
1918
- [rawArabLetters, arabLetters]
2032
+ [rawArabLetters, arabLetters],
2033
+ [rawLatinLettersJi, latinLettersJi],
2034
+ [rawLatinLettersUpperCaseJi, latinLettersUpperCaseJi]
1919
2035
  ];
1920
2036
  for (const [raw, obj] of arr)
1921
2037
  for (const [pattern, result] of raw)
1922
- obj.push([RegExp(pattern, "g"), result]);
2038
+ obj.push([RegExp(pattern, pattern instanceof RegExp ? pattern.flags + "g" : "g"), result]);
1923
2039
  for (const obj of [latinLetters, latinLettersUpperCase])
1924
2040
  for (const item of obj)
1925
2041
  item[0] = RegExp(item[0], "g" + item[0].flags);
@@ -1942,7 +2058,7 @@ var ALPHABET = {
1942
2058
  CYRILLIC: 0,
1943
2059
  LATIN: 1,
1944
2060
  ARABIC: 2,
1945
- LATIN_JI: 1
2061
+ LATIN_JI: 3
1946
2062
  };
1947
2063
  var REPLACE_J = {
1948
2064
  NEVER: 0,
@@ -1956,10 +2072,12 @@ var VARIATION = {
1956
2072
  };
1957
2073
  var letters = {
1958
2074
  [ALPHABET.LATIN]: latinLetters,
1959
- [ALPHABET.ARABIC]: arabLetters
2075
+ [ALPHABET.ARABIC]: arabLetters,
2076
+ [ALPHABET.LATIN_JI]: latinLettersJi
1960
2077
  };
1961
2078
  var lettersUpperCase = {
1962
- [ALPHABET.LATIN]: latinLettersUpperCase
2079
+ [ALPHABET.LATIN]: latinLettersUpperCase,
2080
+ [ALPHABET.LATIN_JI]: latinLettersUpperCaseJi
1963
2081
  };
1964
2082
  var wrappers = {
1965
2083
  html: {
@@ -1985,7 +2103,7 @@ var afterTarask = [
1985
2103
  ];
1986
2104
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
1987
2105
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
1988
- var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
2106
+ var finalize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
1989
2107
  var replaceG = (text, replacer) => text.replace(
1990
2108
  G_REGEX,
1991
2109
  // @ts-ignore
@@ -2077,10 +2195,10 @@ var replaceWithDict = (text, dict = []) => {
2077
2195
  );
2078
2196
  return text;
2079
2197
  };
2080
- var toJ = (vow, shortU) => vow + "й " + (shortU ? "у" : "");
2198
+ var toJ = (shortU) => "й " + (shortU ? "у" : "");
2081
2199
  var replaceIbyJ = (text, always = false) => text.replace(
2082
- /([аеёіоуыэюя] )і (ў?)/g,
2083
- always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2200
+ /(?<=[аеёіоуыэюя] )і (ў?)/g,
2201
+ always ? ($0, $1) => toJ($1) : ($0, $1) => Math.random() >= 0.5 ? toJ($1) : $0
2084
2202
  );
2085
2203
  var __tarask__ = {
2086
2204
  wordlist,
@@ -2122,7 +2240,10 @@ var Taraskevizer = class {
2122
2240
  convert(text) {
2123
2241
  const wrapInColorOf = wrappers.ansiColors;
2124
2242
  const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2125
- const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2243
+ const noFixArr = [];
2244
+ const { splitted, splittedOrig } = this.process(
2245
+ this.prepare(text, noFixArr, "<")
2246
+ );
2126
2247
  if (this.nonHtml.ansiColors)
2127
2248
  highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2128
2249
  text = join(splitted);
@@ -2139,12 +2260,15 @@ var Taraskevizer = class {
2139
2260
  this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2140
2261
  );
2141
2262
  }
2142
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2263
+ return finalize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2143
2264
  }
2144
2265
  convertToHtml(text) {
2145
2266
  const wrapInTag = wrappers.html;
2146
2267
  const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2147
- const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2268
+ const noFixArr = [];
2269
+ const { splitted, splittedOrig } = this.process(
2270
+ this.prepare(text, noFixArr, "&lt;")
2271
+ );
2148
2272
  highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2149
2273
  text = join(splitted);
2150
2274
  if (isCyrillic)
@@ -2152,7 +2276,7 @@ var Taraskevizer = class {
2152
2276
  text,
2153
2277
  this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2154
2278
  );
2155
- return finilize(
2279
+ return finalize(
2156
2280
  applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2157
2281
  const options = $0.slice(1, -1).split("|");
2158
2282
  const main = options.shift();
@@ -2161,33 +2285,43 @@ var Taraskevizer = class {
2161
2285
  "<br>"
2162
2286
  );
2163
2287
  }
2164
- process(text, LEFT_ANGLE_BRACKET) {
2165
- const { abc, j } = this;
2166
- const noFixArr = [];
2288
+ prepare(text, noFixArr, LEFT_ANGLE_BRACKET, doEscapeCapitalized = this.doEscapeCapitalized) {
2167
2289
  text = ` ${text.trim()} `.replace(//g, "");
2168
- if (this.doEscapeCapitalized)
2290
+ if (doEscapeCapitalized)
2169
2291
  text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
2170
- text = text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
2292
+ return text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
2171
2293
  if ($2 === ",")
2172
2294
  return LEFT_ANGLE_BRACKET + $3 + ">";
2173
2295
  if ($1)
2174
2296
  $3 = restoreCase(
2175
- [replaceWithDict($3.toLowerCase(), letters[abc])],
2297
+ [replaceWithDict($3.toLowerCase(), letters[this.abc])],
2176
2298
  [$3]
2177
2299
  );
2178
2300
  noFixArr.push($2 === "." ? $3 : LEFT_ANGLE_BRACKET + $3 + ">");
2179
2301
  return NOFIX_CHAR;
2180
2302
  }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2181
- let splittedOrig, splitted;
2182
- splittedOrig = convertAlphabet(text, abc).split(" ");
2303
+ }
2304
+ convertAlphabetOnly(text) {
2305
+ const noFixArr = [];
2306
+ return finalize(
2307
+ applyNoFix(
2308
+ noFixArr,
2309
+ convertAlphabet(this.prepare(text, noFixArr, "<", false), this.abc)
2310
+ ).replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1"),
2311
+ "\n"
2312
+ );
2313
+ }
2314
+ process(text) {
2315
+ const { abc, j } = this;
2316
+ const splittedOrig = convertAlphabet(text, abc).split(" ");
2183
2317
  text = this.taraskevize(text.toLowerCase());
2184
- if (j)
2318
+ if (j && abc !== ALPHABET.LATIN_JI)
2185
2319
  text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2186
2320
  text = replaceWithDict(text, letters[abc]);
2187
- splitted = text.split(" ");
2321
+ let splitted = text.split(" ");
2188
2322
  if (abc !== ALPHABET.ARABIC)
2189
2323
  splitted = restoreCase(splitted, splittedOrig);
2190
- return { splittedOrig, splitted, noFixArr };
2324
+ return { splittedOrig, splitted };
2191
2325
  }
2192
2326
  taraskevize(text) {
2193
2327
  text = replaceWithDict(text, wordlist);
@@ -2208,6 +2342,5 @@ export {
2208
2342
  Taraskevizer,
2209
2343
  VARIATION,
2210
2344
  __tarask__,
2211
- convertAlphabet,
2212
2345
  gobj
2213
2346
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "taraskevizer",
3
- "version": "5.1.8",
3
+ "version": "5.1.9",
4
4
  "author": "GooseOb",
5
5
  "repository": {
6
6
  "type": "git",
@@ -41,9 +41,9 @@
41
41
  "scripts": {
42
42
  "build": "tsup --config build-config/index.ts",
43
43
  "build:bun_EXPERIMENTAL": "bun ./build-config/bun.ts",
44
- "dev": "esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
45
- "dev:bun": "bun ./test/bun-watch.ts",
46
- "dev-bun": "bun test --watch",
44
+ "dev": "NOCLI=true esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
45
+ "dev:bun": "NOCLI=true bun ./test/bun-watch.ts",
46
+ "dev-bun": "NOCLI=true bun test --watch",
47
47
  "test": "esrun --send-code-mode=temporaryFile test",
48
48
  "prepare": "husky install",
49
49
  "typecheck": "tsc --project src/tsconfig.json"