taraskevizer 5.1.4 → 5.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +26 -24
- package/dist/index.d.ts +4 -2
- package/dist/index.js +25 -24
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -25,16 +25,17 @@ __export(src_exports, {
|
|
|
25
25
|
Taraskevizer: () => Taraskevizer,
|
|
26
26
|
VARIATION: () => VARIATION,
|
|
27
27
|
__tarask__: () => __tarask__,
|
|
28
|
+
convertAlphabet: () => convertAlphabet,
|
|
28
29
|
gobj: () => gobj
|
|
29
30
|
});
|
|
30
31
|
module.exports = __toCommonJS(src_exports);
|
|
31
32
|
|
|
32
33
|
// src/dict/latin.ts
|
|
33
34
|
var latinLetters = [
|
|
34
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
35
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
36
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
37
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
35
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
36
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
37
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
38
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
38
39
|
[/ʼі/, "ji"],
|
|
39
40
|
[/ʼ/, ""],
|
|
40
41
|
[/ць/, "ć"],
|
|
@@ -202,8 +203,8 @@ var chemicalElements2 = [
|
|
|
202
203
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
203
204
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
204
205
|
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
205
|
-
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}
|
|
206
|
-
var iwords = toOneLine(
|
|
206
|
+
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
207
|
+
var iwords = toOneLine(`
|
|
207
208
|
біс
|
|
208
209
|
бсэн
|
|
209
210
|
в[аеоы]
|
|
@@ -240,7 +241,7 @@ var iwords = toOneLine(`і(
|
|
|
240
241
|
нфікс
|
|
241
242
|
нфімум
|
|
242
243
|
ншась?ц
|
|
243
|
-
нш(а[
|
|
244
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
244
245
|
псілан
|
|
245
246
|
рад
|
|
246
247
|
рбіс
|
|
@@ -258,7 +259,7 @@ var iwords = toOneLine(`і(
|
|
|
258
259
|
тар
|
|
259
260
|
тры
|
|
260
261
|
ць?він
|
|
261
|
-
|
|
262
|
+
шыяс`);
|
|
262
263
|
var rawWordlist = [
|
|
263
264
|
/* А */
|
|
264
265
|
[/аахен/, "аахэн"],
|
|
@@ -1846,7 +1847,7 @@ var rawsofteners = [
|
|
|
1846
1847
|
[/з(?=([бвзлмнц]|дз)[еёіюяь])/, "зь"],
|
|
1847
1848
|
[/с(?=[бвлмнпсфц][еёіюяь])/, "сь"],
|
|
1848
1849
|
[/ц(?=[вм][еёіюяь])/, "ць"],
|
|
1849
|
-
[`( (?:б[ея]|пра|цера)?з) (
|
|
1850
|
+
[`( (?:б[ея]|пра|цера)?з) (?=\\(?)(?=[еёюяь]|([бвзйлмнпстфц]|дз)[еёіюяь]|імі? |іх(?:ні)?|і(?:${iwords}))`, "$1ь "],
|
|
1850
1851
|
[/([сз])ʼ(?=[яюеё])/, "$1ь"]
|
|
1851
1852
|
];
|
|
1852
1853
|
var gwords = [
|
|
@@ -1968,12 +1969,13 @@ var getLastLetter = (word, i) => {
|
|
|
1968
1969
|
};
|
|
1969
1970
|
var NOFIX_CHAR = " ";
|
|
1970
1971
|
var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
|
|
1971
|
-
var OPTIONAL_WORDS_REGEX = /\(
|
|
1972
|
+
var OPTIONAL_WORDS_REGEX = /\([^\)]*?\)/g;
|
|
1972
1973
|
var G_REGEX = /[Ґґ]/g;
|
|
1973
1974
|
var ALPHABET = {
|
|
1974
1975
|
CYRILLIC: 0,
|
|
1975
1976
|
LATIN: 1,
|
|
1976
|
-
ARABIC: 2
|
|
1977
|
+
ARABIC: 2,
|
|
1978
|
+
LATIN_JI: 1
|
|
1977
1979
|
};
|
|
1978
1980
|
var REPLACE_J = {
|
|
1979
1981
|
NEVER: 0,
|
|
@@ -2040,7 +2042,7 @@ var restoreCase = (text, orig) => {
|
|
|
2040
2042
|
text[i] = word.toUpperCase();
|
|
2041
2043
|
} else {
|
|
2042
2044
|
text[i] = word[0] === "(" ? word.replace(
|
|
2043
|
-
|
|
2045
|
+
/[^)]*?(?=\))/,
|
|
2044
2046
|
($0) => $0.replace(/[(|]./g, ($02) => $02.toUpperCase())
|
|
2045
2047
|
) : word[0].toUpperCase() + word.slice(1);
|
|
2046
2048
|
}
|
|
@@ -2119,13 +2121,14 @@ var __tarask__ = {
|
|
|
2119
2121
|
replaceWithDict,
|
|
2120
2122
|
afterTarask
|
|
2121
2123
|
};
|
|
2124
|
+
var convertAlphabet = (text, abc) => replaceWithDict(replaceWithDict(text, letters[abc]), lettersUpperCase[abc]);
|
|
2122
2125
|
var Taraskevizer = class {
|
|
2123
2126
|
abc = ALPHABET.CYRILLIC;
|
|
2124
2127
|
j = REPLACE_J.NEVER;
|
|
2128
|
+
doEscapeCapitalized = true;
|
|
2125
2129
|
html = {
|
|
2126
2130
|
g: false
|
|
2127
2131
|
};
|
|
2128
|
-
doEscapeCapitalized = true;
|
|
2129
2132
|
nonHtml = {
|
|
2130
2133
|
h: false,
|
|
2131
2134
|
ansiColors: false,
|
|
@@ -2136,12 +2139,13 @@ var Taraskevizer = class {
|
|
|
2136
2139
|
return;
|
|
2137
2140
|
const general = options.general;
|
|
2138
2141
|
if (general) {
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2142
|
+
for (const prop of [
|
|
2143
|
+
"abc",
|
|
2144
|
+
"j",
|
|
2145
|
+
"doEscapeCapitalized"
|
|
2146
|
+
])
|
|
2147
|
+
if (prop in general)
|
|
2148
|
+
this[prop] = general[prop];
|
|
2145
2149
|
}
|
|
2146
2150
|
if (options.OVERRIDE_taraskevize)
|
|
2147
2151
|
this.taraskevize = options.OVERRIDE_taraskevize;
|
|
@@ -2196,7 +2200,7 @@ var Taraskevizer = class {
|
|
|
2196
2200
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2197
2201
|
if (this.doEscapeCapitalized)
|
|
2198
2202
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2199
|
-
text = text.replace(/<(\*?)([,.]?)(
|
|
2203
|
+
text = text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2200
2204
|
if ($2 === ",")
|
|
2201
2205
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2202
2206
|
if ($1)
|
|
@@ -2208,10 +2212,7 @@ var Taraskevizer = class {
|
|
|
2208
2212
|
return NOFIX_CHAR;
|
|
2209
2213
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2210
2214
|
let splittedOrig, splitted;
|
|
2211
|
-
splittedOrig =
|
|
2212
|
-
replaceWithDict(text, letters[abc]),
|
|
2213
|
-
lettersUpperCase[abc]
|
|
2214
|
-
).split(" ");
|
|
2215
|
+
splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2215
2216
|
text = this.taraskevize(text.toLowerCase());
|
|
2216
2217
|
if (j)
|
|
2217
2218
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
@@ -2241,5 +2242,6 @@ var Taraskevizer = class {
|
|
|
2241
2242
|
Taraskevizer,
|
|
2242
2243
|
VARIATION,
|
|
2243
2244
|
__tarask__,
|
|
2245
|
+
convertAlphabet,
|
|
2244
2246
|
gobj
|
|
2245
2247
|
});
|
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,7 @@ declare const ALPHABET: {
|
|
|
28
28
|
readonly CYRILLIC: 0;
|
|
29
29
|
readonly LATIN: 1;
|
|
30
30
|
readonly ARABIC: 2;
|
|
31
|
+
readonly LATIN_JI: 1;
|
|
31
32
|
};
|
|
32
33
|
declare const REPLACE_J: {
|
|
33
34
|
readonly NEVER: 0;
|
|
@@ -45,13 +46,14 @@ declare const __tarask__: {
|
|
|
45
46
|
readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
|
|
46
47
|
readonly afterTarask: ExtendedDict;
|
|
47
48
|
};
|
|
49
|
+
declare const convertAlphabet: (text: string, abc: Alphabet) => string;
|
|
48
50
|
declare class Taraskevizer {
|
|
49
51
|
abc: Alphabet;
|
|
50
52
|
j: OptionJ;
|
|
53
|
+
doEscapeCapitalized: boolean;
|
|
51
54
|
html: {
|
|
52
55
|
g: boolean;
|
|
53
56
|
};
|
|
54
|
-
doEscapeCapitalized: boolean;
|
|
55
57
|
nonHtml: {
|
|
56
58
|
h: boolean;
|
|
57
59
|
ansiColors: boolean;
|
|
@@ -76,4 +78,4 @@ declare const gobj: {
|
|
|
76
78
|
readonly Ґ: "Г";
|
|
77
79
|
};
|
|
78
80
|
|
|
79
|
-
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, gobj };
|
|
81
|
+
export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, convertAlphabet, gobj };
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// src/dict/latin.ts
|
|
2
2
|
var latinLetters = [
|
|
3
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
4
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
5
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
6
|
-
[/(?<=[аеёіоуўыэюяьʼ| ]
|
|
3
|
+
[/(?<=[аеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)е/, "je"],
|
|
4
|
+
[/(?<=[eаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ё/, "jo"],
|
|
5
|
+
[/(?<=[eoаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)ю/, "ju"],
|
|
6
|
+
[/(?<=[eouаеёіоуўыэюяьʼ| >АЕЁІОУЎЫЭЮЯЬ]|^)я/, "ja"],
|
|
7
7
|
[/ʼі/, "ji"],
|
|
8
8
|
[/ʼ/, ""],
|
|
9
9
|
[/ць/, "ć"],
|
|
@@ -171,8 +171,8 @@ var chemicalElements2 = [
|
|
|
171
171
|
chemicalElements2[1] = chemicalElements2[0] + "|айнштайн|мендзялев|сыборг|гас|флеров";
|
|
172
172
|
var chemicalElements3 = " гал|бэрыл|тул|бэркл|набэл";
|
|
173
173
|
var toOneLine = (str) => str.replace(/\n/g, "|");
|
|
174
|
-
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}
|
|
175
|
-
var iwords = toOneLine(
|
|
174
|
+
var ia = (word, words) => ` ${word} (?=\\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, "(?:")}|i(?:${iwords})))`;
|
|
175
|
+
var iwords = toOneLine(`
|
|
176
176
|
біс
|
|
177
177
|
бсэн
|
|
178
178
|
в[аеоы]
|
|
@@ -209,7 +209,7 @@ var iwords = toOneLine(`і(
|
|
|
209
209
|
нфікс
|
|
210
210
|
нфімум
|
|
211
211
|
ншась?ц
|
|
212
|
-
нш(а[
|
|
212
|
+
нш(а[ейя]?|ась?ц|ую|ы(мі|х|я)?)
|
|
213
213
|
псілан
|
|
214
214
|
рад
|
|
215
215
|
рбіс
|
|
@@ -227,7 +227,7 @@ var iwords = toOneLine(`і(
|
|
|
227
227
|
тар
|
|
228
228
|
тры
|
|
229
229
|
ць?він
|
|
230
|
-
|
|
230
|
+
шыяс`);
|
|
231
231
|
var rawWordlist = [
|
|
232
232
|
/* А */
|
|
233
233
|
[/аахен/, "аахэн"],
|
|
@@ -1815,7 +1815,7 @@ var rawsofteners = [
|
|
|
1815
1815
|
[/з(?=([бвзлмнц]|дз)[еёіюяь])/, "зь"],
|
|
1816
1816
|
[/с(?=[бвлмнпсфц][еёіюяь])/, "сь"],
|
|
1817
1817
|
[/ц(?=[вм][еёіюяь])/, "ць"],
|
|
1818
|
-
[`( (?:б[ея]|пра|цера)?з) (
|
|
1818
|
+
[`( (?:б[ея]|пра|цера)?з) (?=\\(?)(?=[еёюяь]|([бвзйлмнпстфц]|дз)[еёіюяь]|імі? |іх(?:ні)?|і(?:${iwords}))`, "$1ь "],
|
|
1819
1819
|
[/([сз])ʼ(?=[яюеё])/, "$1ь"]
|
|
1820
1820
|
];
|
|
1821
1821
|
var gwords = [
|
|
@@ -1937,12 +1937,13 @@ var getLastLetter = (word, i) => {
|
|
|
1937
1937
|
};
|
|
1938
1938
|
var NOFIX_CHAR = " ";
|
|
1939
1939
|
var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
|
|
1940
|
-
var OPTIONAL_WORDS_REGEX = /\(
|
|
1940
|
+
var OPTIONAL_WORDS_REGEX = /\([^\)]*?\)/g;
|
|
1941
1941
|
var G_REGEX = /[Ґґ]/g;
|
|
1942
1942
|
var ALPHABET = {
|
|
1943
1943
|
CYRILLIC: 0,
|
|
1944
1944
|
LATIN: 1,
|
|
1945
|
-
ARABIC: 2
|
|
1945
|
+
ARABIC: 2,
|
|
1946
|
+
LATIN_JI: 1
|
|
1946
1947
|
};
|
|
1947
1948
|
var REPLACE_J = {
|
|
1948
1949
|
NEVER: 0,
|
|
@@ -2009,7 +2010,7 @@ var restoreCase = (text, orig) => {
|
|
|
2009
2010
|
text[i] = word.toUpperCase();
|
|
2010
2011
|
} else {
|
|
2011
2012
|
text[i] = word[0] === "(" ? word.replace(
|
|
2012
|
-
|
|
2013
|
+
/[^)]*?(?=\))/,
|
|
2013
2014
|
($0) => $0.replace(/[(|]./g, ($02) => $02.toUpperCase())
|
|
2014
2015
|
) : word[0].toUpperCase() + word.slice(1);
|
|
2015
2016
|
}
|
|
@@ -2088,13 +2089,14 @@ var __tarask__ = {
|
|
|
2088
2089
|
replaceWithDict,
|
|
2089
2090
|
afterTarask
|
|
2090
2091
|
};
|
|
2092
|
+
var convertAlphabet = (text, abc) => replaceWithDict(replaceWithDict(text, letters[abc]), lettersUpperCase[abc]);
|
|
2091
2093
|
var Taraskevizer = class {
|
|
2092
2094
|
abc = ALPHABET.CYRILLIC;
|
|
2093
2095
|
j = REPLACE_J.NEVER;
|
|
2096
|
+
doEscapeCapitalized = true;
|
|
2094
2097
|
html = {
|
|
2095
2098
|
g: false
|
|
2096
2099
|
};
|
|
2097
|
-
doEscapeCapitalized = true;
|
|
2098
2100
|
nonHtml = {
|
|
2099
2101
|
h: false,
|
|
2100
2102
|
ansiColors: false,
|
|
@@ -2105,12 +2107,13 @@ var Taraskevizer = class {
|
|
|
2105
2107
|
return;
|
|
2106
2108
|
const general = options.general;
|
|
2107
2109
|
if (general) {
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2110
|
+
for (const prop of [
|
|
2111
|
+
"abc",
|
|
2112
|
+
"j",
|
|
2113
|
+
"doEscapeCapitalized"
|
|
2114
|
+
])
|
|
2115
|
+
if (prop in general)
|
|
2116
|
+
this[prop] = general[prop];
|
|
2114
2117
|
}
|
|
2115
2118
|
if (options.OVERRIDE_taraskevize)
|
|
2116
2119
|
this.taraskevize = options.OVERRIDE_taraskevize;
|
|
@@ -2165,7 +2168,7 @@ var Taraskevizer = class {
|
|
|
2165
2168
|
text = ` ${text.trim()} `.replace(//g, "");
|
|
2166
2169
|
if (this.doEscapeCapitalized)
|
|
2167
2170
|
text = text.replace(new RegExp("(?!<=\\p{Lu} )(\\p{Lu}{2,})(?!= \\p{Lu})", "gu"), "<*.$1>");
|
|
2168
|
-
text = text.replace(/<(\*?)([,.]?)(
|
|
2171
|
+
text = text.replace(/<(\*?)([,.]?)([^>]*?)>/gs, ($0, $1, $2, $3) => {
|
|
2169
2172
|
if ($2 === ",")
|
|
2170
2173
|
return LEFT_ANGLE_BRACKET + $3 + ">";
|
|
2171
2174
|
if ($1)
|
|
@@ -2177,10 +2180,7 @@ var Taraskevizer = class {
|
|
|
2177
2180
|
return NOFIX_CHAR;
|
|
2178
2181
|
}).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "(");
|
|
2179
2182
|
let splittedOrig, splitted;
|
|
2180
|
-
splittedOrig =
|
|
2181
|
-
replaceWithDict(text, letters[abc]),
|
|
2182
|
-
lettersUpperCase[abc]
|
|
2183
|
-
).split(" ");
|
|
2183
|
+
splittedOrig = convertAlphabet(text, abc).split(" ");
|
|
2184
2184
|
text = this.taraskevize(text.toLowerCase());
|
|
2185
2185
|
if (j)
|
|
2186
2186
|
text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
|
|
@@ -2209,5 +2209,6 @@ export {
|
|
|
2209
2209
|
Taraskevizer,
|
|
2210
2210
|
VARIATION,
|
|
2211
2211
|
__tarask__,
|
|
2212
|
+
convertAlphabet,
|
|
2212
2213
|
gobj
|
|
2213
2214
|
};
|