taraskevizer 3.1.3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,37 +7,42 @@ $ npm i taraskevizer
7
7
  ## Usage
8
8
 
9
9
  ```js
10
- import { tarask, taraskToHtml, ALPHABET, J, VARIATION } from 'taraskevizer';
10
+ import { Taraskevizer, ALPHABET, J, VARIATION } from 'taraskevizer';
11
11
 
12
- tarask('планета'); // "плянэта"
12
+ const taraskevizer = new Taraskevizer();
13
+ taraskevizer.convert('планета'); // "плянэта"
13
14
 
14
- tarask(
15
- 'планета і Гродна',
16
- {
15
+ const taraskevizer = new Taraskevizer({
16
+ general: {
17
17
  abc: ALPHABET.CYRILLIC,
18
18
  j: J.ALWAYS,
19
19
  },
20
- {
20
+ nonHtml: {
21
21
  ansiColors: true,
22
22
  variations: VARIATION.FIRST,
23
23
  h: false,
24
- }
25
- ); // "пл\x1b[32mя\x1b[0mн\x1b[32mэ\x1b[0mта \x1b[32mй\x1b[0m \x1b[35mГорадня\x1b[0m"
24
+ },
25
+ });
26
+ taraskevizer.convert('планета і Гродна'); // "пл\x1b[32mя\x1b[0mн\x1b[32mэ\x1b[0mта \x1b[32mй\x1b[0m \x1b[35mГорадня\x1b[0m"
26
27
 
27
- taraskToHtml(
28
- 'энергія планеты',
29
- {
28
+ const taraskevizer = new Taraskevizer({
29
+ general: {
30
30
  abc: ALPHABET.LATIN,
31
31
  },
32
- {
32
+ html: {
33
33
  g: false, // ignored, because alphabet is set to latin
34
- }
35
- ); // "en<tarF>erg</tarF>ija p<tarF>lan</tarF>ety"
34
+ },
35
+ });
36
+ taraskevizer.convertToHtml('энергія планеты'); // "en<tarF>erg</tarF>ija p<tarF>lan</tarF>ety"
37
+
38
+ // properties can be rewritten after creating an object
39
+ taraskevizer.abc = ALPHABET.ARABIC;
40
+ taraskevizer.html.g = true;
36
41
  ```
37
42
 
38
43
  ### Function signatures are in [this file](./dist/index.d.ts) (not available if project is not built)
39
44
 
40
- ## TaraskOptions
45
+ ## general
41
46
 
42
47
  Type: `object`
43
48
 
@@ -67,26 +72,16 @@ Default value: `0`
67
72
  | 1 | random | `яна і ён` or `яна й ён` |
68
73
  | 2 | always | `яна й ён` |
69
74
 
70
- ### OVERRIDE_toTarask
75
+ ### OVERRIDE_taraskevize
71
76
 
72
- Type:
77
+ Type: `(text: string) => string`
73
78
 
74
- ```
75
- (
76
- text: string,
77
- replaceWithDict: (
78
- text: string,
79
- dict?: [RegExp, string | ((...substrings: string[]) => string)][]
80
- ) => string,
81
- wordlist: [RegExp, string][],
82
- softers: [RegExp, string][],
83
- afterTarask: (text: string) => string
84
- ) => string
85
- ```
79
+ Default value: internal function `taraskevize`
86
80
 
87
- Default value: internal function `toTarask`
81
+ Can be overridden in order to make additional changes to the text.
82
+ This function usually uses private api via `Taraskevizer._.`
88
83
 
89
- ## HtmlOptions
84
+ ## html
90
85
 
91
86
  ### g
92
87
 
@@ -101,7 +96,7 @@ Do replace `г`(`h`) by `ґ`(`g`) in cyrillic alphabet?
101
96
  | true | `<tarH>г</tarH>валт <tarH>Г</tarH>валт` |
102
97
  | false | `<tarH>ґ</tarH>валт <tarH>Ґ</tarH>валт` |
103
98
 
104
- ## NonHtmlOptions
99
+ ## nonHtml
105
100
 
106
101
  ### ansiColors
107
102
 
package/dist/bin.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { tarask, taraskToHtml } from "./index.js";
2
+ import { ALPHABET, REPLACE_J, Taraskevizer, VARIATION } from "./index.js";
3
3
  import readline from "readline/promises";
4
4
  import { readFile } from "fs/promises";
5
5
  const prefix = "\x1B[34m[taraskevizer]\x1B[0m ";
@@ -15,12 +15,12 @@ if (checkForOptions(["-v", "--version"])) {
15
15
  printWithPrefix(version);
16
16
  process.exit(0);
17
17
  }
18
- const taraskOptions = {};
19
- const nonHtmlOptions = {
18
+ const general = {};
19
+ const nonHtml = {
20
20
  variations: 2,
21
21
  ansiColors: true
22
22
  };
23
- const htmlOptions = { g: true };
23
+ const html = { g: true };
24
24
  let isHtml = false;
25
25
  const toHashTable = (dict) => {
26
26
  const result = {};
@@ -33,56 +33,56 @@ const optionDict = toHashTable([
33
33
  [
34
34
  ["--latin", "-l"],
35
35
  () => {
36
- taraskOptions.abc = 1;
36
+ general.abc = ALPHABET.LATIN;
37
37
  }
38
38
  ],
39
39
  [
40
40
  ["--arabic", "-a"],
41
41
  () => {
42
- taraskOptions.abc = 2;
42
+ general.abc = ALPHABET.ARABIC;
43
43
  }
44
44
  ],
45
45
  [
46
46
  ["--greek", "-gr"],
47
47
  () => {
48
- taraskOptions.abc = 3;
48
+ general.abc = ALPHABET.GREEK;
49
49
  }
50
50
  ],
51
51
  [
52
52
  ["--jrandom", "-jr"],
53
53
  () => {
54
- taraskOptions.j = 1;
54
+ general.j = REPLACE_J.RANDOM;
55
55
  }
56
56
  ],
57
57
  [
58
58
  ["--jalways", "-ja"],
59
59
  () => {
60
- taraskOptions.j = 2;
60
+ general.j = REPLACE_J.ALWAYS;
61
61
  }
62
62
  ],
63
63
  [
64
64
  ["--h", "-h"],
65
65
  () => {
66
- nonHtmlOptions.h = true;
67
- htmlOptions.g = false;
66
+ nonHtml.h = true;
67
+ html.g = false;
68
68
  }
69
69
  ],
70
70
  [
71
71
  ["--no-variations", "-nv"],
72
72
  () => {
73
- nonHtmlOptions.variations = 0;
73
+ nonHtml.variations = VARIATION.NO;
74
74
  }
75
75
  ],
76
76
  [
77
77
  ["--first-variation-only", "-fvo"],
78
78
  () => {
79
- nonHtmlOptions.variations = 1;
79
+ nonHtml.variations = VARIATION.FIRST;
80
80
  }
81
81
  ],
82
82
  [
83
83
  ["--no-color", "-nc"],
84
84
  () => {
85
- nonHtmlOptions.ansiColors = false;
85
+ nonHtml.ansiColors = false;
86
86
  }
87
87
  ],
88
88
  [
@@ -105,7 +105,8 @@ let text = process.argv.length ? process.argv.join(" ") : await readline.createI
105
105
  input: process.stdin,
106
106
  output: process.stdout
107
107
  }).question(prefix + "Enter the text:\n");
108
+ const taraskevizer = new Taraskevizer({ general, html, nonHtml });
108
109
  process.stdout.write(
109
- isHtml ? taraskToHtml(text, taraskOptions, htmlOptions) : tarask(text, taraskOptions, nonHtmlOptions)
110
+ isHtml ? taraskevizer.convertToHtml(text) : taraskevizer.convert(text)
110
111
  );
111
112
  process.exit(0);
package/dist/index.cjs CHANGED
@@ -21,11 +21,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var src_exports = {};
22
22
  __export(src_exports, {
23
23
  ALPHABET: () => ALPHABET,
24
- J: () => J,
24
+ REPLACE_J: () => REPLACE_J,
25
+ Taraskevizer: () => Taraskevizer,
25
26
  VARIATION: () => VARIATION,
26
- gobj: () => gobj,
27
- tarask: () => tarask,
28
- taraskToHtml: () => taraskToHtml
27
+ gobj: () => gobj
29
28
  });
30
29
  module.exports = __toCommonJS(src_exports);
31
30
 
@@ -2141,9 +2140,22 @@ var NOFIX_CHAR = "  ";
2141
2140
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
2142
2141
  var OPTIONAL_WORDS_REGEX = /\(.*?\)/g;
2143
2142
  var G_REGEX = /[Ґґ]/g;
2144
- var ALPHABET = { CYRILLIC: 0, LATIN: 1, ARABIC: 2, GREEK: 3 };
2145
- var J = { NEVER: 0, RANDOM: 1, ALWAYS: 2 };
2146
- var VARIATION = { NO: 0, FIRST: 1, ALL: 2 };
2143
+ var ALPHABET = {
2144
+ CYRILLIC: 0,
2145
+ LATIN: 1,
2146
+ ARABIC: 2,
2147
+ GREEK: 3
2148
+ };
2149
+ var REPLACE_J = {
2150
+ NEVER: 0,
2151
+ RANDOM: 1,
2152
+ ALWAYS: 2
2153
+ };
2154
+ var VARIATION = {
2155
+ NO: 0,
2156
+ FIRST: 1,
2157
+ ALL: 2
2158
+ };
2147
2159
  var letters = {
2148
2160
  [ALPHABET.LATIN]: latinLetters,
2149
2161
  [ALPHABET.ARABIC]: arabLetters,
@@ -2177,38 +2189,6 @@ var afterTarask = [
2177
2189
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2178
2190
  ]
2179
2191
  ];
2180
- var process = (text, LEFT_ANGLE_BRACKET, options) => {
2181
- const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2182
- const noFixArr = [];
2183
- text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2184
- console.log($0, ":", $1, ",", $2);
2185
- if ($1 === ",")
2186
- return LEFT_ANGLE_BRACKET + $2 + ">";
2187
- noFixArr.push($1 === "." ? $2 : $0);
2188
- return NOFIX_CHAR;
2189
- }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2190
- let splittedOrig, splitted;
2191
- splittedOrig = replaceWithDict(
2192
- replaceWithDict(text, letters[abc]),
2193
- lettersUpperCase[abc]
2194
- ).split(" ");
2195
- text = _toTarask(
2196
- text.toLowerCase(),
2197
- replaceWithDict,
2198
- wordlist,
2199
- softers,
2200
- afterTarask
2201
- );
2202
- if (j)
2203
- text = replaceIbyJ(text, j === J.ALWAYS);
2204
- if (abc === ALPHABET.GREEK)
2205
- text = replaceWithDict(text, thWords);
2206
- text = replaceWithDict(text, letters[abc]);
2207
- splitted = text.split(" ");
2208
- if (abc !== ALPHABET.ARABIC)
2209
- splitted = restoreCase(splitted, splittedOrig);
2210
- return { splittedOrig, splitted, noFixArr };
2211
- };
2212
2192
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
2213
2193
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
2214
2194
  var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
@@ -2217,55 +2197,6 @@ var replaceG = (text, replacer) => text.replace(
2217
2197
  // @ts-ignore
2218
2198
  replacer
2219
2199
  );
2220
- var getCompletedOptions = (options) => ({
2221
- abc: 0,
2222
- j: 0,
2223
- ...options
2224
- });
2225
- var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2226
- const options = getCompletedOptions(taraskOptions);
2227
- const wrapInTag = wrappers.html;
2228
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2229
- const { splitted, splittedOrig, noFixArr } = process(text, "&lt;", options);
2230
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2231
- text = join(splitted);
2232
- if (isCyrillic)
2233
- text = replaceG(
2234
- text,
2235
- htmlOptions.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2236
- );
2237
- return finilize(
2238
- applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2239
- const options2 = $0.slice(1, -1).split("|");
2240
- const main = options2.shift();
2241
- return `<tarL data-l='${options2}'>${main}</tarL>`;
2242
- }),
2243
- "<br>"
2244
- );
2245
- };
2246
- var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2247
- const options = getCompletedOptions(taraskOptions);
2248
- const wrapInColorOf = wrappers.ansiColors;
2249
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2250
- const { splitted, splittedOrig, noFixArr } = process(text, "<", options);
2251
- if (nonHtmlOptions.ansiColors)
2252
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2253
- text = join(splitted);
2254
- if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.ansiColors))
2255
- text = replaceG(
2256
- text,
2257
- nonHtmlOptions.ansiColors ? nonHtmlOptions.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2258
- );
2259
- if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2260
- const wordIndex = nonHtmlOptions.variations ?? 0;
2261
- const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2262
- text = text.replace(
2263
- OPTIONAL_WORDS_REGEX,
2264
- nonHtmlOptions.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2265
- );
2266
- }
2267
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2268
- };
2269
2200
  var restoreCase = (text, orig) => {
2270
2201
  for (let i = 0; i < text.length; i++) {
2271
2202
  const word = text[i];
@@ -2343,18 +2274,6 @@ var highlightChanges = (text, orig, isCyrillic, highlight) => {
2343
2274
  text[i] = word.slice(0, fromStart) + highlight(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2344
2275
  }
2345
2276
  };
2346
- var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2347
- text = replaceWithDict2(text, wordlist2);
2348
- softening:
2349
- do {
2350
- text = replaceWithDict2(text, softers2);
2351
- for (const [pattern, result] of softers2)
2352
- if (result !== "$1дзьдз" && pattern.test(text))
2353
- continue softening;
2354
- break;
2355
- } while (true);
2356
- return replaceWithDict2(text, afterTarask2);
2357
- };
2358
2277
  var replaceWithDict = (text, dict = []) => {
2359
2278
  for (const [pattern, result] of dict)
2360
2279
  text = text.replace(
@@ -2369,12 +2288,123 @@ var replaceIbyJ = (text, always = false) => text.replace(
2369
2288
  /([аеёіоуыэюя] )і (ў?)/g,
2370
2289
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2371
2290
  );
2291
+ var Taraskevizer = class _Taraskevizer {
2292
+ abc = ALPHABET.CYRILLIC;
2293
+ j = REPLACE_J.NEVER;
2294
+ html = {
2295
+ g: false
2296
+ };
2297
+ nonHtml = {
2298
+ h: false,
2299
+ ansiColors: false,
2300
+ variations: VARIATION.ALL
2301
+ };
2302
+ constructor(options) {
2303
+ if (!options)
2304
+ return;
2305
+ const general = options.general;
2306
+ if (general) {
2307
+ if (general.abc)
2308
+ this.abc = general.abc;
2309
+ if (general.j)
2310
+ this.j = general.j;
2311
+ }
2312
+ if (options.OVERRIDE_taraskevize)
2313
+ this.taraskevize = options.OVERRIDE_taraskevize;
2314
+ Object.assign(this.html, options.html);
2315
+ Object.assign(this.nonHtml, options.nonHtml);
2316
+ }
2317
+ convert(text) {
2318
+ const wrapInColorOf = wrappers.ansiColors;
2319
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2320
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2321
+ if (this.nonHtml.ansiColors)
2322
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2323
+ text = join(splitted);
2324
+ if (isCyrillic && (this.nonHtml.h || this.nonHtml.ansiColors))
2325
+ text = replaceG(
2326
+ text,
2327
+ this.nonHtml.ansiColors ? this.nonHtml.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2328
+ );
2329
+ if ("variations" in this.nonHtml && this.nonHtml.variations !== VARIATION.ALL) {
2330
+ const wordIndex = this.nonHtml.variations ?? 0;
2331
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2332
+ text = text.replace(
2333
+ OPTIONAL_WORDS_REGEX,
2334
+ this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2335
+ );
2336
+ }
2337
+ return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2338
+ }
2339
+ convertToHtml(text) {
2340
+ const wrapInTag = wrappers.html;
2341
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2342
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2343
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2344
+ text = join(splitted);
2345
+ if (isCyrillic)
2346
+ text = replaceG(
2347
+ text,
2348
+ this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2349
+ );
2350
+ return finilize(
2351
+ applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2352
+ const options = $0.slice(1, -1).split("|");
2353
+ const main = options.shift();
2354
+ return `<tarL data-l='${options}'>${main}</tarL>`;
2355
+ }),
2356
+ "<br>"
2357
+ );
2358
+ }
2359
+ process(text, LEFT_ANGLE_BRACKET) {
2360
+ const { abc, j } = this;
2361
+ const noFixArr = [];
2362
+ text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2363
+ if ($1 === ",")
2364
+ return LEFT_ANGLE_BRACKET + $2 + ">";
2365
+ noFixArr.push($1 === "." ? $2 : $0);
2366
+ return NOFIX_CHAR;
2367
+ }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2368
+ let splittedOrig, splitted;
2369
+ splittedOrig = replaceWithDict(
2370
+ replaceWithDict(text, letters[abc]),
2371
+ lettersUpperCase[abc]
2372
+ ).split(" ");
2373
+ text = this.taraskevize(text.toLowerCase());
2374
+ if (j)
2375
+ text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2376
+ if (abc === ALPHABET.GREEK)
2377
+ text = replaceWithDict(text, thWords);
2378
+ text = replaceWithDict(text, letters[abc]);
2379
+ splitted = text.split(" ");
2380
+ if (abc !== ALPHABET.ARABIC)
2381
+ splitted = restoreCase(splitted, splittedOrig);
2382
+ return { splittedOrig, splitted, noFixArr };
2383
+ }
2384
+ taraskevize(text) {
2385
+ text = _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.wordlist);
2386
+ softening:
2387
+ do {
2388
+ text = _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.softers);
2389
+ for (const [pattern, result] of _Taraskevizer._.softers)
2390
+ if (result !== "$1дзьдз" && pattern.test(text))
2391
+ continue softening;
2392
+ break;
2393
+ } while (true);
2394
+ return _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.afterTarask);
2395
+ }
2396
+ static _ = {
2397
+ wordlist,
2398
+ softers,
2399
+ replaceWithDict,
2400
+ afterTarask
2401
+ };
2402
+ };
2372
2403
  // Annotate the CommonJS export names for ESM import in node:
2373
2404
  0 && (module.exports = {
2374
2405
  ALPHABET,
2375
- J,
2406
+ REPLACE_J,
2407
+ Taraskevizer,
2376
2408
  VARIATION,
2377
- gobj,
2378
- tarask,
2379
- taraskToHtml
2409
+ gobj
2380
2410
  });
package/dist/index.d.ts CHANGED
@@ -8,7 +8,6 @@ type Variation = 0 | 1 | 2;
8
8
  type TaraskOptions = {
9
9
  abc: Alphabet;
10
10
  j: OptionJ;
11
- OVERRIDE_toTarask?: ToTarask;
12
11
  };
13
12
  type NonHtmlOptions = {
14
13
  ansiColors: boolean;
@@ -18,17 +17,11 @@ type NonHtmlOptions = {
18
17
  type HtmlOptions = {
19
18
  g: boolean;
20
19
  };
21
- type ReplaceWithDict = (text: string, dict?: ExtendedDict) => string;
22
- type ToTarask = (text: string, replaceWithDict: ReplaceWithDict, wordlist: Dict, softers: Dict, afterTarask: ExtendedDict) => string;
23
- type Tarask<TOptions extends object> = (text: string, taraskOptions?: DeepPartialReadonly<TaraskOptions>, options?: DeepPartialReadonly<TOptions>) => string;
24
- type Dict<T = RegExp> = [T, string][];
25
- type ExtendedDict = [
20
+ type Dict<T = RegExp> = readonly (readonly [T, string])[];
21
+ type ExtendedDict = readonly (readonly [
26
22
  RegExp,
27
23
  string | ((...substrings: string[]) => string)
28
- ][];
29
- type AlphabetDependentDict = {
30
- [key in Alphabet]?: Dict;
31
- };
24
+ ])[];
32
25
 
33
26
  declare const ALPHABET: {
34
27
  readonly CYRILLIC: 0;
@@ -36,7 +29,7 @@ declare const ALPHABET: {
36
29
  readonly ARABIC: 2;
37
30
  readonly GREEK: 3;
38
31
  };
39
- declare const J: {
32
+ declare const REPLACE_J: {
40
33
  readonly NEVER: 0;
41
34
  readonly RANDOM: 1;
42
35
  readonly ALWAYS: 2;
@@ -46,8 +39,34 @@ declare const VARIATION: {
46
39
  readonly FIRST: 1;
47
40
  readonly ALL: 2;
48
41
  };
49
- declare const taraskToHtml: Tarask<HtmlOptions>;
50
- declare const tarask: Tarask<NonHtmlOptions>;
42
+ declare class Taraskevizer {
43
+ abc: Alphabet;
44
+ j: OptionJ;
45
+ html: {
46
+ g: boolean;
47
+ };
48
+ nonHtml: {
49
+ h: boolean;
50
+ ansiColors: boolean;
51
+ variations: Variation;
52
+ };
53
+ constructor(options?: DeepPartialReadonly<{
54
+ general: TaraskOptions;
55
+ html: HtmlOptions;
56
+ nonHtml: NonHtmlOptions;
57
+ OVERRIDE_taraskevize(this: Taraskevizer, text: string): string;
58
+ }>);
59
+ convert(text: string): string;
60
+ convertToHtml(text: string): string;
61
+ private process;
62
+ private taraskevize;
63
+ static readonly _: {
64
+ readonly wordlist: Dict;
65
+ readonly softers: Dict;
66
+ readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
67
+ readonly afterTarask: ExtendedDict;
68
+ };
69
+ }
51
70
 
52
71
  declare const gobj: {
53
72
  readonly г: "ґ";
@@ -56,4 +75,4 @@ declare const gobj: {
56
75
  readonly Ґ: "Г";
57
76
  };
58
77
 
59
- export { ALPHABET, type AlphabetDependentDict, type DeepPartialReadonly, type Dict, type ExtendedDict, type HtmlOptions, J, type NonHtmlOptions, type ReplaceWithDict, type Tarask, type TaraskOptions, type ToTarask, VARIATION, gobj, tarask, taraskToHtml };
78
+ export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, gobj };
package/dist/index.js CHANGED
@@ -2110,9 +2110,22 @@ var NOFIX_CHAR = "  ";
2110
2110
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
2111
2111
  var OPTIONAL_WORDS_REGEX = /\(.*?\)/g;
2112
2112
  var G_REGEX = /[Ґґ]/g;
2113
- var ALPHABET = { CYRILLIC: 0, LATIN: 1, ARABIC: 2, GREEK: 3 };
2114
- var J = { NEVER: 0, RANDOM: 1, ALWAYS: 2 };
2115
- var VARIATION = { NO: 0, FIRST: 1, ALL: 2 };
2113
+ var ALPHABET = {
2114
+ CYRILLIC: 0,
2115
+ LATIN: 1,
2116
+ ARABIC: 2,
2117
+ GREEK: 3
2118
+ };
2119
+ var REPLACE_J = {
2120
+ NEVER: 0,
2121
+ RANDOM: 1,
2122
+ ALWAYS: 2
2123
+ };
2124
+ var VARIATION = {
2125
+ NO: 0,
2126
+ FIRST: 1,
2127
+ ALL: 2
2128
+ };
2116
2129
  var letters = {
2117
2130
  [ALPHABET.LATIN]: latinLetters,
2118
2131
  [ALPHABET.ARABIC]: arabLetters,
@@ -2146,38 +2159,6 @@ var afterTarask = [
2146
2159
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2147
2160
  ]
2148
2161
  ];
2149
- var process = (text, LEFT_ANGLE_BRACKET, options) => {
2150
- const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2151
- const noFixArr = [];
2152
- text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2153
- console.log($0, ":", $1, ",", $2);
2154
- if ($1 === ",")
2155
- return LEFT_ANGLE_BRACKET + $2 + ">";
2156
- noFixArr.push($1 === "." ? $2 : $0);
2157
- return NOFIX_CHAR;
2158
- }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2159
- let splittedOrig, splitted;
2160
- splittedOrig = replaceWithDict(
2161
- replaceWithDict(text, letters[abc]),
2162
- lettersUpperCase[abc]
2163
- ).split(" ");
2164
- text = _toTarask(
2165
- text.toLowerCase(),
2166
- replaceWithDict,
2167
- wordlist,
2168
- softers,
2169
- afterTarask
2170
- );
2171
- if (j)
2172
- text = replaceIbyJ(text, j === J.ALWAYS);
2173
- if (abc === ALPHABET.GREEK)
2174
- text = replaceWithDict(text, thWords);
2175
- text = replaceWithDict(text, letters[abc]);
2176
- splitted = text.split(" ");
2177
- if (abc !== ALPHABET.ARABIC)
2178
- splitted = restoreCase(splitted, splittedOrig);
2179
- return { splittedOrig, splitted, noFixArr };
2180
- };
2181
2162
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
2182
2163
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
2183
2164
  var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
@@ -2186,55 +2167,6 @@ var replaceG = (text, replacer) => text.replace(
2186
2167
  // @ts-ignore
2187
2168
  replacer
2188
2169
  );
2189
- var getCompletedOptions = (options) => ({
2190
- abc: 0,
2191
- j: 0,
2192
- ...options
2193
- });
2194
- var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2195
- const options = getCompletedOptions(taraskOptions);
2196
- const wrapInTag = wrappers.html;
2197
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2198
- const { splitted, splittedOrig, noFixArr } = process(text, "&lt;", options);
2199
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2200
- text = join(splitted);
2201
- if (isCyrillic)
2202
- text = replaceG(
2203
- text,
2204
- htmlOptions.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2205
- );
2206
- return finilize(
2207
- applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2208
- const options2 = $0.slice(1, -1).split("|");
2209
- const main = options2.shift();
2210
- return `<tarL data-l='${options2}'>${main}</tarL>`;
2211
- }),
2212
- "<br>"
2213
- );
2214
- };
2215
- var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2216
- const options = getCompletedOptions(taraskOptions);
2217
- const wrapInColorOf = wrappers.ansiColors;
2218
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2219
- const { splitted, splittedOrig, noFixArr } = process(text, "<", options);
2220
- if (nonHtmlOptions.ansiColors)
2221
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2222
- text = join(splitted);
2223
- if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.ansiColors))
2224
- text = replaceG(
2225
- text,
2226
- nonHtmlOptions.ansiColors ? nonHtmlOptions.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2227
- );
2228
- if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2229
- const wordIndex = nonHtmlOptions.variations ?? 0;
2230
- const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2231
- text = text.replace(
2232
- OPTIONAL_WORDS_REGEX,
2233
- nonHtmlOptions.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2234
- );
2235
- }
2236
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2237
- };
2238
2170
  var restoreCase = (text, orig) => {
2239
2171
  for (let i = 0; i < text.length; i++) {
2240
2172
  const word = text[i];
@@ -2312,18 +2244,6 @@ var highlightChanges = (text, orig, isCyrillic, highlight) => {
2312
2244
  text[i] = word.slice(0, fromStart) + highlight(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2313
2245
  }
2314
2246
  };
2315
- var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2316
- text = replaceWithDict2(text, wordlist2);
2317
- softening:
2318
- do {
2319
- text = replaceWithDict2(text, softers2);
2320
- for (const [pattern, result] of softers2)
2321
- if (result !== "$1дзьдз" && pattern.test(text))
2322
- continue softening;
2323
- break;
2324
- } while (true);
2325
- return replaceWithDict2(text, afterTarask2);
2326
- };
2327
2247
  var replaceWithDict = (text, dict = []) => {
2328
2248
  for (const [pattern, result] of dict)
2329
2249
  text = text.replace(
@@ -2338,11 +2258,122 @@ var replaceIbyJ = (text, always = false) => text.replace(
2338
2258
  /([аеёіоуыэюя] )і (ў?)/g,
2339
2259
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2340
2260
  );
2261
+ var Taraskevizer = class _Taraskevizer {
2262
+ abc = ALPHABET.CYRILLIC;
2263
+ j = REPLACE_J.NEVER;
2264
+ html = {
2265
+ g: false
2266
+ };
2267
+ nonHtml = {
2268
+ h: false,
2269
+ ansiColors: false,
2270
+ variations: VARIATION.ALL
2271
+ };
2272
+ constructor(options) {
2273
+ if (!options)
2274
+ return;
2275
+ const general = options.general;
2276
+ if (general) {
2277
+ if (general.abc)
2278
+ this.abc = general.abc;
2279
+ if (general.j)
2280
+ this.j = general.j;
2281
+ }
2282
+ if (options.OVERRIDE_taraskevize)
2283
+ this.taraskevize = options.OVERRIDE_taraskevize;
2284
+ Object.assign(this.html, options.html);
2285
+ Object.assign(this.nonHtml, options.nonHtml);
2286
+ }
2287
+ convert(text) {
2288
+ const wrapInColorOf = wrappers.ansiColors;
2289
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2290
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2291
+ if (this.nonHtml.ansiColors)
2292
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2293
+ text = join(splitted);
2294
+ if (isCyrillic && (this.nonHtml.h || this.nonHtml.ansiColors))
2295
+ text = replaceG(
2296
+ text,
2297
+ this.nonHtml.ansiColors ? this.nonHtml.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2298
+ );
2299
+ if ("variations" in this.nonHtml && this.nonHtml.variations !== VARIATION.ALL) {
2300
+ const wordIndex = this.nonHtml.variations ?? 0;
2301
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2302
+ text = text.replace(
2303
+ OPTIONAL_WORDS_REGEX,
2304
+ this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2305
+ );
2306
+ }
2307
+ return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2308
+ }
2309
+ convertToHtml(text) {
2310
+ const wrapInTag = wrappers.html;
2311
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2312
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2313
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2314
+ text = join(splitted);
2315
+ if (isCyrillic)
2316
+ text = replaceG(
2317
+ text,
2318
+ this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2319
+ );
2320
+ return finilize(
2321
+ applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2322
+ const options = $0.slice(1, -1).split("|");
2323
+ const main = options.shift();
2324
+ return `<tarL data-l='${options}'>${main}</tarL>`;
2325
+ }),
2326
+ "<br>"
2327
+ );
2328
+ }
2329
+ process(text, LEFT_ANGLE_BRACKET) {
2330
+ const { abc, j } = this;
2331
+ const noFixArr = [];
2332
+ text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2333
+ if ($1 === ",")
2334
+ return LEFT_ANGLE_BRACKET + $2 + ">";
2335
+ noFixArr.push($1 === "." ? $2 : $0);
2336
+ return NOFIX_CHAR;
2337
+ }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2338
+ let splittedOrig, splitted;
2339
+ splittedOrig = replaceWithDict(
2340
+ replaceWithDict(text, letters[abc]),
2341
+ lettersUpperCase[abc]
2342
+ ).split(" ");
2343
+ text = this.taraskevize(text.toLowerCase());
2344
+ if (j)
2345
+ text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2346
+ if (abc === ALPHABET.GREEK)
2347
+ text = replaceWithDict(text, thWords);
2348
+ text = replaceWithDict(text, letters[abc]);
2349
+ splitted = text.split(" ");
2350
+ if (abc !== ALPHABET.ARABIC)
2351
+ splitted = restoreCase(splitted, splittedOrig);
2352
+ return { splittedOrig, splitted, noFixArr };
2353
+ }
2354
+ taraskevize(text) {
2355
+ text = _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.wordlist);
2356
+ softening:
2357
+ do {
2358
+ text = _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.softers);
2359
+ for (const [pattern, result] of _Taraskevizer._.softers)
2360
+ if (result !== "$1дзьдз" && pattern.test(text))
2361
+ continue softening;
2362
+ break;
2363
+ } while (true);
2364
+ return _Taraskevizer._.replaceWithDict(text, _Taraskevizer._.afterTarask);
2365
+ }
2366
+ static _ = {
2367
+ wordlist,
2368
+ softers,
2369
+ replaceWithDict,
2370
+ afterTarask
2371
+ };
2372
+ };
2341
2373
  export {
2342
2374
  ALPHABET,
2343
- J,
2375
+ REPLACE_J,
2376
+ Taraskevizer,
2344
2377
  VARIATION,
2345
- gobj,
2346
- tarask,
2347
- taraskToHtml
2378
+ gobj
2348
2379
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "taraskevizer",
3
- "version": "3.1.3",
3
+ "version": "4.0.0",
4
4
  "author": "GooseOb",
5
5
  "repository": {
6
6
  "type": "git",
@@ -14,7 +14,6 @@
14
14
  "bun-types": "^1.0.17",
15
15
  "husky": "^8.0.3",
16
16
  "prettier": "^3.1.1",
17
- "simple-git": "^3.21.0",
18
17
  "tsup": "^7.2.0",
19
18
  "typescript": "^5.3.3"
20
19
  },