taraskevizer 3.1.3 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,37 +7,42 @@ $ npm i taraskevizer
7
7
  ## Usage
8
8
 
9
9
  ```js
10
- import { tarask, taraskToHtml, ALPHABET, J, VARIATION } from 'taraskevizer';
10
+ import { Taraskevizer, ALPHABET, J, VARIATION } from 'taraskevizer';
11
11
 
12
- tarask('планета'); // "плянэта"
12
+ const taraskevizer = new Taraskevizer();
13
+ taraskevizer.convert('планета'); // "плянэта"
13
14
 
14
- tarask(
15
- 'планета і Гродна',
16
- {
15
+ const taraskevizer = new Taraskevizer({
16
+ general: {
17
17
  abc: ALPHABET.CYRILLIC,
18
18
  j: J.ALWAYS,
19
19
  },
20
- {
20
+ nonHtml: {
21
21
  ansiColors: true,
22
22
  variations: VARIATION.FIRST,
23
23
  h: false,
24
- }
25
- ); // "пл\x1b[32mя\x1b[0mн\x1b[32mэ\x1b[0mта \x1b[32mй\x1b[0m \x1b[35mГорадня\x1b[0m"
24
+ },
25
+ });
26
+ taraskevizer.convert('планета і Гродна'); // "пл\x1b[32mя\x1b[0mн\x1b[32mэ\x1b[0mта \x1b[32mй\x1b[0m \x1b[35mГорадня\x1b[0m"
26
27
 
27
- taraskToHtml(
28
- 'энергія планеты',
29
- {
28
+ const taraskevizer = new Taraskevizer({
29
+ general: {
30
30
  abc: ALPHABET.LATIN,
31
31
  },
32
- {
32
+ html: {
33
33
  g: false, // ignored, because alphabet is set to latin
34
- }
35
- ); // "en<tarF>erg</tarF>ija p<tarF>lan</tarF>ety"
34
+ },
35
+ });
36
+ taraskevizer.convertToHtml('энергія планеты'); // "en<tarF>erg</tarF>ija p<tarF>lan</tarF>ety"
37
+
38
+ // properties can be rewritten after creating an object
39
+ taraskevizer.abc = ALPHABET.ARABIC;
40
+ taraskevizer.html.g = true;
36
41
  ```
37
42
 
38
43
  ### Function signatures are in [this file](./dist/index.d.ts) (not available if project is not built)
39
44
 
40
- ## TaraskOptions
45
+ ## general
41
46
 
42
47
  Type: `object`
43
48
 
@@ -67,26 +72,16 @@ Default value: `0`
67
72
  | 1 | random | `яна і ён` or `яна й ён` |
68
73
  | 2 | always | `яна й ён` |
69
74
 
70
- ### OVERRIDE_toTarask
75
+ ### OVERRIDE_taraskevize
71
76
 
72
- Type:
77
+ Type: `(text: string) => string`
73
78
 
74
- ```
75
- (
76
- text: string,
77
- replaceWithDict: (
78
- text: string,
79
- dict?: [RegExp, string | ((...substrings: string[]) => string)][]
80
- ) => string,
81
- wordlist: [RegExp, string][],
82
- softers: [RegExp, string][],
83
- afterTarask: (text: string) => string
84
- ) => string
85
- ```
79
+ Default value: internal function `taraskevize`
86
80
 
87
- Default value: internal function `toTarask`
81
+ Can be overridden in order to make additional changes to the text.
82
+ This function usually uses private api via `Taraskevizer._.`
88
83
 
89
- ## HtmlOptions
84
+ ## html
90
85
 
91
86
  ### g
92
87
 
@@ -101,7 +96,7 @@ Do replace `г`(`h`) by `ґ`(`g`) in cyrillic alphabet?
101
96
  | true | `<tarH>г</tarH>валт <tarH>Г</tarH>валт` |
102
97
  | false | `<tarH>ґ</tarH>валт <tarH>Ґ</tarH>валт` |
103
98
 
104
- ## NonHtmlOptions
99
+ ## nonHtml
105
100
 
106
101
  ### ansiColors
107
102
 
package/dist/bin.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { tarask, taraskToHtml } from "./index.js";
2
+ import { ALPHABET, REPLACE_J, Taraskevizer, VARIATION } from "./index.js";
3
3
  import readline from "readline/promises";
4
4
  import { readFile } from "fs/promises";
5
5
  const prefix = "\x1B[34m[taraskevizer]\x1B[0m ";
@@ -15,12 +15,12 @@ if (checkForOptions(["-v", "--version"])) {
15
15
  printWithPrefix(version);
16
16
  process.exit(0);
17
17
  }
18
- const taraskOptions = {};
19
- const nonHtmlOptions = {
18
+ const general = {};
19
+ const nonHtml = {
20
20
  variations: 2,
21
21
  ansiColors: true
22
22
  };
23
- const htmlOptions = { g: true };
23
+ const html = { g: true };
24
24
  let isHtml = false;
25
25
  const toHashTable = (dict) => {
26
26
  const result = {};
@@ -33,56 +33,56 @@ const optionDict = toHashTable([
33
33
  [
34
34
  ["--latin", "-l"],
35
35
  () => {
36
- taraskOptions.abc = 1;
36
+ general.abc = ALPHABET.LATIN;
37
37
  }
38
38
  ],
39
39
  [
40
40
  ["--arabic", "-a"],
41
41
  () => {
42
- taraskOptions.abc = 2;
42
+ general.abc = ALPHABET.ARABIC;
43
43
  }
44
44
  ],
45
45
  [
46
46
  ["--greek", "-gr"],
47
47
  () => {
48
- taraskOptions.abc = 3;
48
+ general.abc = ALPHABET.GREEK;
49
49
  }
50
50
  ],
51
51
  [
52
52
  ["--jrandom", "-jr"],
53
53
  () => {
54
- taraskOptions.j = 1;
54
+ general.j = REPLACE_J.RANDOM;
55
55
  }
56
56
  ],
57
57
  [
58
58
  ["--jalways", "-ja"],
59
59
  () => {
60
- taraskOptions.j = 2;
60
+ general.j = REPLACE_J.ALWAYS;
61
61
  }
62
62
  ],
63
63
  [
64
64
  ["--h", "-h"],
65
65
  () => {
66
- nonHtmlOptions.h = true;
67
- htmlOptions.g = false;
66
+ nonHtml.h = true;
67
+ html.g = false;
68
68
  }
69
69
  ],
70
70
  [
71
71
  ["--no-variations", "-nv"],
72
72
  () => {
73
- nonHtmlOptions.variations = 0;
73
+ nonHtml.variations = VARIATION.NO;
74
74
  }
75
75
  ],
76
76
  [
77
77
  ["--first-variation-only", "-fvo"],
78
78
  () => {
79
- nonHtmlOptions.variations = 1;
79
+ nonHtml.variations = VARIATION.FIRST;
80
80
  }
81
81
  ],
82
82
  [
83
83
  ["--no-color", "-nc"],
84
84
  () => {
85
- nonHtmlOptions.ansiColors = false;
85
+ nonHtml.ansiColors = false;
86
86
  }
87
87
  ],
88
88
  [
@@ -105,7 +105,8 @@ let text = process.argv.length ? process.argv.join(" ") : await readline.createI
105
105
  input: process.stdin,
106
106
  output: process.stdout
107
107
  }).question(prefix + "Enter the text:\n");
108
+ const taraskevizer = new Taraskevizer({ general, html, nonHtml });
108
109
  process.stdout.write(
109
- isHtml ? taraskToHtml(text, taraskOptions, htmlOptions) : tarask(text, taraskOptions, nonHtmlOptions)
110
+ isHtml ? taraskevizer.convertToHtml(text) : taraskevizer.convert(text)
110
111
  );
111
112
  process.exit(0);
package/dist/index.cjs CHANGED
@@ -21,11 +21,11 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var src_exports = {};
22
22
  __export(src_exports, {
23
23
  ALPHABET: () => ALPHABET,
24
- J: () => J,
24
+ REPLACE_J: () => REPLACE_J,
25
+ Taraskevizer: () => Taraskevizer,
25
26
  VARIATION: () => VARIATION,
26
- gobj: () => gobj,
27
- tarask: () => tarask,
28
- taraskToHtml: () => taraskToHtml
27
+ __tarask__: () => __tarask__,
28
+ gobj: () => gobj
29
29
  });
30
30
  module.exports = __toCommonJS(src_exports);
31
31
 
@@ -2141,9 +2141,22 @@ var NOFIX_CHAR = "  ";
2141
2141
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
2142
2142
  var OPTIONAL_WORDS_REGEX = /\(.*?\)/g;
2143
2143
  var G_REGEX = /[Ґґ]/g;
2144
- var ALPHABET = { CYRILLIC: 0, LATIN: 1, ARABIC: 2, GREEK: 3 };
2145
- var J = { NEVER: 0, RANDOM: 1, ALWAYS: 2 };
2146
- var VARIATION = { NO: 0, FIRST: 1, ALL: 2 };
2144
+ var ALPHABET = {
2145
+ CYRILLIC: 0,
2146
+ LATIN: 1,
2147
+ ARABIC: 2,
2148
+ GREEK: 3
2149
+ };
2150
+ var REPLACE_J = {
2151
+ NEVER: 0,
2152
+ RANDOM: 1,
2153
+ ALWAYS: 2
2154
+ };
2155
+ var VARIATION = {
2156
+ NO: 0,
2157
+ FIRST: 1,
2158
+ ALL: 2
2159
+ };
2147
2160
  var letters = {
2148
2161
  [ALPHABET.LATIN]: latinLetters,
2149
2162
  [ALPHABET.ARABIC]: arabLetters,
@@ -2177,38 +2190,6 @@ var afterTarask = [
2177
2190
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2178
2191
  ]
2179
2192
  ];
2180
- var process = (text, LEFT_ANGLE_BRACKET, options) => {
2181
- const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2182
- const noFixArr = [];
2183
- text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2184
- console.log($0, ":", $1, ",", $2);
2185
- if ($1 === ",")
2186
- return LEFT_ANGLE_BRACKET + $2 + ">";
2187
- noFixArr.push($1 === "." ? $2 : $0);
2188
- return NOFIX_CHAR;
2189
- }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2190
- let splittedOrig, splitted;
2191
- splittedOrig = replaceWithDict(
2192
- replaceWithDict(text, letters[abc]),
2193
- lettersUpperCase[abc]
2194
- ).split(" ");
2195
- text = _toTarask(
2196
- text.toLowerCase(),
2197
- replaceWithDict,
2198
- wordlist,
2199
- softers,
2200
- afterTarask
2201
- );
2202
- if (j)
2203
- text = replaceIbyJ(text, j === J.ALWAYS);
2204
- if (abc === ALPHABET.GREEK)
2205
- text = replaceWithDict(text, thWords);
2206
- text = replaceWithDict(text, letters[abc]);
2207
- splitted = text.split(" ");
2208
- if (abc !== ALPHABET.ARABIC)
2209
- splitted = restoreCase(splitted, splittedOrig);
2210
- return { splittedOrig, splitted, noFixArr };
2211
- };
2212
2193
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
2213
2194
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
2214
2195
  var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
@@ -2217,55 +2198,6 @@ var replaceG = (text, replacer) => text.replace(
2217
2198
  // @ts-ignore
2218
2199
  replacer
2219
2200
  );
2220
- var getCompletedOptions = (options) => ({
2221
- abc: 0,
2222
- j: 0,
2223
- ...options
2224
- });
2225
- var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2226
- const options = getCompletedOptions(taraskOptions);
2227
- const wrapInTag = wrappers.html;
2228
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2229
- const { splitted, splittedOrig, noFixArr } = process(text, "&lt;", options);
2230
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2231
- text = join(splitted);
2232
- if (isCyrillic)
2233
- text = replaceG(
2234
- text,
2235
- htmlOptions.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2236
- );
2237
- return finilize(
2238
- applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2239
- const options2 = $0.slice(1, -1).split("|");
2240
- const main = options2.shift();
2241
- return `<tarL data-l='${options2}'>${main}</tarL>`;
2242
- }),
2243
- "<br>"
2244
- );
2245
- };
2246
- var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2247
- const options = getCompletedOptions(taraskOptions);
2248
- const wrapInColorOf = wrappers.ansiColors;
2249
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2250
- const { splitted, splittedOrig, noFixArr } = process(text, "<", options);
2251
- if (nonHtmlOptions.ansiColors)
2252
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2253
- text = join(splitted);
2254
- if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.ansiColors))
2255
- text = replaceG(
2256
- text,
2257
- nonHtmlOptions.ansiColors ? nonHtmlOptions.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2258
- );
2259
- if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2260
- const wordIndex = nonHtmlOptions.variations ?? 0;
2261
- const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2262
- text = text.replace(
2263
- OPTIONAL_WORDS_REGEX,
2264
- nonHtmlOptions.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2265
- );
2266
- }
2267
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2268
- };
2269
2201
  var restoreCase = (text, orig) => {
2270
2202
  for (let i = 0; i < text.length; i++) {
2271
2203
  const word = text[i];
@@ -2343,18 +2275,6 @@ var highlightChanges = (text, orig, isCyrillic, highlight) => {
2343
2275
  text[i] = word.slice(0, fromStart) + highlight(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2344
2276
  }
2345
2277
  };
2346
- var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2347
- text = replaceWithDict2(text, wordlist2);
2348
- softening:
2349
- do {
2350
- text = replaceWithDict2(text, softers2);
2351
- for (const [pattern, result] of softers2)
2352
- if (result !== "$1дзьдз" && pattern.test(text))
2353
- continue softening;
2354
- break;
2355
- } while (true);
2356
- return replaceWithDict2(text, afterTarask2);
2357
- };
2358
2278
  var replaceWithDict = (text, dict = []) => {
2359
2279
  for (const [pattern, result] of dict)
2360
2280
  text = text.replace(
@@ -2369,12 +2289,124 @@ var replaceIbyJ = (text, always = false) => text.replace(
2369
2289
  /([аеёіоуыэюя] )і (ў?)/g,
2370
2290
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2371
2291
  );
2292
+ var __tarask__ = {
2293
+ wordlist,
2294
+ softers,
2295
+ replaceWithDict,
2296
+ afterTarask
2297
+ };
2298
+ var Taraskevizer = class {
2299
+ abc = ALPHABET.CYRILLIC;
2300
+ j = REPLACE_J.NEVER;
2301
+ html = {
2302
+ g: false
2303
+ };
2304
+ nonHtml = {
2305
+ h: false,
2306
+ ansiColors: false,
2307
+ variations: VARIATION.ALL
2308
+ };
2309
+ constructor(options) {
2310
+ if (!options)
2311
+ return;
2312
+ const general = options.general;
2313
+ if (general) {
2314
+ if (general.abc)
2315
+ this.abc = general.abc;
2316
+ if (general.j)
2317
+ this.j = general.j;
2318
+ }
2319
+ if (options.OVERRIDE_taraskevize)
2320
+ this.taraskevize = options.OVERRIDE_taraskevize;
2321
+ Object.assign(this.html, options.html);
2322
+ Object.assign(this.nonHtml, options.nonHtml);
2323
+ }
2324
+ convert(text) {
2325
+ const wrapInColorOf = wrappers.ansiColors;
2326
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2327
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2328
+ if (this.nonHtml.ansiColors)
2329
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2330
+ text = join(splitted);
2331
+ if (isCyrillic && (this.nonHtml.h || this.nonHtml.ansiColors))
2332
+ text = replaceG(
2333
+ text,
2334
+ this.nonHtml.ansiColors ? this.nonHtml.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2335
+ );
2336
+ if ("variations" in this.nonHtml && this.nonHtml.variations !== VARIATION.ALL) {
2337
+ const wordIndex = this.nonHtml.variations ?? 0;
2338
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2339
+ text = text.replace(
2340
+ OPTIONAL_WORDS_REGEX,
2341
+ this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2342
+ );
2343
+ }
2344
+ return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2345
+ }
2346
+ convertToHtml(text) {
2347
+ const wrapInTag = wrappers.html;
2348
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2349
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2350
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2351
+ text = join(splitted);
2352
+ if (isCyrillic)
2353
+ text = replaceG(
2354
+ text,
2355
+ this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2356
+ );
2357
+ return finilize(
2358
+ applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2359
+ const options = $0.slice(1, -1).split("|");
2360
+ const main = options.shift();
2361
+ return `<tarL data-l='${options}'>${main}</tarL>`;
2362
+ }),
2363
+ "<br>"
2364
+ );
2365
+ }
2366
+ process(text, LEFT_ANGLE_BRACKET) {
2367
+ const { abc, j } = this;
2368
+ const noFixArr = [];
2369
+ text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2370
+ if ($1 === ",")
2371
+ return LEFT_ANGLE_BRACKET + $2 + ">";
2372
+ noFixArr.push($1 === "." ? $2 : $0);
2373
+ return NOFIX_CHAR;
2374
+ }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2375
+ let splittedOrig, splitted;
2376
+ splittedOrig = replaceWithDict(
2377
+ replaceWithDict(text, letters[abc]),
2378
+ lettersUpperCase[abc]
2379
+ ).split(" ");
2380
+ text = this.taraskevize(text.toLowerCase());
2381
+ if (j)
2382
+ text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2383
+ if (abc === ALPHABET.GREEK)
2384
+ text = replaceWithDict(text, thWords);
2385
+ text = replaceWithDict(text, letters[abc]);
2386
+ splitted = text.split(" ");
2387
+ if (abc !== ALPHABET.ARABIC)
2388
+ splitted = restoreCase(splitted, splittedOrig);
2389
+ return { splittedOrig, splitted, noFixArr };
2390
+ }
2391
+ taraskevize(text) {
2392
+ text = replaceWithDict(text, wordlist);
2393
+ softening:
2394
+ do {
2395
+ text = replaceWithDict(text, softers);
2396
+ for (const [pattern, result] of softers)
2397
+ if (result !== "$1дзьдз" && pattern.test(text))
2398
+ continue softening;
2399
+ break;
2400
+ } while (true);
2401
+ return replaceWithDict(text, afterTarask);
2402
+ }
2403
+ };
2372
2404
  // Annotate the CommonJS export names for ESM import in node:
2373
2405
  0 && (module.exports = {
2374
2406
  ALPHABET,
2375
- J,
2407
+ REPLACE_J,
2408
+ Taraskevizer,
2376
2409
  VARIATION,
2377
- gobj,
2378
- tarask,
2379
- taraskToHtml
2410
+ __tarask__,
2411
+ gobj
2380
2412
  });
package/dist/index.d.ts CHANGED
@@ -8,7 +8,6 @@ type Variation = 0 | 1 | 2;
8
8
  type TaraskOptions = {
9
9
  abc: Alphabet;
10
10
  j: OptionJ;
11
- OVERRIDE_toTarask?: ToTarask;
12
11
  };
13
12
  type NonHtmlOptions = {
14
13
  ansiColors: boolean;
@@ -18,17 +17,11 @@ type NonHtmlOptions = {
18
17
  type HtmlOptions = {
19
18
  g: boolean;
20
19
  };
21
- type ReplaceWithDict = (text: string, dict?: ExtendedDict) => string;
22
- type ToTarask = (text: string, replaceWithDict: ReplaceWithDict, wordlist: Dict, softers: Dict, afterTarask: ExtendedDict) => string;
23
- type Tarask<TOptions extends object> = (text: string, taraskOptions?: DeepPartialReadonly<TaraskOptions>, options?: DeepPartialReadonly<TOptions>) => string;
24
- type Dict<T = RegExp> = [T, string][];
25
- type ExtendedDict = [
20
+ type Dict<T = RegExp> = readonly (readonly [T, string])[];
21
+ type ExtendedDict = readonly (readonly [
26
22
  RegExp,
27
23
  string | ((...substrings: string[]) => string)
28
- ][];
29
- type AlphabetDependentDict = {
30
- [key in Alphabet]?: Dict;
31
- };
24
+ ])[];
32
25
 
33
26
  declare const ALPHABET: {
34
27
  readonly CYRILLIC: 0;
@@ -36,7 +29,7 @@ declare const ALPHABET: {
36
29
  readonly ARABIC: 2;
37
30
  readonly GREEK: 3;
38
31
  };
39
- declare const J: {
32
+ declare const REPLACE_J: {
40
33
  readonly NEVER: 0;
41
34
  readonly RANDOM: 1;
42
35
  readonly ALWAYS: 2;
@@ -46,8 +39,34 @@ declare const VARIATION: {
46
39
  readonly FIRST: 1;
47
40
  readonly ALL: 2;
48
41
  };
49
- declare const taraskToHtml: Tarask<HtmlOptions>;
50
- declare const tarask: Tarask<NonHtmlOptions>;
42
+ declare const __tarask__: {
43
+ readonly wordlist: Dict;
44
+ readonly softers: Dict;
45
+ readonly replaceWithDict: (text: string, dict?: ExtendedDict) => string;
46
+ readonly afterTarask: ExtendedDict;
47
+ };
48
+ declare class Taraskevizer {
49
+ abc: Alphabet;
50
+ j: OptionJ;
51
+ html: {
52
+ g: boolean;
53
+ };
54
+ nonHtml: {
55
+ h: boolean;
56
+ ansiColors: boolean;
57
+ variations: Variation;
58
+ };
59
+ constructor(options?: DeepPartialReadonly<{
60
+ general: TaraskOptions;
61
+ html: HtmlOptions;
62
+ nonHtml: NonHtmlOptions;
63
+ OVERRIDE_taraskevize(this: Taraskevizer, text: string): string;
64
+ }>);
65
+ convert(text: string): string;
66
+ convertToHtml(text: string): string;
67
+ private process;
68
+ protected taraskevize(text: string): string;
69
+ }
51
70
 
52
71
  declare const gobj: {
53
72
  readonly г: "ґ";
@@ -56,4 +75,4 @@ declare const gobj: {
56
75
  readonly Ґ: "Г";
57
76
  };
58
77
 
59
- export { ALPHABET, type AlphabetDependentDict, type DeepPartialReadonly, type Dict, type ExtendedDict, type HtmlOptions, J, type NonHtmlOptions, type ReplaceWithDict, type Tarask, type TaraskOptions, type ToTarask, VARIATION, gobj, tarask, taraskToHtml };
78
+ export { ALPHABET, type HtmlOptions, type NonHtmlOptions, REPLACE_J, type TaraskOptions, Taraskevizer, VARIATION, __tarask__, gobj };
package/dist/index.js CHANGED
@@ -2110,9 +2110,22 @@ var NOFIX_CHAR = "  ";
2110
2110
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
2111
2111
  var OPTIONAL_WORDS_REGEX = /\(.*?\)/g;
2112
2112
  var G_REGEX = /[Ґґ]/g;
2113
- var ALPHABET = { CYRILLIC: 0, LATIN: 1, ARABIC: 2, GREEK: 3 };
2114
- var J = { NEVER: 0, RANDOM: 1, ALWAYS: 2 };
2115
- var VARIATION = { NO: 0, FIRST: 1, ALL: 2 };
2113
+ var ALPHABET = {
2114
+ CYRILLIC: 0,
2115
+ LATIN: 1,
2116
+ ARABIC: 2,
2117
+ GREEK: 3
2118
+ };
2119
+ var REPLACE_J = {
2120
+ NEVER: 0,
2121
+ RANDOM: 1,
2122
+ ALWAYS: 2
2123
+ };
2124
+ var VARIATION = {
2125
+ NO: 0,
2126
+ FIRST: 1,
2127
+ ALL: 2
2128
+ };
2116
2129
  var letters = {
2117
2130
  [ALPHABET.LATIN]: latinLetters,
2118
2131
  [ALPHABET.ARABIC]: arabLetters,
@@ -2146,38 +2159,6 @@ var afterTarask = [
2146
2159
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2147
2160
  ]
2148
2161
  ];
2149
- var process = (text, LEFT_ANGLE_BRACKET, options) => {
2150
- const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2151
- const noFixArr = [];
2152
- text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2153
- console.log($0, ":", $1, ",", $2);
2154
- if ($1 === ",")
2155
- return LEFT_ANGLE_BRACKET + $2 + ">";
2156
- noFixArr.push($1 === "." ? $2 : $0);
2157
- return NOFIX_CHAR;
2158
- }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2159
- let splittedOrig, splitted;
2160
- splittedOrig = replaceWithDict(
2161
- replaceWithDict(text, letters[abc]),
2162
- lettersUpperCase[abc]
2163
- ).split(" ");
2164
- text = _toTarask(
2165
- text.toLowerCase(),
2166
- replaceWithDict,
2167
- wordlist,
2168
- softers,
2169
- afterTarask
2170
- );
2171
- if (j)
2172
- text = replaceIbyJ(text, j === J.ALWAYS);
2173
- if (abc === ALPHABET.GREEK)
2174
- text = replaceWithDict(text, thWords);
2175
- text = replaceWithDict(text, letters[abc]);
2176
- splitted = text.split(" ");
2177
- if (abc !== ALPHABET.ARABIC)
2178
- splitted = restoreCase(splitted, splittedOrig);
2179
- return { splittedOrig, splitted, noFixArr };
2180
- };
2181
2162
  var applyNoFix = (arr2, text) => arr2.length ? text.replace(NOFIX_REGEX, () => arr2.shift()) : text;
2182
2163
  var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(new RegExp(" (\\p{P}|\\p{S}|\\d|&#40) ", "gu"), "$1");
2183
2164
  var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
@@ -2186,55 +2167,6 @@ var replaceG = (text, replacer) => text.replace(
2186
2167
  // @ts-ignore
2187
2168
  replacer
2188
2169
  );
2189
- var getCompletedOptions = (options) => ({
2190
- abc: 0,
2191
- j: 0,
2192
- ...options
2193
- });
2194
- var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2195
- const options = getCompletedOptions(taraskOptions);
2196
- const wrapInTag = wrappers.html;
2197
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2198
- const { splitted, splittedOrig, noFixArr } = process(text, "&lt;", options);
2199
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2200
- text = join(splitted);
2201
- if (isCyrillic)
2202
- text = replaceG(
2203
- text,
2204
- htmlOptions.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2205
- );
2206
- return finilize(
2207
- applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2208
- const options2 = $0.slice(1, -1).split("|");
2209
- const main = options2.shift();
2210
- return `<tarL data-l='${options2}'>${main}</tarL>`;
2211
- }),
2212
- "<br>"
2213
- );
2214
- };
2215
- var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2216
- const options = getCompletedOptions(taraskOptions);
2217
- const wrapInColorOf = wrappers.ansiColors;
2218
- const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2219
- const { splitted, splittedOrig, noFixArr } = process(text, "<", options);
2220
- if (nonHtmlOptions.ansiColors)
2221
- highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2222
- text = join(splitted);
2223
- if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.ansiColors))
2224
- text = replaceG(
2225
- text,
2226
- nonHtmlOptions.ansiColors ? nonHtmlOptions.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2227
- );
2228
- if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2229
- const wordIndex = nonHtmlOptions.variations ?? 0;
2230
- const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2231
- text = text.replace(
2232
- OPTIONAL_WORDS_REGEX,
2233
- nonHtmlOptions.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2234
- );
2235
- }
2236
- return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2237
- };
2238
2170
  var restoreCase = (text, orig) => {
2239
2171
  for (let i = 0; i < text.length; i++) {
2240
2172
  const word = text[i];
@@ -2312,18 +2244,6 @@ var highlightChanges = (text, orig, isCyrillic, highlight) => {
2312
2244
  text[i] = word.slice(0, fromStart) + highlight(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2313
2245
  }
2314
2246
  };
2315
- var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2316
- text = replaceWithDict2(text, wordlist2);
2317
- softening:
2318
- do {
2319
- text = replaceWithDict2(text, softers2);
2320
- for (const [pattern, result] of softers2)
2321
- if (result !== "$1дзьдз" && pattern.test(text))
2322
- continue softening;
2323
- break;
2324
- } while (true);
2325
- return replaceWithDict2(text, afterTarask2);
2326
- };
2327
2247
  var replaceWithDict = (text, dict = []) => {
2328
2248
  for (const [pattern, result] of dict)
2329
2249
  text = text.replace(
@@ -2338,11 +2258,123 @@ var replaceIbyJ = (text, always = false) => text.replace(
2338
2258
  /([аеёіоуыэюя] )і (ў?)/g,
2339
2259
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2340
2260
  );
2261
+ var __tarask__ = {
2262
+ wordlist,
2263
+ softers,
2264
+ replaceWithDict,
2265
+ afterTarask
2266
+ };
2267
+ var Taraskevizer = class {
2268
+ abc = ALPHABET.CYRILLIC;
2269
+ j = REPLACE_J.NEVER;
2270
+ html = {
2271
+ g: false
2272
+ };
2273
+ nonHtml = {
2274
+ h: false,
2275
+ ansiColors: false,
2276
+ variations: VARIATION.ALL
2277
+ };
2278
+ constructor(options) {
2279
+ if (!options)
2280
+ return;
2281
+ const general = options.general;
2282
+ if (general) {
2283
+ if (general.abc)
2284
+ this.abc = general.abc;
2285
+ if (general.j)
2286
+ this.j = general.j;
2287
+ }
2288
+ if (options.OVERRIDE_taraskevize)
2289
+ this.taraskevize = options.OVERRIDE_taraskevize;
2290
+ Object.assign(this.html, options.html);
2291
+ Object.assign(this.nonHtml, options.nonHtml);
2292
+ }
2293
+ convert(text) {
2294
+ const wrapInColorOf = wrappers.ansiColors;
2295
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2296
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "<");
2297
+ if (this.nonHtml.ansiColors)
2298
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInColorOf.fix);
2299
+ text = join(splitted);
2300
+ if (isCyrillic && (this.nonHtml.h || this.nonHtml.ansiColors))
2301
+ text = replaceG(
2302
+ text,
2303
+ this.nonHtml.ansiColors ? this.nonHtml.h ? ($0) => wrapInColorOf.variable(gobj[$0]) : wrapInColorOf.variable("$&") : ($0) => gobj[$0]
2304
+ );
2305
+ if ("variations" in this.nonHtml && this.nonHtml.variations !== VARIATION.ALL) {
2306
+ const wordIndex = this.nonHtml.variations ?? 0;
2307
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2308
+ text = text.replace(
2309
+ OPTIONAL_WORDS_REGEX,
2310
+ this.nonHtml.ansiColors ? ($0) => wrapInColorOf.variable(replacer($0)) : replacer
2311
+ );
2312
+ }
2313
+ return finilize(applyNoFix(noFixArr, text).replace(/&#40/g, "("), "\n");
2314
+ }
2315
+ convertToHtml(text) {
2316
+ const wrapInTag = wrappers.html;
2317
+ const isCyrillic = this.abc === ALPHABET.CYRILLIC;
2318
+ const { splitted, splittedOrig, noFixArr } = this.process(text, "&lt;");
2319
+ highlightChanges(splitted, splittedOrig, isCyrillic, wrapInTag.fix);
2320
+ text = join(splitted);
2321
+ if (isCyrillic)
2322
+ text = replaceG(
2323
+ text,
2324
+ this.html.g ? wrapInTag.letterH("$&") : ($0) => wrapInTag.letterH(gobj[$0])
2325
+ );
2326
+ return finilize(
2327
+ applyNoFix(noFixArr, text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2328
+ const options = $0.slice(1, -1).split("|");
2329
+ const main = options.shift();
2330
+ return `<tarL data-l='${options}'>${main}</tarL>`;
2331
+ }),
2332
+ "<br>"
2333
+ );
2334
+ }
2335
+ process(text, LEFT_ANGLE_BRACKET) {
2336
+ const { abc, j } = this;
2337
+ const noFixArr = [];
2338
+ text = ` ${text.trim()} `.replace(//g, "").replace(/<([,.]?)(.*?)>/gs, ($0, $1, $2) => {
2339
+ if ($1 === ",")
2340
+ return LEFT_ANGLE_BRACKET + $2 + ">";
2341
+ noFixArr.push($1 === "." ? $2 : $0);
2342
+ return NOFIX_CHAR;
2343
+ }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(new RegExp("(\\p{P}|\\p{S}|\\d)", "gu"), " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2344
+ let splittedOrig, splitted;
2345
+ splittedOrig = replaceWithDict(
2346
+ replaceWithDict(text, letters[abc]),
2347
+ lettersUpperCase[abc]
2348
+ ).split(" ");
2349
+ text = this.taraskevize(text.toLowerCase());
2350
+ if (j)
2351
+ text = replaceIbyJ(text, j === REPLACE_J.ALWAYS);
2352
+ if (abc === ALPHABET.GREEK)
2353
+ text = replaceWithDict(text, thWords);
2354
+ text = replaceWithDict(text, letters[abc]);
2355
+ splitted = text.split(" ");
2356
+ if (abc !== ALPHABET.ARABIC)
2357
+ splitted = restoreCase(splitted, splittedOrig);
2358
+ return { splittedOrig, splitted, noFixArr };
2359
+ }
2360
+ taraskevize(text) {
2361
+ text = replaceWithDict(text, wordlist);
2362
+ softening:
2363
+ do {
2364
+ text = replaceWithDict(text, softers);
2365
+ for (const [pattern, result] of softers)
2366
+ if (result !== "$1дзьдз" && pattern.test(text))
2367
+ continue softening;
2368
+ break;
2369
+ } while (true);
2370
+ return replaceWithDict(text, afterTarask);
2371
+ }
2372
+ };
2341
2373
  export {
2342
2374
  ALPHABET,
2343
- J,
2375
+ REPLACE_J,
2376
+ Taraskevizer,
2344
2377
  VARIATION,
2345
- gobj,
2346
- tarask,
2347
- taraskToHtml
2378
+ __tarask__,
2379
+ gobj
2348
2380
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "taraskevizer",
3
- "version": "3.1.3",
3
+ "version": "4.1.0",
4
4
  "author": "GooseOb",
5
5
  "repository": {
6
6
  "type": "git",
@@ -14,7 +14,6 @@
14
14
  "bun-types": "^1.0.17",
15
15
  "husky": "^8.0.3",
16
16
  "prettier": "^3.1.1",
17
- "simple-git": "^3.21.0",
18
17
  "tsup": "^7.2.0",
19
18
  "typescript": "^5.3.3"
20
19
  },