taraskevizer 1.7.5 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,31 +7,40 @@ $ npm i taraskevizer
7
7
  ## Usage
8
8
 
9
9
  ```js
10
- import { tarask, ALPHABET } from 'taraskevizer';
11
-
12
- const result = tarask(text);
13
- // or
14
- const result = tarask(text, {
15
- html: {
16
- abc: ALPHABET.ARABIC,
17
- g: false,
18
- // ...
19
- },
20
- nonHtml: false,
21
- });
22
- ```
23
-
24
- ## API
10
+ import { tarask, taraskToHtml, ALPHABET, J, VARIATION } from 'taraskevizer';
25
11
 
26
- ### tarask(text, options?)
12
+ const taraskedText = tarask(text);
13
+ // планета -> плянэта
27
14
 
28
- Returns a `string`
29
-
30
- #### text
15
+ const taraskedText = tarask(
16
+ text,
17
+ {
18
+ abc: ALPHABET.CYRILLIC,
19
+ j: J.ALWAYS,
20
+ },
21
+ {
22
+ nodeColors: true,
23
+ variations: VARIATION.FIRST,
24
+ h: false,
25
+ }
26
+ );
27
+ // планета і Гродна -> пл\x1b[32mя\x1b[0mн\x1b[32mэ\x1b[0mта \x1b[32mй\x1b[0m \x1b[35mГорадня\x1b[0m
28
+
29
+ const taraskedText = taraskToHtml(
30
+ text,
31
+ {
32
+ abc: ALPHABET.LATIN,
33
+ },
34
+ {
35
+ g: false, // ignored, because alphabet is set to latin
36
+ }
37
+ );
38
+ // энергія планеты -> en<tarF>erg</tarF>ija p<tarF>lan</tarF>ety
39
+ ```
31
40
 
32
- Type: `string`
41
+ ### Function signatures are in [this file](./dist/index.d.ts)
33
42
 
34
- ## Options
43
+ ## TaraskOptions
35
44
 
36
45
  Type: `object`
37
46
 
@@ -64,15 +73,28 @@ When to replace `і`(`i`) by `й`(`j`) after vowels:
64
73
  2 = always
65
74
  ```
66
75
 
67
- ### html
76
+ ### OVERRIDE_toTarask
68
77
 
69
- Type: `boolean|object`
78
+ Type:
70
79
 
71
- Default value: `false`
80
+ ```
81
+ (
82
+ text: string,
83
+ replaceWithDict: (
84
+ text: string,
85
+ dict?: [RegExp, string | ((...substrings: string[]) => string)][]
86
+ ) => string,
87
+ wordlist: [RegExp, string][],
88
+ softers: [RegExp, string][],
89
+ afterTarask: (text: string) => string
90
+ ) => string
91
+ ```
92
+
93
+ Default value: internal function `toTarask`
72
94
 
73
- If `true|object`, some parts of a text are wrapped in HTML tags.
95
+ ## HtmlOptions
74
96
 
75
- #### html.g
97
+ ### g
76
98
 
77
99
  Type: `boolean`
78
100
 
@@ -86,21 +108,15 @@ false: <tarH>г</tarH> <tarH>Г</tarH>
86
108
  true: <tarH>ґ</tarH> <tarH>Ґ</tarH>
87
109
  ```
88
110
 
89
- ### nonHtml
90
-
91
- Type: `boolean|object`
92
-
93
- Default value: `false`
94
-
95
- If `html` is defined, will be ignored
111
+ ## NonHtmlOptions
96
112
 
97
- #### nonHtml.nodeColors
113
+ ### nodeColors
98
114
 
99
115
  Type: `boolean`
100
116
 
101
117
  Default value: `false`
102
118
 
103
- #### nonHtml.h
119
+ ### h
104
120
 
105
121
  Type: `boolean`
106
122
 
@@ -114,7 +130,7 @@ false: Ґ ґ
114
130
  true: Г г
115
131
  ```
116
132
 
117
- #### nonHtml.variations
133
+ ### variations
118
134
 
119
135
  Type: `number`
120
136
 
@@ -128,25 +144,6 @@ Which variation should be if a part of word is variable?
128
144
  2 = all: (Гродна|Горадня)
129
145
  ```
130
146
 
131
- ### OVERRIDE_toTarask
132
-
133
- Type:
134
-
135
- ```
136
- (
137
- text: string,
138
- replaceWithDict: (
139
- text: string,
140
- dict?: [RegExp, string | ((...substrings: string[]) => string)][]
141
- ) => string,
142
- wordlist: [RegExp, string][],
143
- softers: [RegExp, string][],
144
- afterTarask: (text: string) => string
145
- ) => string
146
- ```
147
-
148
- Default value: internal function `toTarask`
149
-
150
147
  ## HTML tags
151
148
 
152
149
  ### tarF
package/bin/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { tarask } from '../dist/index.js';
2
+ import { tarask, taraskToHtml } from '../dist/index.js';
3
3
  import { readFile } from 'fs/promises';
4
4
 
5
5
  const print = (...msgs) => {
@@ -20,72 +20,75 @@ if (checkForOptions(['-v', '--version'])) {
20
20
  process.exit(0);
21
21
  }
22
22
 
23
- const stgs = {
24
- nonHtml: {
25
- variations: 2,
26
- nodeColors: true,
27
- },
28
- };
23
+ /** @type {Partial<import('../dist/types.js').TaraskOptions>} */
24
+ const taraskOptions = {};
25
+ /** @type {Partial<import('../dist/types.js').NonHtmlOptions>} */
26
+ const nonHtmlOptions = { variations: 2, nodeColors: true };
27
+ /** @type {Partial<import('../dist/types.js').HtmlOptions>} */
28
+ const htmlOptions = { g: true };
29
+
30
+ let isHtml = false;
29
31
 
30
32
  const optionDict = [
31
33
  [
32
34
  ['--latin', '-l'],
33
35
  () => {
34
- stgs.abc = 1;
36
+ taraskOptions.abc = 1;
35
37
  },
36
38
  ],
37
39
  [
38
40
  ['--arabic', '-a'],
39
41
  () => {
40
- stgs.abc = 2;
42
+ taraskOptions.abc = 2;
41
43
  },
42
44
  ],
43
45
  [
44
46
  ['--greek', '-gr'],
45
47
  () => {
46
- stgs.abc = 3;
48
+ taraskOptions.abc = 3;
47
49
  },
48
50
  ],
49
51
  [
50
52
  ['--jrandom', '-jr'],
51
53
  () => {
52
- stgs.j = 1;
54
+ taraskOptions.j = 1;
53
55
  },
54
56
  ],
55
57
  [
56
58
  ['--jalways', '-ja'],
57
59
  () => {
58
- stgs.j = 2;
60
+ taraskOptions.j = 2;
59
61
  },
60
62
  ],
61
63
  [
62
64
  ['--h', '-h'],
63
65
  () => {
64
- stgs.nonHtml.h = true;
66
+ nonHtmlOptions.h = true;
67
+ htmlOptions.g = false;
65
68
  },
66
69
  ],
67
70
  [
68
71
  ['--no-variations', '-nv'],
69
72
  () => {
70
- stgs.nonHtml.variations = 0;
73
+ nonHtmlOptions.variations = 0;
71
74
  },
72
75
  ],
73
76
  [
74
77
  ['--first-variation-only', '-fvo'],
75
78
  () => {
76
- stgs.nonHtml.variations = 1;
79
+ nonHtmlOptions.variations = 1;
77
80
  },
78
81
  ],
79
82
  [
80
83
  ['--no-color', '-nc'],
81
84
  () => {
82
- stgs.nonHtml.nodeColors = false;
85
+ nonHtmlOptions.nodeColors = false;
83
86
  },
84
87
  ],
85
88
  [
86
89
  ['--html', '-html'],
87
90
  () => {
88
- stgs.html = {};
91
+ isHtml = true;
89
92
  },
90
93
  ],
91
94
  ];
@@ -102,4 +105,8 @@ optionEater: while (true) {
102
105
 
103
106
  const text = process.argv.join(' ');
104
107
 
105
- console.log(tarask(text, stgs));
108
+ console.log(
109
+ isHtml
110
+ ? taraskToHtml(text, taraskOptions, htmlOptions)
111
+ : tarask(text, taraskOptions, nonHtmlOptions)
112
+ );
package/dist/index.cjs CHANGED
@@ -24,7 +24,8 @@ __export(src_exports, {
24
24
  J: () => J,
25
25
  VARIATION: () => VARIATION,
26
26
  gobj: () => gobj,
27
- tarask: () => tarask
27
+ tarask: () => tarask,
28
+ taraskToHtml: () => taraskToHtml
28
29
  });
29
30
  module.exports = __toCommonJS(src_exports);
30
31
 
@@ -1362,7 +1363,7 @@ var rawWordlist = [
1362
1363
  [/экумен/, "экумэн"],
1363
1364
  [/елісейск/, "элізэйск"],
1364
1365
  [/энцыкла/, "энцыкля"],
1365
- [/энерг/, "энэрг"],
1366
+ [/энерг/, "энэрґ"],
1366
1367
  [/эфект/, "эфэкт"],
1367
1368
  [/эфемер/, "эфэмэр"],
1368
1369
  [/эцюд/, "этуд"],
@@ -1617,7 +1618,7 @@ var rawWordlist = [
1617
1618
  [/сцей /, "сьц(ей|яў) "],
1618
1619
  [/ (г|(?:най)?ч)асьц\(ей\)\(яў\) /, " $1асьцей "],
1619
1620
  /* а > у */
1620
- [/(абед| дон|енск|[іы]зм|завод|інстытут| канал|крым|(?:кле|пола|слу)цк|лёндан|мадрыд| рым)а [^-]/, "$1у "],
1621
+ [/(абед| дон|енск|[іы]зм|завод|інстытут| канал|крым|(?:кле|пола|слу)цк|лёндан|мадрыд| рым)а (?!-)/, "$1у "],
1621
1622
  [/(бэрлін|нясвіж|парыж)а /, "$1(а|у) "],
1622
1623
  [/дуная /, "дунаю "],
1623
1624
  /* Грэцкія назовы */
@@ -2015,7 +2016,7 @@ var gwords = [
2015
2016
  "аґрэст",
2016
2017
  " біґ ",
2017
2018
  " бразґ",
2018
- "буґацьці",
2019
+ "буґацці",
2019
2020
  "бурґ",
2020
2021
  "вашынґт",
2021
2022
  "возґр",
@@ -2098,9 +2099,8 @@ var gwords = [
2098
2099
  "райхстаґ",
2099
2100
  " уґанд",
2100
2101
  // 'фоґель',
2101
- " цуґл",
2102
+ " цуґл"
2102
2103
  // 'шваґер'
2103
- "энэрґ"
2104
2104
  ].reverse();
2105
2105
  var gobj = {
2106
2106
  "г": "ґ",
@@ -2126,13 +2126,12 @@ for (const word of gwords)
2126
2126
  wordlist.push([RegExp(word.replace(/ґ/g, "г"), "g"), word]);
2127
2127
 
2128
2128
  // src/tarask.ts
2129
- var isObject = (arg) => typeof arg === "object";
2130
2129
  var isUpperCase = (str) => str === str.toUpperCase();
2131
- var getLastLetter = (word) => {
2132
- for (let i = word.length - 1; i >= 0; i--)
2133
- if (/\p{L}/u.test(word[i]))
2134
- return word[i];
2135
- throw new Error(`the last letter of the word ${word} not found`);
2130
+ var getLastLetter = (word, i) => {
2131
+ for (let i2 = word.length - 1; i2 >= 0; i2--)
2132
+ if (/\p{L}/u.test(word[i2]))
2133
+ return word[i2];
2134
+ throw new Error(`the last letter of the word ${word} not found. index: ${i}`);
2136
2135
  };
2137
2136
  var NOFIX_CHAR = " ￿ ";
2138
2137
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
@@ -2175,17 +2174,14 @@ var afterTarask = [
2175
2174
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2176
2175
  ]
2177
2176
  ];
2178
- var tarask = (text, options = {}) => {
2179
- const { abc = 0, j = 0, html = false, nonHtml = false } = options;
2180
- const isHtmlObject = isObject(html);
2181
- const isNonHtmlObject = isObject(nonHtml);
2182
- const apply = html ? tagApplications.html : tagApplications.nonHtml;
2183
- const noFix = [];
2184
- const LEFT_ANGLE_BRACKET = html ? "&lt;" : "<";
2177
+ var noFix = [];
2178
+ var process = (text, LEFT_ANGLE_BRACKET, options) => {
2179
+ const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2180
+ const noFix2 = [];
2185
2181
  text = ` ${text.trim()} `.replace(/�/g, "").replace(/<([,.]?)([.\s]*?)>/g, ($0, $1, $2) => {
2186
2182
  if ($1 === ",")
2187
2183
  return LEFT_ANGLE_BRACKET + $2 + ">";
2188
- noFix[noFix.length] = $1 === "." ? $2 : $0;
2184
+ noFix2[noFix2.length] = $1 === "." ? $2 : $0;
2189
2185
  return NOFIX_CHAR;
2190
2186
  }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(/(\p{P}|\p{S}|\d)/gu, " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2191
2187
  let splittedOrig, splitted;
@@ -2193,7 +2189,7 @@ var tarask = (text, options = {}) => {
2193
2189
  replaceWithDict(text, letters[abc]),
2194
2190
  lettersUpperCase[abc]
2195
2191
  ).split(" ");
2196
- text = (options.OVERRIDE_toTarask || toTarask)(
2192
+ text = _toTarask(
2197
2193
  text.toLowerCase(),
2198
2194
  replaceWithDict,
2199
2195
  wordlist,
@@ -2208,35 +2204,69 @@ var tarask = (text, options = {}) => {
2208
2204
  splitted = text.split(" ");
2209
2205
  if (abc !== ALPHABET.ARABIC)
2210
2206
  splitted = restoreCase(splitted, splittedOrig);
2211
- if (html || isNonHtmlObject && nonHtml.nodeColors)
2212
- splitted = toTags(
2213
- splitted,
2214
- splittedOrig,
2215
- abc === ALPHABET.CYRILLIC,
2216
- apply.F
2207
+ return { splittedOrig, splitted };
2208
+ };
2209
+ var applyNoFix = (text) => {
2210
+ if (noFix.length)
2211
+ text = text.replace(NOFIX_REGEX, () => noFix.shift());
2212
+ noFix = [];
2213
+ return text;
2214
+ };
2215
+ var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(/ (\p{P}|\p{S}|\d|&#40) /gu, "$1");
2216
+ var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
2217
+ var replaceG = (text, replacer) => text.replace(
2218
+ G_REGEX,
2219
+ // @ts-ignore
2220
+ replacer
2221
+ );
2222
+ var getCompletedOptions = (options) => ({
2223
+ abc: 0,
2224
+ j: 0,
2225
+ ...options
2226
+ });
2227
+ var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2228
+ const options = getCompletedOptions(taraskOptions);
2229
+ const apply = tagApplications.html;
2230
+ const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2231
+ const { splitted, splittedOrig } = process(text, "&lt;", options);
2232
+ addTags(splitted, splittedOrig, isCyrillic, apply.F);
2233
+ text = join(splitted);
2234
+ if (isCyrillic)
2235
+ text = replaceG(
2236
+ text,
2237
+ htmlOptions.g ? apply.H("$&") : ($0) => apply.H(gobj[$0])
2217
2238
  );
2218
- text = splitted.join(" ").replace(/&nbsp;/g, " ").replace(/ (\p{P}|\p{S}|\d|&#40) /gu, "$1");
2219
- let gReplacer;
2220
- if (abc === ALPHABET.CYRILLIC) {
2221
- if (isHtmlObject) {
2222
- gReplacer = html.g ? apply.H("$&") : ($0) => apply.H(gobj[$0]);
2223
- } else if (isNonHtmlObject) {
2224
- if (nonHtml.nodeColors) {
2225
- gReplacer = nonHtml.h ? ($0) => apply.H(gobj[$0]) : apply.H("$&");
2226
- } else if (nonHtml.h) {
2227
- gReplacer = ($0) => gobj[$0];
2228
- }
2229
- }
2230
- }
2231
- if (gReplacer)
2239
+ return finilize(
2240
+ applyNoFix(text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2241
+ const options2 = $0.slice(1, -1).split("|");
2242
+ const main = options2.shift();
2243
+ return `<tarL data-l='${options2}'>${main}</tarL>`;
2244
+ }),
2245
+ "<br>"
2246
+ );
2247
+ };
2248
+ var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2249
+ const options = getCompletedOptions(taraskOptions);
2250
+ const apply = tagApplications.nonHtml;
2251
+ const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2252
+ const { splitted, splittedOrig } = process(text, "&lt;", options);
2253
+ if (nonHtmlOptions.nodeColors)
2254
+ addTags(splitted, splittedOrig, isCyrillic, apply.F);
2255
+ text = join(splitted);
2256
+ if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.nodeColors))
2257
+ text = replaceG(
2258
+ text,
2259
+ nonHtmlOptions.nodeColors ? nonHtmlOptions.h ? ($0) => apply.H(gobj[$0]) : apply.H("$&") : ($0) => gobj[$0]
2260
+ );
2261
+ if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2262
+ const wordIndex = nonHtmlOptions.variations ?? 0;
2263
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2232
2264
  text = text.replace(
2233
- G_REGEX,
2234
- // @ts-ignore
2235
- gReplacer
2265
+ OPTIONAL_WORDS_REGEX,
2266
+ nonHtmlOptions.nodeColors ? ($0) => tagApplications.nonHtml.L(replacer($0)) : replacer
2236
2267
  );
2237
- if (noFix.length)
2238
- text = text.replace(NOFIX_REGEX, () => noFix.shift());
2239
- return (html ? finalizer.html(text) : finalizer.nonHtml(text, nonHtml)).replace(/ \t /g, " ").trim();
2268
+ }
2269
+ return finilize(applyNoFix(text).replace(/&#40/g, "("), "\n");
2240
2270
  };
2241
2271
  var restoreCase = (text, orig) => {
2242
2272
  for (let i = 0; i < text.length; i++) {
@@ -2252,7 +2282,7 @@ var restoreCase = (text, orig) => {
2252
2282
  continue;
2253
2283
  if (word === "зь") {
2254
2284
  text[i] = isUpperCase(orig[i + 1]) ? "ЗЬ" : "Зь";
2255
- } else if (isUpperCase(getLastLetter(oWord))) {
2285
+ } else if (isUpperCase(getLastLetter(oWord, i))) {
2256
2286
  text[i] = word.toUpperCase();
2257
2287
  } else {
2258
2288
  text[i] = word[0] === "(" ? word.replace(
@@ -2263,13 +2293,13 @@ var restoreCase = (text, orig) => {
2263
2293
  }
2264
2294
  return text;
2265
2295
  };
2266
- var toTags = (text, orig, isCyrillic, applyF) => {
2296
+ var addTags = (text, orig, isCyrillic, applyF) => {
2267
2297
  for (let i = 0; i < text.length; i++) {
2268
2298
  const word = text[i];
2269
2299
  const oWord = orig[i];
2270
2300
  if (oWord === word)
2271
2301
  continue;
2272
- const wordH = isCyrillic ? word.replace(G_REGEX, ($0) => gobj[$0]) : word;
2302
+ const wordH = isCyrillic ? replaceG(word, ($0) => gobj[$0]) : word;
2273
2303
  if (oWord === wordH)
2274
2304
  continue;
2275
2305
  if (!/\(/.test(word)) {
@@ -2314,7 +2344,6 @@ var toTags = (text, orig, isCyrillic, applyF) => {
2314
2344
  }
2315
2345
  text[i] = word.slice(0, fromStart) + applyF(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2316
2346
  }
2317
- return text;
2318
2347
  };
2319
2348
  var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2320
2349
  text = replaceWithDict2(text, wordlist2);
@@ -2342,29 +2371,12 @@ var replaceIbyJ = (text, always = false) => text.replace(
2342
2371
  /([аеёіоуыэюя] )і (ў?)/g,
2343
2372
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2344
2373
  );
2345
- var finalizer = {
2346
- html: (text) => text.replace(OPTIONAL_WORDS_REGEX, ($0) => {
2347
- const options = $0.slice(1, -1).split("|");
2348
- const main = options.shift();
2349
- return `<tarL data-l='${options}'>${main}</tarL>`;
2350
- }).replace(/ \n /g, "<br>"),
2351
- nonHtml(text, options) {
2352
- if (isObject(options) && "variations" in options && options.variations !== VARIATION.ALL) {
2353
- const WORD_INDEX = options.variations ?? 0;
2354
- const replacer = ($0) => $0.slice(1, -1).split("|")[WORD_INDEX];
2355
- text = text.replace(
2356
- OPTIONAL_WORDS_REGEX,
2357
- options.nodeColors ? ($0) => tagApplications.nonHtml.L(replacer($0)) : replacer
2358
- );
2359
- }
2360
- return text.replace(/&#40/g, "(").replace(/ \n /g, "\n");
2361
- }
2362
- };
2363
2374
  // Annotate the CommonJS export names for ESM import in node:
2364
2375
  0 && (module.exports = {
2365
2376
  ALPHABET,
2366
2377
  J,
2367
2378
  VARIATION,
2368
2379
  gobj,
2369
- tarask
2380
+ tarask,
2381
+ taraskToHtml
2370
2382
  });
package/dist/index.d.cts CHANGED
@@ -1,13 +1,15 @@
1
1
  type ModifyObjectType<T, TResultObj> = T extends object ? T extends (...args: any[]) => any ? T : TResultObj : T;
2
- type DeepPartial<T> = ModifyObjectType<T, {
3
- [P in keyof T]?: DeepPartial<T[P]>;
4
- }>;
5
- type DeepReadonly<T> = ModifyObjectType<T, {
6
- readonly [P in keyof T]: DeepReadonly<T[P]>;
2
+ type DeepPartialReadonly<T> = ModifyObjectType<T, {
3
+ readonly [P in keyof T]?: DeepPartialReadonly<T[P]>;
7
4
  }>;
8
5
  type Alphabet = 0 | 1 | 2 | 3;
9
6
  type OptionJ = 0 | 1 | 2;
10
7
  type Variation = 0 | 1 | 2;
8
+ type TaraskOptions = {
9
+ abc: Alphabet;
10
+ j: OptionJ;
11
+ OVERRIDE_toTarask?: ToTarask;
12
+ };
11
13
  type NonHtmlOptions = {
12
14
  nodeColors: boolean;
13
15
  h: boolean;
@@ -18,15 +20,7 @@ type HtmlOptions = {
18
20
  };
19
21
  type ReplaceWithDict = (text: string, dict?: ExtendedDict) => string;
20
22
  type ToTarask = (text: string, replaceWithDict: ReplaceWithDict, wordlist: Dict, softers: Dict, afterTarask: ExtendedDict) => string;
21
- type TaraskOptionsStrict = {
22
- abc: Alphabet;
23
- j: OptionJ;
24
- html: boolean | HtmlOptions;
25
- nonHtml: boolean | NonHtmlOptions;
26
- OVERRIDE_toTarask?: ToTarask;
27
- };
28
- type TaraskOptions = DeepPartial<TaraskOptionsStrict>;
29
- type Tarask = (text: string, options?: DeepReadonly<TaraskOptions>) => string;
23
+ type Tarask<TOptions extends object> = (text: string, taraskOptions?: DeepPartialReadonly<TaraskOptions>, options?: DeepPartialReadonly<TOptions>) => string;
30
24
  type Dict<T = RegExp> = [T, string][];
31
25
  type ExtendedDict = [
32
26
  RegExp,
@@ -52,7 +46,8 @@ declare const VARIATION: {
52
46
  readonly FIRST: 1;
53
47
  readonly ALL: 2;
54
48
  };
55
- declare const tarask: Tarask;
49
+ declare const taraskToHtml: Tarask<HtmlOptions>;
50
+ declare const tarask: Tarask<NonHtmlOptions>;
56
51
 
57
52
  declare const gobj: {
58
53
  readonly г: "ґ";
@@ -61,4 +56,4 @@ declare const gobj: {
61
56
  readonly Ґ: "Г";
62
57
  };
63
58
 
64
- export { ALPHABET, AlphabetDependentDict, Dict, ExtendedDict, HtmlOptions, J, NonHtmlOptions, ReplaceWithDict, Tarask, TaraskOptions, TaraskOptionsStrict, ToTarask, VARIATION, gobj, tarask };
59
+ export { ALPHABET, AlphabetDependentDict, DeepPartialReadonly, Dict, ExtendedDict, HtmlOptions, J, NonHtmlOptions, ReplaceWithDict, Tarask, TaraskOptions, ToTarask, VARIATION, gobj, tarask, taraskToHtml };
package/dist/index.d.ts CHANGED
@@ -1,13 +1,15 @@
1
1
  type ModifyObjectType<T, TResultObj> = T extends object ? T extends (...args: any[]) => any ? T : TResultObj : T;
2
- type DeepPartial<T> = ModifyObjectType<T, {
3
- [P in keyof T]?: DeepPartial<T[P]>;
4
- }>;
5
- type DeepReadonly<T> = ModifyObjectType<T, {
6
- readonly [P in keyof T]: DeepReadonly<T[P]>;
2
+ type DeepPartialReadonly<T> = ModifyObjectType<T, {
3
+ readonly [P in keyof T]?: DeepPartialReadonly<T[P]>;
7
4
  }>;
8
5
  type Alphabet = 0 | 1 | 2 | 3;
9
6
  type OptionJ = 0 | 1 | 2;
10
7
  type Variation = 0 | 1 | 2;
8
+ type TaraskOptions = {
9
+ abc: Alphabet;
10
+ j: OptionJ;
11
+ OVERRIDE_toTarask?: ToTarask;
12
+ };
11
13
  type NonHtmlOptions = {
12
14
  nodeColors: boolean;
13
15
  h: boolean;
@@ -18,15 +20,7 @@ type HtmlOptions = {
18
20
  };
19
21
  type ReplaceWithDict = (text: string, dict?: ExtendedDict) => string;
20
22
  type ToTarask = (text: string, replaceWithDict: ReplaceWithDict, wordlist: Dict, softers: Dict, afterTarask: ExtendedDict) => string;
21
- type TaraskOptionsStrict = {
22
- abc: Alphabet;
23
- j: OptionJ;
24
- html: boolean | HtmlOptions;
25
- nonHtml: boolean | NonHtmlOptions;
26
- OVERRIDE_toTarask?: ToTarask;
27
- };
28
- type TaraskOptions = DeepPartial<TaraskOptionsStrict>;
29
- type Tarask = (text: string, options?: DeepReadonly<TaraskOptions>) => string;
23
+ type Tarask<TOptions extends object> = (text: string, taraskOptions?: DeepPartialReadonly<TaraskOptions>, options?: DeepPartialReadonly<TOptions>) => string;
30
24
  type Dict<T = RegExp> = [T, string][];
31
25
  type ExtendedDict = [
32
26
  RegExp,
@@ -52,7 +46,8 @@ declare const VARIATION: {
52
46
  readonly FIRST: 1;
53
47
  readonly ALL: 2;
54
48
  };
55
- declare const tarask: Tarask;
49
+ declare const taraskToHtml: Tarask<HtmlOptions>;
50
+ declare const tarask: Tarask<NonHtmlOptions>;
56
51
 
57
52
  declare const gobj: {
58
53
  readonly г: "ґ";
@@ -61,4 +56,4 @@ declare const gobj: {
61
56
  readonly Ґ: "Г";
62
57
  };
63
58
 
64
- export { ALPHABET, AlphabetDependentDict, Dict, ExtendedDict, HtmlOptions, J, NonHtmlOptions, ReplaceWithDict, Tarask, TaraskOptions, TaraskOptionsStrict, ToTarask, VARIATION, gobj, tarask };
59
+ export { ALPHABET, AlphabetDependentDict, DeepPartialReadonly, Dict, ExtendedDict, HtmlOptions, J, NonHtmlOptions, ReplaceWithDict, Tarask, TaraskOptions, ToTarask, VARIATION, gobj, tarask, taraskToHtml };
package/dist/index.js CHANGED
@@ -1332,7 +1332,7 @@ var rawWordlist = [
1332
1332
  [/экумен/, "экумэн"],
1333
1333
  [/елісейск/, "элізэйск"],
1334
1334
  [/энцыкла/, "энцыкля"],
1335
- [/энерг/, "энэрг"],
1335
+ [/энерг/, "энэрґ"],
1336
1336
  [/эфект/, "эфэкт"],
1337
1337
  [/эфемер/, "эфэмэр"],
1338
1338
  [/эцюд/, "этуд"],
@@ -1587,7 +1587,7 @@ var rawWordlist = [
1587
1587
  [/сцей /, "сьц(ей|яў) "],
1588
1588
  [/ (г|(?:най)?ч)асьц\(ей\)\(яў\) /, " $1асьцей "],
1589
1589
  /* а > у */
1590
- [/(абед| дон|енск|[іы]зм|завод|інстытут| канал|крым|(?:кле|пола|слу)цк|лёндан|мадрыд| рым)а [^-]/, "$1у "],
1590
+ [/(абед| дон|енск|[іы]зм|завод|інстытут| канал|крым|(?:кле|пола|слу)цк|лёндан|мадрыд| рым)а (?!-)/, "$1у "],
1591
1591
  [/(бэрлін|нясвіж|парыж)а /, "$1(а|у) "],
1592
1592
  [/дуная /, "дунаю "],
1593
1593
  /* Грэцкія назовы */
@@ -1985,7 +1985,7 @@ var gwords = [
1985
1985
  "аґрэст",
1986
1986
  " біґ ",
1987
1987
  " бразґ",
1988
- "буґацьці",
1988
+ "буґацці",
1989
1989
  "бурґ",
1990
1990
  "вашынґт",
1991
1991
  "возґр",
@@ -2068,9 +2068,8 @@ var gwords = [
2068
2068
  "райхстаґ",
2069
2069
  " уґанд",
2070
2070
  // 'фоґель',
2071
- " цуґл",
2071
+ " цуґл"
2072
2072
  // 'шваґер'
2073
- "энэрґ"
2074
2073
  ].reverse();
2075
2074
  var gobj = {
2076
2075
  "г": "ґ",
@@ -2096,13 +2095,12 @@ for (const word of gwords)
2096
2095
  wordlist.push([RegExp(word.replace(/ґ/g, "г"), "g"), word]);
2097
2096
 
2098
2097
  // src/tarask.ts
2099
- var isObject = (arg) => typeof arg === "object";
2100
2098
  var isUpperCase = (str) => str === str.toUpperCase();
2101
- var getLastLetter = (word) => {
2102
- for (let i = word.length - 1; i >= 0; i--)
2103
- if (/\p{L}/u.test(word[i]))
2104
- return word[i];
2105
- throw new Error(`the last letter of the word ${word} not found`);
2099
+ var getLastLetter = (word, i) => {
2100
+ for (let i2 = word.length - 1; i2 >= 0; i2--)
2101
+ if (/\p{L}/u.test(word[i2]))
2102
+ return word[i2];
2103
+ throw new Error(`the last letter of the word ${word} not found. index: ${i}`);
2106
2104
  };
2107
2105
  var NOFIX_CHAR = " ￿ ";
2108
2106
  var NOFIX_REGEX = new RegExp(NOFIX_CHAR, "g");
@@ -2145,17 +2143,14 @@ var afterTarask = [
2145
2143
  ($0, $1, $2) => /([ая]ў|ну)$/.test($2) ? $1 + "ь і" + $2 : $0
2146
2144
  ]
2147
2145
  ];
2148
- var tarask = (text, options = {}) => {
2149
- const { abc = 0, j = 0, html = false, nonHtml = false } = options;
2150
- const isHtmlObject = isObject(html);
2151
- const isNonHtmlObject = isObject(nonHtml);
2152
- const apply = html ? tagApplications.html : tagApplications.nonHtml;
2153
- const noFix = [];
2154
- const LEFT_ANGLE_BRACKET = html ? "&lt;" : "<";
2146
+ var noFix = [];
2147
+ var process = (text, LEFT_ANGLE_BRACKET, options) => {
2148
+ const { abc, j, OVERRIDE_toTarask: _toTarask = toTarask } = options;
2149
+ const noFix2 = [];
2155
2150
  text = ` ${text.trim()} `.replace(/�/g, "").replace(/<([,.]?)([.\s]*?)>/g, ($0, $1, $2) => {
2156
2151
  if ($1 === ",")
2157
2152
  return LEFT_ANGLE_BRACKET + $2 + ">";
2158
- noFix[noFix.length] = $1 === "." ? $2 : $0;
2153
+ noFix2[noFix2.length] = $1 === "." ? $2 : $0;
2159
2154
  return NOFIX_CHAR;
2160
2155
  }).replace(/г'(?![еёіюя])/g, "ґ").replace(/([\n\t])/g, " $1 ").replace(/ - /g, " — ").replace(/(\p{P}|\p{S}|\d)/gu, " $1 ").replace(/ ['`’] (?=\S)/g, "ʼ").replace(/\(/g, "&#40");
2161
2156
  let splittedOrig, splitted;
@@ -2163,7 +2158,7 @@ var tarask = (text, options = {}) => {
2163
2158
  replaceWithDict(text, letters[abc]),
2164
2159
  lettersUpperCase[abc]
2165
2160
  ).split(" ");
2166
- text = (options.OVERRIDE_toTarask || toTarask)(
2161
+ text = _toTarask(
2167
2162
  text.toLowerCase(),
2168
2163
  replaceWithDict,
2169
2164
  wordlist,
@@ -2178,35 +2173,69 @@ var tarask = (text, options = {}) => {
2178
2173
  splitted = text.split(" ");
2179
2174
  if (abc !== ALPHABET.ARABIC)
2180
2175
  splitted = restoreCase(splitted, splittedOrig);
2181
- if (html || isNonHtmlObject && nonHtml.nodeColors)
2182
- splitted = toTags(
2183
- splitted,
2184
- splittedOrig,
2185
- abc === ALPHABET.CYRILLIC,
2186
- apply.F
2176
+ return { splittedOrig, splitted };
2177
+ };
2178
+ var applyNoFix = (text) => {
2179
+ if (noFix.length)
2180
+ text = text.replace(NOFIX_REGEX, () => noFix.shift());
2181
+ noFix = [];
2182
+ return text;
2183
+ };
2184
+ var join = (textArr) => textArr.join(" ").replace(/&nbsp;/g, " ").replace(/ (\p{P}|\p{S}|\d|&#40) /gu, "$1");
2185
+ var finilize = (text, newLine) => text.replace(/ \t /g, " ").replace(/ \n /g, newLine).trim();
2186
+ var replaceG = (text, replacer) => text.replace(
2187
+ G_REGEX,
2188
+ // @ts-ignore
2189
+ replacer
2190
+ );
2191
+ var getCompletedOptions = (options) => ({
2192
+ abc: 0,
2193
+ j: 0,
2194
+ ...options
2195
+ });
2196
+ var taraskToHtml = (text, taraskOptions, htmlOptions = {}) => {
2197
+ const options = getCompletedOptions(taraskOptions);
2198
+ const apply = tagApplications.html;
2199
+ const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2200
+ const { splitted, splittedOrig } = process(text, "&lt;", options);
2201
+ addTags(splitted, splittedOrig, isCyrillic, apply.F);
2202
+ text = join(splitted);
2203
+ if (isCyrillic)
2204
+ text = replaceG(
2205
+ text,
2206
+ htmlOptions.g ? apply.H("$&") : ($0) => apply.H(gobj[$0])
2187
2207
  );
2188
- text = splitted.join(" ").replace(/&nbsp;/g, " ").replace(/ (\p{P}|\p{S}|\d|&#40) /gu, "$1");
2189
- let gReplacer;
2190
- if (abc === ALPHABET.CYRILLIC) {
2191
- if (isHtmlObject) {
2192
- gReplacer = html.g ? apply.H("$&") : ($0) => apply.H(gobj[$0]);
2193
- } else if (isNonHtmlObject) {
2194
- if (nonHtml.nodeColors) {
2195
- gReplacer = nonHtml.h ? ($0) => apply.H(gobj[$0]) : apply.H("$&");
2196
- } else if (nonHtml.h) {
2197
- gReplacer = ($0) => gobj[$0];
2198
- }
2199
- }
2200
- }
2201
- if (gReplacer)
2208
+ return finilize(
2209
+ applyNoFix(text).replace(OPTIONAL_WORDS_REGEX, ($0) => {
2210
+ const options2 = $0.slice(1, -1).split("|");
2211
+ const main = options2.shift();
2212
+ return `<tarL data-l='${options2}'>${main}</tarL>`;
2213
+ }),
2214
+ "<br>"
2215
+ );
2216
+ };
2217
+ var tarask = (text, taraskOptions, nonHtmlOptions = {}) => {
2218
+ const options = getCompletedOptions(taraskOptions);
2219
+ const apply = tagApplications.nonHtml;
2220
+ const isCyrillic = options.abc === ALPHABET.CYRILLIC;
2221
+ const { splitted, splittedOrig } = process(text, "&lt;", options);
2222
+ if (nonHtmlOptions.nodeColors)
2223
+ addTags(splitted, splittedOrig, isCyrillic, apply.F);
2224
+ text = join(splitted);
2225
+ if (isCyrillic && (nonHtmlOptions.h || nonHtmlOptions.nodeColors))
2226
+ text = replaceG(
2227
+ text,
2228
+ nonHtmlOptions.nodeColors ? nonHtmlOptions.h ? ($0) => apply.H(gobj[$0]) : apply.H("$&") : ($0) => gobj[$0]
2229
+ );
2230
+ if ("variations" in nonHtmlOptions && nonHtmlOptions.variations !== VARIATION.ALL) {
2231
+ const wordIndex = nonHtmlOptions.variations ?? 0;
2232
+ const replacer = ($0) => $0.slice(1, -1).split("|")[wordIndex];
2202
2233
  text = text.replace(
2203
- G_REGEX,
2204
- // @ts-ignore
2205
- gReplacer
2234
+ OPTIONAL_WORDS_REGEX,
2235
+ nonHtmlOptions.nodeColors ? ($0) => tagApplications.nonHtml.L(replacer($0)) : replacer
2206
2236
  );
2207
- if (noFix.length)
2208
- text = text.replace(NOFIX_REGEX, () => noFix.shift());
2209
- return (html ? finalizer.html(text) : finalizer.nonHtml(text, nonHtml)).replace(/ \t /g, " ").trim();
2237
+ }
2238
+ return finilize(applyNoFix(text).replace(/&#40/g, "("), "\n");
2210
2239
  };
2211
2240
  var restoreCase = (text, orig) => {
2212
2241
  for (let i = 0; i < text.length; i++) {
@@ -2222,7 +2251,7 @@ var restoreCase = (text, orig) => {
2222
2251
  continue;
2223
2252
  if (word === "зь") {
2224
2253
  text[i] = isUpperCase(orig[i + 1]) ? "ЗЬ" : "Зь";
2225
- } else if (isUpperCase(getLastLetter(oWord))) {
2254
+ } else if (isUpperCase(getLastLetter(oWord, i))) {
2226
2255
  text[i] = word.toUpperCase();
2227
2256
  } else {
2228
2257
  text[i] = word[0] === "(" ? word.replace(
@@ -2233,13 +2262,13 @@ var restoreCase = (text, orig) => {
2233
2262
  }
2234
2263
  return text;
2235
2264
  };
2236
- var toTags = (text, orig, isCyrillic, applyF) => {
2265
+ var addTags = (text, orig, isCyrillic, applyF) => {
2237
2266
  for (let i = 0; i < text.length; i++) {
2238
2267
  const word = text[i];
2239
2268
  const oWord = orig[i];
2240
2269
  if (oWord === word)
2241
2270
  continue;
2242
- const wordH = isCyrillic ? word.replace(G_REGEX, ($0) => gobj[$0]) : word;
2271
+ const wordH = isCyrillic ? replaceG(word, ($0) => gobj[$0]) : word;
2243
2272
  if (oWord === wordH)
2244
2273
  continue;
2245
2274
  if (!/\(/.test(word)) {
@@ -2284,7 +2313,6 @@ var toTags = (text, orig, isCyrillic, applyF) => {
2284
2313
  }
2285
2314
  text[i] = word.slice(0, fromStart) + applyF(word.slice(fromStart, fromWordEnd + 1)) + word.slice(fromWordEnd + 1);
2286
2315
  }
2287
- return text;
2288
2316
  };
2289
2317
  var toTarask = (text, replaceWithDict2, wordlist2, softers2, afterTarask2) => {
2290
2318
  text = replaceWithDict2(text, wordlist2);
@@ -2312,28 +2340,11 @@ var replaceIbyJ = (text, always = false) => text.replace(
2312
2340
  /([аеёіоуыэюя] )і (ў?)/g,
2313
2341
  always ? ($0, $1, $2) => toJ($1, $2) : ($0, $1, $2) => Math.random() >= 0.5 ? toJ($1, $2) : $0
2314
2342
  );
2315
- var finalizer = {
2316
- html: (text) => text.replace(OPTIONAL_WORDS_REGEX, ($0) => {
2317
- const options = $0.slice(1, -1).split("|");
2318
- const main = options.shift();
2319
- return `<tarL data-l='${options}'>${main}</tarL>`;
2320
- }).replace(/ \n /g, "<br>"),
2321
- nonHtml(text, options) {
2322
- if (isObject(options) && "variations" in options && options.variations !== VARIATION.ALL) {
2323
- const WORD_INDEX = options.variations ?? 0;
2324
- const replacer = ($0) => $0.slice(1, -1).split("|")[WORD_INDEX];
2325
- text = text.replace(
2326
- OPTIONAL_WORDS_REGEX,
2327
- options.nodeColors ? ($0) => tagApplications.nonHtml.L(replacer($0)) : replacer
2328
- );
2329
- }
2330
- return text.replace(/&#40/g, "(").replace(/ \n /g, "\n");
2331
- }
2332
- };
2333
2343
  export {
2334
2344
  ALPHABET,
2335
2345
  J,
2336
2346
  VARIATION,
2337
2347
  gobj,
2338
- tarask
2348
+ tarask,
2349
+ taraskToHtml
2339
2350
  };
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "taraskevizer",
3
3
  "author": "GooseOb",
4
4
  "license": "MIT",
5
- "version": "1.7.5",
5
+ "version": "2.0.1",
6
6
  "private": false,
7
7
  "homepage": "https://gooseob.github.io/taraskevizatar/",
8
8
  "main": "dist/index.js",
@@ -20,7 +20,7 @@
20
20
  "scripts": {
21
21
  "build": "tsup --config build-config/index.js",
22
22
  "build:bun_EXPERIMENTAL": "bun ./build-config/bun.ts",
23
- "dev": "esrun --watch=src/*,test/* --send-code-mode=temporaryFile test",
23
+ "dev": "esrun --watch=src/*,test/*,bin/* --send-code-mode=temporaryFile test",
24
24
  "dev:bun": "bun ./test/bun-watch.ts",
25
25
  "dev-bun": "bun test --watch",
26
26
  "test": "esrun --send-code-mode=temporaryFile test",