taraskevizer 0.0.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts DELETED
@@ -1,3 +0,0 @@
1
- export * from './types';
2
- export { tarask, taraskSync } from './tarask';
3
- export { gobj } from './dict';
package/src/tarask.ts DELETED
@@ -1,258 +0,0 @@
1
- import {
2
- wordlist,
3
- softers,
4
- arabLetters,
5
- latinLetters,
6
- latinLettersUpperCase,
7
- gobj,
8
- } from './dict';
9
- import {
10
- Tarask,
11
- TaraskAsync,
12
- Dict,
13
- AlphabetDependentDict,
14
- TaraskOptions,
15
- } from './types';
16
- import * as debug from './tools.debug';
17
-
18
- const isUpperCase = (str: string): boolean => str === str.toUpperCase();
19
-
20
- const NOFIX_CHAR = ' \uffff ';
21
- const NOFIX_REGEX = new RegExp(NOFIX_CHAR, 'g');
22
- const OPTIONAL_WORDS_REGEX = /\(.*?\)/g;
23
- const G_REGEX = /[Ґґ]/g;
24
-
25
- const CYRILLIC = 0,
26
- LATIN = 1,
27
- ARABIC = 2;
28
- const J_NEVER = 0,
29
- J_RANDOM = 1,
30
- J_ALWAYS = 2;
31
-
32
- const letters = {
33
- [LATIN]: latinLetters,
34
- [ARABIC]: arabLetters,
35
- } satisfies AlphabetDependentDict;
36
- const lettersUpperCase = {
37
- [LATIN]: latinLettersUpperCase,
38
- } satisfies AlphabetDependentDict;
39
- const additionalReplacements = {
40
- [CYRILLIC]: {
41
- $1У: /([АЕЁІОУЫЭЮЯ])<tarF>Ў<\/tarF>/g,
42
- ' У': / <tarF>Ў<\/tarF>(?=\p{Lu})/gu,
43
- },
44
- [LATIN]: {
45
- $1U: /([AEIOUY])<tarF>Ŭ<\/tarF>/g,
46
- ' U': / <tarF>Ŭ<\/tarF>(?=\p{Lu})/gu,
47
- },
48
- [ARABIC]: {},
49
- } satisfies AlphabetDependentDict;
50
-
51
- export const taraskSync: Tarask = (text, { abc = 0, j = 0, html }) => {
52
- const noFix: string[] = [];
53
-
54
- const LEFT_ANGLE_BRACKET = html ? '&lt;' : '<';
55
-
56
- text = ` ${text.trim()} `
57
- .replace(/<([,.]?)((?:.|\s)*?)>/g, ($0, $1, $2) => {
58
- if ($1 === ',') return LEFT_ANGLE_BRACKET + $2 + '>';
59
- noFix[noFix.length] = $1 === '.' ? $2 : $0;
60
- return NOFIX_CHAR;
61
- })
62
- .replace(/г'(?![еёіюя])/g, 'ґ')
63
- .replace(/(\n|\t)/g, ' $1 ')
64
- .replace(/ - /g, ' — ')
65
- .replace(/(\p{P}|\p{S}|\d)/gu, ' $1 ')
66
- .replace(/ ['`’] (?=\S)/g, 'ʼ')
67
- .replace(/\(/g, '&#40');
68
-
69
- let splittedOrig: string[], splitted: string[];
70
- splittedOrig = replaceWithDict(
71
- replaceWithDict(text, letters[abc]),
72
- lettersUpperCase[abc]
73
- ).split(' ');
74
-
75
- text = toTarask(text.toLowerCase());
76
- if (j) text = replaceIbyJ(text, j === J_ALWAYS);
77
- text = replaceWithDict(text, letters[abc]);
78
-
79
- splitted = text.split(' ');
80
- if (abc !== ARABIC) splitted = restoreCase(splitted, splittedOrig);
81
- if (html) splitted = toHtmlTags(splitted, splittedOrig, abc);
82
-
83
- text = splitted
84
- .join(' ')
85
- .replace(/&nbsp;/g, ' ')
86
- .replace(/ (\p{P}|\p{S}|\d|&#40) /gu, '$1');
87
-
88
- if (html) {
89
- text = replaceWithDict(text, additionalReplacements[abc]);
90
- if (abc === CYRILLIC)
91
- text = text.replace(
92
- G_REGEX,
93
- // @ts-ignore
94
- html.g ? '<tarH>$&</tarH>' : ($0) => `<tarH>${gobj[$0]}</tarH>`
95
- );
96
- }
97
-
98
- if (noFix.length) text = text.replace(NOFIX_REGEX, () => noFix.shift());
99
-
100
- return (html ? finalizer.html : finalizer.text)(text).trim();
101
- };
102
-
103
- export const tarask: TaraskAsync = (...args) =>
104
- new Promise((res) => res(taraskSync(...args)));
105
-
106
- function restoreCase(text: string[], orig: string[]): string[] {
107
- for (let i = 0; i < text.length; i++) {
108
- const word = text[i];
109
- const oWord = orig[i];
110
- if (word === oWord) continue;
111
- if (word === oWord.toLowerCase()) {
112
- text[i] = oWord;
113
- continue;
114
- }
115
- if (!oWord[0] || !isUpperCase(oWord[0])) continue;
116
- if (word === 'зь') {
117
- text[i] = isUpperCase(orig[i + 1]) ? 'ЗЬ' : 'Зь';
118
- } else if (isUpperCase(oWord[oWord.length - 1])) {
119
- text[i] = word.toUpperCase();
120
- } else {
121
- text[i] =
122
- word[0] === '('
123
- ? word.replace(/.*?(?=\))/, ($0) =>
124
- $0.replace(/[(|]./g, ($0) => $0.toUpperCase())
125
- )
126
- : word[0].toUpperCase() + word.slice(1);
127
- }
128
- }
129
-
130
- return text;
131
- }
132
-
133
- function toHtmlTags(
134
- text: string[],
135
- orig: string[],
136
- abc: TaraskOptions['abc']
137
- ): string[] {
138
- for (let i = 0; i < text.length; i++) {
139
- const word = text[i];
140
- const oWord = orig[i];
141
- if (oWord === word) continue;
142
- const wordH = word.replace(G_REGEX, ($0) => gobj[$0]);
143
- if (oWord === wordH) continue;
144
- if (!/\(/.test(word)) {
145
- if (word.length === oWord.length) {
146
- const LettersText = word.split('');
147
- for (let x = 0; x < LettersText.length; x++) {
148
- if (LettersText[x] !== oWord[x])
149
- LettersText[x] = `<tarF>${LettersText[x]}</tarF>`;
150
- }
151
- text[i] = LettersText.join('');
152
- continue;
153
- }
154
- if (abc === CYRILLIC) {
155
- const word1 = word.replace(/ь/g, '');
156
- switch (oWord) {
157
- case word1:
158
- text[i] = word.replace(/ь/g, '<tarF>ь</tarF>');
159
- continue;
160
- case word1 + 'ь':
161
- text[i] = word.slice(0, -1).replace(/ь/g, '<tarF>ь</tarF>') + 'ь';
162
- continue;
163
- }
164
- }
165
- }
166
-
167
- const oWordEnd = oWord.length - 1;
168
- let fromStart = 0;
169
- let fromWordEnd = word.length - 1;
170
- let fromOWordEnd = oWordEnd;
171
-
172
- while (wordH[fromStart] === oWord[fromStart]) ++fromStart;
173
- while (wordH[fromWordEnd] === oWord[fromOWordEnd])
174
- --fromWordEnd, --fromOWordEnd;
175
-
176
- if (oWord.length < word.length) {
177
- if (fromOWordEnd === oWordEnd) {
178
- text[i] = `<tarF>${word}</tarF>`;
179
- continue;
180
- }
181
- if (fromWordEnd < 0) fromWordEnd = 0;
182
- }
183
-
184
- text[i] =
185
- word.slice(0, fromStart) +
186
- '<tarF>' +
187
- word.slice(fromStart, fromWordEnd + 1) +
188
- '</tarF>' +
189
- word.slice(fromWordEnd + 1);
190
- }
191
-
192
- return text;
193
- }
194
-
195
- function toTarask(text: string): string {
196
- text = replaceWithDict(text, wordlist);
197
- loop: do {
198
- text = replaceWithDict(text, softers);
199
- for (const key in softers)
200
- if (key !== '$1дзьдз' && softers[key].test(text)) continue loop;
201
- break;
202
- } while (true);
203
-
204
- const iaReplacer = <TStart extends ' б' | ' н', T extends string>(
205
- $0: `${TStart}е${T}`,
206
- $1: TStart,
207
- $2: T
208
- ) => ($2.match(/[аеёіоуыэюя]/g)?.length === 1 ? $1 + 'я' + $2 : $0);
209
-
210
- return text
211
- .replace(/ [уў]асьнігл /g, ' уаснігл ')
212
- .replace(/ сь(?=нід |мі )/g, ' с')
213
- .replace(/( б)е(зь? \S+)/g, iaReplacer)
214
- .replace(/( н)е( \S+)/g, iaReplacer)
215
- .replace(/( (?:б[ея]|пра|цера)?з) і(\S*)/g, ($0, $1, $2) =>
216
- /([ая]ў|ну)$/.test($2) ? $1 + 'ь і' + $2 : $0
217
- );
218
- }
219
-
220
- function replaceWithDict(text: string, dict: Dict = null): string {
221
- for (const key in dict) text = text.replace(dict[key], key);
222
-
223
- return text;
224
- }
225
-
226
- type Vow = 'а' | 'е' | 'ё' | 'і' | 'о' | 'у' | 'ы' | 'э' | 'ю' | 'я';
227
-
228
- type ToJ = <TVow extends `${Vow} `, TU extends '' | 'ў'>(
229
- vow: TVow,
230
- shortU: TU
231
- ) => `${TVow}й ${TU extends 'ў' ? 'у' : ''}`;
232
-
233
- const toJ: ToJ = (vow, shortU) =>
234
- (vow + 'й ' + (shortU ? 'у' : '')) as ReturnType<ToJ>;
235
-
236
- function replaceIbyJ(text: string, always = false): string {
237
- return text.replace(
238
- /([аеёіоуыэюя] )і (ў?)/g,
239
- always
240
- ? ($0, $1, $2) => toJ($1, $2)
241
- : ($0, $1, $2) => (Math.random() >= 0.5 ? toJ($1, $2) : $0)
242
- );
243
- }
244
-
245
- const finalizer = {
246
- html: (text) =>
247
- text
248
- .replace(OPTIONAL_WORDS_REGEX, ($0) => {
249
- const options = $0.slice(1, -1).split('|');
250
- const main = options.shift();
251
- return `<tarL data-l='${options}'>${main}</tarL>`;
252
- })
253
- .replace(/ \n /g, '<br>'),
254
- text: (text) =>
255
- text
256
- .replace(OPTIONAL_WORDS_REGEX, ($0) => $0.slice(1, -1).split('|')[0])
257
- .replace(/&#40/g, '('),
258
- } satisfies Record<string, (text: string) => string>;
@@ -1,20 +0,0 @@
1
- import { Dict } from './types';
2
- type LogFn = (...args: any[]) => void;
3
- declare const console: { log: LogFn };
4
-
5
- export function replaceWithDict(
6
- text: string,
7
- dict: Dict = null,
8
- regex: RegExp
9
- ): string {
10
- for (const key in dict) {
11
- text = text.replace(dict[key], key);
12
- if (regex.test(text)) throw `'${key}': /${dict[key]}/`;
13
- }
14
-
15
- return text;
16
- }
17
-
18
- export const log: LogFn = (...msgs) => {
19
- console.log(...msgs);
20
- };
package/src/tsconfig.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "compilerOptions": {
3
- "lib": ["es6"]
4
- }
5
- }
package/src/types.ts DELETED
@@ -1,15 +0,0 @@
1
- type Alphabet = 0 | 1 | 2;
2
- type J = 0 | 1 | 2;
3
- export type HtmlOptions = { g: boolean };
4
- export type TaraskOptions = {
5
- abc: Alphabet;
6
- j: J;
7
- html: false | HtmlOptions;
8
- };
9
- type Promisify<T> = T extends (...args: infer TArgs) => infer TReturn
10
- ? (...args: TArgs) => Promise<TReturn>
11
- : never;
12
- export type Tarask = (text: string, options: TaraskOptions) => string;
13
- export type TaraskAsync = Promisify<Tarask>;
14
- export type Dict = Record<string, RegExp>;
15
- export type AlphabetDependentDict = { [key in Alphabet]?: Dict };
package/tsup.config.js DELETED
@@ -1,24 +0,0 @@
1
- import { defineConfig } from 'tsup';
2
- import path from 'path';
3
- import { readFile, writeFile } from 'fs/promises';
4
- import postprocess from './postprocess';
5
- import noDebugFiles from './esbuild-plugins/no-debug-files';
6
-
7
- export default defineConfig({
8
- entry: ['src/index.ts'],
9
- clean: true,
10
- sourcemap: false,
11
- splitting: false,
12
- minify: false,
13
- format: ['cjs', 'esm'],
14
- dts: true,
15
- esbuildPlugins: [noDebugFiles],
16
- async onSuccess() {
17
- for (const ext of ['js', 'mjs']) {
18
- const filePath = path.resolve(this.outDir, 'index.' + ext);
19
- readFile(filePath, 'utf8').then((text) =>
20
- writeFile(filePath, postprocess(text, ext))
21
- );
22
- }
23
- },
24
- });