taraskevizer 8.0.11 → 8.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dict/iawords.d.ts +2 -0
- package/dist/dict/iawords.js +97 -0
- package/dist/dict/wordlist.js +3 -89
- package/dist/pipelines.d.ts +1 -1
- package/dist/pipelines.js +2 -11
- package/dist/steps/index.d.ts +1 -0
- package/dist/steps/index.js +1 -0
- package/dist/steps/phonetize.d.ts +2 -0
- package/dist/steps/phonetize.js +21 -0
- package/package.json +1 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { iwords } from './iwords.js';
|
|
2
|
+
import { toOneLine } from './lib.js';
|
|
3
|
+
const ia = (word, words) => ` ${word}(?= \\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, '(?:')}|i(?:${iwords})))`;
|
|
4
|
+
export const iaWords = [
|
|
5
|
+
|
|
6
|
+
[
|
|
7
|
+
ia('не', toOneLine(`(вы)?(бач[ауы]
|
|
8
|
+
бег[ла]
|
|
9
|
+
блыта[юц]
|
|
10
|
+
брал[аі]
|
|
11
|
+
буд(зе|у(ць|чы)?)
|
|
12
|
+
ваш
|
|
13
|
+
веда(л[аі]|ць|ю(ць|чы)?|ў)
|
|
14
|
+
дума(л[аі]|ць|ю(ць|чы)?|ў)
|
|
15
|
+
зна(л[аі]|ць|ю(ць|чы)?)
|
|
16
|
+
кла(д |л|ў)
|
|
17
|
+
ма(е|ючы|ю(ць|чы)?)
|
|
18
|
+
ме(л|ў|ць)
|
|
19
|
+
мы(ц|л[аі])
|
|
20
|
+
пі(са|шу)(цц|чы|ць )?
|
|
21
|
+
ў?пэўн
|
|
22
|
+
ска(ж(а|уць)|за )
|
|
23
|
+
спал[аі]
|
|
24
|
+
трэба
|
|
25
|
+
чу[елўцю])
|
|
26
|
+
веды
|
|
27
|
+
веліч
|
|
28
|
+
выраш
|
|
29
|
+
вы[дп]ал[еіяю]
|
|
30
|
+
выпад
|
|
31
|
+
гор(ш|ай)
|
|
32
|
+
гу(к|чн|ст)
|
|
33
|
+
дрэнн
|
|
34
|
+
іхн
|
|
35
|
+
карт\\S{0,4}[ \\)]
|
|
36
|
+
каш(ай?|амі|у|ы)
|
|
37
|
+
лёгк
|
|
38
|
+
літар
|
|
39
|
+
лішн
|
|
40
|
+
(мен|бол|леп)(ш|ей)
|
|
41
|
+
мап\\S{0,4}[ \\)]
|
|
42
|
+
медз
|
|
43
|
+
мякк
|
|
44
|
+
наш([аы]я|ую|ых)
|
|
45
|
+
прыйдзеш
|
|
46
|
+
руш[аы]
|
|
47
|
+
ста(ў|не|нуць|л[аі])
|
|
48
|
+
таго
|
|
49
|
+
тое
|
|
50
|
+
тыя
|
|
51
|
+
ўвод
|
|
52
|
+
ўс[еёя]
|
|
53
|
+
фарб(а(ў|мі)?|у|ы)?
|
|
54
|
+
ян `)),
|
|
55
|
+
' ня',
|
|
56
|
+
],
|
|
57
|
+
|
|
58
|
+
[
|
|
59
|
+
ia('без', toOneLine(`(вы)?(клад[ау]
|
|
60
|
+
мела
|
|
61
|
+
мытых
|
|
62
|
+
піса[нц]
|
|
63
|
+
ска(жа|жуць|за)
|
|
64
|
+
спал[аі]
|
|
65
|
+
чу[елўцю])
|
|
66
|
+
ваш
|
|
67
|
+
ведаў
|
|
68
|
+
велічы
|
|
69
|
+
выраш
|
|
70
|
+
вы[дп]але
|
|
71
|
+
выпад
|
|
72
|
+
гор(ш|ай)
|
|
73
|
+
гу(к|чн|ст)
|
|
74
|
+
дрэнн
|
|
75
|
+
іхн
|
|
76
|
+
карт\\S{0,4}[ \\)]
|
|
77
|
+
каш(ы|аў)
|
|
78
|
+
лёгк
|
|
79
|
+
літар
|
|
80
|
+
лішн
|
|
81
|
+
медз
|
|
82
|
+
(мен|бол|леп|ш|ей)
|
|
83
|
+
мап\\S{0,4}[ \\)]
|
|
84
|
+
мякк
|
|
85
|
+
наш(ую|ых)
|
|
86
|
+
пішучы
|
|
87
|
+
руша
|
|
88
|
+
стаў
|
|
89
|
+
таго
|
|
90
|
+
ўвод
|
|
91
|
+
ўс[еёя]
|
|
92
|
+
фарб(аў|ы)?
|
|
93
|
+
цукр[ау]
|
|
94
|
+
ян`)),
|
|
95
|
+
' бяз',
|
|
96
|
+
],
|
|
97
|
+
];
|
package/dist/dict/wordlist.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { dictFrom
|
|
1
|
+
import { iaWords } from './iawords.js';
|
|
2
|
+
import { dictFrom } from './lib.js';
|
|
3
3
|
const chemicalElements1 = [
|
|
4
4
|
'сканд|ванад|рубід|род|ірыд|рэзэрфорд',
|
|
5
5
|
'стронц|бар|цэр|лютэц|самар| тор|амэрыц|кальц|кюр|дармштат|лівэрмор|натр'
|
|
@@ -11,7 +11,6 @@ const chemicalElements2 = [
|
|
|
11
11
|
];
|
|
12
12
|
chemicalElements2[1] = chemicalElements2[0] + '|айнштайн|мендзялев|сыборг|гас|флеров';
|
|
13
13
|
const chemicalElements3 = ' гал|бэрыл|тул|бэркл|набэл';
|
|
14
|
-
const ia = (word, words) => ` ${word}(?= \\(?(?:[бвгджзйклмнпстфцчшў]*[оё]|${words.replace(/\(/g, '(?:')}|i(?:${iwords})))`;
|
|
15
14
|
const gwords = [
|
|
16
15
|
' ґб ',
|
|
17
16
|
'ґг',
|
|
@@ -1415,92 +1414,7 @@ export const wordlist = dictFrom.raw([
|
|
|
1415
1414
|
[/ фасці(?=[вў])/, ' фа(сьці|ста)'],
|
|
1416
1415
|
|
|
1417
1416
|
[/ зеленалужск/, ' (зеленалуск|зялёналуск)'],
|
|
1418
|
-
|
|
1419
|
-
[ia('не', toOneLine(`(вы)?(бач[ауы]
|
|
1420
|
-
бег[ла]
|
|
1421
|
-
блыта[юц]
|
|
1422
|
-
брал[аі]
|
|
1423
|
-
буд(зе|у(ць|чы)?)
|
|
1424
|
-
ваш
|
|
1425
|
-
веда(л[аі]|ць|ю(ць|чы)?|ў)
|
|
1426
|
-
дума(л[аі]|ць|ю(ць|чы)?|ў)
|
|
1427
|
-
зна(л[аі]|ць|ю(ць|чы)?)
|
|
1428
|
-
кла(д |л|ў)
|
|
1429
|
-
ма(е|ючы|ю(ць|чы)?)
|
|
1430
|
-
ме(л|ў|ць)
|
|
1431
|
-
мы(ц|л[аі])
|
|
1432
|
-
пі(са|шу)(цц|чы|ць )?
|
|
1433
|
-
ў?пэўн
|
|
1434
|
-
ска(ж(а|уць)|за )
|
|
1435
|
-
спал[аі]
|
|
1436
|
-
трэба
|
|
1437
|
-
чу[елўцю])
|
|
1438
|
-
веды
|
|
1439
|
-
веліч
|
|
1440
|
-
выраш
|
|
1441
|
-
вы[дп]ал[еіяю]
|
|
1442
|
-
выпад
|
|
1443
|
-
гор(ш|ай)
|
|
1444
|
-
гу(к|чн|ст)
|
|
1445
|
-
дрэнн
|
|
1446
|
-
іхн
|
|
1447
|
-
карт\\S{0,4}[ \\)]
|
|
1448
|
-
каш(ай?|амі|у|ы)
|
|
1449
|
-
лёгк
|
|
1450
|
-
літар
|
|
1451
|
-
лішн
|
|
1452
|
-
(мен|бол|леп)(ш|ей)
|
|
1453
|
-
мап\\S{0,4}[ \\)]
|
|
1454
|
-
медз
|
|
1455
|
-
мякк
|
|
1456
|
-
наш([аы]я|ую|ых)
|
|
1457
|
-
прыйдзеш
|
|
1458
|
-
руш[аы]
|
|
1459
|
-
ста(ў|не|нуць|л[аі])
|
|
1460
|
-
таго
|
|
1461
|
-
тое
|
|
1462
|
-
тыя
|
|
1463
|
-
ўвод
|
|
1464
|
-
ўс[еёя]
|
|
1465
|
-
фарб(а(ў|мі)?|у|ы)?
|
|
1466
|
-
ян `)), ' ня'],
|
|
1467
|
-
|
|
1468
|
-
[ia('без', toOneLine(`(вы)?(клад[ау]
|
|
1469
|
-
мела
|
|
1470
|
-
мытых
|
|
1471
|
-
піса[нц]
|
|
1472
|
-
ска(жа|жуць|за)
|
|
1473
|
-
спал[аі]
|
|
1474
|
-
чу[елўцю])
|
|
1475
|
-
ваш
|
|
1476
|
-
ведаў
|
|
1477
|
-
велічы
|
|
1478
|
-
выраш
|
|
1479
|
-
вы[дп]але
|
|
1480
|
-
выпад
|
|
1481
|
-
гор(ш|ай)
|
|
1482
|
-
гу(к|чн|ст)
|
|
1483
|
-
дрэнн
|
|
1484
|
-
іхн
|
|
1485
|
-
карт\\S{0,4}[ \\)]
|
|
1486
|
-
каш(ы|аў)
|
|
1487
|
-
лёгк
|
|
1488
|
-
літар
|
|
1489
|
-
лішн
|
|
1490
|
-
медз
|
|
1491
|
-
(мен|бол|леп|ш|ей)
|
|
1492
|
-
мап\\S{0,4}[ \\)]
|
|
1493
|
-
мякк
|
|
1494
|
-
наш(ую|ых)
|
|
1495
|
-
пішучы
|
|
1496
|
-
руша
|
|
1497
|
-
стаў
|
|
1498
|
-
таго
|
|
1499
|
-
ўвод
|
|
1500
|
-
ўс[еёя]
|
|
1501
|
-
фарб(аў|ы)?
|
|
1502
|
-
цукр[ау]
|
|
1503
|
-
ян`)), ' бяз'],
|
|
1417
|
+
...iaWords,
|
|
1504
1418
|
|
|
1505
1419
|
[/(\S)([іы]) (аа|ба|генэ|кры|тэ)зіс /, '$1($2|ая) $3(зіс|за) '],
|
|
1506
1420
|
[/ага (аа|ба|генэ|кры|тэ)зіс[ау] /, '(ага|ай) $1(зісу|зы) '],
|
package/dist/pipelines.d.ts
CHANGED
|
@@ -44,5 +44,5 @@ export declare const html: (TaraskStep<import("./steps/types").SplittedTextStora
|
|
|
44
44
|
/**
|
|
45
45
|
* Pipeline for phonetizing.
|
|
46
46
|
*/
|
|
47
|
-
export declare const phonetic: (TaraskStep<import("./steps/resolve-syntax").SpecialSyntaxStorage> | TaraskStep<import("./steps/whitespaces").WhiteSpaceStorage>)[];
|
|
47
|
+
export declare const phonetic: (TaraskStep<import("./steps/types").SplittedTextStorage> | TaraskStep<import("./steps/resolve-syntax").SpecialSyntaxStorage> | TaraskStep<import("./steps/whitespaces").WhiteSpaceStorage>)[];
|
|
48
48
|
export {};
|
package/dist/pipelines.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
import { applyVariationsHtml, applyVariationsNonHtml, applyGHtml, applyGNonHtml, highlightDiffStep, highlightDiffStepNonHtml, applyNoFix, convertAlphabet, convertAlphabetLowerCase, joinSplittedText, prepare, replaceIbyJ, resolveSpecialSyntax, restoreCaseStep, restoreWhitespaces, storeSplittedAbcConvertedOrig, storeSplittedText, taraskevize, whitespacesToSpaces, trim, finalize, toLowerCase, } from './steps/index.js';
|
|
2
|
+
import { applyVariationsHtml, applyVariationsNonHtml, applyGHtml, applyGNonHtml, highlightDiffStep, highlightDiffStepNonHtml, applyNoFix, convertAlphabet, convertAlphabetLowerCase, joinSplittedText, prepare, replaceIbyJ, resolveSpecialSyntax, restoreCaseStep, restoreWhitespaces, storeSplittedAbcConvertedOrig, storeSplittedText, taraskevize, phonetize, whitespacesToSpaces, trim, finalize, toLowerCase, iotacizeJi, } from './steps/index.js';
|
|
3
3
|
import { htmlWrappers } from './lib/wrappers.js';
|
|
4
4
|
const resolveSpecialSyntaxWithLAB = resolveSpecialSyntax('<');
|
|
5
5
|
const finalizeWithNewLine = finalize('\n');
|
|
@@ -46,13 +46,4 @@ export const plainText = createPipeline(resolveSpecialSyntaxWithLAB, applyGNonHt
|
|
|
46
46
|
|
|
47
47
|
export const html = createPipeline(resolveSpecialSyntax('<'), applyGHtml, applyVariationsHtml, finalize('<br>'), highlightDiffStep(htmlWrappers.fix));
|
|
48
48
|
|
|
49
|
-
export const phonetic = [
|
|
50
|
-
trim,
|
|
51
|
-
resolveSpecialSyntaxWithLAB,
|
|
52
|
-
prepare,
|
|
53
|
-
whitespacesToSpaces,
|
|
54
|
-
convertAlphabet,
|
|
55
|
-
restoreWhitespaces,
|
|
56
|
-
applyNoFix,
|
|
57
|
-
finalizeWithNewLine,
|
|
58
|
-
];
|
|
49
|
+
export const phonetic = plainText.flatMap((item) => item === taraskevize ? [phonetize, iotacizeJi] : item);
|
package/dist/steps/index.d.ts
CHANGED
|
@@ -22,6 +22,7 @@ export * from './restore-case';
|
|
|
22
22
|
export * from './store-splitted-abc-converted-orig';
|
|
23
23
|
export * from './store-splitted-text';
|
|
24
24
|
export * from './taraskevize';
|
|
25
|
+
export * from './phonetize';
|
|
25
26
|
export * from './whitespaces';
|
|
26
27
|
export * from './trim';
|
|
27
28
|
export * from './finalize';
|
package/dist/steps/index.js
CHANGED
|
@@ -11,6 +11,7 @@ export * from './restore-case.js';
|
|
|
11
11
|
export * from './store-splitted-abc-converted-orig.js';
|
|
12
12
|
export * from './store-splitted-text.js';
|
|
13
13
|
export * from './taraskevize.js';
|
|
14
|
+
export * from './phonetize.js';
|
|
14
15
|
export * from './whitespaces.js';
|
|
15
16
|
export * from './trim.js';
|
|
16
17
|
export * from './finalize.js';
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { iaWords } from '../dict/iawords.js';
|
|
2
|
+
import { noSoften, softeners } from '../dict/index.js';
|
|
3
|
+
import { iwords } from '../dict/iwords.js';
|
|
4
|
+
import { afterTarask, dictFrom, mutatingStep, replaceWithDict, } from '../lib/index.js';
|
|
5
|
+
export const phonetize = mutatingStep(({ text }) => {
|
|
6
|
+
text = replaceWithDict(text, noSoften.concat(iaWords));
|
|
7
|
+
softening: do {
|
|
8
|
+
text = replaceWithDict(text, softeners);
|
|
9
|
+
for (const [pattern, result] of softeners)
|
|
10
|
+
if (result !== '$1дзьдз' && pattern.test(text))
|
|
11
|
+
continue softening;
|
|
12
|
+
break;
|
|
13
|
+
} while (true);
|
|
14
|
+
return replaceWithDict(text.replace(/\ue0ff/g, '').replace(/не пра/g, 'не пра'), afterTarask).replace(/не пра/g, 'не пра');
|
|
15
|
+
});
|
|
16
|
+
const idict = dictFrom.raw([
|
|
17
|
+
[/([аеёіоуыэюя] )і ў/, '$1й у'],
|
|
18
|
+
[/([аеёіоуыэюя] )і /, '$1й '],
|
|
19
|
+
[` і(?=${iwords})`, ' йі'],
|
|
20
|
+
]);
|
|
21
|
+
export const iotacizeJi = mutatingStep(({ text }) => replaceWithDict(text, idict));
|