skrits 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/CODE_OF_CONDUCT.md +87 -0
- package/CONTRIBUTING.md +62 -0
- package/README.md +145 -0
- package/SECURITY.md +25 -0
- package/SUPPORT.md +13 -0
- package/dist/core/caseHelper.d.ts +5 -0
- package/dist/core/caseHelper.d.ts.map +1 -0
- package/dist/core/caseHelper.js +37 -0
- package/dist/core/index.d.ts +5 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +21 -0
- package/dist/core/stringUtils.d.ts +9 -0
- package/dist/core/stringUtils.d.ts.map +1 -0
- package/dist/core/stringUtils.js +28 -0
- package/dist/core/tokenizer.d.ts +4 -0
- package/dist/core/tokenizer.d.ts.map +1 -0
- package/dist/core/tokenizer.js +15 -0
- package/dist/core/transliteration.d.ts +8 -0
- package/dist/core/transliteration.d.ts.map +1 -0
- package/dist/core/transliteration.js +85 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +47 -0
- package/dist/leet.d.ts +25 -0
- package/dist/leet.d.ts.map +1 -0
- package/dist/leet.js +108 -0
- package/dist/leetrovacki.d.ts +37 -0
- package/dist/leetrovacki.d.ts.map +1 -0
- package/dist/leetrovacki.js +109 -0
- package/dist/satrovacki.d.ts +25 -0
- package/dist/satrovacki.d.ts.map +1 -0
- package/dist/satrovacki.js +157 -0
- package/dist/skrit.d.ts +28 -0
- package/dist/skrit.d.ts.map +1 -0
- package/dist/skrit.js +167 -0
- package/dist/utrovacki.d.ts +19 -0
- package/dist/utrovacki.d.ts.map +1 -0
- package/dist/utrovacki.js +100 -0
- package/package.json +32 -0
- package/src/core/caseHelper.ts +31 -0
- package/src/core/index.ts +4 -0
- package/src/core/stringUtils.ts +25 -0
- package/src/core/tokenizer.ts +12 -0
- package/src/core/transliteration.ts +84 -0
- package/src/index.ts +34 -0
- package/src/leet.ts +118 -0
- package/src/leetrovacki.ts +141 -0
- package/src/satrovacki.ts +176 -0
- package/src/skrit.ts +188 -0
- package/src/utrovacki.ts +111 -0
- package/tests/leet.test.ts +117 -0
- package/tests/leetrovacki.test.ts +76 -0
- package/tests/satrovacki.test.ts +112 -0
- package/tests/skrit.test.ts +80 -0
- package/tests/transliteration.test.ts +64 -0
- package/tests/utrovacki.test.ts +83 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import {
|
|
2
|
+
LEET_TABLE,
|
|
3
|
+
BASIC_LEET_PROFILE,
|
|
4
|
+
availableProfiles,
|
|
5
|
+
getLeetProfile,
|
|
6
|
+
buildFullLeetProfile,
|
|
7
|
+
applyLeet,
|
|
8
|
+
looksLikeLeet,
|
|
9
|
+
LeetEncoder,
|
|
10
|
+
DEFAULT_LEET_DENSITY,
|
|
11
|
+
} from '../src/leet';
|
|
12
|
+
|
|
13
|
+
describe('Leet', () => {
|
|
14
|
+
describe('availableProfiles', () => {
|
|
15
|
+
test('returns three profiles', () => {
|
|
16
|
+
const profiles = availableProfiles();
|
|
17
|
+
expect(profiles).toContain('basic');
|
|
18
|
+
expect(profiles).toContain('readable');
|
|
19
|
+
expect(profiles).toContain('full');
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe('LEET_TABLE', () => {
|
|
24
|
+
test('has 26 letters', () => {
|
|
25
|
+
expect(Object.keys(LEET_TABLE).length).toBe(26);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test('each letter has at least one variant', () => {
|
|
29
|
+
for (const variants of Object.values(LEET_TABLE)) {
|
|
30
|
+
expect(variants.length).toBeGreaterThan(0);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
describe('getLeetProfile', () => {
|
|
36
|
+
test('basic profile returns BASIC_LEET_PROFILE', () => {
|
|
37
|
+
const profile = getLeetProfile('basic');
|
|
38
|
+
expect(profile).toEqual(BASIC_LEET_PROFILE);
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test('unknown profile throws', () => {
|
|
42
|
+
expect(() => getLeetProfile('unknown')).toThrow();
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('custom map returned as-is', () => {
|
|
46
|
+
const custom = { a: 'x' };
|
|
47
|
+
expect(getLeetProfile('basic', custom)).toEqual(custom);
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe('buildFullLeetProfile', () => {
|
|
52
|
+
test('complexity 0 returns first variant', () => {
|
|
53
|
+
const profile = buildFullLeetProfile(0);
|
|
54
|
+
expect(profile['a']).toBe(LEET_TABLE['a'][0]);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test('complexity 1 returns second variant where available', () => {
|
|
58
|
+
const profile = buildFullLeetProfile(1);
|
|
59
|
+
expect(profile['a']).toBe(LEET_TABLE['a'][1]);
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
describe('applyLeet', () => {
|
|
64
|
+
test('density 1.0 replaces all mappable chars', () => {
|
|
65
|
+
const result = applyLeet('a', { a: '4' }, 1.0);
|
|
66
|
+
expect(result).toBe('4');
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test('density 0.0 replaces nothing', () => {
|
|
70
|
+
const result = applyLeet('aaaa', { a: '4' }, 0.0);
|
|
71
|
+
expect(result).toBe('aaaa');
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('non-mapped chars unchanged', () => {
|
|
75
|
+
const result = applyLeet('xyz', { a: '4' }, 1.0);
|
|
76
|
+
expect(result).toBe('xyz');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('invalid density throws', () => {
|
|
80
|
+
expect(() => applyLeet('a', { a: '4' }, 1.5)).toThrow();
|
|
81
|
+
expect(() => applyLeet('a', { a: '4' }, -0.1)).toThrow();
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe('looksLikeLeet', () => {
|
|
86
|
+
test('leet text detected', () => {
|
|
87
|
+
expect(looksLikeLeet('h3ll0')).toBe(true);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('plain text not leet', () => {
|
|
91
|
+
expect(looksLikeLeet('hello')).toBe(false);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test('single signal char not enough', () => {
|
|
95
|
+
expect(looksLikeLeet('h3llo')).toBe(false); // only one signal char
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe('LeetEncoder', () => {
|
|
100
|
+
test('basic profile encodes', () => {
|
|
101
|
+
const enc = new LeetEncoder({ profile: 'basic', density: 1.0 });
|
|
102
|
+
// 'a' -> '4' at density 1.0
|
|
103
|
+
const result = enc.encode('aaa');
|
|
104
|
+
expect(result).not.toBe('aaa');
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test('density 0 encodes nothing', () => {
|
|
108
|
+
const enc = new LeetEncoder({ profile: 'basic', density: 0.0 });
|
|
109
|
+
expect(enc.encode('aeiou')).toBe('aeiou');
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('default density is 0.86', () => {
|
|
113
|
+
const enc = new LeetEncoder();
|
|
114
|
+
expect(enc.density).toBe(DEFAULT_LEET_DENSITY);
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { Leetrovacki } from '../src/leetrovacki';
|
|
2
|
+
|
|
3
|
+
describe('Leetrovacki', () => {
|
|
4
|
+
describe('basic encoding', () => {
|
|
5
|
+
test('encodes word with leet applied', () => {
|
|
6
|
+
const l = new Leetrovacki({ base: 'satro', leetDensity: 1.0 });
|
|
7
|
+
const result = l.encodeWord('bazen');
|
|
8
|
+
// zenba with leet at 100% density: z->2, e->3, n unchanged, b unchanged, a->4
|
|
9
|
+
expect(result).not.toBe('bazen');
|
|
10
|
+
expect(typeof result).toBe('string');
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test('short words unchanged', () => {
|
|
14
|
+
const l = new Leetrovacki();
|
|
15
|
+
expect(l.encodeWord('na')).toBe('na');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test('utro base uses utro format', () => {
|
|
19
|
+
const l = new Leetrovacki({ base: 'utro', leetDensity: 0.0 });
|
|
20
|
+
const result = l.encodeWord('bazen');
|
|
21
|
+
// zaStyle='24' replaces 'za', njeStyle='n73' replaces 'nje' - these always apply in utro mode
|
|
22
|
+
expect(result).toContain('24');
|
|
23
|
+
expect(result).toContain('n73');
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('sentence encode', () => {
|
|
27
|
+
const l = new Leetrovacki({ base: 'satro' });
|
|
28
|
+
const result = l.encode('bazen voda');
|
|
29
|
+
expect(result.length).toBeGreaterThan(0);
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
describe('validation', () => {
|
|
34
|
+
test('invalid density throws', () => {
|
|
35
|
+
expect(() => new Leetrovacki({ leetDensity: 1.5 })).toThrow();
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('invalid density negative throws', () => {
|
|
39
|
+
expect(() => new Leetrovacki({ leetDensity: -0.1 })).toThrow();
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
describe('za style', () => {
|
|
44
|
+
test('zaStyle z4 used in utro mode', () => {
|
|
45
|
+
const l = new Leetrovacki({ base: 'utro', zaStyle: 'z4', leetDensity: 0.0 });
|
|
46
|
+
const result = l.encodeWord('bazen');
|
|
47
|
+
expect(result).toContain('z4');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test('zaStyle 24 used in utro mode', () => {
|
|
51
|
+
const l = new Leetrovacki({ base: 'utro', zaStyle: '24', leetDensity: 0.0 });
|
|
52
|
+
const result = l.encodeWord('bazen');
|
|
53
|
+
expect(result).toContain('24');
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('nje style', () => {
|
|
58
|
+
test('njeStyle n73 in utro mode', () => {
|
|
59
|
+
const l = new Leetrovacki({ base: 'utro', njeStyle: 'n73', leetDensity: 0.0 });
|
|
60
|
+
const result = l.encodeWord('bazen');
|
|
61
|
+
expect(result).toContain('n73');
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe('leet profiles', () => {
|
|
66
|
+
test('basic profile', () => {
|
|
67
|
+
const l = new Leetrovacki({ base: 'satro', leetProfile: 'basic', leetDensity: 1.0 });
|
|
68
|
+
expect(typeof l.encodeWord('bazen')).toBe('string');
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
test('full profile complexity', () => {
|
|
72
|
+
const l = new Leetrovacki({ base: 'satro', leetProfile: 'full', leetComplexity: 0, leetDensity: 1.0 });
|
|
73
|
+
expect(typeof l.encodeWord('bazen')).toBe('string');
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { Satrovacki } from '../src/satrovacki';
|
|
2
|
+
|
|
3
|
+
describe('Satrovacki', () => {
|
|
4
|
+
const s = new Satrovacki();
|
|
5
|
+
|
|
6
|
+
describe('encode', () => {
|
|
7
|
+
test('basic latin word', () => {
|
|
8
|
+
expect(s.encodeWord('bazen')).toBe('zenba');
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test('Beograd title case', () => {
|
|
12
|
+
expect(s.encodeWord('Beograd')).toBe('Gradbeo');
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test('riba', () => {
|
|
16
|
+
expect(s.encodeWord('riba')).toBe('bari');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('uppercase', () => {
|
|
20
|
+
expect(s.encodeWord('BAZEN')).toBe('ZENBA');
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test('word shorter than minWordLength unchanged', () => {
|
|
24
|
+
expect(s.encodeWord('na')).toBe('na');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
test('word exactly at minWordLength encoded', () => {
|
|
28
|
+
expect(s.encodeWord('pas')).not.toBe('pas');
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test('sentence with punctuation', () => {
|
|
32
|
+
const result = s.encode('Zdravo, svete!');
|
|
33
|
+
expect(result).toContain(',');
|
|
34
|
+
expect(result).toContain('!');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test('numbers unchanged', () => {
|
|
38
|
+
expect(s.encode('test 123')).toContain('123');
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test('cyrillic word', () => {
|
|
42
|
+
// Земун -> Мунзе
|
|
43
|
+
expect(s.encodeWord('Земун')).toBe('Мунзе');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test('cyrillic sentence', () => {
|
|
47
|
+
const result = s.encode('Земун закон матори');
|
|
48
|
+
expect(result).toMatch(/^[А-ЯЁа-яёЂЃЄЅІЇЈЉЊЋЌЍЎЏ\s]+$/u);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('exception brate -> tebra', () => {
|
|
52
|
+
const exc = new Satrovacki({ exceptions: { brate: 'tebra' } });
|
|
53
|
+
expect(exc.encodeWord('brate')).toBe('tebra');
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('exception case-insensitive key', () => {
|
|
57
|
+
const exc = new Satrovacki({ exceptions: { brate: 'tebra' } });
|
|
58
|
+
expect(exc.encodeWord('BRATE')).toBe('TEBRA');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('matori unchanged (exception)', () => {
|
|
62
|
+
const exc = new Satrovacki({ exceptions: { matori: 'matori' } });
|
|
63
|
+
expect(exc.encodeWord('matori')).toBe('matori');
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
test('syllabic r - no vowels fallback', () => {
|
|
67
|
+
// "brk" - all consonants except r which is syllabic
|
|
68
|
+
const result = s.encodeWord('brk');
|
|
69
|
+
expect(typeof result).toBe('string');
|
|
70
|
+
expect(result.length).toBe(3);
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('decode', () => {
|
|
75
|
+
test('decode zenba -> bazen', () => {
|
|
76
|
+
expect(s.decodeWord('zenba')).toBe('bazen');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('decode Gradbeo -> Beograd', () => {
|
|
80
|
+
expect(s.decodeWord('Gradbeo')).toBe('Beograd');
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
test('roundtrip', () => {
|
|
84
|
+
const words = ['bazen', 'zemlja', 'srbija', 'kafa', 'beograd'];
|
|
85
|
+
for (const w of words) {
|
|
86
|
+
expect(s.decodeWord(s.encodeWord(w))).toBe(w);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test('cyrillic roundtrip', () => {
|
|
91
|
+
const encoded = s.encodeWord('Земун');
|
|
92
|
+
expect(s.decodeWord(encoded)).toBe('Земун');
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
describe('canDecodeWord', () => {
|
|
97
|
+
test('encoded word can be decoded', () => {
|
|
98
|
+
expect(s.canDecodeWord('zenba')).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test('short word cannot be decoded', () => {
|
|
102
|
+
expect(s.canDecodeWord('na')).toBe(false);
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
describe('options', () => {
|
|
107
|
+
test('custom minWordLength', () => {
|
|
108
|
+
const s4 = new Satrovacki({ minWordLength: 4 });
|
|
109
|
+
expect(s4.encodeWord('pas')).toBe('pas'); // 3 < 4
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
});
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { encodeText, detectMode, detectLeetBase, _deleetTextBasic } from '../src/skrit';
|
|
2
|
+
import { Satrovacki } from '../src/satrovacki';
|
|
3
|
+
import { Utrovacki } from '../src/utrovacki';
|
|
4
|
+
|
|
5
|
+
describe('Skrit Router', () => {
|
|
6
|
+
describe('detectMode', () => {
|
|
7
|
+
test('plain text detected as satro', () => {
|
|
8
|
+
expect(detectMode('bazen voda')).toBe('satro');
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test('utro encoded text detected as utro', () => {
|
|
12
|
+
// Encode some utro words first
|
|
13
|
+
const u = new Utrovacki();
|
|
14
|
+
const encoded = u.encode('bazen zemlja kafa');
|
|
15
|
+
expect(detectMode(encoded)).toBe('utro');
|
|
16
|
+
});
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
describe('encodeText', () => {
|
|
20
|
+
test('satro mode', () => {
|
|
21
|
+
const result = encodeText('bazen', { mode: 'satro' });
|
|
22
|
+
expect(result).toBe('zenba');
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test('utro mode', () => {
|
|
26
|
+
const result = encodeText('bazen', { mode: 'utro' });
|
|
27
|
+
expect(result).toBe('uzenzabanje');
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test('auto mode on plain text encodes as satro', () => {
|
|
31
|
+
const result = encodeText('bazen', { mode: 'auto' });
|
|
32
|
+
// On plain text that isn't encoded, should encode to satro
|
|
33
|
+
expect(result).toBe('zenba');
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('auto mode decodes utro', () => {
|
|
37
|
+
const u = new Utrovacki();
|
|
38
|
+
const encoded = u.encodeWord('bazen');
|
|
39
|
+
const result = encodeText(encoded, { mode: 'auto' });
|
|
40
|
+
expect(result).toBe('bazen');
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test('auto mode decodes satro', () => {
|
|
44
|
+
const s = new Satrovacki();
|
|
45
|
+
const encoded = s.encodeWord('bazen');
|
|
46
|
+
const result = encodeText(encoded, { mode: 'auto', detectFrom: 'satro' });
|
|
47
|
+
expect(result).toBe('bazen');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test('leet mode', () => {
|
|
51
|
+
const result = encodeText('bazen', { mode: 'leet', leetDensity: 1.0 });
|
|
52
|
+
expect(result).not.toBe('bazen');
|
|
53
|
+
});
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
describe('_deleetTextBasic', () => {
|
|
57
|
+
test('reverses basic leet', () => {
|
|
58
|
+
expect(_deleetTextBasic('h3ll0')).toBe('hello');
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
test('4 -> a', () => {
|
|
62
|
+
expect(_deleetTextBasic('4')).toBe('a');
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test('00 -> u', () => {
|
|
66
|
+
expect(_deleetTextBasic('00')).toBe('u');
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test('non-leet chars unchanged', () => {
|
|
70
|
+
expect(_deleetTextBasic('xyz')).toBe('xyz');
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe('detectLeetBase', () => {
|
|
75
|
+
test('returns satro or utro', () => {
|
|
76
|
+
const base = detectLeetBase('zenba');
|
|
77
|
+
expect(['satro', 'utro']).toContain(base);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { cyrillicToLatin, latinToCyrillic, containsCyrillic } from '../src/core/transliteration';
|
|
2
|
+
|
|
3
|
+
describe('Transliteration', () => {
|
|
4
|
+
describe('cyrillicToLatin', () => {
|
|
5
|
+
test('basic cyrillic to latin', () => {
|
|
6
|
+
expect(cyrillicToLatin('земун')).toBe('zemun');
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
test('uppercase cyrillic', () => {
|
|
10
|
+
expect(cyrillicToLatin('ЗЕМУН')).toBe('ZEMUN');
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test('digraphs', () => {
|
|
14
|
+
expect(cyrillicToLatin('љ')).toBe('lj');
|
|
15
|
+
expect(cyrillicToLatin('њ')).toBe('nj');
|
|
16
|
+
expect(cyrillicToLatin('џ')).toBe('dž');
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('mixed text passes through non-cyrillic', () => {
|
|
20
|
+
expect(cyrillicToLatin('abc')).toBe('abc');
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
describe('latinToCyrillic', () => {
|
|
25
|
+
test('basic latin to cyrillic', () => {
|
|
26
|
+
expect(latinToCyrillic('zemun')).toBe('земун');
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test('digraphs', () => {
|
|
30
|
+
expect(latinToCyrillic('lj')).toBe('љ');
|
|
31
|
+
expect(latinToCyrillic('nj')).toBe('њ');
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('plain c -> ц by default', () => {
|
|
35
|
+
expect(latinToCyrillic('c')).toBe('ц');
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('plain c -> ч with plainCTarget=ч', () => {
|
|
39
|
+
expect(latinToCyrillic('c', false, 'ч')).toBe('ч');
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test('soft tj -> ć when enabled', () => {
|
|
43
|
+
expect(latinToCyrillic('tj', true)).toBe('ћ');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test('tj stays as t+j when disabled', () => {
|
|
47
|
+
expect(latinToCyrillic('tj', false)).toBe('тј');
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe('containsCyrillic', () => {
|
|
52
|
+
test('cyrillic text detected', () => {
|
|
53
|
+
expect(containsCyrillic('земун')).toBe(true);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('latin text not detected', () => {
|
|
57
|
+
expect(containsCyrillic('zemun')).toBe(false);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test('mixed text detected', () => {
|
|
61
|
+
expect(containsCyrillic('zemун')).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { Utrovacki } from '../src/utrovacki';
|
|
2
|
+
|
|
3
|
+
describe('Utrovacki', () => {
|
|
4
|
+
const u = new Utrovacki();
|
|
5
|
+
|
|
6
|
+
describe('encode', () => {
|
|
7
|
+
test('bazen -> uzenzabanje', () => {
|
|
8
|
+
expect(u.encodeWord('bazen')).toBe('uzenzabanje');
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test('title case', () => {
|
|
12
|
+
// Zemun: z=consonant, e=vowel at idx1, splitIdx=2 (after 'ze'), part2='mun'
|
|
13
|
+
// encoded = u + mun + za + ze + nje = 'umunzazenje' -> title -> 'Umunzazenje'
|
|
14
|
+
expect(u.encodeWord('Zemun')).toBe('Umunzazenje');
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test('short word unchanged', () => {
|
|
18
|
+
expect(u.encodeWord('na')).toBe('na');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test('cyrillic', () => {
|
|
22
|
+
const result = u.encodeWord('Земун');
|
|
23
|
+
expect(result).toMatch(/^[\u0400-\u04FF]+$/u);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test('custom affixes', () => {
|
|
27
|
+
const custom = new Utrovacki({ prefix: 'ku', infix: 'ma', suffix: 'la' });
|
|
28
|
+
const result = custom.encodeWord('bazen');
|
|
29
|
+
expect(result).toContain('ku');
|
|
30
|
+
expect(result).toContain('ma');
|
|
31
|
+
expect(result).toContain('la');
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test('sentence', () => {
|
|
35
|
+
const result = u.encode('bazen voda');
|
|
36
|
+
expect(result).toContain('nje');
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
describe('decode', () => {
|
|
41
|
+
test('decode uzenzabanje -> bazen', () => {
|
|
42
|
+
expect(u.decodeWord('uzenzabanje')).toBe('bazen');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('roundtrip', () => {
|
|
46
|
+
const words = ['bazen', 'zemlja', 'kafa'];
|
|
47
|
+
for (const w of words) {
|
|
48
|
+
const encoded = u.encodeWord(w);
|
|
49
|
+
expect(u.decodeWord(encoded)).toBe(w);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test('cyrillic roundtrip', () => {
|
|
54
|
+
const encoded = u.encodeWord('земун');
|
|
55
|
+
expect(u.decodeWord(encoded)).toBe('земун');
|
|
56
|
+
});
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
describe('canDecodeWord', () => {
|
|
60
|
+
test('utro word detectable', () => {
|
|
61
|
+
expect(u.canDecodeWord('uzenzabanje')).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test('plain word not utro', () => {
|
|
65
|
+
expect(u.canDecodeWord('bazen')).toBe(false);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('short word false', () => {
|
|
69
|
+
expect(u.canDecodeWord('uz')).toBe(false);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe('_splitEncodedParts', () => {
|
|
74
|
+
test('splits correctly', () => {
|
|
75
|
+
const parts = u._splitEncodedParts('uzenzabanje');
|
|
76
|
+
expect(parts).toEqual(['ba', 'zen']);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('invalid returns null', () => {
|
|
80
|
+
expect(u._splitEncodedParts('bazen')).toBeNull();
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2020",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"lib": ["ES2020"],
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"declaration": true,
|
|
11
|
+
"declarationMap": true,
|
|
12
|
+
"sourceMap": true,
|
|
13
|
+
"skipLibCheck": true
|
|
14
|
+
},
|
|
15
|
+
"include": ["src/**/*"],
|
|
16
|
+
"exclude": ["node_modules", "dist", "tests"]
|
|
17
|
+
}
|