braille-codec 0.0.1-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +80 -0
- package/LICENSE +201 -0
- package/README.md +10 -0
- package/bin/braille-decode +66 -0
- package/dist/decoder/constants/char_english.d.ts +1 -0
- package/dist/decoder/constants/char_shortcut.d.ts +1 -0
- package/dist/decoder/constants/choseong.d.ts +1 -0
- package/dist/decoder/constants/index.d.ts +10 -0
- package/dist/decoder/constants/indicators.d.ts +6 -0
- package/dist/decoder/constants/jongseong.d.ts +1 -0
- package/dist/decoder/constants/jungsong.d.ts +1 -0
- package/dist/decoder/constants/number.d.ts +1 -0
- package/dist/decoder/constants/symbol.d.ts +1 -0
- package/dist/decoder/constants/utils.d.ts +3 -0
- package/dist/decoder/index.d.ts +51 -0
- package/dist/decoder/index.test.d.ts +1 -0
- package/dist/index.cjs +770 -0
- package/dist/index.d.ts +53 -0
- package/dist/index.mjs +768 -0
- package/package.json +38 -0
- package/rollup.config.js +18 -0
- package/src/decoder/constants/char_english.ts +30 -0
- package/src/decoder/constants/char_shortcut.ts +33 -0
- package/src/decoder/constants/choseong.ts +18 -0
- package/src/decoder/constants/index.ts +110 -0
- package/src/decoder/constants/indicators.ts +8 -0
- package/src/decoder/constants/jongseong.ts +19 -0
- package/src/decoder/constants/jungsong.ts +25 -0
- package/src/decoder/constants/number.ts +14 -0
- package/src/decoder/constants/symbol.ts +34 -0
- package/src/decoder/constants/utils.ts +14 -0
- package/src/decoder/index.test.ts +77 -0
- package/src/decoder/index.ts +527 -0
- package/src/index.ts +3 -0
- package/tsconfig.json +16 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BRAILLE_UNICODE_START,
|
|
3
|
+
ASCII_BRAILLE_MAP,
|
|
4
|
+
KOREAN_CHOSEONG,
|
|
5
|
+
KOREAN_JUNGSEONG,
|
|
6
|
+
KOREAN_JONGSEONG,
|
|
7
|
+
KOREAN_SHORTCUTS,
|
|
8
|
+
ENGLISH_ALPHABET,
|
|
9
|
+
NUMBERS,
|
|
10
|
+
SYMBOLS,
|
|
11
|
+
NUMBER_INDICATOR,
|
|
12
|
+
ENGLISH_INDICATOR,
|
|
13
|
+
ENGLISH_TERMINATOR,
|
|
14
|
+
UPPERCASE_INDICATOR,
|
|
15
|
+
KOREAN_PART_INDICATOR,
|
|
16
|
+
KOREAN_CONSONANT_INDICATOR,
|
|
17
|
+
} from './constants';
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Braille Decoder
|
|
21
|
+
* Supports English, Special Characters, and Korean.
|
|
22
|
+
* Reference: braillify/libs/braillify/src
|
|
23
|
+
*/
|
|
24
|
+
export class Decoder {
|
|
25
|
+
// Pre-computed lookup tables for efficient access
|
|
26
|
+
private jungseongMap: Map<string, { text: string; len: number; key: string }[]> = new Map();
|
|
27
|
+
private shortcutMap: Map<string, { text: string; len: number; key: string }[]> = new Map();
|
|
28
|
+
private symbolMap: Map<string, { text: string; len: number; key: string }[]> = new Map();
|
|
29
|
+
|
|
30
|
+
// Korean composition maps
|
|
31
|
+
private readonly CHOSEONG = 'ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ';
|
|
32
|
+
private readonly JUNGSEONG = 'ㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ';
|
|
33
|
+
private readonly JONGSEONG = ['', 'ㄱ', 'ㄲ', 'ㄳ', 'ㄴ', 'ㄵ', 'ㄶ', 'ㄷ', 'ㄹ', 'ㄺ', 'ㄻ', 'ㄼ', 'ㄽ', 'ㄾ', 'ㄿ', 'ㅀ', 'ㅁ', 'ㅂ', 'ㅄ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅊ', 'ㅋ', 'ㅌ', 'ㅍ', 'ㅎ'];
|
|
34
|
+
|
|
35
|
+
private choMap: Record<string, number> = {};
|
|
36
|
+
private jungMap: Record<string, number> = {};
|
|
37
|
+
private jongMap: Record<string, number> = {};
|
|
38
|
+
|
|
39
|
+
constructor() {
|
|
40
|
+
this.initializeLookupTables();
|
|
41
|
+
this.CHOSEONG.split('').forEach((c, i) => this.choMap[c] = i);
|
|
42
|
+
this.JUNGSEONG.split('').forEach((c, i) => this.jungMap[c] = i);
|
|
43
|
+
this.JONGSEONG.forEach((c, i) => this.jongMap[c] = i);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Initialize lookup tables from constants for efficient decoding
|
|
48
|
+
*/
|
|
49
|
+
private initializeLookupTables(): void {
|
|
50
|
+
// Build jungseong lookup table
|
|
51
|
+
for (const [key, text] of Object.entries(KOREAN_JUNGSEONG)) {
|
|
52
|
+
const dots = key.split(',').map(Number);
|
|
53
|
+
const firstDot = dots[0].toString();
|
|
54
|
+
if (!this.jungseongMap.has(firstDot)) {
|
|
55
|
+
this.jungseongMap.set(firstDot, []);
|
|
56
|
+
}
|
|
57
|
+
this.jungseongMap.get(firstDot)!.push({ text, len: dots.length, key });
|
|
58
|
+
}
|
|
59
|
+
// Sort by length descending (try longer patterns first)
|
|
60
|
+
for (const entries of this.jungseongMap.values()) {
|
|
61
|
+
entries.sort((a, b) => b.len - a.len);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Build shortcut lookup table
|
|
65
|
+
for (const [key, text] of Object.entries(KOREAN_SHORTCUTS)) {
|
|
66
|
+
const dots = key.split(',').map(Number);
|
|
67
|
+
const firstDot = dots[0].toString();
|
|
68
|
+
if (!this.shortcutMap.has(firstDot)) {
|
|
69
|
+
this.shortcutMap.set(firstDot, []);
|
|
70
|
+
}
|
|
71
|
+
this.shortcutMap.get(firstDot)!.push({ text, len: dots.length, key });
|
|
72
|
+
}
|
|
73
|
+
for (const entries of this.shortcutMap.values()) {
|
|
74
|
+
entries.sort((a, b) => b.len - a.len);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Build symbol lookup table
|
|
78
|
+
for (const [key, text] of Object.entries(SYMBOLS)) {
|
|
79
|
+
const dots = key.split(',').map(Number);
|
|
80
|
+
const firstDot = dots[0].toString();
|
|
81
|
+
if (!this.symbolMap.has(firstDot)) {
|
|
82
|
+
this.symbolMap.set(firstDot, []);
|
|
83
|
+
}
|
|
84
|
+
this.symbolMap.get(firstDot)!.push({ text, len: dots.length, key });
|
|
85
|
+
}
|
|
86
|
+
for (const entries of this.symbolMap.values()) {
|
|
87
|
+
entries.sort((a, b) => b.len - a.len);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Converts ASCII Braille (BRL) to Unicode Braille.
|
|
93
|
+
* Example: asciiBrailleToUnicode('abcd') -> '⠁⠃⠉⠙'
|
|
94
|
+
*/
|
|
95
|
+
public asciiBrailleToUnicode(input: string): string {
|
|
96
|
+
return input
|
|
97
|
+
.split('')
|
|
98
|
+
.map((char) => {
|
|
99
|
+
const dotPattern = ASCII_BRAILLE_MAP[char];
|
|
100
|
+
if (dotPattern === undefined) return char;
|
|
101
|
+
return String.fromCharCode(BRAILLE_UNICODE_START + dotPattern);
|
|
102
|
+
})
|
|
103
|
+
.join('');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Translates Unicode Braille to Text.
|
|
108
|
+
* Supports Korean, English, Numbers, and Symbols.
|
|
109
|
+
*/
|
|
110
|
+
public translateToText(input: string): string {
|
|
111
|
+
return input.split('\n').map(v => this.translateToTextOneLine(v)).join('\n');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Compose Korean syllable from jamo
|
|
116
|
+
*/
|
|
117
|
+
private composeKoreanSyllable(cho: string, jung: string, jong: string = ''): string {
|
|
118
|
+
const choIdx = this.choMap[cho];
|
|
119
|
+
const jungIdx = this.jungMap[jung];
|
|
120
|
+
const jongIdx = this.jongMap[jong] || 0;
|
|
121
|
+
if (choIdx !== undefined && jungIdx !== undefined) {
|
|
122
|
+
return String.fromCharCode(0xAC00 + (choIdx * 21 + jungIdx) * 28 + jongIdx);
|
|
123
|
+
}
|
|
124
|
+
return cho + jung + jong;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Decompose Korean syllable to jamo
|
|
129
|
+
*/
|
|
130
|
+
private decomposeSyllable(syllable: string): [string, string, string] | null {
|
|
131
|
+
const code = syllable.charCodeAt(0);
|
|
132
|
+
if (code >= 0xAC00 && code <= 0xD7A3) {
|
|
133
|
+
const offset = code - 0xAC00;
|
|
134
|
+
const choIdx = Math.floor(offset / (21 * 28));
|
|
135
|
+
const jungIdx = Math.floor((offset % (21 * 28)) / 28);
|
|
136
|
+
const jongIdx = offset % 28;
|
|
137
|
+
return [this.CHOSEONG[choIdx], this.JUNGSEONG[jungIdx], this.JONGSEONG[jongIdx]];
|
|
138
|
+
}
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Add jongseong to existing syllable
|
|
144
|
+
*/
|
|
145
|
+
private addJongseongToSyllable(syllable: string, jong: string): string {
|
|
146
|
+
const decomposed = this.decomposeSyllable(syllable);
|
|
147
|
+
if (decomposed) {
|
|
148
|
+
const [cho, jung, _] = decomposed;
|
|
149
|
+
return this.composeKoreanSyllable(cho, jung, jong);
|
|
150
|
+
}
|
|
151
|
+
return syllable + jong;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Translates Unicode Braille to Text (single line).
|
|
156
|
+
* Supports Korean, English, Numbers, and Symbols.
|
|
157
|
+
*/
|
|
158
|
+
public translateToTextOneLine(input: string): string {
|
|
159
|
+
const dots = input.split('').map((char) => {
|
|
160
|
+
const code = char.charCodeAt(0);
|
|
161
|
+
if (code >= BRAILLE_UNICODE_START && code <= BRAILLE_UNICODE_START + 0x3f) {
|
|
162
|
+
return code - BRAILLE_UNICODE_START;
|
|
163
|
+
}
|
|
164
|
+
if (char === '\n') return 255;
|
|
165
|
+
return -1; // Not a braille character
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
let result = '';
|
|
169
|
+
let i = 0;
|
|
170
|
+
let isEnglishMode = false;
|
|
171
|
+
let isNumberMode = false;
|
|
172
|
+
let pendingKoreanCho = ''; // Pending Korean choseong
|
|
173
|
+
let pendingKoreanJung = ''; // Pending Korean jungseong
|
|
174
|
+
|
|
175
|
+
const flushPendingKorean = () => {
|
|
176
|
+
if (pendingKoreanCho && pendingKoreanJung) {
|
|
177
|
+
result += this.composeKoreanSyllable(pendingKoreanCho, pendingKoreanJung);
|
|
178
|
+
pendingKoreanCho = '';
|
|
179
|
+
pendingKoreanJung = '';
|
|
180
|
+
} else if (pendingKoreanCho) {
|
|
181
|
+
result += pendingKoreanCho;
|
|
182
|
+
pendingKoreanCho = '';
|
|
183
|
+
} else if (pendingKoreanJung) {
|
|
184
|
+
result += this.composeKoreanSyllable('ㅇ', pendingKoreanJung);
|
|
185
|
+
pendingKoreanJung = '';
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
while (i < dots.length) {
|
|
190
|
+
const dot = dots[i];
|
|
191
|
+
|
|
192
|
+
if (dot === -1) {
|
|
193
|
+
flushPendingKorean();
|
|
194
|
+
result += input[i];
|
|
195
|
+
isNumberMode = false;
|
|
196
|
+
i++;
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (dot === 255) {
|
|
201
|
+
flushPendingKorean();
|
|
202
|
+
result += '\n';
|
|
203
|
+
isNumberMode = false;
|
|
204
|
+
i++;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Indicators
|
|
209
|
+
if (dot === NUMBER_INDICATOR) {
|
|
210
|
+
flushPendingKorean();
|
|
211
|
+
isNumberMode = true;
|
|
212
|
+
i++;
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (dot === ENGLISH_INDICATOR) {
|
|
217
|
+
flushPendingKorean();
|
|
218
|
+
isEnglishMode = true;
|
|
219
|
+
i++;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (dot === ENGLISH_TERMINATOR && isEnglishMode) {
|
|
224
|
+
isEnglishMode = false;
|
|
225
|
+
i++;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (dot === 0) {
|
|
230
|
+
// Space
|
|
231
|
+
flushPendingKorean();
|
|
232
|
+
// Check if we should skip space after number mode before Korean text (not English)
|
|
233
|
+
if (isNumberMode && i + 1 < dots.length) {
|
|
234
|
+
const nextDot = dots[i + 1];
|
|
235
|
+
// Check if next is Korean (shortcut, choseong, jungseong, or jongseong)
|
|
236
|
+
// But NOT English indicator
|
|
237
|
+
const isNextKorean = (this.matchShortcut(dots, i + 1) !== null ||
|
|
238
|
+
KOREAN_CHOSEONG[nextDot] !== undefined ||
|
|
239
|
+
this.matchJungseong(dots, i + 1) !== null ||
|
|
240
|
+
KOREAN_JONGSEONG[nextDot] !== undefined) &&
|
|
241
|
+
nextDot !== ENGLISH_INDICATOR;
|
|
242
|
+
if (isNextKorean) {
|
|
243
|
+
// Skip space between number and Korean
|
|
244
|
+
isNumberMode = false;
|
|
245
|
+
i++;
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
result += ' ';
|
|
250
|
+
isNumberMode = false;
|
|
251
|
+
i++;
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Number Mode
|
|
256
|
+
if (isNumberMode) {
|
|
257
|
+
if (NUMBERS[dot]) {
|
|
258
|
+
result += NUMBERS[dot];
|
|
259
|
+
i++;
|
|
260
|
+
continue;
|
|
261
|
+
} else if (dot === 2) { // Comma in number mode
|
|
262
|
+
result += ',';
|
|
263
|
+
i++;
|
|
264
|
+
continue;
|
|
265
|
+
} else if (dot === 50) { // Dot in number mode
|
|
266
|
+
result += '.';
|
|
267
|
+
i++;
|
|
268
|
+
continue;
|
|
269
|
+
} else if (dot === 36) { // Hyphen in number mode
|
|
270
|
+
result += '‐'; // U+2010
|
|
271
|
+
i++;
|
|
272
|
+
continue;
|
|
273
|
+
} else if (dot === KOREAN_CONSONANT_INDICATOR) {
|
|
274
|
+
// ⠸ in number mode starts asterisk sequence
|
|
275
|
+
i++;
|
|
276
|
+
// All following ⠢ are asterisks
|
|
277
|
+
while (i < dots.length && dots[i] === 34) { // ⠢
|
|
278
|
+
result += '∗'; // U+2217
|
|
279
|
+
i++;
|
|
280
|
+
}
|
|
281
|
+
continue;
|
|
282
|
+
} else if (dot === 7) { // ⠇ in number mode (end marker?)
|
|
283
|
+
// Skip or handle as needed
|
|
284
|
+
i++;
|
|
285
|
+
continue;
|
|
286
|
+
} else {
|
|
287
|
+
isNumberMode = false;
|
|
288
|
+
// Fall through
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// English Mode
|
|
293
|
+
if (isEnglishMode) {
|
|
294
|
+
let isUpper = false;
|
|
295
|
+
if (dot === UPPERCASE_INDICATOR) {
|
|
296
|
+
isUpper = true;
|
|
297
|
+
i++;
|
|
298
|
+
// Check for double uppercase (word)
|
|
299
|
+
if (i < dots.length && dots[i] === UPPERCASE_INDICATOR) {
|
|
300
|
+
i++;
|
|
301
|
+
// Word uppercase
|
|
302
|
+
while (i < dots.length && dots[i] !== 0 && dots[i] !== ENGLISH_TERMINATOR) {
|
|
303
|
+
const d = dots[i];
|
|
304
|
+
const char = ENGLISH_ALPHABET[d];
|
|
305
|
+
if (char) {
|
|
306
|
+
result += char.toUpperCase();
|
|
307
|
+
} else {
|
|
308
|
+
// Try symbols in English mode
|
|
309
|
+
const sym = this.matchSymbol(dots, i);
|
|
310
|
+
if (sym) {
|
|
311
|
+
result += sym.text;
|
|
312
|
+
i += sym.len - 1;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
i++;
|
|
316
|
+
}
|
|
317
|
+
continue;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
const charDot = isUpper ? dots[i] : dot;
|
|
321
|
+
const char = ENGLISH_ALPHABET[charDot];
|
|
322
|
+
if (char) {
|
|
323
|
+
result += isUpper ? char.toUpperCase() : char;
|
|
324
|
+
i++;
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Symbols (Check multi-dot symbols first)
|
|
330
|
+
// But check if this could be Korean choseong followed by jungseong or shortcut
|
|
331
|
+
const sym = this.matchSymbol(dots, i);
|
|
332
|
+
if (sym) {
|
|
333
|
+
// Check if this dot is also a choseong and followed by jungseong or shortcut
|
|
334
|
+
if (KOREAN_CHOSEONG[dot] && i + 1 < dots.length) {
|
|
335
|
+
const nextJung = this.matchJungseong(dots, i + 1);
|
|
336
|
+
const nextShortcut = this.matchShortcut(dots, i + 1);
|
|
337
|
+
if (nextJung || nextShortcut) {
|
|
338
|
+
// This is choseong, not symbol
|
|
339
|
+
flushPendingKorean();
|
|
340
|
+
pendingKoreanCho = KOREAN_CHOSEONG[dot];
|
|
341
|
+
i++;
|
|
342
|
+
continue;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
flushPendingKorean();
|
|
346
|
+
result += sym.text;
|
|
347
|
+
i += sym.len;
|
|
348
|
+
continue;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Korean Mode
|
|
352
|
+
// 1. Shortcuts (Check multi-dot shortcuts first)
|
|
353
|
+
const shortcut = this.matchShortcut(dots, i);
|
|
354
|
+
if (shortcut) {
|
|
355
|
+
// Check if there's a pending choseong (e.g., ㅅ + 옥 = 속, ㄴ + 영 = 녕)
|
|
356
|
+
if (pendingKoreanCho) {
|
|
357
|
+
// Decompose shortcut and combine with pending choseong
|
|
358
|
+
const decomposed = this.decomposeSyllable(shortcut.text);
|
|
359
|
+
if (decomposed && decomposed[0] === 'ㅇ') {
|
|
360
|
+
// Replace ㅇ with pending choseong
|
|
361
|
+
result += this.composeKoreanSyllable(pendingKoreanCho, decomposed[1], decomposed[2]);
|
|
362
|
+
pendingKoreanCho = '';
|
|
363
|
+
pendingKoreanJung = '';
|
|
364
|
+
i += shortcut.len;
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Check if this is a single-char shortcut that could be choseong
|
|
370
|
+
const singleCharShortcuts = ['가', '나', '다', '마', '바', '사', '자', '카', '타', '파', '하'];
|
|
371
|
+
if (shortcut.len === 1 && singleCharShortcuts.includes(shortcut.text)) {
|
|
372
|
+
// Check if next is a shortcut or jungseong (NOT jongseong)
|
|
373
|
+
if (i + 1 < dots.length) {
|
|
374
|
+
const nextShortcut = this.matchShortcut(dots, i + 1);
|
|
375
|
+
const nextJung = this.matchJungseong(dots, i + 1);
|
|
376
|
+
|
|
377
|
+
// If followed by shortcut or jungseong, interpret as choseong
|
|
378
|
+
if (nextShortcut || nextJung) {
|
|
379
|
+
if (KOREAN_CHOSEONG[dot]) {
|
|
380
|
+
flushPendingKorean();
|
|
381
|
+
pendingKoreanCho = KOREAN_CHOSEONG[dot];
|
|
382
|
+
i++;
|
|
383
|
+
continue;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Shortcut is a complete syllable
|
|
390
|
+
// Check if next is jongseong
|
|
391
|
+
if (i + 1 < dots.length && KOREAN_JONGSEONG[dots[i + 1]]) {
|
|
392
|
+
// Add jongseong to shortcut
|
|
393
|
+
flushPendingKorean();
|
|
394
|
+
result += this.addJongseongToSyllable(shortcut.text, KOREAN_JONGSEONG[dots[i + 1]]);
|
|
395
|
+
i += shortcut.len + 1;
|
|
396
|
+
continue;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
flushPendingKorean();
|
|
400
|
+
result += shortcut.text;
|
|
401
|
+
i += shortcut.len;
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// 2. Choseong
|
|
406
|
+
if (KOREAN_CHOSEONG[dot]) {
|
|
407
|
+
flushPendingKorean();
|
|
408
|
+
pendingKoreanCho = KOREAN_CHOSEONG[dot];
|
|
409
|
+
i++;
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// 3. Jungseong
|
|
414
|
+
const jung = this.matchJungseong(dots, i);
|
|
415
|
+
if (jung) {
|
|
416
|
+
if (pendingKoreanCho && !pendingKoreanJung) {
|
|
417
|
+
// Choseong + Jungseong
|
|
418
|
+
pendingKoreanJung = jung.text;
|
|
419
|
+
} else if (pendingKoreanCho && pendingKoreanJung) {
|
|
420
|
+
// Already have cho + jung, flush and start new syllable
|
|
421
|
+
flushPendingKorean();
|
|
422
|
+
pendingKoreanCho = 'ㅇ';
|
|
423
|
+
pendingKoreanJung = jung.text;
|
|
424
|
+
} else {
|
|
425
|
+
// Jungseong alone (implicit ㅇ)
|
|
426
|
+
flushPendingKorean();
|
|
427
|
+
pendingKoreanCho = 'ㅇ';
|
|
428
|
+
pendingKoreanJung = jung.text;
|
|
429
|
+
}
|
|
430
|
+
i += jung.len;
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// 4. Jongseong
|
|
435
|
+
if (KOREAN_JONGSEONG[dot]) {
|
|
436
|
+
if (pendingKoreanCho && pendingKoreanJung) {
|
|
437
|
+
// Complete syllable with jongseong
|
|
438
|
+
result += this.composeKoreanSyllable(pendingKoreanCho, pendingKoreanJung, KOREAN_JONGSEONG[dot]);
|
|
439
|
+
pendingKoreanCho = '';
|
|
440
|
+
pendingKoreanJung = '';
|
|
441
|
+
} else {
|
|
442
|
+
// Jongseong without pending syllable
|
|
443
|
+
flushPendingKorean();
|
|
444
|
+
result += KOREAN_JONGSEONG[dot];
|
|
445
|
+
}
|
|
446
|
+
i++;
|
|
447
|
+
continue;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// 5. Korean Part/Consonant Indicators
|
|
451
|
+
if (dot === KOREAN_PART_INDICATOR || dot === KOREAN_CONSONANT_INDICATOR) {
|
|
452
|
+
flushPendingKorean();
|
|
453
|
+
i++;
|
|
454
|
+
if (i < dots.length) {
|
|
455
|
+
const nextDot = dots[i];
|
|
456
|
+
// Try to find in choseong or jongseong maps
|
|
457
|
+
const char = KOREAN_CHOSEONG[nextDot] || KOREAN_JONGSEONG[nextDot];
|
|
458
|
+
if (char) {
|
|
459
|
+
result += char;
|
|
460
|
+
i++;
|
|
461
|
+
continue;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// Unknown pattern
|
|
468
|
+
flushPendingKorean();
|
|
469
|
+
i++;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
flushPendingKorean();
|
|
473
|
+
return result;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
private matchJungseong(dots: number[], index: number): { text: string; len: number } | null {
|
|
477
|
+
const firstDot = dots[index].toString();
|
|
478
|
+
const candidates = this.jungseongMap.get(firstDot);
|
|
479
|
+
if (!candidates) return null;
|
|
480
|
+
|
|
481
|
+
// Try patterns from longest to shortest
|
|
482
|
+
for (const candidate of candidates) {
|
|
483
|
+
if (index + candidate.len <= dots.length) {
|
|
484
|
+
// Build the key to match
|
|
485
|
+
const key = dots.slice(index, index + candidate.len).join(',');
|
|
486
|
+
if (candidate.key === key) {
|
|
487
|
+
return { text: candidate.text, len: candidate.len };
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
return null;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
private matchShortcut(dots: number[], index: number): { text: string; len: number } | null {
|
|
495
|
+
const firstDot = dots[index].toString();
|
|
496
|
+
const candidates = this.shortcutMap.get(firstDot);
|
|
497
|
+
if (!candidates) return null;
|
|
498
|
+
|
|
499
|
+
// Try patterns from longest to shortest
|
|
500
|
+
for (const candidate of candidates) {
|
|
501
|
+
if (index + candidate.len <= dots.length) {
|
|
502
|
+
const key = dots.slice(index, index + candidate.len).join(',');
|
|
503
|
+
if (candidate.key === key) {
|
|
504
|
+
return { text: candidate.text, len: candidate.len };
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
return null;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
private matchSymbol(dots: number[], index: number): { text: string; len: number } | null {
|
|
512
|
+
const firstDot = dots[index].toString();
|
|
513
|
+
const candidates = this.symbolMap.get(firstDot);
|
|
514
|
+
if (!candidates) return null;
|
|
515
|
+
|
|
516
|
+
// Try patterns from longest to shortest
|
|
517
|
+
for (const candidate of candidates) {
|
|
518
|
+
if (index + candidate.len <= dots.length) {
|
|
519
|
+
const key = dots.slice(index, index + candidate.len).join(',');
|
|
520
|
+
if (candidate.key === key) {
|
|
521
|
+
return { text: candidate.text, len: candidate.len };
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
return null;
|
|
526
|
+
}
|
|
527
|
+
}
|
package/src/index.ts
ADDED
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ESNext",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"lib": ["ESNext", "DOM"],
|
|
6
|
+
"moduleResolution": "bundler",
|
|
7
|
+
"esModuleInterop": true,
|
|
8
|
+
"strict": true,
|
|
9
|
+
"skipLibCheck": true,
|
|
10
|
+
"declaration": true,
|
|
11
|
+
"outDir": "./dist",
|
|
12
|
+
"rootDir": "./src"
|
|
13
|
+
},
|
|
14
|
+
"include": ["src/**/*.ts"],
|
|
15
|
+
"exclude": ["node_modules", "dist"]
|
|
16
|
+
}
|