konglish 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +522 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -6
- package/dist/index.d.ts +2 -6
- package/dist/index.js +522 -37
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
type LatinToHangulOptions = {
|
|
2
|
-
/**
|
|
3
|
-
* 단어 사전
|
|
4
|
-
* - key: 라틴 단어 (또는 "전체 문자열"도 가능)
|
|
5
|
-
* - value: 후보 한글 발음 배열(첫 번째 사용)
|
|
6
|
-
*/
|
|
7
2
|
dictionary?: Record<string, string[]>;
|
|
3
|
+
beamWidth?: number;
|
|
4
|
+
enableFallbackPhones?: boolean;
|
|
8
5
|
};
|
|
9
6
|
declare function latinToHangul(input: string, options?: LatinToHangulOptions): string;
|
|
10
7
|
|
|
@@ -17,7 +14,6 @@ declare class Konglish {
|
|
|
17
14
|
private readonly defaultOptions;
|
|
18
15
|
constructor(defaultOptions?: LatinToHangulOptions);
|
|
19
16
|
latinToHangul(input: string, options?: LatinToHangulOptions): string;
|
|
20
|
-
latinToHangulAsync(input: string, options?: LatinToHangulOptions): Promise<string>;
|
|
21
17
|
}
|
|
22
18
|
|
|
23
19
|
declare const customDictionary: Record<string, string[]>;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
type LatinToHangulOptions = {
|
|
2
|
-
/**
|
|
3
|
-
* 단어 사전
|
|
4
|
-
* - key: 라틴 단어 (또는 "전체 문자열"도 가능)
|
|
5
|
-
* - value: 후보 한글 발음 배열(첫 번째 사용)
|
|
6
|
-
*/
|
|
7
2
|
dictionary?: Record<string, string[]>;
|
|
3
|
+
beamWidth?: number;
|
|
4
|
+
enableFallbackPhones?: boolean;
|
|
8
5
|
};
|
|
9
6
|
declare function latinToHangul(input: string, options?: LatinToHangulOptions): string;
|
|
10
7
|
|
|
@@ -17,7 +14,6 @@ declare class Konglish {
|
|
|
17
14
|
private readonly defaultOptions;
|
|
18
15
|
constructor(defaultOptions?: LatinToHangulOptions);
|
|
19
16
|
latinToHangul(input: string, options?: LatinToHangulOptions): string;
|
|
20
|
-
latinToHangulAsync(input: string, options?: LatinToHangulOptions): Promise<string>;
|
|
21
17
|
}
|
|
22
18
|
|
|
23
19
|
declare const customDictionary: Record<string, string[]>;
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import { dictionary } from 'cmu-pronouncing-dictionary';
|
|
2
|
+
|
|
3
|
+
// src/pron/latinTokorea.ts
|
|
4
|
+
|
|
1
5
|
// src/pron/dictionary.ts
|
|
2
6
|
var customDictionary = {
|
|
3
7
|
"a": ["\uC5B4"],
|
|
@@ -24159,7 +24163,20 @@ var customDictionary = {
|
|
|
24159
24163
|
};
|
|
24160
24164
|
|
|
24161
24165
|
// src/pron/latinTokorea.ts
|
|
24162
|
-
var
|
|
24166
|
+
var DEFAULT_BEAM_WIDTH = 8;
|
|
24167
|
+
function latinToHangul(input, options) {
|
|
24168
|
+
const fullKey = input.trim().toLowerCase();
|
|
24169
|
+
const userFullHit = options?.dictionary?.[fullKey];
|
|
24170
|
+
if (userFullHit && userFullHit.length > 0)
|
|
24171
|
+
return userFullHit[0];
|
|
24172
|
+
const builtInFullHit = customDictionary[fullKey];
|
|
24173
|
+
if (builtInFullHit && builtInFullHit.length > 0)
|
|
24174
|
+
return builtInFullHit[0];
|
|
24175
|
+
const resolved = resolveOptions(options, 1);
|
|
24176
|
+
const tokens = tokenizePreservingSpecialChars(input);
|
|
24177
|
+
const [best] = transliterateTokens(tokens, resolved);
|
|
24178
|
+
return best ?? input;
|
|
24179
|
+
}
|
|
24163
24180
|
function tokenizePreservingSpecialChars(input) {
|
|
24164
24181
|
const tokens = [];
|
|
24165
24182
|
const regex = /([a-zA-Z]+)|([^a-zA-Z]+)/g;
|
|
@@ -24174,47 +24191,521 @@ function tokenizePreservingSpecialChars(input) {
|
|
|
24174
24191
|
}
|
|
24175
24192
|
return tokens;
|
|
24176
24193
|
}
|
|
24177
|
-
function
|
|
24178
|
-
const
|
|
24179
|
-
|
|
24180
|
-
|
|
24181
|
-
|
|
24182
|
-
|
|
24183
|
-
|
|
24184
|
-
|
|
24185
|
-
|
|
24194
|
+
function resolveOptions(options, limit) {
|
|
24195
|
+
const desiredBeam = options?.beamWidth ?? DEFAULT_BEAM_WIDTH;
|
|
24196
|
+
return {
|
|
24197
|
+
limit,
|
|
24198
|
+
beamWidth: Math.max(desiredBeam, limit, DEFAULT_BEAM_WIDTH),
|
|
24199
|
+
enableFallbackPhones: options?.enableFallbackPhones ?? true,
|
|
24200
|
+
dictionaryOverride: options?.dictionary
|
|
24201
|
+
};
|
|
24202
|
+
}
|
|
24203
|
+
function transliterateTokens(tokens, options) {
|
|
24204
|
+
const wordTokens = tokens.filter((t) => t.type === "word");
|
|
24205
|
+
if (wordTokens.length === 0)
|
|
24206
|
+
return [];
|
|
24207
|
+
const combinedWords = wordTokens.map((t) => t.text).join(" ");
|
|
24208
|
+
const candidates = buildWordCandidates(combinedWords, options);
|
|
24209
|
+
if (candidates.length === 0)
|
|
24210
|
+
return [];
|
|
24211
|
+
return candidates.map((cand) => applyCandidateToTokens(tokens, cand));
|
|
24212
|
+
}
|
|
24213
|
+
function applyCandidateToTokens(tokens, candidate) {
|
|
24214
|
+
const candWords = candidate.split(" ");
|
|
24215
|
+
let candIdx = 0;
|
|
24216
|
+
return tokens.map((token) => {
|
|
24217
|
+
if (token.type === "word") {
|
|
24218
|
+
const next = candWords[candIdx];
|
|
24219
|
+
candIdx += 1;
|
|
24220
|
+
return next ?? token.text;
|
|
24221
|
+
}
|
|
24222
|
+
return token.text;
|
|
24223
|
+
}).join("");
|
|
24224
|
+
}
|
|
24225
|
+
function buildWordCandidates(input, options) {
|
|
24226
|
+
const text = normalizeInput(input);
|
|
24227
|
+
if (!text)
|
|
24228
|
+
return [];
|
|
24229
|
+
const words = text.split(/\s+/).filter(Boolean);
|
|
24230
|
+
const { beamWidth, limit } = options;
|
|
24231
|
+
let beam = [{ text: "", score: 0 }];
|
|
24232
|
+
for (let wi = 0; wi < words.length; wi++) {
|
|
24233
|
+
const w = words[wi];
|
|
24234
|
+
const override = overrideCandidates(w, options.dictionaryOverride).map(
|
|
24235
|
+
(s, i) => ({ text: s, score: i })
|
|
24236
|
+
);
|
|
24237
|
+
const phones = cmuLookupPhones(w);
|
|
24238
|
+
const dictCands = phones ? arpabetToKoCandidates(phones, beamWidth).map((s, i) => ({
|
|
24239
|
+
text: s,
|
|
24240
|
+
score: 100 + i
|
|
24241
|
+
})) : [];
|
|
24242
|
+
const fb = phones || !options.enableFallbackPhones ? null : fallbackPhones(w);
|
|
24243
|
+
const fbCands = fb ? arpabetToKoCandidates(fb, beamWidth).map((s, i) => ({
|
|
24244
|
+
text: s,
|
|
24245
|
+
score: 200 + i
|
|
24246
|
+
})) : [];
|
|
24247
|
+
const wordBeam = takeTopK(
|
|
24248
|
+
dedupeByText([...override, ...dictCands, ...fbCands]),
|
|
24249
|
+
beamWidth
|
|
24250
|
+
);
|
|
24251
|
+
beam = combineBeams(beam, wordBeam, beamWidth);
|
|
24252
|
+
if (wi !== words.length - 1)
|
|
24253
|
+
beam = beam.map((x) => ({ text: `${x.text} `, score: x.score }));
|
|
24254
|
+
}
|
|
24255
|
+
beam.sort((a, b) => a.score - b.score);
|
|
24256
|
+
const out = [];
|
|
24257
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24258
|
+
for (const x of beam) {
|
|
24259
|
+
const s = x.text.trim();
|
|
24260
|
+
if (!s || seen.has(s))
|
|
24186
24261
|
continue;
|
|
24187
|
-
|
|
24262
|
+
seen.add(s);
|
|
24263
|
+
out.push(s);
|
|
24264
|
+
if (out.length >= limit)
|
|
24265
|
+
break;
|
|
24188
24266
|
}
|
|
24189
24267
|
return out;
|
|
24190
24268
|
}
|
|
24191
|
-
function
|
|
24192
|
-
const key =
|
|
24193
|
-
const
|
|
24194
|
-
|
|
24269
|
+
function overrideCandidates(word, overrides) {
|
|
24270
|
+
const key = word.toLowerCase();
|
|
24271
|
+
const userHit = overrides?.[key];
|
|
24272
|
+
if (userHit && userHit.length > 0)
|
|
24273
|
+
return userHit.map(String);
|
|
24274
|
+
const builtIn = customDictionary[key];
|
|
24275
|
+
if (!builtIn || builtIn.length === 0)
|
|
24276
|
+
return [];
|
|
24277
|
+
return builtIn.map(String);
|
|
24195
24278
|
}
|
|
24196
|
-
function
|
|
24197
|
-
|
|
24279
|
+
function cmuLookupPhones(word) {
|
|
24280
|
+
if (!dictionary)
|
|
24281
|
+
return null;
|
|
24282
|
+
const key = word.toUpperCase();
|
|
24283
|
+
const raw = dictionary[key] ?? dictionary[`${key}(1)`] ?? dictionary[`${key}(2)`];
|
|
24284
|
+
if (!raw || typeof raw !== "string")
|
|
24285
|
+
return null;
|
|
24286
|
+
return raw.trim().split(/\s+/).map((p) => p.replace(/[0-9]/g, "")).filter(Boolean);
|
|
24198
24287
|
}
|
|
24199
|
-
function
|
|
24200
|
-
const
|
|
24201
|
-
const
|
|
24202
|
-
|
|
24203
|
-
|
|
24204
|
-
|
|
24205
|
-
|
|
24206
|
-
|
|
24207
|
-
|
|
24208
|
-
|
|
24288
|
+
function fallbackPhones(word) {
|
|
24289
|
+
const w = word.toLowerCase();
|
|
24290
|
+
const phones = [];
|
|
24291
|
+
let i = 0;
|
|
24292
|
+
if (/^pre[bcdfghjklmnpqrstvwxyz]/.test(w)) {
|
|
24293
|
+
phones.push("P", "R", "IY");
|
|
24294
|
+
i = 3;
|
|
24295
|
+
}
|
|
24296
|
+
while (i < w.length) {
|
|
24297
|
+
const rest = w.slice(i);
|
|
24298
|
+
if (rest.startsWith("er")) {
|
|
24299
|
+
phones.push("ER");
|
|
24300
|
+
i += 2;
|
|
24301
|
+
continue;
|
|
24302
|
+
}
|
|
24303
|
+
if (rest.startsWith("ir")) {
|
|
24304
|
+
phones.push("ER");
|
|
24305
|
+
i += 2;
|
|
24306
|
+
continue;
|
|
24307
|
+
}
|
|
24308
|
+
if (rest.startsWith("ur")) {
|
|
24309
|
+
phones.push("ER");
|
|
24310
|
+
i += 2;
|
|
24311
|
+
continue;
|
|
24312
|
+
}
|
|
24313
|
+
if (rest.startsWith("ch")) {
|
|
24314
|
+
phones.push("CH");
|
|
24315
|
+
i += 2;
|
|
24209
24316
|
continue;
|
|
24210
24317
|
}
|
|
24211
|
-
|
|
24212
|
-
|
|
24318
|
+
if (rest.startsWith("sh")) {
|
|
24319
|
+
phones.push("SH");
|
|
24320
|
+
i += 2;
|
|
24321
|
+
continue;
|
|
24322
|
+
}
|
|
24323
|
+
if (rest.startsWith("th")) {
|
|
24324
|
+
phones.push("TH");
|
|
24325
|
+
i += 2;
|
|
24326
|
+
continue;
|
|
24327
|
+
}
|
|
24328
|
+
if (rest.startsWith("ng")) {
|
|
24329
|
+
phones.push("NG");
|
|
24330
|
+
i += 2;
|
|
24331
|
+
continue;
|
|
24332
|
+
}
|
|
24333
|
+
if (rest.startsWith("qu")) {
|
|
24334
|
+
phones.push("K", "W");
|
|
24335
|
+
i += 2;
|
|
24336
|
+
continue;
|
|
24337
|
+
}
|
|
24338
|
+
const c = rest[0];
|
|
24339
|
+
if ("aeiouy".includes(c))
|
|
24340
|
+
phones.push(vowelToArpabet(c));
|
|
24341
|
+
else if (/[a-z]/.test(c))
|
|
24342
|
+
phones.push(consToArpabet(c));
|
|
24343
|
+
i += 1;
|
|
24344
|
+
}
|
|
24345
|
+
return phones.length ? phones : null;
|
|
24346
|
+
}
|
|
24347
|
+
function vowelToArpabet(ch) {
|
|
24348
|
+
switch (ch) {
|
|
24349
|
+
case "a":
|
|
24350
|
+
return "AE";
|
|
24351
|
+
case "e":
|
|
24352
|
+
return "EH";
|
|
24353
|
+
case "i":
|
|
24354
|
+
return "IY";
|
|
24355
|
+
case "o":
|
|
24356
|
+
return "OW";
|
|
24357
|
+
case "u":
|
|
24358
|
+
return "UW";
|
|
24359
|
+
case "y":
|
|
24360
|
+
return "IY";
|
|
24361
|
+
default:
|
|
24362
|
+
return "AH";
|
|
24363
|
+
}
|
|
24364
|
+
}
|
|
24365
|
+
function consToArpabet(ch) {
|
|
24366
|
+
switch (ch) {
|
|
24367
|
+
case "b":
|
|
24368
|
+
return "B";
|
|
24369
|
+
case "c":
|
|
24370
|
+
return "K";
|
|
24371
|
+
case "d":
|
|
24372
|
+
return "D";
|
|
24373
|
+
case "f":
|
|
24374
|
+
return "F";
|
|
24375
|
+
case "g":
|
|
24376
|
+
return "G";
|
|
24377
|
+
case "h":
|
|
24378
|
+
return "HH";
|
|
24379
|
+
case "j":
|
|
24380
|
+
return "JH";
|
|
24381
|
+
case "k":
|
|
24382
|
+
return "K";
|
|
24383
|
+
case "l":
|
|
24384
|
+
return "L";
|
|
24385
|
+
case "m":
|
|
24386
|
+
return "M";
|
|
24387
|
+
case "n":
|
|
24388
|
+
return "N";
|
|
24389
|
+
case "p":
|
|
24390
|
+
return "P";
|
|
24391
|
+
case "q":
|
|
24392
|
+
return "K";
|
|
24393
|
+
case "r":
|
|
24394
|
+
return "R";
|
|
24395
|
+
case "s":
|
|
24396
|
+
return "S";
|
|
24397
|
+
case "t":
|
|
24398
|
+
return "T";
|
|
24399
|
+
case "v":
|
|
24400
|
+
return "V";
|
|
24401
|
+
case "w":
|
|
24402
|
+
return "W";
|
|
24403
|
+
case "x":
|
|
24404
|
+
return "K";
|
|
24405
|
+
case "z":
|
|
24406
|
+
return "Z";
|
|
24407
|
+
default:
|
|
24408
|
+
return "HH";
|
|
24409
|
+
}
|
|
24410
|
+
}
|
|
24411
|
+
var VOWELS = /* @__PURE__ */ new Set([
|
|
24412
|
+
"AA",
|
|
24413
|
+
"AE",
|
|
24414
|
+
"AH",
|
|
24415
|
+
"AO",
|
|
24416
|
+
"AW",
|
|
24417
|
+
"AY",
|
|
24418
|
+
"EH",
|
|
24419
|
+
"ER",
|
|
24420
|
+
"EY",
|
|
24421
|
+
"IH",
|
|
24422
|
+
"IY",
|
|
24423
|
+
"OW",
|
|
24424
|
+
"OY",
|
|
24425
|
+
"UH",
|
|
24426
|
+
"UW",
|
|
24427
|
+
"AX"
|
|
24428
|
+
]);
|
|
24429
|
+
function arpabetToKoCandidates(phones, limit) {
|
|
24430
|
+
if (!phones.length)
|
|
24431
|
+
return [];
|
|
24432
|
+
const syllables = syllabifyArpabet(phones);
|
|
24433
|
+
let beam = [{ text: "", score: 0 }];
|
|
24434
|
+
for (const syl of syllables) {
|
|
24435
|
+
const sylBeam = buildKoSyllableCandidates(syl, limit);
|
|
24436
|
+
beam = combineBeams(beam, sylBeam, limit);
|
|
24437
|
+
}
|
|
24438
|
+
beam.sort((a, b) => a.score - b.score);
|
|
24439
|
+
const out = [];
|
|
24440
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24441
|
+
for (const x of beam) {
|
|
24442
|
+
if (seen.has(x.text))
|
|
24443
|
+
continue;
|
|
24444
|
+
seen.add(x.text);
|
|
24445
|
+
out.push(x.text);
|
|
24446
|
+
if (out.length >= limit)
|
|
24447
|
+
break;
|
|
24448
|
+
}
|
|
24449
|
+
return out;
|
|
24450
|
+
}
|
|
24451
|
+
function syllabifyArpabet(phones) {
|
|
24452
|
+
const out = [];
|
|
24453
|
+
let onset = [];
|
|
24454
|
+
let vowel = null;
|
|
24455
|
+
let coda = [];
|
|
24456
|
+
const flush = () => {
|
|
24457
|
+
if (vowel)
|
|
24458
|
+
out.push({ onset, vowel, coda });
|
|
24459
|
+
onset = [];
|
|
24460
|
+
vowel = null;
|
|
24461
|
+
coda = [];
|
|
24462
|
+
};
|
|
24463
|
+
for (const p of phones) {
|
|
24464
|
+
if (VOWELS.has(p)) {
|
|
24465
|
+
if (vowel)
|
|
24466
|
+
flush();
|
|
24467
|
+
vowel = p;
|
|
24468
|
+
} else {
|
|
24469
|
+
if (!vowel)
|
|
24470
|
+
onset.push(p);
|
|
24471
|
+
else
|
|
24472
|
+
coda.push(p);
|
|
24473
|
+
}
|
|
24474
|
+
}
|
|
24475
|
+
flush();
|
|
24476
|
+
for (let i = 0; i < out.length - 1; i++) {
|
|
24477
|
+
const cur = out[i];
|
|
24478
|
+
const nxt = out[i + 1];
|
|
24479
|
+
if (cur.coda.length > 0 && nxt.onset.length === 0) {
|
|
24480
|
+
const moved = cur.coda.pop();
|
|
24481
|
+
if (moved)
|
|
24482
|
+
nxt.onset.unshift(moved);
|
|
24483
|
+
}
|
|
24213
24484
|
}
|
|
24214
24485
|
return out;
|
|
24215
24486
|
}
|
|
24216
|
-
|
|
24217
|
-
|
|
24487
|
+
function buildKoSyllableCandidates({ onset, vowel, coda }, limit) {
|
|
24488
|
+
let beam = [{ text: "", score: 0 }];
|
|
24489
|
+
if (onset.length > 1) {
|
|
24490
|
+
const prefix = onset.slice(0, onset.length - 1);
|
|
24491
|
+
for (const p of prefix) {
|
|
24492
|
+
const prefixBeam = consMap(p).map((x) => ({
|
|
24493
|
+
text: composeHangul(x.cho, "\u3161", ""),
|
|
24494
|
+
score: x.score + 3
|
|
24495
|
+
}));
|
|
24496
|
+
beam = combineBeams(beam, prefixBeam, limit);
|
|
24497
|
+
}
|
|
24498
|
+
}
|
|
24499
|
+
const mainOnset = onset.length === 0 ? null : onset[onset.length - 1];
|
|
24500
|
+
const onsetCands = mainOnset ? consMap(mainOnset) : [{ cho: "\u3147", score: 0 }];
|
|
24501
|
+
const vowelCands = vowelMap(vowel);
|
|
24502
|
+
const codaFirst = coda.length ? coda[0] : null;
|
|
24503
|
+
const codaCands = codaFirst ? consToJongMap(codaFirst) : [{ jong: "", score: 0 }];
|
|
24504
|
+
const main = [];
|
|
24505
|
+
for (const o of onsetCands) {
|
|
24506
|
+
for (const v of vowelCands) {
|
|
24507
|
+
for (const cd of codaCands) {
|
|
24508
|
+
const jong = pickJong(cd.jong, v.addJong);
|
|
24509
|
+
main.push({
|
|
24510
|
+
text: composeHangul(o.cho, v.jung, jong),
|
|
24511
|
+
score: o.score + v.score + cd.score + (v.addJong ? 1 : 0)
|
|
24512
|
+
});
|
|
24513
|
+
}
|
|
24514
|
+
}
|
|
24515
|
+
}
|
|
24516
|
+
beam = combineBeams(beam, takeTopK(main, limit), limit);
|
|
24517
|
+
if (coda.length > 1) {
|
|
24518
|
+
for (const extra of coda.slice(1)) {
|
|
24519
|
+
const extraBeam = consMap(extra).map((x) => ({
|
|
24520
|
+
text: composeHangul(x.cho, "\u3161", ""),
|
|
24521
|
+
score: 4 + x.score
|
|
24522
|
+
}));
|
|
24523
|
+
beam = combineBeams(beam, extraBeam, limit);
|
|
24524
|
+
}
|
|
24525
|
+
}
|
|
24526
|
+
return takeTopK(beam, limit);
|
|
24527
|
+
}
|
|
24528
|
+
function consMap(p) {
|
|
24529
|
+
switch (p) {
|
|
24530
|
+
case "P":
|
|
24531
|
+
return [{ cho: "\u314D", score: 0 }];
|
|
24532
|
+
case "B":
|
|
24533
|
+
return [{ cho: "\u3142", score: 0 }];
|
|
24534
|
+
case "T":
|
|
24535
|
+
return [{ cho: "\u314C", score: 0 }];
|
|
24536
|
+
case "D":
|
|
24537
|
+
return [{ cho: "\u3137", score: 0 }];
|
|
24538
|
+
case "K":
|
|
24539
|
+
return [{ cho: "\u314B", score: 0 }, { cho: "\u3131", score: 1 }];
|
|
24540
|
+
case "G":
|
|
24541
|
+
return [{ cho: "\u3131", score: 0 }, { cho: "\u314B", score: 1 }];
|
|
24542
|
+
case "F":
|
|
24543
|
+
return [{ cho: "\u314D", score: 0 }];
|
|
24544
|
+
case "V":
|
|
24545
|
+
return [{ cho: "\u3142", score: 0 }];
|
|
24546
|
+
case "S":
|
|
24547
|
+
return [{ cho: "\u3145", score: 0 }];
|
|
24548
|
+
case "Z":
|
|
24549
|
+
return [{ cho: "\u3148", score: 0 }];
|
|
24550
|
+
case "JH":
|
|
24551
|
+
return [{ cho: "\u3148", score: 0 }];
|
|
24552
|
+
case "CH":
|
|
24553
|
+
return [{ cho: "\u314A", score: 0 }];
|
|
24554
|
+
case "SH":
|
|
24555
|
+
return [{ cho: "\u3145", score: 0 }];
|
|
24556
|
+
case "HH":
|
|
24557
|
+
return [{ cho: "\u314E", score: 0 }];
|
|
24558
|
+
case "M":
|
|
24559
|
+
return [{ cho: "\u3141", score: 0 }];
|
|
24560
|
+
case "N":
|
|
24561
|
+
return [{ cho: "\u3134", score: 0 }];
|
|
24562
|
+
case "NG":
|
|
24563
|
+
return [{ cho: "\u3147", score: 0 }];
|
|
24564
|
+
case "L":
|
|
24565
|
+
case "R":
|
|
24566
|
+
return [{ cho: "\u3139", score: 0 }];
|
|
24567
|
+
case "W":
|
|
24568
|
+
case "Y":
|
|
24569
|
+
return [{ cho: "\u3147", score: 2 }];
|
|
24570
|
+
case "TH":
|
|
24571
|
+
return [{ cho: "\u3145", score: 0 }, { cho: "\u3137", score: 2 }];
|
|
24572
|
+
case "DH":
|
|
24573
|
+
return [{ cho: "\u3137", score: 0 }];
|
|
24574
|
+
default:
|
|
24575
|
+
return [{ cho: "\u3147", score: 5 }];
|
|
24576
|
+
}
|
|
24577
|
+
}
|
|
24578
|
+
function consToJongMap(p) {
|
|
24579
|
+
const mk = (jong, score = 0) => ({ jong, score });
|
|
24580
|
+
switch (p) {
|
|
24581
|
+
case "P":
|
|
24582
|
+
case "B":
|
|
24583
|
+
case "F":
|
|
24584
|
+
case "V":
|
|
24585
|
+
return [mk("\u3142")];
|
|
24586
|
+
case "T":
|
|
24587
|
+
case "D":
|
|
24588
|
+
case "S":
|
|
24589
|
+
case "Z":
|
|
24590
|
+
case "JH":
|
|
24591
|
+
case "CH":
|
|
24592
|
+
return [mk("\u3137")];
|
|
24593
|
+
case "K":
|
|
24594
|
+
case "G":
|
|
24595
|
+
return [mk("\u3131")];
|
|
24596
|
+
case "M":
|
|
24597
|
+
return [mk("\u3141")];
|
|
24598
|
+
case "N":
|
|
24599
|
+
return [mk("\u3134")];
|
|
24600
|
+
case "NG":
|
|
24601
|
+
return [mk("\u3147")];
|
|
24602
|
+
case "L":
|
|
24603
|
+
case "R":
|
|
24604
|
+
return [mk("\u3139")];
|
|
24605
|
+
case "HH":
|
|
24606
|
+
return [mk("\u314E")];
|
|
24607
|
+
default:
|
|
24608
|
+
return [mk("")];
|
|
24609
|
+
}
|
|
24610
|
+
}
|
|
24611
|
+
function vowelMap(v) {
|
|
24612
|
+
switch (v) {
|
|
24613
|
+
case "IY":
|
|
24614
|
+
return [{ jung: "\u3163", score: 0 }];
|
|
24615
|
+
case "IH":
|
|
24616
|
+
return [{ jung: "\u3163", score: 0 }];
|
|
24617
|
+
case "EH":
|
|
24618
|
+
return [{ jung: "\u3154", score: 0 }];
|
|
24619
|
+
case "AE":
|
|
24620
|
+
return [{ jung: "\u3150", score: 0 }];
|
|
24621
|
+
case "AA":
|
|
24622
|
+
return [{ jung: "\u314F", score: 0 }];
|
|
24623
|
+
case "AH":
|
|
24624
|
+
return [{ jung: "\u3153", score: 0 }];
|
|
24625
|
+
case "AO":
|
|
24626
|
+
return [{ jung: "\u3157", score: 0 }, { jung: "\u3153", score: 1 }];
|
|
24627
|
+
case "OW":
|
|
24628
|
+
return [{ jung: "\u3157", score: 0 }];
|
|
24629
|
+
case "UH":
|
|
24630
|
+
return [{ jung: "\u315C", score: 0 }, { jung: "\u3153", score: 2 }];
|
|
24631
|
+
case "UW":
|
|
24632
|
+
return [{ jung: "\u315C", score: 0 }];
|
|
24633
|
+
case "ER":
|
|
24634
|
+
return [{ jung: "\u3153", score: 0 }, { jung: "\u3153", addJong: "\u3139", score: 1 }];
|
|
24635
|
+
case "EY":
|
|
24636
|
+
return [{ jung: "\u3154", score: 0 }];
|
|
24637
|
+
case "AY":
|
|
24638
|
+
return [{ jung: "\u314F", score: 0 }, { jung: "\u3150", score: 2 }];
|
|
24639
|
+
case "AW":
|
|
24640
|
+
return [{ jung: "\u314F", score: 0 }, { jung: "\u3157", score: 2 }];
|
|
24641
|
+
case "OY":
|
|
24642
|
+
return [{ jung: "\u3157", score: 0 }];
|
|
24643
|
+
default:
|
|
24644
|
+
return [{ jung: "\u3161", score: 5 }];
|
|
24645
|
+
}
|
|
24646
|
+
}
|
|
24647
|
+
var CHO = ["\u3131", "\u3132", "\u3134", "\u3137", "\u3138", "\u3139", "\u3141", "\u3142", "\u3143", "\u3145", "\u3146", "\u3147", "\u3148", "\u3149", "\u314A", "\u314B", "\u314C", "\u314D", "\u314E"];
|
|
24648
|
+
var JUNG = ["\u314F", "\u3150", "\u3151", "\u3152", "\u3153", "\u3154", "\u3155", "\u3156", "\u3157", "\u3158", "\u3159", "\u315A", "\u315B", "\u315C", "\u315D", "\u315E", "\u315F", "\u3160", "\u3161", "\u3162", "\u3163"];
|
|
24649
|
+
var JONG = ["", "\u3131", "\u3132", "\u3133", "\u3134", "\u3135", "\u3136", "\u3137", "\u3139", "\u313A", "\u313B", "\u313C", "\u313D", "\u313E", "\u313F", "\u3140", "\u3141", "\u3142", "\u3144", "\u3145", "\u3146", "\u3147", "\u3148", "\u314A", "\u314B", "\u314C", "\u314D", "\u314E"];
|
|
24650
|
+
var choIndex = new Map(CHO.map((c, i) => [c, i]));
|
|
24651
|
+
var jungIndex = new Map(JUNG.map((c, i) => [c, i]));
|
|
24652
|
+
var jongIndex = new Map(JONG.map((c, i) => [c, i]));
|
|
24653
|
+
function composeHangul(cho, jung, jong = "") {
|
|
24654
|
+
const ci = choIndex.get(cho) ?? choIndex.get("\u3147") ?? 11;
|
|
24655
|
+
const ji = jungIndex.get(jung) ?? jungIndex.get("\u3161") ?? 18;
|
|
24656
|
+
const gi = jongIndex.get(normalizeJong(jong)) ?? 0;
|
|
24657
|
+
return String.fromCharCode(44032 + (ci * 21 + ji) * 28 + gi);
|
|
24658
|
+
}
|
|
24659
|
+
function normalizeJong(j) {
|
|
24660
|
+
if (!j)
|
|
24661
|
+
return "";
|
|
24662
|
+
if (jongIndex.has(j))
|
|
24663
|
+
return j;
|
|
24664
|
+
if (j === "\u314B")
|
|
24665
|
+
return "\u3131";
|
|
24666
|
+
if (j === "\u314C")
|
|
24667
|
+
return "\u3137";
|
|
24668
|
+
if (j === "\u314D")
|
|
24669
|
+
return "\u3142";
|
|
24670
|
+
return "";
|
|
24671
|
+
}
|
|
24672
|
+
function pickJong(j1, j2) {
|
|
24673
|
+
return normalizeJong(j1 || j2 || "");
|
|
24674
|
+
}
|
|
24675
|
+
function normalizeInput(s) {
|
|
24676
|
+
return String(s).toLowerCase().replace(/([a-z])[-]+([a-z])/g, "$1$2").replace(/[^a-z0-9\s\-_'']/g, " ").replace(/[_'']/g, "").replace(/-/g, " ").trim().replace(/\s+/g, " ");
|
|
24677
|
+
}
|
|
24678
|
+
function dedupeByText(arr) {
|
|
24679
|
+
const best = /* @__PURE__ */ new Map();
|
|
24680
|
+
for (const x of arr) {
|
|
24681
|
+
const prev = best.get(x.text);
|
|
24682
|
+
if (!prev || x.score < prev.score)
|
|
24683
|
+
best.set(x.text, x);
|
|
24684
|
+
}
|
|
24685
|
+
return [...best.values()];
|
|
24686
|
+
}
|
|
24687
|
+
function combineBeams(left, right, limit) {
|
|
24688
|
+
const combined = [];
|
|
24689
|
+
for (const a of left) {
|
|
24690
|
+
for (const b of right) {
|
|
24691
|
+
combined.push({ text: a.text + b.text, score: a.score + b.score });
|
|
24692
|
+
}
|
|
24693
|
+
}
|
|
24694
|
+
return takeTopK(combined, limit);
|
|
24695
|
+
}
|
|
24696
|
+
function takeTopK(arr, k) {
|
|
24697
|
+
arr.sort((a, b) => a.score - b.score);
|
|
24698
|
+
const out = [];
|
|
24699
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24700
|
+
for (const x of arr) {
|
|
24701
|
+
if (seen.has(x.text))
|
|
24702
|
+
continue;
|
|
24703
|
+
seen.add(x.text);
|
|
24704
|
+
out.push(x);
|
|
24705
|
+
if (out.length >= k)
|
|
24706
|
+
break;
|
|
24707
|
+
}
|
|
24708
|
+
return out;
|
|
24218
24709
|
}
|
|
24219
24710
|
|
|
24220
24711
|
// src/konglish.ts
|
|
@@ -24232,12 +24723,6 @@ var Konglish = class {
|
|
|
24232
24723
|
latinToHangul(input, options) {
|
|
24233
24724
|
return latinToHangul(input, mergeOptions(this.defaultOptions, options));
|
|
24234
24725
|
}
|
|
24235
|
-
latinToHangulAsync(input, options) {
|
|
24236
|
-
return latinToHangulAsync(
|
|
24237
|
-
input,
|
|
24238
|
-
mergeOptions(this.defaultOptions, options)
|
|
24239
|
-
);
|
|
24240
|
-
}
|
|
24241
24726
|
};
|
|
24242
24727
|
|
|
24243
24728
|
export { Konglish, customDictionary, latinToHangul };
|