konglish 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +522 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -6
- package/dist/index.d.ts +2 -6
- package/dist/index.js +522 -37
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var cmuPronouncingDictionary = require('cmu-pronouncing-dictionary');
|
|
4
|
+
|
|
5
|
+
// src/pron/latinTokorea.ts
|
|
6
|
+
|
|
3
7
|
// src/pron/dictionary.ts
|
|
4
8
|
var customDictionary = {
|
|
5
9
|
"a": ["\uC5B4"],
|
|
@@ -24161,7 +24165,20 @@ var customDictionary = {
|
|
|
24161
24165
|
};
|
|
24162
24166
|
|
|
24163
24167
|
// src/pron/latinTokorea.ts
|
|
24164
|
-
var
|
|
24168
|
+
var DEFAULT_BEAM_WIDTH = 8;
|
|
24169
|
+
function latinToHangul(input, options) {
|
|
24170
|
+
const fullKey = input.trim().toLowerCase();
|
|
24171
|
+
const userFullHit = options?.dictionary?.[fullKey];
|
|
24172
|
+
if (userFullHit && userFullHit.length > 0)
|
|
24173
|
+
return userFullHit[0];
|
|
24174
|
+
const builtInFullHit = customDictionary[fullKey];
|
|
24175
|
+
if (builtInFullHit && builtInFullHit.length > 0)
|
|
24176
|
+
return builtInFullHit[0];
|
|
24177
|
+
const resolved = resolveOptions(options, 1);
|
|
24178
|
+
const tokens = tokenizePreservingSpecialChars(input);
|
|
24179
|
+
const [best] = transliterateTokens(tokens, resolved);
|
|
24180
|
+
return best ?? input;
|
|
24181
|
+
}
|
|
24165
24182
|
function tokenizePreservingSpecialChars(input) {
|
|
24166
24183
|
const tokens = [];
|
|
24167
24184
|
const regex = /([a-zA-Z]+)|([^a-zA-Z]+)/g;
|
|
@@ -24176,47 +24193,521 @@ function tokenizePreservingSpecialChars(input) {
|
|
|
24176
24193
|
}
|
|
24177
24194
|
return tokens;
|
|
24178
24195
|
}
|
|
24179
|
-
function
|
|
24180
|
-
const
|
|
24181
|
-
|
|
24182
|
-
|
|
24183
|
-
|
|
24184
|
-
|
|
24185
|
-
|
|
24186
|
-
|
|
24187
|
-
|
|
24196
|
+
function resolveOptions(options, limit) {
|
|
24197
|
+
const desiredBeam = options?.beamWidth ?? DEFAULT_BEAM_WIDTH;
|
|
24198
|
+
return {
|
|
24199
|
+
limit,
|
|
24200
|
+
beamWidth: Math.max(desiredBeam, limit, DEFAULT_BEAM_WIDTH),
|
|
24201
|
+
enableFallbackPhones: options?.enableFallbackPhones ?? true,
|
|
24202
|
+
dictionaryOverride: options?.dictionary
|
|
24203
|
+
};
|
|
24204
|
+
}
|
|
24205
|
+
function transliterateTokens(tokens, options) {
|
|
24206
|
+
const wordTokens = tokens.filter((t) => t.type === "word");
|
|
24207
|
+
if (wordTokens.length === 0)
|
|
24208
|
+
return [];
|
|
24209
|
+
const combinedWords = wordTokens.map((t) => t.text).join(" ");
|
|
24210
|
+
const candidates = buildWordCandidates(combinedWords, options);
|
|
24211
|
+
if (candidates.length === 0)
|
|
24212
|
+
return [];
|
|
24213
|
+
return candidates.map((cand) => applyCandidateToTokens(tokens, cand));
|
|
24214
|
+
}
|
|
24215
|
+
function applyCandidateToTokens(tokens, candidate) {
|
|
24216
|
+
const candWords = candidate.split(" ");
|
|
24217
|
+
let candIdx = 0;
|
|
24218
|
+
return tokens.map((token) => {
|
|
24219
|
+
if (token.type === "word") {
|
|
24220
|
+
const next = candWords[candIdx];
|
|
24221
|
+
candIdx += 1;
|
|
24222
|
+
return next ?? token.text;
|
|
24223
|
+
}
|
|
24224
|
+
return token.text;
|
|
24225
|
+
}).join("");
|
|
24226
|
+
}
|
|
24227
|
+
function buildWordCandidates(input, options) {
|
|
24228
|
+
const text = normalizeInput(input);
|
|
24229
|
+
if (!text)
|
|
24230
|
+
return [];
|
|
24231
|
+
const words = text.split(/\s+/).filter(Boolean);
|
|
24232
|
+
const { beamWidth, limit } = options;
|
|
24233
|
+
let beam = [{ text: "", score: 0 }];
|
|
24234
|
+
for (let wi = 0; wi < words.length; wi++) {
|
|
24235
|
+
const w = words[wi];
|
|
24236
|
+
const override = overrideCandidates(w, options.dictionaryOverride).map(
|
|
24237
|
+
(s, i) => ({ text: s, score: i })
|
|
24238
|
+
);
|
|
24239
|
+
const phones = cmuLookupPhones(w);
|
|
24240
|
+
const dictCands = phones ? arpabetToKoCandidates(phones, beamWidth).map((s, i) => ({
|
|
24241
|
+
text: s,
|
|
24242
|
+
score: 100 + i
|
|
24243
|
+
})) : [];
|
|
24244
|
+
const fb = phones || !options.enableFallbackPhones ? null : fallbackPhones(w);
|
|
24245
|
+
const fbCands = fb ? arpabetToKoCandidates(fb, beamWidth).map((s, i) => ({
|
|
24246
|
+
text: s,
|
|
24247
|
+
score: 200 + i
|
|
24248
|
+
})) : [];
|
|
24249
|
+
const wordBeam = takeTopK(
|
|
24250
|
+
dedupeByText([...override, ...dictCands, ...fbCands]),
|
|
24251
|
+
beamWidth
|
|
24252
|
+
);
|
|
24253
|
+
beam = combineBeams(beam, wordBeam, beamWidth);
|
|
24254
|
+
if (wi !== words.length - 1)
|
|
24255
|
+
beam = beam.map((x) => ({ text: `${x.text} `, score: x.score }));
|
|
24256
|
+
}
|
|
24257
|
+
beam.sort((a, b) => a.score - b.score);
|
|
24258
|
+
const out = [];
|
|
24259
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24260
|
+
for (const x of beam) {
|
|
24261
|
+
const s = x.text.trim();
|
|
24262
|
+
if (!s || seen.has(s))
|
|
24188
24263
|
continue;
|
|
24189
|
-
|
|
24264
|
+
seen.add(s);
|
|
24265
|
+
out.push(s);
|
|
24266
|
+
if (out.length >= limit)
|
|
24267
|
+
break;
|
|
24190
24268
|
}
|
|
24191
24269
|
return out;
|
|
24192
24270
|
}
|
|
24193
|
-
function
|
|
24194
|
-
const key =
|
|
24195
|
-
const
|
|
24196
|
-
|
|
24271
|
+
function overrideCandidates(word, overrides) {
|
|
24272
|
+
const key = word.toLowerCase();
|
|
24273
|
+
const userHit = overrides?.[key];
|
|
24274
|
+
if (userHit && userHit.length > 0)
|
|
24275
|
+
return userHit.map(String);
|
|
24276
|
+
const builtIn = customDictionary[key];
|
|
24277
|
+
if (!builtIn || builtIn.length === 0)
|
|
24278
|
+
return [];
|
|
24279
|
+
return builtIn.map(String);
|
|
24197
24280
|
}
|
|
24198
|
-
function
|
|
24199
|
-
|
|
24281
|
+
function cmuLookupPhones(word) {
|
|
24282
|
+
if (!cmuPronouncingDictionary.dictionary)
|
|
24283
|
+
return null;
|
|
24284
|
+
const key = word.toUpperCase();
|
|
24285
|
+
const raw = cmuPronouncingDictionary.dictionary[key] ?? cmuPronouncingDictionary.dictionary[`${key}(1)`] ?? cmuPronouncingDictionary.dictionary[`${key}(2)`];
|
|
24286
|
+
if (!raw || typeof raw !== "string")
|
|
24287
|
+
return null;
|
|
24288
|
+
return raw.trim().split(/\s+/).map((p) => p.replace(/[0-9]/g, "")).filter(Boolean);
|
|
24200
24289
|
}
|
|
24201
|
-
function
|
|
24202
|
-
const
|
|
24203
|
-
const
|
|
24204
|
-
|
|
24205
|
-
|
|
24206
|
-
|
|
24207
|
-
|
|
24208
|
-
|
|
24209
|
-
|
|
24210
|
-
|
|
24290
|
+
function fallbackPhones(word) {
|
|
24291
|
+
const w = word.toLowerCase();
|
|
24292
|
+
const phones = [];
|
|
24293
|
+
let i = 0;
|
|
24294
|
+
if (/^pre[bcdfghjklmnpqrstvwxyz]/.test(w)) {
|
|
24295
|
+
phones.push("P", "R", "IY");
|
|
24296
|
+
i = 3;
|
|
24297
|
+
}
|
|
24298
|
+
while (i < w.length) {
|
|
24299
|
+
const rest = w.slice(i);
|
|
24300
|
+
if (rest.startsWith("er")) {
|
|
24301
|
+
phones.push("ER");
|
|
24302
|
+
i += 2;
|
|
24303
|
+
continue;
|
|
24304
|
+
}
|
|
24305
|
+
if (rest.startsWith("ir")) {
|
|
24306
|
+
phones.push("ER");
|
|
24307
|
+
i += 2;
|
|
24308
|
+
continue;
|
|
24309
|
+
}
|
|
24310
|
+
if (rest.startsWith("ur")) {
|
|
24311
|
+
phones.push("ER");
|
|
24312
|
+
i += 2;
|
|
24313
|
+
continue;
|
|
24314
|
+
}
|
|
24315
|
+
if (rest.startsWith("ch")) {
|
|
24316
|
+
phones.push("CH");
|
|
24317
|
+
i += 2;
|
|
24211
24318
|
continue;
|
|
24212
24319
|
}
|
|
24213
|
-
|
|
24214
|
-
|
|
24320
|
+
if (rest.startsWith("sh")) {
|
|
24321
|
+
phones.push("SH");
|
|
24322
|
+
i += 2;
|
|
24323
|
+
continue;
|
|
24324
|
+
}
|
|
24325
|
+
if (rest.startsWith("th")) {
|
|
24326
|
+
phones.push("TH");
|
|
24327
|
+
i += 2;
|
|
24328
|
+
continue;
|
|
24329
|
+
}
|
|
24330
|
+
if (rest.startsWith("ng")) {
|
|
24331
|
+
phones.push("NG");
|
|
24332
|
+
i += 2;
|
|
24333
|
+
continue;
|
|
24334
|
+
}
|
|
24335
|
+
if (rest.startsWith("qu")) {
|
|
24336
|
+
phones.push("K", "W");
|
|
24337
|
+
i += 2;
|
|
24338
|
+
continue;
|
|
24339
|
+
}
|
|
24340
|
+
const c = rest[0];
|
|
24341
|
+
if ("aeiouy".includes(c))
|
|
24342
|
+
phones.push(vowelToArpabet(c));
|
|
24343
|
+
else if (/[a-z]/.test(c))
|
|
24344
|
+
phones.push(consToArpabet(c));
|
|
24345
|
+
i += 1;
|
|
24346
|
+
}
|
|
24347
|
+
return phones.length ? phones : null;
|
|
24348
|
+
}
|
|
24349
|
+
function vowelToArpabet(ch) {
|
|
24350
|
+
switch (ch) {
|
|
24351
|
+
case "a":
|
|
24352
|
+
return "AE";
|
|
24353
|
+
case "e":
|
|
24354
|
+
return "EH";
|
|
24355
|
+
case "i":
|
|
24356
|
+
return "IY";
|
|
24357
|
+
case "o":
|
|
24358
|
+
return "OW";
|
|
24359
|
+
case "u":
|
|
24360
|
+
return "UW";
|
|
24361
|
+
case "y":
|
|
24362
|
+
return "IY";
|
|
24363
|
+
default:
|
|
24364
|
+
return "AH";
|
|
24365
|
+
}
|
|
24366
|
+
}
|
|
24367
|
+
function consToArpabet(ch) {
|
|
24368
|
+
switch (ch) {
|
|
24369
|
+
case "b":
|
|
24370
|
+
return "B";
|
|
24371
|
+
case "c":
|
|
24372
|
+
return "K";
|
|
24373
|
+
case "d":
|
|
24374
|
+
return "D";
|
|
24375
|
+
case "f":
|
|
24376
|
+
return "F";
|
|
24377
|
+
case "g":
|
|
24378
|
+
return "G";
|
|
24379
|
+
case "h":
|
|
24380
|
+
return "HH";
|
|
24381
|
+
case "j":
|
|
24382
|
+
return "JH";
|
|
24383
|
+
case "k":
|
|
24384
|
+
return "K";
|
|
24385
|
+
case "l":
|
|
24386
|
+
return "L";
|
|
24387
|
+
case "m":
|
|
24388
|
+
return "M";
|
|
24389
|
+
case "n":
|
|
24390
|
+
return "N";
|
|
24391
|
+
case "p":
|
|
24392
|
+
return "P";
|
|
24393
|
+
case "q":
|
|
24394
|
+
return "K";
|
|
24395
|
+
case "r":
|
|
24396
|
+
return "R";
|
|
24397
|
+
case "s":
|
|
24398
|
+
return "S";
|
|
24399
|
+
case "t":
|
|
24400
|
+
return "T";
|
|
24401
|
+
case "v":
|
|
24402
|
+
return "V";
|
|
24403
|
+
case "w":
|
|
24404
|
+
return "W";
|
|
24405
|
+
case "x":
|
|
24406
|
+
return "K";
|
|
24407
|
+
case "z":
|
|
24408
|
+
return "Z";
|
|
24409
|
+
default:
|
|
24410
|
+
return "HH";
|
|
24411
|
+
}
|
|
24412
|
+
}
|
|
24413
|
+
var VOWELS = /* @__PURE__ */ new Set([
|
|
24414
|
+
"AA",
|
|
24415
|
+
"AE",
|
|
24416
|
+
"AH",
|
|
24417
|
+
"AO",
|
|
24418
|
+
"AW",
|
|
24419
|
+
"AY",
|
|
24420
|
+
"EH",
|
|
24421
|
+
"ER",
|
|
24422
|
+
"EY",
|
|
24423
|
+
"IH",
|
|
24424
|
+
"IY",
|
|
24425
|
+
"OW",
|
|
24426
|
+
"OY",
|
|
24427
|
+
"UH",
|
|
24428
|
+
"UW",
|
|
24429
|
+
"AX"
|
|
24430
|
+
]);
|
|
24431
|
+
function arpabetToKoCandidates(phones, limit) {
|
|
24432
|
+
if (!phones.length)
|
|
24433
|
+
return [];
|
|
24434
|
+
const syllables = syllabifyArpabet(phones);
|
|
24435
|
+
let beam = [{ text: "", score: 0 }];
|
|
24436
|
+
for (const syl of syllables) {
|
|
24437
|
+
const sylBeam = buildKoSyllableCandidates(syl, limit);
|
|
24438
|
+
beam = combineBeams(beam, sylBeam, limit);
|
|
24439
|
+
}
|
|
24440
|
+
beam.sort((a, b) => a.score - b.score);
|
|
24441
|
+
const out = [];
|
|
24442
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24443
|
+
for (const x of beam) {
|
|
24444
|
+
if (seen.has(x.text))
|
|
24445
|
+
continue;
|
|
24446
|
+
seen.add(x.text);
|
|
24447
|
+
out.push(x.text);
|
|
24448
|
+
if (out.length >= limit)
|
|
24449
|
+
break;
|
|
24450
|
+
}
|
|
24451
|
+
return out;
|
|
24452
|
+
}
|
|
24453
|
+
function syllabifyArpabet(phones) {
|
|
24454
|
+
const out = [];
|
|
24455
|
+
let onset = [];
|
|
24456
|
+
let vowel = null;
|
|
24457
|
+
let coda = [];
|
|
24458
|
+
const flush = () => {
|
|
24459
|
+
if (vowel)
|
|
24460
|
+
out.push({ onset, vowel, coda });
|
|
24461
|
+
onset = [];
|
|
24462
|
+
vowel = null;
|
|
24463
|
+
coda = [];
|
|
24464
|
+
};
|
|
24465
|
+
for (const p of phones) {
|
|
24466
|
+
if (VOWELS.has(p)) {
|
|
24467
|
+
if (vowel)
|
|
24468
|
+
flush();
|
|
24469
|
+
vowel = p;
|
|
24470
|
+
} else {
|
|
24471
|
+
if (!vowel)
|
|
24472
|
+
onset.push(p);
|
|
24473
|
+
else
|
|
24474
|
+
coda.push(p);
|
|
24475
|
+
}
|
|
24476
|
+
}
|
|
24477
|
+
flush();
|
|
24478
|
+
for (let i = 0; i < out.length - 1; i++) {
|
|
24479
|
+
const cur = out[i];
|
|
24480
|
+
const nxt = out[i + 1];
|
|
24481
|
+
if (cur.coda.length > 0 && nxt.onset.length === 0) {
|
|
24482
|
+
const moved = cur.coda.pop();
|
|
24483
|
+
if (moved)
|
|
24484
|
+
nxt.onset.unshift(moved);
|
|
24485
|
+
}
|
|
24215
24486
|
}
|
|
24216
24487
|
return out;
|
|
24217
24488
|
}
|
|
24218
|
-
|
|
24219
|
-
|
|
24489
|
+
function buildKoSyllableCandidates({ onset, vowel, coda }, limit) {
|
|
24490
|
+
let beam = [{ text: "", score: 0 }];
|
|
24491
|
+
if (onset.length > 1) {
|
|
24492
|
+
const prefix = onset.slice(0, onset.length - 1);
|
|
24493
|
+
for (const p of prefix) {
|
|
24494
|
+
const prefixBeam = consMap(p).map((x) => ({
|
|
24495
|
+
text: composeHangul(x.cho, "\u3161", ""),
|
|
24496
|
+
score: x.score + 3
|
|
24497
|
+
}));
|
|
24498
|
+
beam = combineBeams(beam, prefixBeam, limit);
|
|
24499
|
+
}
|
|
24500
|
+
}
|
|
24501
|
+
const mainOnset = onset.length === 0 ? null : onset[onset.length - 1];
|
|
24502
|
+
const onsetCands = mainOnset ? consMap(mainOnset) : [{ cho: "\u3147", score: 0 }];
|
|
24503
|
+
const vowelCands = vowelMap(vowel);
|
|
24504
|
+
const codaFirst = coda.length ? coda[0] : null;
|
|
24505
|
+
const codaCands = codaFirst ? consToJongMap(codaFirst) : [{ jong: "", score: 0 }];
|
|
24506
|
+
const main = [];
|
|
24507
|
+
for (const o of onsetCands) {
|
|
24508
|
+
for (const v of vowelCands) {
|
|
24509
|
+
for (const cd of codaCands) {
|
|
24510
|
+
const jong = pickJong(cd.jong, v.addJong);
|
|
24511
|
+
main.push({
|
|
24512
|
+
text: composeHangul(o.cho, v.jung, jong),
|
|
24513
|
+
score: o.score + v.score + cd.score + (v.addJong ? 1 : 0)
|
|
24514
|
+
});
|
|
24515
|
+
}
|
|
24516
|
+
}
|
|
24517
|
+
}
|
|
24518
|
+
beam = combineBeams(beam, takeTopK(main, limit), limit);
|
|
24519
|
+
if (coda.length > 1) {
|
|
24520
|
+
for (const extra of coda.slice(1)) {
|
|
24521
|
+
const extraBeam = consMap(extra).map((x) => ({
|
|
24522
|
+
text: composeHangul(x.cho, "\u3161", ""),
|
|
24523
|
+
score: 4 + x.score
|
|
24524
|
+
}));
|
|
24525
|
+
beam = combineBeams(beam, extraBeam, limit);
|
|
24526
|
+
}
|
|
24527
|
+
}
|
|
24528
|
+
return takeTopK(beam, limit);
|
|
24529
|
+
}
|
|
24530
|
+
function consMap(p) {
|
|
24531
|
+
switch (p) {
|
|
24532
|
+
case "P":
|
|
24533
|
+
return [{ cho: "\u314D", score: 0 }];
|
|
24534
|
+
case "B":
|
|
24535
|
+
return [{ cho: "\u3142", score: 0 }];
|
|
24536
|
+
case "T":
|
|
24537
|
+
return [{ cho: "\u314C", score: 0 }];
|
|
24538
|
+
case "D":
|
|
24539
|
+
return [{ cho: "\u3137", score: 0 }];
|
|
24540
|
+
case "K":
|
|
24541
|
+
return [{ cho: "\u314B", score: 0 }, { cho: "\u3131", score: 1 }];
|
|
24542
|
+
case "G":
|
|
24543
|
+
return [{ cho: "\u3131", score: 0 }, { cho: "\u314B", score: 1 }];
|
|
24544
|
+
case "F":
|
|
24545
|
+
return [{ cho: "\u314D", score: 0 }];
|
|
24546
|
+
case "V":
|
|
24547
|
+
return [{ cho: "\u3142", score: 0 }];
|
|
24548
|
+
case "S":
|
|
24549
|
+
return [{ cho: "\u3145", score: 0 }];
|
|
24550
|
+
case "Z":
|
|
24551
|
+
return [{ cho: "\u3148", score: 0 }];
|
|
24552
|
+
case "JH":
|
|
24553
|
+
return [{ cho: "\u3148", score: 0 }];
|
|
24554
|
+
case "CH":
|
|
24555
|
+
return [{ cho: "\u314A", score: 0 }];
|
|
24556
|
+
case "SH":
|
|
24557
|
+
return [{ cho: "\u3145", score: 0 }];
|
|
24558
|
+
case "HH":
|
|
24559
|
+
return [{ cho: "\u314E", score: 0 }];
|
|
24560
|
+
case "M":
|
|
24561
|
+
return [{ cho: "\u3141", score: 0 }];
|
|
24562
|
+
case "N":
|
|
24563
|
+
return [{ cho: "\u3134", score: 0 }];
|
|
24564
|
+
case "NG":
|
|
24565
|
+
return [{ cho: "\u3147", score: 0 }];
|
|
24566
|
+
case "L":
|
|
24567
|
+
case "R":
|
|
24568
|
+
return [{ cho: "\u3139", score: 0 }];
|
|
24569
|
+
case "W":
|
|
24570
|
+
case "Y":
|
|
24571
|
+
return [{ cho: "\u3147", score: 2 }];
|
|
24572
|
+
case "TH":
|
|
24573
|
+
return [{ cho: "\u3145", score: 0 }, { cho: "\u3137", score: 2 }];
|
|
24574
|
+
case "DH":
|
|
24575
|
+
return [{ cho: "\u3137", score: 0 }];
|
|
24576
|
+
default:
|
|
24577
|
+
return [{ cho: "\u3147", score: 5 }];
|
|
24578
|
+
}
|
|
24579
|
+
}
|
|
24580
|
+
function consToJongMap(p) {
|
|
24581
|
+
const mk = (jong, score = 0) => ({ jong, score });
|
|
24582
|
+
switch (p) {
|
|
24583
|
+
case "P":
|
|
24584
|
+
case "B":
|
|
24585
|
+
case "F":
|
|
24586
|
+
case "V":
|
|
24587
|
+
return [mk("\u3142")];
|
|
24588
|
+
case "T":
|
|
24589
|
+
case "D":
|
|
24590
|
+
case "S":
|
|
24591
|
+
case "Z":
|
|
24592
|
+
case "JH":
|
|
24593
|
+
case "CH":
|
|
24594
|
+
return [mk("\u3137")];
|
|
24595
|
+
case "K":
|
|
24596
|
+
case "G":
|
|
24597
|
+
return [mk("\u3131")];
|
|
24598
|
+
case "M":
|
|
24599
|
+
return [mk("\u3141")];
|
|
24600
|
+
case "N":
|
|
24601
|
+
return [mk("\u3134")];
|
|
24602
|
+
case "NG":
|
|
24603
|
+
return [mk("\u3147")];
|
|
24604
|
+
case "L":
|
|
24605
|
+
case "R":
|
|
24606
|
+
return [mk("\u3139")];
|
|
24607
|
+
case "HH":
|
|
24608
|
+
return [mk("\u314E")];
|
|
24609
|
+
default:
|
|
24610
|
+
return [mk("")];
|
|
24611
|
+
}
|
|
24612
|
+
}
|
|
24613
|
+
function vowelMap(v) {
|
|
24614
|
+
switch (v) {
|
|
24615
|
+
case "IY":
|
|
24616
|
+
return [{ jung: "\u3163", score: 0 }];
|
|
24617
|
+
case "IH":
|
|
24618
|
+
return [{ jung: "\u3163", score: 0 }];
|
|
24619
|
+
case "EH":
|
|
24620
|
+
return [{ jung: "\u3154", score: 0 }];
|
|
24621
|
+
case "AE":
|
|
24622
|
+
return [{ jung: "\u3150", score: 0 }];
|
|
24623
|
+
case "AA":
|
|
24624
|
+
return [{ jung: "\u314F", score: 0 }];
|
|
24625
|
+
case "AH":
|
|
24626
|
+
return [{ jung: "\u3153", score: 0 }];
|
|
24627
|
+
case "AO":
|
|
24628
|
+
return [{ jung: "\u3157", score: 0 }, { jung: "\u3153", score: 1 }];
|
|
24629
|
+
case "OW":
|
|
24630
|
+
return [{ jung: "\u3157", score: 0 }];
|
|
24631
|
+
case "UH":
|
|
24632
|
+
return [{ jung: "\u315C", score: 0 }, { jung: "\u3153", score: 2 }];
|
|
24633
|
+
case "UW":
|
|
24634
|
+
return [{ jung: "\u315C", score: 0 }];
|
|
24635
|
+
case "ER":
|
|
24636
|
+
return [{ jung: "\u3153", score: 0 }, { jung: "\u3153", addJong: "\u3139", score: 1 }];
|
|
24637
|
+
case "EY":
|
|
24638
|
+
return [{ jung: "\u3154", score: 0 }];
|
|
24639
|
+
case "AY":
|
|
24640
|
+
return [{ jung: "\u314F", score: 0 }, { jung: "\u3150", score: 2 }];
|
|
24641
|
+
case "AW":
|
|
24642
|
+
return [{ jung: "\u314F", score: 0 }, { jung: "\u3157", score: 2 }];
|
|
24643
|
+
case "OY":
|
|
24644
|
+
return [{ jung: "\u3157", score: 0 }];
|
|
24645
|
+
default:
|
|
24646
|
+
return [{ jung: "\u3161", score: 5 }];
|
|
24647
|
+
}
|
|
24648
|
+
}
|
|
24649
|
+
var CHO = ["\u3131", "\u3132", "\u3134", "\u3137", "\u3138", "\u3139", "\u3141", "\u3142", "\u3143", "\u3145", "\u3146", "\u3147", "\u3148", "\u3149", "\u314A", "\u314B", "\u314C", "\u314D", "\u314E"];
|
|
24650
|
+
var JUNG = ["\u314F", "\u3150", "\u3151", "\u3152", "\u3153", "\u3154", "\u3155", "\u3156", "\u3157", "\u3158", "\u3159", "\u315A", "\u315B", "\u315C", "\u315D", "\u315E", "\u315F", "\u3160", "\u3161", "\u3162", "\u3163"];
|
|
24651
|
+
var JONG = ["", "\u3131", "\u3132", "\u3133", "\u3134", "\u3135", "\u3136", "\u3137", "\u3139", "\u313A", "\u313B", "\u313C", "\u313D", "\u313E", "\u313F", "\u3140", "\u3141", "\u3142", "\u3144", "\u3145", "\u3146", "\u3147", "\u3148", "\u314A", "\u314B", "\u314C", "\u314D", "\u314E"];
|
|
24652
|
+
var choIndex = new Map(CHO.map((c, i) => [c, i]));
|
|
24653
|
+
var jungIndex = new Map(JUNG.map((c, i) => [c, i]));
|
|
24654
|
+
var jongIndex = new Map(JONG.map((c, i) => [c, i]));
|
|
24655
|
+
function composeHangul(cho, jung, jong = "") {
|
|
24656
|
+
const ci = choIndex.get(cho) ?? choIndex.get("\u3147") ?? 11;
|
|
24657
|
+
const ji = jungIndex.get(jung) ?? jungIndex.get("\u3161") ?? 18;
|
|
24658
|
+
const gi = jongIndex.get(normalizeJong(jong)) ?? 0;
|
|
24659
|
+
return String.fromCharCode(44032 + (ci * 21 + ji) * 28 + gi);
|
|
24660
|
+
}
|
|
24661
|
+
function normalizeJong(j) {
|
|
24662
|
+
if (!j)
|
|
24663
|
+
return "";
|
|
24664
|
+
if (jongIndex.has(j))
|
|
24665
|
+
return j;
|
|
24666
|
+
if (j === "\u314B")
|
|
24667
|
+
return "\u3131";
|
|
24668
|
+
if (j === "\u314C")
|
|
24669
|
+
return "\u3137";
|
|
24670
|
+
if (j === "\u314D")
|
|
24671
|
+
return "\u3142";
|
|
24672
|
+
return "";
|
|
24673
|
+
}
|
|
24674
|
+
function pickJong(j1, j2) {
|
|
24675
|
+
return normalizeJong(j1 || j2 || "");
|
|
24676
|
+
}
|
|
24677
|
+
function normalizeInput(s) {
|
|
24678
|
+
return String(s).toLowerCase().replace(/([a-z])[-]+([a-z])/g, "$1$2").replace(/[^a-z0-9\s\-_'']/g, " ").replace(/[_'']/g, "").replace(/-/g, " ").trim().replace(/\s+/g, " ");
|
|
24679
|
+
}
|
|
24680
|
+
function dedupeByText(arr) {
|
|
24681
|
+
const best = /* @__PURE__ */ new Map();
|
|
24682
|
+
for (const x of arr) {
|
|
24683
|
+
const prev = best.get(x.text);
|
|
24684
|
+
if (!prev || x.score < prev.score)
|
|
24685
|
+
best.set(x.text, x);
|
|
24686
|
+
}
|
|
24687
|
+
return [...best.values()];
|
|
24688
|
+
}
|
|
24689
|
+
function combineBeams(left, right, limit) {
|
|
24690
|
+
const combined = [];
|
|
24691
|
+
for (const a of left) {
|
|
24692
|
+
for (const b of right) {
|
|
24693
|
+
combined.push({ text: a.text + b.text, score: a.score + b.score });
|
|
24694
|
+
}
|
|
24695
|
+
}
|
|
24696
|
+
return takeTopK(combined, limit);
|
|
24697
|
+
}
|
|
24698
|
+
function takeTopK(arr, k) {
|
|
24699
|
+
arr.sort((a, b) => a.score - b.score);
|
|
24700
|
+
const out = [];
|
|
24701
|
+
const seen = /* @__PURE__ */ new Set();
|
|
24702
|
+
for (const x of arr) {
|
|
24703
|
+
if (seen.has(x.text))
|
|
24704
|
+
continue;
|
|
24705
|
+
seen.add(x.text);
|
|
24706
|
+
out.push(x);
|
|
24707
|
+
if (out.length >= k)
|
|
24708
|
+
break;
|
|
24709
|
+
}
|
|
24710
|
+
return out;
|
|
24220
24711
|
}
|
|
24221
24712
|
|
|
24222
24713
|
// src/konglish.ts
|
|
@@ -24234,12 +24725,6 @@ var Konglish = class {
|
|
|
24234
24725
|
latinToHangul(input, options) {
|
|
24235
24726
|
return latinToHangul(input, mergeOptions(this.defaultOptions, options));
|
|
24236
24727
|
}
|
|
24237
|
-
latinToHangulAsync(input, options) {
|
|
24238
|
-
return latinToHangulAsync(
|
|
24239
|
-
input,
|
|
24240
|
-
mergeOptions(this.defaultOptions, options)
|
|
24241
|
-
);
|
|
24242
|
-
}
|
|
24243
24728
|
};
|
|
24244
24729
|
|
|
24245
24730
|
exports.Konglish = Konglish;
|