ipa-hangul 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +31 -9
- package/dist/index.mjs +31 -9
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -258,6 +258,9 @@ function matchConsonant(text, pos) {
|
|
|
258
258
|
function preprocessIPA(ipa) {
|
|
259
259
|
return ipa.replace(/\([^)]*\)/g, "").replace(/[\/\[\]]/g, "").replace(/ˈ/g, ".[P]").replace(/ˌ/g, ".[S]").replace(/[′']/g, ".").replace(/\.+/g, ".").replace(/^\./g, "").trim();
|
|
260
260
|
}
|
|
261
|
+
function hasIPAVowel(text) {
|
|
262
|
+
return /[iɪeɛæɑɒɔʌəɜɝʊuoa]/.test(text);
|
|
263
|
+
}
|
|
261
264
|
function parseSyllables(text) {
|
|
262
265
|
const syllables = [];
|
|
263
266
|
const parts = text.split(".");
|
|
@@ -271,7 +274,17 @@ function parseSyllables(text) {
|
|
|
271
274
|
syllables.push({ text: part, stress: "none" });
|
|
272
275
|
}
|
|
273
276
|
}
|
|
274
|
-
|
|
277
|
+
const merged = [];
|
|
278
|
+
for (let i = 0; i < syllables.length; i++) {
|
|
279
|
+
const curr = syllables[i];
|
|
280
|
+
const next = syllables[i + 1];
|
|
281
|
+
if (next && next.stress !== "none" && curr.stress === "none" && !hasIPAVowel(curr.text)) {
|
|
282
|
+
next.text = curr.text + next.text;
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
merged.push(curr);
|
|
286
|
+
}
|
|
287
|
+
return merged;
|
|
275
288
|
}
|
|
276
289
|
function splitByLongVowel(text) {
|
|
277
290
|
const segments = [];
|
|
@@ -403,21 +416,30 @@ function convertSegment(tokens) {
|
|
|
403
416
|
}
|
|
404
417
|
return result.join("");
|
|
405
418
|
}
|
|
406
|
-
function
|
|
419
|
+
function isCompleteSyllable(code) {
|
|
420
|
+
return code >= 44032 && code <= 55203;
|
|
421
|
+
}
|
|
422
|
+
function getFirstStressedSyllable(text) {
|
|
423
|
+
let firstCompleteIdx = -1;
|
|
407
424
|
for (let i = 0; i < text.length; i++) {
|
|
408
425
|
const code = text.charCodeAt(i);
|
|
409
|
-
if (code
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
rest: text.substring(i + 1)
|
|
413
|
-
};
|
|
426
|
+
if (isCompleteSyllable(code)) {
|
|
427
|
+
firstCompleteIdx = i;
|
|
428
|
+
break;
|
|
414
429
|
}
|
|
415
430
|
}
|
|
416
|
-
|
|
431
|
+
if (firstCompleteIdx === -1) {
|
|
432
|
+
if (text.length === 0) return { first: "", rest: "" };
|
|
433
|
+
return { first: text[0], rest: text.substring(1) };
|
|
434
|
+
}
|
|
435
|
+
return {
|
|
436
|
+
first: text.substring(0, firstCompleteIdx + 1),
|
|
437
|
+
rest: text.substring(firstCompleteIdx + 1)
|
|
438
|
+
};
|
|
417
439
|
}
|
|
418
440
|
function applyStressMarker(hangul, stress, format) {
|
|
419
441
|
if (!format || stress === "none") return hangul;
|
|
420
|
-
const { first, rest } =
|
|
442
|
+
const { first, rest } = getFirstStressedSyllable(hangul);
|
|
421
443
|
if (stress === "primary") {
|
|
422
444
|
if (format === "markdown") {
|
|
423
445
|
return `**${first}**${rest}`;
|
package/dist/index.mjs
CHANGED
|
@@ -234,6 +234,9 @@ function matchConsonant(text, pos) {
|
|
|
234
234
|
function preprocessIPA(ipa) {
|
|
235
235
|
return ipa.replace(/\([^)]*\)/g, "").replace(/[\/\[\]]/g, "").replace(/ˈ/g, ".[P]").replace(/ˌ/g, ".[S]").replace(/[′']/g, ".").replace(/\.+/g, ".").replace(/^\./g, "").trim();
|
|
236
236
|
}
|
|
237
|
+
function hasIPAVowel(text) {
|
|
238
|
+
return /[iɪeɛæɑɒɔʌəɜɝʊuoa]/.test(text);
|
|
239
|
+
}
|
|
237
240
|
function parseSyllables(text) {
|
|
238
241
|
const syllables = [];
|
|
239
242
|
const parts = text.split(".");
|
|
@@ -247,7 +250,17 @@ function parseSyllables(text) {
|
|
|
247
250
|
syllables.push({ text: part, stress: "none" });
|
|
248
251
|
}
|
|
249
252
|
}
|
|
250
|
-
|
|
253
|
+
const merged = [];
|
|
254
|
+
for (let i = 0; i < syllables.length; i++) {
|
|
255
|
+
const curr = syllables[i];
|
|
256
|
+
const next = syllables[i + 1];
|
|
257
|
+
if (next && next.stress !== "none" && curr.stress === "none" && !hasIPAVowel(curr.text)) {
|
|
258
|
+
next.text = curr.text + next.text;
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
merged.push(curr);
|
|
262
|
+
}
|
|
263
|
+
return merged;
|
|
251
264
|
}
|
|
252
265
|
function splitByLongVowel(text) {
|
|
253
266
|
const segments = [];
|
|
@@ -379,21 +392,30 @@ function convertSegment(tokens) {
|
|
|
379
392
|
}
|
|
380
393
|
return result.join("");
|
|
381
394
|
}
|
|
382
|
-
function
|
|
395
|
+
function isCompleteSyllable(code) {
|
|
396
|
+
return code >= 44032 && code <= 55203;
|
|
397
|
+
}
|
|
398
|
+
function getFirstStressedSyllable(text) {
|
|
399
|
+
let firstCompleteIdx = -1;
|
|
383
400
|
for (let i = 0; i < text.length; i++) {
|
|
384
401
|
const code = text.charCodeAt(i);
|
|
385
|
-
if (code
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
rest: text.substring(i + 1)
|
|
389
|
-
};
|
|
402
|
+
if (isCompleteSyllable(code)) {
|
|
403
|
+
firstCompleteIdx = i;
|
|
404
|
+
break;
|
|
390
405
|
}
|
|
391
406
|
}
|
|
392
|
-
|
|
407
|
+
if (firstCompleteIdx === -1) {
|
|
408
|
+
if (text.length === 0) return { first: "", rest: "" };
|
|
409
|
+
return { first: text[0], rest: text.substring(1) };
|
|
410
|
+
}
|
|
411
|
+
return {
|
|
412
|
+
first: text.substring(0, firstCompleteIdx + 1),
|
|
413
|
+
rest: text.substring(firstCompleteIdx + 1)
|
|
414
|
+
};
|
|
393
415
|
}
|
|
394
416
|
function applyStressMarker(hangul, stress, format) {
|
|
395
417
|
if (!format || stress === "none") return hangul;
|
|
396
|
-
const { first, rest } =
|
|
418
|
+
const { first, rest } = getFirstStressedSyllable(hangul);
|
|
397
419
|
if (stress === "primary") {
|
|
398
420
|
if (format === "markdown") {
|
|
399
421
|
return `**${first}**${rest}`;
|