henkan 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +96 -74
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +95 -74
- package/dist/index.mjs.map +2 -2
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +2 -2
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +7 -0
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +1 -0
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
- package/docs/api/functions/convertJawiktionarySync.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
- package/docs/api/functions/createEntryMaps.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/generateFurigana.md +33 -0
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getValidForms.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/getWordDefinitions.md +1 -1
- package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
- package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
- package/docs/api/interfaces/Definition.md +4 -4
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/EntryMaps.md +7 -7
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/JaWiktionaryEntry.md +7 -7
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +23 -23
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +15 -15
- package/docs/api/interfaces/WordDefinitionPair.md +4 -4
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictTranslation.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/KanjiEntryMap.md +1 -1
- package/docs/api/type-aliases/KanjiSVGMap.md +1 -1
- package/docs/api/type-aliases/KanjiWordsMap.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/docs/api/type-aliases/WordDefinitionsMap.md +1 -1
- package/docs/api/type-aliases/WordExamplesMap.md +1 -1
- package/docs/api/type-aliases/WordIDEntryMap.md +1 -1
- package/package.json +1 -1
- package/src/constants.ts +2 -1
- package/src/types.ts +2 -4
- package/src/utils.ts +127 -100
package/dist/index.mjs
CHANGED
|
@@ -92,8 +92,9 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
92
92
|
["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
|
|
93
93
|
["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
|
|
94
94
|
["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
|
|
95
|
-
["lit", ["literal_meaning", "Literal meaning"]],
|
|
96
95
|
["expl", ["explanation", "Explanation"]],
|
|
96
|
+
["fig", ["figurative", "Figurative"]],
|
|
97
|
+
["lit", ["literal_meaning", "Literal meaning"]],
|
|
97
98
|
["tm", ["trademark", "Trademark"]],
|
|
98
99
|
["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
|
|
99
100
|
["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
|
|
@@ -1278,6 +1279,22 @@ function katakanaToHiragana(input) {
|
|
|
1278
1279
|
}
|
|
1279
1280
|
return output.join("").normalize("NFC");
|
|
1280
1281
|
}
|
|
1282
|
+
async function generateFurigana(text, bindedFunction) {
|
|
1283
|
+
if (!text.includes("\u30FB"))
|
|
1284
|
+
return String(
|
|
1285
|
+
await bindedFunction(text, {
|
|
1286
|
+
to: "hiragana",
|
|
1287
|
+
mode: "furigana"
|
|
1288
|
+
})
|
|
1289
|
+
);
|
|
1290
|
+
else
|
|
1291
|
+
return (await Promise.all(
|
|
1292
|
+
text.split("\u30FB").map(async (t) => {
|
|
1293
|
+
const tFurigana = await generateFurigana(t, bindedFunction);
|
|
1294
|
+
return tFurigana;
|
|
1295
|
+
})
|
|
1296
|
+
)).join("");
|
|
1297
|
+
}
|
|
1281
1298
|
function getValidForms(readings, kanjiForms, wordIsCommon) {
|
|
1282
1299
|
const kanjiFormRestrictions = /* @__PURE__ */ new Set();
|
|
1283
1300
|
const validReadings = readings.filter(
|
|
@@ -1373,13 +1390,16 @@ function convertJMdict(xmlString, examples) {
|
|
|
1373
1390
|
const meaningObj = { partOfSpeech: [], translations: [] };
|
|
1374
1391
|
meaningObj.partOfSpeech = meaning.pos;
|
|
1375
1392
|
meaningObj.translations = [];
|
|
1376
|
-
for (const gloss of meaning.gloss)
|
|
1377
|
-
|
|
1378
|
-
|
|
1393
|
+
for (const gloss of meaning.gloss) {
|
|
1394
|
+
const translation = String(gloss._ ?? gloss).trim();
|
|
1395
|
+
const type = typeof gloss.$ === "object" && typeof gloss.$.g_type === "string" ? gloss.$.g_type : void 0;
|
|
1396
|
+
if (translation.length > 0 && type !== void 0 && type.length > 0)
|
|
1379
1397
|
meaningObj.translations.push({
|
|
1380
|
-
translation
|
|
1381
|
-
type
|
|
1398
|
+
translation,
|
|
1399
|
+
type
|
|
1382
1400
|
});
|
|
1401
|
+
else meaningObj.translations.push(translation);
|
|
1402
|
+
}
|
|
1383
1403
|
if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
|
|
1384
1404
|
if (isStringArray(meaning.stagk))
|
|
1385
1405
|
meaningObj.kanjiFormRestrictions = meaning.stagk;
|
|
@@ -1509,7 +1529,7 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1509
1529
|
if (a !== void 0 && b !== void 0 && a.startsWith("A: ") && b.startsWith("B: ")) {
|
|
1510
1530
|
a = a.replace("A: ", "");
|
|
1511
1531
|
b = b.replace("B: ", "");
|
|
1512
|
-
const idMatch = regexps.tanakaID.exec(a)?.groups
|
|
1532
|
+
const idMatch = regexps.tanakaID.exec(a)?.groups.id?.trim();
|
|
1513
1533
|
const idParts = String(idMatch).split("_");
|
|
1514
1534
|
const id = `${Number(idParts[0])}_${Number(idParts[1])}`;
|
|
1515
1535
|
const aParts = a.replace(regexps.tanakaID, "").split(" ");
|
|
@@ -1517,17 +1537,15 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1517
1537
|
const bParts = [];
|
|
1518
1538
|
for (const part of bRawParts) {
|
|
1519
1539
|
const partMatches = regexps.tanakaPart.exec(part);
|
|
1520
|
-
const baseForm = partMatches?.groups
|
|
1540
|
+
const baseForm = partMatches?.groups.base;
|
|
1521
1541
|
const examplePart = { baseForm };
|
|
1522
|
-
const reading = partMatches?.groups
|
|
1523
|
-
const glossNumber = partMatches?.groups
|
|
1524
|
-
const inflectedForm = partMatches?.groups
|
|
1542
|
+
const reading = partMatches?.groups.reading;
|
|
1543
|
+
const glossNumber = partMatches?.groups.glossnum;
|
|
1544
|
+
const inflectedForm = partMatches?.groups.inflection;
|
|
1525
1545
|
if (reading !== void 0)
|
|
1526
1546
|
if (regexps.tanakaReferenceID.test(reading)) {
|
|
1527
1547
|
const referenceID = regexps.tanakaReferenceID.exec(reading);
|
|
1528
|
-
examplePart.referenceID = `${Number(
|
|
1529
|
-
referenceID?.groups["entryid"]
|
|
1530
|
-
)}`;
|
|
1548
|
+
examplePart.referenceID = `${Number(referenceID?.groups.entryid)}`;
|
|
1531
1549
|
} else examplePart.reading = reading;
|
|
1532
1550
|
if (glossNumber !== void 0)
|
|
1533
1551
|
examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
|
|
@@ -1557,13 +1575,10 @@ async function convertTanakaCorpusWithFurigana(tanakaString) {
|
|
|
1557
1575
|
await kuroshiro.init(new KuromojiAnalyzer());
|
|
1558
1576
|
const convert = kuroshiro.convert.bind(kuroshiro);
|
|
1559
1577
|
for (let i = 0; i < tanakaArray.length; i++)
|
|
1560
|
-
|
|
1561
|
-
tanakaArray[i].
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
mode: "furigana"
|
|
1565
|
-
})
|
|
1566
|
-
);
|
|
1578
|
+
tanakaArray[i].furigana = await generateFurigana(
|
|
1579
|
+
tanakaArray[i].phrase,
|
|
1580
|
+
convert
|
|
1581
|
+
);
|
|
1567
1582
|
return tanakaArray;
|
|
1568
1583
|
}
|
|
1569
1584
|
function convertRadkFile(radkBuffer, kanjiDic) {
|
|
@@ -1766,17 +1781,22 @@ function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs,
|
|
|
1766
1781
|
};
|
|
1767
1782
|
}
|
|
1768
1783
|
function mapEntry(entry) {
|
|
1784
|
+
const senses = Array.from(
|
|
1785
|
+
entry.senses.filter(
|
|
1786
|
+
(sense) => isObjectArray(sense.form_of) && sense.form_of.every((form) => typeof form.word === "string") === true || isStringArray(sense.glosses)
|
|
1787
|
+
)
|
|
1788
|
+
);
|
|
1769
1789
|
return {
|
|
1770
1790
|
word: entry.word,
|
|
1771
1791
|
pos_title: entry.pos_title,
|
|
1772
|
-
senses
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
}
|
|
1792
|
+
...senses.length > 0 ? {
|
|
1793
|
+
senses: entry.senses.map((sense) => ({
|
|
1794
|
+
...isObjectArray(sense.form_of) ? {
|
|
1795
|
+
form_of: sense.form_of.map((form) => String(form.word))
|
|
1796
|
+
} : {},
|
|
1797
|
+
glosses: sense.glosses
|
|
1798
|
+
}))
|
|
1799
|
+
} : {},
|
|
1780
1800
|
...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") === true ? { forms: entry.forms.map((form) => String(form.form)) } : {}
|
|
1781
1801
|
};
|
|
1782
1802
|
}
|
|
@@ -1809,15 +1829,16 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1809
1829
|
return entries;
|
|
1810
1830
|
}
|
|
1811
1831
|
function parseEntry(entry, definitions, definitionMap) {
|
|
1812
|
-
|
|
1813
|
-
const
|
|
1814
|
-
|
|
1815
|
-
if (!
|
|
1816
|
-
definitionMap.
|
|
1817
|
-
|
|
1818
|
-
|
|
1832
|
+
if (entry.senses !== void 0)
|
|
1833
|
+
for (const sense of entry.senses) {
|
|
1834
|
+
const definition = sense.glosses.join("");
|
|
1835
|
+
if (!definitions.some((def) => def.definition === definition)) {
|
|
1836
|
+
if (!definitionMap.has(definition))
|
|
1837
|
+
definitionMap.set(definition, { count: 1 });
|
|
1838
|
+
else definitionMap.get(definition).count++;
|
|
1839
|
+
definitions.push({ definition });
|
|
1840
|
+
}
|
|
1819
1841
|
}
|
|
1820
|
-
}
|
|
1821
1842
|
}
|
|
1822
1843
|
function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
1823
1844
|
const entries = /* @__PURE__ */ new Map();
|
|
@@ -1866,19 +1887,20 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
|
1866
1887
|
let valid = false;
|
|
1867
1888
|
if (validKanjiForms.has(entry.word)) {
|
|
1868
1889
|
valid = true;
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1890
|
+
if (entry.senses !== void 0)
|
|
1891
|
+
for (const sense of entry.senses) {
|
|
1892
|
+
if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
|
|
1893
|
+
validFormOfEntries.add(entry.word);
|
|
1894
|
+
for (const gloss of sense.glosses) {
|
|
1895
|
+
let hasForm = false;
|
|
1896
|
+
for (const r of validForms)
|
|
1897
|
+
if (gloss.includes(r)) {
|
|
1898
|
+
hasForm = true;
|
|
1899
|
+
break;
|
|
1900
|
+
}
|
|
1901
|
+
if (hasForm) validGlossesEntries.add(entry.word);
|
|
1902
|
+
}
|
|
1880
1903
|
}
|
|
1881
|
-
}
|
|
1882
1904
|
if (entry.forms !== void 0) {
|
|
1883
1905
|
for (const form of entry.forms)
|
|
1884
1906
|
if (validForms.has(form)) validFormsEntries.add(entry.word);
|
|
@@ -2084,25 +2106,26 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
|
2084
2106
|
const hasForms = ent.forms !== void 0 && ent.forms.some((form) => pair.forms.has(form));
|
|
2085
2107
|
if (pair.kanjiForms !== void 0 && pair.kanjiForms.has(ent.word)) {
|
|
2086
2108
|
kanjiFormEntries.push(ent);
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2109
|
+
if (ent.senses !== void 0)
|
|
2110
|
+
for (const sense of ent.senses) {
|
|
2111
|
+
if (hasValidFormOf && sense.form_of !== void 0) {
|
|
2112
|
+
for (const form of sense.form_of)
|
|
2113
|
+
if (pair.forms.has(form)) {
|
|
2114
|
+
const elem = titleFormMap.get(form);
|
|
2115
|
+
if (elem === void 0)
|
|
2116
|
+
titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
|
|
2117
|
+
else elem.add(ent.word);
|
|
2118
|
+
}
|
|
2119
|
+
}
|
|
2120
|
+
for (const gloss of sense.glosses)
|
|
2121
|
+
for (const f of pair.forms)
|
|
2122
|
+
if (gloss.includes(f)) {
|
|
2123
|
+
const elem = refsMap.get(f);
|
|
2124
|
+
if (elem === void 0)
|
|
2125
|
+
refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2126
|
+
else elem.add(ent.word);
|
|
2127
|
+
}
|
|
2096
2128
|
}
|
|
2097
|
-
for (const gloss of sense.glosses)
|
|
2098
|
-
for (const f of pair.forms)
|
|
2099
|
-
if (gloss.includes(f)) {
|
|
2100
|
-
const elem = refsMap.get(f);
|
|
2101
|
-
if (elem === void 0)
|
|
2102
|
-
refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2103
|
-
else elem.add(ent.word);
|
|
2104
|
-
}
|
|
2105
|
-
}
|
|
2106
2129
|
if (hasValidForms && ent.forms !== void 0) {
|
|
2107
2130
|
for (const form of ent.forms)
|
|
2108
2131
|
if (pair.forms.has(form)) forms.add(form);
|
|
@@ -2183,13 +2206,10 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
|
|
|
2183
2206
|
for (let i = 0; i < japaneseDefinitions.length; i++) {
|
|
2184
2207
|
const pair = japaneseDefinitions[i];
|
|
2185
2208
|
for (let j = 0; j < pair.definitions.length; j++)
|
|
2186
|
-
|
|
2187
|
-
pair.definitions[j].
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
mode: "furigana"
|
|
2191
|
-
})
|
|
2192
|
-
);
|
|
2209
|
+
pair.definitions[j].furigana = await generateFurigana(
|
|
2210
|
+
pair.definitions[j].definition,
|
|
2211
|
+
convert
|
|
2212
|
+
);
|
|
2193
2213
|
japaneseDefinitions[i] = pair;
|
|
2194
2214
|
}
|
|
2195
2215
|
return japaneseDefinitions;
|
|
@@ -2933,6 +2953,7 @@ export {
|
|
|
2933
2953
|
createEntryMaps,
|
|
2934
2954
|
generateAnkiNote,
|
|
2935
2955
|
generateAnkiNotesFile,
|
|
2956
|
+
generateFurigana,
|
|
2936
2957
|
getKanji,
|
|
2937
2958
|
getKanjiExtended,
|
|
2938
2959
|
getValidForms,
|