henkan 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +96 -74
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +95 -74
- package/dist/index.mjs.map +2 -2
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +2 -2
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +7 -0
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +1 -0
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
- package/docs/api/functions/convertJawiktionarySync.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
- package/docs/api/functions/createEntryMaps.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/generateFurigana.md +33 -0
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getValidForms.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/getWordDefinitions.md +1 -1
- package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
- package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
- package/docs/api/interfaces/Definition.md +4 -4
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/EntryMaps.md +7 -7
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/JaWiktionaryEntry.md +7 -7
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +23 -23
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +15 -15
- package/docs/api/interfaces/WordDefinitionPair.md +4 -4
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictTranslation.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/KanjiEntryMap.md +1 -1
- package/docs/api/type-aliases/KanjiSVGMap.md +1 -1
- package/docs/api/type-aliases/KanjiWordsMap.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/docs/api/type-aliases/WordDefinitionsMap.md +1 -1
- package/docs/api/type-aliases/WordExamplesMap.md +1 -1
- package/docs/api/type-aliases/WordIDEntryMap.md +1 -1
- package/package.json +1 -1
- package/src/constants.ts +2 -1
- package/src/types.ts +2 -4
- package/src/utils.ts +127 -100
package/dist/index.cjs.js
CHANGED
|
@@ -42,6 +42,7 @@ __export(index_exports, {
|
|
|
42
42
|
createEntryMaps: () => createEntryMaps,
|
|
43
43
|
generateAnkiNote: () => generateAnkiNote,
|
|
44
44
|
generateAnkiNotesFile: () => generateAnkiNotesFile,
|
|
45
|
+
generateFurigana: () => generateFurigana,
|
|
45
46
|
getKanji: () => getKanji,
|
|
46
47
|
getKanjiExtended: () => getKanjiExtended,
|
|
47
48
|
getValidForms: () => getValidForms,
|
|
@@ -154,8 +155,9 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
154
155
|
["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
|
|
155
156
|
["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
|
|
156
157
|
["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
|
|
157
|
-
["lit", ["literal_meaning", "Literal meaning"]],
|
|
158
158
|
["expl", ["explanation", "Explanation"]],
|
|
159
|
+
["fig", ["figurative", "Figurative"]],
|
|
160
|
+
["lit", ["literal_meaning", "Literal meaning"]],
|
|
159
161
|
["tm", ["trademark", "Trademark"]],
|
|
160
162
|
["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
|
|
161
163
|
["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
|
|
@@ -1340,6 +1342,22 @@ function katakanaToHiragana(input) {
|
|
|
1340
1342
|
}
|
|
1341
1343
|
return output.join("").normalize("NFC");
|
|
1342
1344
|
}
|
|
1345
|
+
async function generateFurigana(text, bindedFunction) {
|
|
1346
|
+
if (!text.includes("\u30FB"))
|
|
1347
|
+
return String(
|
|
1348
|
+
await bindedFunction(text, {
|
|
1349
|
+
to: "hiragana",
|
|
1350
|
+
mode: "furigana"
|
|
1351
|
+
})
|
|
1352
|
+
);
|
|
1353
|
+
else
|
|
1354
|
+
return (await Promise.all(
|
|
1355
|
+
text.split("\u30FB").map(async (t) => {
|
|
1356
|
+
const tFurigana = await generateFurigana(t, bindedFunction);
|
|
1357
|
+
return tFurigana;
|
|
1358
|
+
})
|
|
1359
|
+
)).join("");
|
|
1360
|
+
}
|
|
1343
1361
|
function getValidForms(readings, kanjiForms, wordIsCommon) {
|
|
1344
1362
|
const kanjiFormRestrictions = /* @__PURE__ */ new Set();
|
|
1345
1363
|
const validReadings = readings.filter(
|
|
@@ -1435,13 +1453,16 @@ function convertJMdict(xmlString, examples) {
|
|
|
1435
1453
|
const meaningObj = { partOfSpeech: [], translations: [] };
|
|
1436
1454
|
meaningObj.partOfSpeech = meaning.pos;
|
|
1437
1455
|
meaningObj.translations = [];
|
|
1438
|
-
for (const gloss of meaning.gloss)
|
|
1439
|
-
|
|
1440
|
-
|
|
1456
|
+
for (const gloss of meaning.gloss) {
|
|
1457
|
+
const translation = String(gloss._ ?? gloss).trim();
|
|
1458
|
+
const type = typeof gloss.$ === "object" && typeof gloss.$.g_type === "string" ? gloss.$.g_type : void 0;
|
|
1459
|
+
if (translation.length > 0 && type !== void 0 && type.length > 0)
|
|
1441
1460
|
meaningObj.translations.push({
|
|
1442
|
-
translation
|
|
1443
|
-
type
|
|
1461
|
+
translation,
|
|
1462
|
+
type
|
|
1444
1463
|
});
|
|
1464
|
+
else meaningObj.translations.push(translation);
|
|
1465
|
+
}
|
|
1445
1466
|
if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
|
|
1446
1467
|
if (isStringArray(meaning.stagk))
|
|
1447
1468
|
meaningObj.kanjiFormRestrictions = meaning.stagk;
|
|
@@ -1571,7 +1592,7 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1571
1592
|
if (a !== void 0 && b !== void 0 && a.startsWith("A: ") && b.startsWith("B: ")) {
|
|
1572
1593
|
a = a.replace("A: ", "");
|
|
1573
1594
|
b = b.replace("B: ", "");
|
|
1574
|
-
const idMatch = regexps.tanakaID.exec(a)?.groups
|
|
1595
|
+
const idMatch = regexps.tanakaID.exec(a)?.groups.id?.trim();
|
|
1575
1596
|
const idParts = String(idMatch).split("_");
|
|
1576
1597
|
const id = `${Number(idParts[0])}_${Number(idParts[1])}`;
|
|
1577
1598
|
const aParts = a.replace(regexps.tanakaID, "").split(" ");
|
|
@@ -1579,17 +1600,15 @@ function convertTanakaCorpus(tanakaString) {
|
|
|
1579
1600
|
const bParts = [];
|
|
1580
1601
|
for (const part of bRawParts) {
|
|
1581
1602
|
const partMatches = regexps.tanakaPart.exec(part);
|
|
1582
|
-
const baseForm = partMatches?.groups
|
|
1603
|
+
const baseForm = partMatches?.groups.base;
|
|
1583
1604
|
const examplePart = { baseForm };
|
|
1584
|
-
const reading = partMatches?.groups
|
|
1585
|
-
const glossNumber = partMatches?.groups
|
|
1586
|
-
const inflectedForm = partMatches?.groups
|
|
1605
|
+
const reading = partMatches?.groups.reading;
|
|
1606
|
+
const glossNumber = partMatches?.groups.glossnum;
|
|
1607
|
+
const inflectedForm = partMatches?.groups.inflection;
|
|
1587
1608
|
if (reading !== void 0)
|
|
1588
1609
|
if (regexps.tanakaReferenceID.test(reading)) {
|
|
1589
1610
|
const referenceID = regexps.tanakaReferenceID.exec(reading);
|
|
1590
|
-
examplePart.referenceID = `${Number(
|
|
1591
|
-
referenceID?.groups["entryid"]
|
|
1592
|
-
)}`;
|
|
1611
|
+
examplePart.referenceID = `${Number(referenceID?.groups.entryid)}`;
|
|
1593
1612
|
} else examplePart.reading = reading;
|
|
1594
1613
|
if (glossNumber !== void 0)
|
|
1595
1614
|
examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
|
|
@@ -1619,13 +1638,10 @@ async function convertTanakaCorpusWithFurigana(tanakaString) {
|
|
|
1619
1638
|
await kuroshiro.init(new KuromojiAnalyzer());
|
|
1620
1639
|
const convert = kuroshiro.convert.bind(kuroshiro);
|
|
1621
1640
|
for (let i = 0; i < tanakaArray.length; i++)
|
|
1622
|
-
|
|
1623
|
-
tanakaArray[i].
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
mode: "furigana"
|
|
1627
|
-
})
|
|
1628
|
-
);
|
|
1641
|
+
tanakaArray[i].furigana = await generateFurigana(
|
|
1642
|
+
tanakaArray[i].phrase,
|
|
1643
|
+
convert
|
|
1644
|
+
);
|
|
1629
1645
|
return tanakaArray;
|
|
1630
1646
|
}
|
|
1631
1647
|
function convertRadkFile(radkBuffer, kanjiDic) {
|
|
@@ -1828,17 +1844,22 @@ function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs,
|
|
|
1828
1844
|
};
|
|
1829
1845
|
}
|
|
1830
1846
|
function mapEntry(entry) {
|
|
1847
|
+
const senses = Array.from(
|
|
1848
|
+
entry.senses.filter(
|
|
1849
|
+
(sense) => isObjectArray(sense.form_of) && sense.form_of.every((form) => typeof form.word === "string") === true || isStringArray(sense.glosses)
|
|
1850
|
+
)
|
|
1851
|
+
);
|
|
1831
1852
|
return {
|
|
1832
1853
|
word: entry.word,
|
|
1833
1854
|
pos_title: entry.pos_title,
|
|
1834
|
-
senses
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
}
|
|
1855
|
+
...senses.length > 0 ? {
|
|
1856
|
+
senses: entry.senses.map((sense) => ({
|
|
1857
|
+
...isObjectArray(sense.form_of) ? {
|
|
1858
|
+
form_of: sense.form_of.map((form) => String(form.word))
|
|
1859
|
+
} : {},
|
|
1860
|
+
glosses: sense.glosses
|
|
1861
|
+
}))
|
|
1862
|
+
} : {},
|
|
1842
1863
|
...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") === true ? { forms: entry.forms.map((form) => String(form.form)) } : {}
|
|
1843
1864
|
};
|
|
1844
1865
|
}
|
|
@@ -1871,15 +1892,16 @@ async function convertJawiktionaryAsync(stream) {
|
|
|
1871
1892
|
return entries;
|
|
1872
1893
|
}
|
|
1873
1894
|
function parseEntry(entry, definitions, definitionMap) {
|
|
1874
|
-
|
|
1875
|
-
const
|
|
1876
|
-
|
|
1877
|
-
if (!
|
|
1878
|
-
definitionMap.
|
|
1879
|
-
|
|
1880
|
-
|
|
1895
|
+
if (entry.senses !== void 0)
|
|
1896
|
+
for (const sense of entry.senses) {
|
|
1897
|
+
const definition = sense.glosses.join("");
|
|
1898
|
+
if (!definitions.some((def) => def.definition === definition)) {
|
|
1899
|
+
if (!definitionMap.has(definition))
|
|
1900
|
+
definitionMap.set(definition, { count: 1 });
|
|
1901
|
+
else definitionMap.get(definition).count++;
|
|
1902
|
+
definitions.push({ definition });
|
|
1903
|
+
}
|
|
1881
1904
|
}
|
|
1882
|
-
}
|
|
1883
1905
|
}
|
|
1884
1906
|
function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
1885
1907
|
const entries = /* @__PURE__ */ new Map();
|
|
@@ -1928,19 +1950,20 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
|
1928
1950
|
let valid = false;
|
|
1929
1951
|
if (validKanjiForms.has(entry.word)) {
|
|
1930
1952
|
valid = true;
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1953
|
+
if (entry.senses !== void 0)
|
|
1954
|
+
for (const sense of entry.senses) {
|
|
1955
|
+
if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
|
|
1956
|
+
validFormOfEntries.add(entry.word);
|
|
1957
|
+
for (const gloss of sense.glosses) {
|
|
1958
|
+
let hasForm = false;
|
|
1959
|
+
for (const r of validForms)
|
|
1960
|
+
if (gloss.includes(r)) {
|
|
1961
|
+
hasForm = true;
|
|
1962
|
+
break;
|
|
1963
|
+
}
|
|
1964
|
+
if (hasForm) validGlossesEntries.add(entry.word);
|
|
1965
|
+
}
|
|
1942
1966
|
}
|
|
1943
|
-
}
|
|
1944
1967
|
if (entry.forms !== void 0) {
|
|
1945
1968
|
for (const form of entry.forms)
|
|
1946
1969
|
if (validForms.has(form)) validFormsEntries.add(entry.word);
|
|
@@ -2146,25 +2169,26 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
|
|
|
2146
2169
|
const hasForms = ent.forms !== void 0 && ent.forms.some((form) => pair.forms.has(form));
|
|
2147
2170
|
if (pair.kanjiForms !== void 0 && pair.kanjiForms.has(ent.word)) {
|
|
2148
2171
|
kanjiFormEntries.push(ent);
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2172
|
+
if (ent.senses !== void 0)
|
|
2173
|
+
for (const sense of ent.senses) {
|
|
2174
|
+
if (hasValidFormOf && sense.form_of !== void 0) {
|
|
2175
|
+
for (const form of sense.form_of)
|
|
2176
|
+
if (pair.forms.has(form)) {
|
|
2177
|
+
const elem = titleFormMap.get(form);
|
|
2178
|
+
if (elem === void 0)
|
|
2179
|
+
titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
|
|
2180
|
+
else elem.add(ent.word);
|
|
2181
|
+
}
|
|
2182
|
+
}
|
|
2183
|
+
for (const gloss of sense.glosses)
|
|
2184
|
+
for (const f of pair.forms)
|
|
2185
|
+
if (gloss.includes(f)) {
|
|
2186
|
+
const elem = refsMap.get(f);
|
|
2187
|
+
if (elem === void 0)
|
|
2188
|
+
refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2189
|
+
else elem.add(ent.word);
|
|
2190
|
+
}
|
|
2158
2191
|
}
|
|
2159
|
-
for (const gloss of sense.glosses)
|
|
2160
|
-
for (const f of pair.forms)
|
|
2161
|
-
if (gloss.includes(f)) {
|
|
2162
|
-
const elem = refsMap.get(f);
|
|
2163
|
-
if (elem === void 0)
|
|
2164
|
-
refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
|
|
2165
|
-
else elem.add(ent.word);
|
|
2166
|
-
}
|
|
2167
|
-
}
|
|
2168
2192
|
if (hasValidForms && ent.forms !== void 0) {
|
|
2169
2193
|
for (const form of ent.forms)
|
|
2170
2194
|
if (pair.forms.has(form)) forms.add(form);
|
|
@@ -2245,13 +2269,10 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
|
|
|
2245
2269
|
for (let i = 0; i < japaneseDefinitions.length; i++) {
|
|
2246
2270
|
const pair = japaneseDefinitions[i];
|
|
2247
2271
|
for (let j = 0; j < pair.definitions.length; j++)
|
|
2248
|
-
|
|
2249
|
-
pair.definitions[j].
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
mode: "furigana"
|
|
2253
|
-
})
|
|
2254
|
-
);
|
|
2272
|
+
pair.definitions[j].furigana = await generateFurigana(
|
|
2273
|
+
pair.definitions[j].definition,
|
|
2274
|
+
convert
|
|
2275
|
+
);
|
|
2255
2276
|
japaneseDefinitions[i] = pair;
|
|
2256
2277
|
}
|
|
2257
2278
|
return japaneseDefinitions;
|
|
@@ -2996,6 +3017,7 @@ ${ankiNotes}`;
|
|
|
2996
3017
|
createEntryMaps,
|
|
2997
3018
|
generateAnkiNote,
|
|
2998
3019
|
generateAnkiNotesFile,
|
|
3020
|
+
generateFurigana,
|
|
2999
3021
|
getKanji,
|
|
3000
3022
|
getKanjiExtended,
|
|
3001
3023
|
getValidForms,
|