henkan 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +0 -0
- package/LICENSE.md +0 -0
- package/README.md +1 -1
- package/SECURITY.md +0 -0
- package/dist/index.cjs.js +358 -53
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +353 -53
- package/dist/index.mjs.map +2 -2
- package/dist/types/types.d.ts +43 -5
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +32 -11
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +5 -0
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertJawiktionary.md +29 -0
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +2 -2
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +4 -4
- package/docs/api/functions/getKanji.md +12 -6
- package/docs/api/functions/getKanjiExtended.md +17 -11
- package/docs/api/functions/getWord.md +8 -2
- package/docs/api/functions/getWordDefinitions.md +45 -0
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +1 -1
- package/docs/api/interfaces/Definition.md +41 -0
- package/docs/api/interfaces/DictKanji.md +7 -7
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +12 -12
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -8
- package/docs/api/interfaces/Word.md +24 -14
- package/docs/api/interfaces/WordDefinitionPair.md +31 -0
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/DictTranslation.md +13 -0
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +16 -9
package/CONTRIBUTING.md
CHANGED
|
File without changes
|
package/LICENSE.md
CHANGED
|
File without changes
|
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ A Node.JS library that helps you convert any dictionary from [EDRDG](https://www
|
|
|
4
4
|
|
|
5
5
|
> The library is used for the [Full Japanese Study Deck](https://ankiweb.net/shared/info/1407096987), so some features might be tailored specifically to the deck.
|
|
6
6
|
|
|
7
|
-
[](https://www.npmjs.com/package/henkan) []()
|
|
7
|
+
[](https://www.npmjs.com/package/henkan) [](https://github.com/Ronokof/Henkan/blob/main/LICENSE.md)  [](https://codecov.io/gh/Ronokof/Henkan)
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
package/SECURITY.md
CHANGED
|
File without changes
|
package/dist/index.cjs.js
CHANGED
|
@@ -32,6 +32,7 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
capitalizeString: () => capitalizeString,
|
|
34
34
|
convertJMdict: () => convertJMdict,
|
|
35
|
+
convertJawiktionary: () => convertJawiktionary,
|
|
35
36
|
convertKanjiDic: () => convertKanjiDic,
|
|
36
37
|
convertKradFile: () => convertKradFile,
|
|
37
38
|
convertRadkFile: () => convertRadkFile,
|
|
@@ -41,6 +42,7 @@ __export(index_exports, {
|
|
|
41
42
|
getKanji: () => getKanji,
|
|
42
43
|
getKanjiExtended: () => getKanjiExtended,
|
|
43
44
|
getWord: () => getWord,
|
|
45
|
+
getWordDefinitions: () => getWordDefinitions,
|
|
44
46
|
isGrammar: () => isGrammar,
|
|
45
47
|
isKana: () => isKana,
|
|
46
48
|
isKanji: () => isKanji,
|
|
@@ -1135,6 +1137,7 @@ var import_libxmljs2 = __toESM(require("libxmljs2"));
|
|
|
1135
1137
|
var import_xml2js = __toESM(require("xml2js"));
|
|
1136
1138
|
var import_iconv_lite = __toESM(require("iconv-lite"));
|
|
1137
1139
|
var import_client_polly = require("@aws-sdk/client-polly");
|
|
1140
|
+
var import_readline = require("readline");
|
|
1138
1141
|
var Kuroshiro = require("kuroshiro");
|
|
1139
1142
|
var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
|
|
1140
1143
|
function capitalizeString(value) {
|
|
@@ -1150,6 +1153,7 @@ function isStringArray(arg) {
|
|
|
1150
1153
|
return arg !== null && arg !== void 0 && Array.isArray(arg) && arg.every((element) => typeof element === "string");
|
|
1151
1154
|
}
|
|
1152
1155
|
function shuffleArray(arr) {
|
|
1156
|
+
if (arr.length < 2) return arr;
|
|
1153
1157
|
const a = arr.slice();
|
|
1154
1158
|
for (let i = a.length - 1; i > 0; i--) {
|
|
1155
1159
|
const j = Math.floor(Math.random() * (i + 1));
|
|
@@ -1265,7 +1269,7 @@ function convertJMdict(xmlString, examples) {
|
|
|
1265
1269
|
}
|
|
1266
1270
|
if (isStringArray(meaning.dial))
|
|
1267
1271
|
meaningObj.dialects = meaning.dial;
|
|
1268
|
-
if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations
|
|
1272
|
+
if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations)
|
|
1269
1273
|
entryObj.meanings.push(meaningObj);
|
|
1270
1274
|
}
|
|
1271
1275
|
if (entryObj.meanings.length === usuallyInKanaMeanings)
|
|
@@ -1276,7 +1280,7 @@ function convertJMdict(xmlString, examples) {
|
|
|
1276
1280
|
entryObj.readings.filter(
|
|
1277
1281
|
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1278
1282
|
(note) => notSearchedForms.has(note)
|
|
1279
|
-
) || reading.commonness
|
|
1283
|
+
) || reading.commonness
|
|
1280
1284
|
).map((reading) => reading.reading)
|
|
1281
1285
|
);
|
|
1282
1286
|
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
@@ -1377,9 +1381,13 @@ function convertKanjiDic(xmlString) {
|
|
|
1377
1381
|
rmObj.groups.push(groupObj);
|
|
1378
1382
|
}
|
|
1379
1383
|
if (isStringArray(rm.nanori)) rmObj.nanori = rm.nanori;
|
|
1380
|
-
if (rmObj.groups.length > 0
|
|
1384
|
+
if (rmObj.groups.length > 0 && kanjiObj.readingMeaning)
|
|
1385
|
+
kanjiObj.readingMeaning.push(rmObj);
|
|
1381
1386
|
}
|
|
1382
|
-
if (kanjiObj.
|
|
1387
|
+
if (kanjiObj.readingMeaning && kanjiObj.readingMeaning.length === 0)
|
|
1388
|
+
delete kanjiObj.readingMeaning;
|
|
1389
|
+
if (kanjiObj.kanji.length === 1 && kanjiObj.misc && kanjiObj.misc.strokeNumber.length > 0)
|
|
1390
|
+
dict.push(kanjiObj);
|
|
1383
1391
|
}
|
|
1384
1392
|
});
|
|
1385
1393
|
return dict;
|
|
@@ -1483,7 +1491,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1483
1491
|
(dictKanji) => dictKanji.kanji === kanji
|
|
1484
1492
|
);
|
|
1485
1493
|
if (foundKanji) kanjiList.push(foundKanji);
|
|
1486
|
-
else kanjiList.push({ kanji
|
|
1494
|
+
else kanjiList.push({ kanji });
|
|
1487
1495
|
}
|
|
1488
1496
|
j++;
|
|
1489
1497
|
kanjiLine = fileParsed[j];
|
|
@@ -1491,7 +1499,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1491
1499
|
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1492
1500
|
}
|
|
1493
1501
|
if (kanjiList.length > 0) radical.kanji = kanjiList;
|
|
1494
|
-
if (radical.radical.length
|
|
1502
|
+
if (radical.radical.length === 1 && radical.strokes.length > 0)
|
|
1495
1503
|
radicals.push(radical);
|
|
1496
1504
|
}
|
|
1497
1505
|
}
|
|
@@ -1552,6 +1560,265 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
|
1552
1560
|
throw err;
|
|
1553
1561
|
}
|
|
1554
1562
|
}
|
|
1563
|
+
async function convertJawiktionary(stream) {
|
|
1564
|
+
const rl = (0, import_readline.createInterface)({
|
|
1565
|
+
input: stream,
|
|
1566
|
+
crlfDelay: Infinity
|
|
1567
|
+
});
|
|
1568
|
+
let lineNumber = 0;
|
|
1569
|
+
return await new Promise(
|
|
1570
|
+
async (resolve, reject) => {
|
|
1571
|
+
try {
|
|
1572
|
+
const entries = [];
|
|
1573
|
+
for await (const line of rl) {
|
|
1574
|
+
lineNumber++;
|
|
1575
|
+
let obj = void 0;
|
|
1576
|
+
try {
|
|
1577
|
+
obj = JSON.parse(line.trim());
|
|
1578
|
+
} catch (err) {
|
|
1579
|
+
rl.close();
|
|
1580
|
+
throw new Error(
|
|
1581
|
+
`Invalid JSONL at line ${lineNumber}: ${err.message}`
|
|
1582
|
+
);
|
|
1583
|
+
}
|
|
1584
|
+
if (obj !== void 0 && obj !== null && typeof obj === "object" && obj.lang_code === "ja" && obj.lang === "\u65E5\u672C\u8A9E")
|
|
1585
|
+
entries.push(obj);
|
|
1586
|
+
}
|
|
1587
|
+
rl.close();
|
|
1588
|
+
stream.close();
|
|
1589
|
+
stream.destroy();
|
|
1590
|
+
resolve(entries);
|
|
1591
|
+
} catch (err) {
|
|
1592
|
+
reject(err);
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
);
|
|
1596
|
+
}
|
|
1597
|
+
function parseEntry(entry, definitions, definitionMap) {
|
|
1598
|
+
if (isValidArray(entry.senses)) {
|
|
1599
|
+
for (const sense of entry.senses)
|
|
1600
|
+
if (isStringArray(sense.glosses)) {
|
|
1601
|
+
const definition = sense.glosses.join("<br>");
|
|
1602
|
+
if (!definitions.some((def) => def.definition === definition)) {
|
|
1603
|
+
if (!definitionMap.has(definition))
|
|
1604
|
+
definitionMap.set(definition, { count: 1 });
|
|
1605
|
+
else definitionMap.get(definition).count++;
|
|
1606
|
+
definitions.push({ definition });
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
async function getWordDefinitions(entries, jmDict, kanjiDic, generateFurigana) {
|
|
1612
|
+
return await new Promise(async (resolve, reject) => {
|
|
1613
|
+
try {
|
|
1614
|
+
const japaneseDefinitions = [];
|
|
1615
|
+
const definitionMap = /* @__PURE__ */ new Map();
|
|
1616
|
+
const validWords = [];
|
|
1617
|
+
const validReadings = /* @__PURE__ */ new Set();
|
|
1618
|
+
const validKanjiForms = /* @__PURE__ */ new Set();
|
|
1619
|
+
for (const word of jmDict) {
|
|
1620
|
+
let valid = false;
|
|
1621
|
+
for (const r of word.readings)
|
|
1622
|
+
if ((r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0) && !validReadings.has(r.reading)) {
|
|
1623
|
+
validReadings.add(r.reading);
|
|
1624
|
+
if (!valid) valid = true;
|
|
1625
|
+
}
|
|
1626
|
+
if (word.kanjiForms) {
|
|
1627
|
+
for (const kf of word.kanjiForms)
|
|
1628
|
+
if ((kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0) && !validKanjiForms.has(kf.form)) {
|
|
1629
|
+
validKanjiForms.add(kf.form);
|
|
1630
|
+
if (!valid) valid = true;
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
if (valid)
|
|
1634
|
+
validWords.push(
|
|
1635
|
+
getWord(void 0, void 0, kanjiDic, void 0, void 0, word)
|
|
1636
|
+
);
|
|
1637
|
+
}
|
|
1638
|
+
const validTitleEntries = [];
|
|
1639
|
+
const entriesWithFormTitlesGlobal = [];
|
|
1640
|
+
const entriesWithFormsGlobal = [];
|
|
1641
|
+
const validFormOfEntries = /* @__PURE__ */ new Set();
|
|
1642
|
+
const validGlossesEntries = /* @__PURE__ */ new Set();
|
|
1643
|
+
const validFormsEntries = /* @__PURE__ */ new Set();
|
|
1644
|
+
for (const entry of entries) {
|
|
1645
|
+
let valid = false;
|
|
1646
|
+
if (validKanjiForms && validKanjiForms.has(entry.word)) {
|
|
1647
|
+
valid = true;
|
|
1648
|
+
if (isValidArray(entry.senses))
|
|
1649
|
+
for (const sense of entry.senses) {
|
|
1650
|
+
if (isValidArray(sense.form_of) && sense.form_of.some(
|
|
1651
|
+
(form) => form.word && typeof form.word === "string" && validReadings.has(form.word)
|
|
1652
|
+
))
|
|
1653
|
+
validFormOfEntries.add(entry.word);
|
|
1654
|
+
else if (isValidArray(sense.glosses) && sense.glosses.length === 1) {
|
|
1655
|
+
const gloss = sense.glosses[0];
|
|
1656
|
+
let reading = void 0;
|
|
1657
|
+
if (gloss !== void 0) {
|
|
1658
|
+
if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u53C2\u7167\u3002")) {
|
|
1659
|
+
for (const r of validReadings)
|
|
1660
|
+
if (gloss.trim().includes(r)) {
|
|
1661
|
+
reading = r;
|
|
1662
|
+
break;
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
if (reading) validGlossesEntries.add(entry.word);
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
if (isValidArray(entry.forms)) {
|
|
1670
|
+
for (const form of entry.forms)
|
|
1671
|
+
if (form.form && typeof form.form === "string" && validReadings.has(form.form))
|
|
1672
|
+
validFormsEntries.add(entry.word);
|
|
1673
|
+
}
|
|
1674
|
+
} else if (validReadings.has(entry.word) && isValidArray(entry.forms) && entry.forms.some((form) => validKanjiForms.has(form.form))) {
|
|
1675
|
+
valid = true;
|
|
1676
|
+
entriesWithFormTitlesGlobal.push(entry);
|
|
1677
|
+
} else if (validReadings.has(entry.word)) {
|
|
1678
|
+
valid = true;
|
|
1679
|
+
entriesWithFormTitlesGlobal.push(entry);
|
|
1680
|
+
}
|
|
1681
|
+
if (valid) validTitleEntries.push(entry);
|
|
1682
|
+
if (isValidArray(entry.forms) && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
|
|
1683
|
+
(form) => validKanjiForms.has(form.form) || validReadings.has(form.form)
|
|
1684
|
+
))
|
|
1685
|
+
entriesWithFormsGlobal.push(entry);
|
|
1686
|
+
}
|
|
1687
|
+
for (const word of validWords) {
|
|
1688
|
+
const definitions = [];
|
|
1689
|
+
const kanjiFormEntries = [];
|
|
1690
|
+
const readingWithFormsEntries = [];
|
|
1691
|
+
const readingEntries = [];
|
|
1692
|
+
const titleFormMap = /* @__PURE__ */ new Map();
|
|
1693
|
+
const readingForms = /* @__PURE__ */ new Set();
|
|
1694
|
+
const validWordReadings = new Set(
|
|
1695
|
+
word.readings.filter(
|
|
1696
|
+
(r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.common === true
|
|
1697
|
+
).map((r) => r.reading)
|
|
1698
|
+
);
|
|
1699
|
+
const validWordKanjiForms = word.kanjiForms ? new Set(
|
|
1700
|
+
word.kanjiForms.filter(
|
|
1701
|
+
(kf) => kf.notes === void 0 || !kf.notes.some(
|
|
1702
|
+
(note) => notSearchedForms.has(note)
|
|
1703
|
+
) || kf.common === true
|
|
1704
|
+
).map((kf) => kf.kanjiForm)
|
|
1705
|
+
) : void 0;
|
|
1706
|
+
const entriesWithFormTitles = entriesWithFormTitlesGlobal.filter(
|
|
1707
|
+
(entry) => validWordReadings.has(entry.word)
|
|
1708
|
+
);
|
|
1709
|
+
const entriesWithForms = entriesWithFormsGlobal.filter(
|
|
1710
|
+
(entry) => isValidArray(entry.forms) && (validWordKanjiForms && validWordKanjiForms.has(entry.word) || validWordReadings.has(entry.word)) && entry.forms.some(
|
|
1711
|
+
(form) => validWordKanjiForms && validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
|
|
1712
|
+
)
|
|
1713
|
+
);
|
|
1714
|
+
for (const ent of validTitleEntries) {
|
|
1715
|
+
const validFormOf = validFormOfEntries.has(ent.word);
|
|
1716
|
+
const validGlosses = validGlossesEntries.has(ent.word);
|
|
1717
|
+
const validForms = validFormsEntries.has(ent.word);
|
|
1718
|
+
if (word.kanjiForms && validWordKanjiForms && validWordKanjiForms.has(ent.word) && (validFormOf || validGlosses || validForms)) {
|
|
1719
|
+
kanjiFormEntries.push(ent);
|
|
1720
|
+
if ((validFormOf || validGlosses) && isValidArray(ent.senses))
|
|
1721
|
+
for (const sense of ent.senses) {
|
|
1722
|
+
if (validFormOf && isValidArray(sense.form_of)) {
|
|
1723
|
+
for (const form of sense.form_of)
|
|
1724
|
+
if (form.word && typeof form.word === "string" && validWordReadings.has(form.word)) {
|
|
1725
|
+
const elem = titleFormMap.get(
|
|
1726
|
+
form.word
|
|
1727
|
+
);
|
|
1728
|
+
if (!elem)
|
|
1729
|
+
titleFormMap.set(
|
|
1730
|
+
form.word,
|
|
1731
|
+
/* @__PURE__ */ new Set([ent.word])
|
|
1732
|
+
);
|
|
1733
|
+
else elem.add(ent.word);
|
|
1734
|
+
}
|
|
1735
|
+
} else if (validGlosses && isStringArray(sense.glosses) && sense.glosses.length === 1) {
|
|
1736
|
+
const gloss = sense.glosses[0];
|
|
1737
|
+
let reading = void 0;
|
|
1738
|
+
if (gloss !== void 0) {
|
|
1739
|
+
if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u53C2\u7167\u3002")) {
|
|
1740
|
+
for (const r of validWordReadings)
|
|
1741
|
+
if (gloss.trim().includes(r)) {
|
|
1742
|
+
reading = r;
|
|
1743
|
+
break;
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
if (reading) {
|
|
1748
|
+
const elem = titleFormMap.get(reading);
|
|
1749
|
+
if (!elem)
|
|
1750
|
+
titleFormMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
|
|
1751
|
+
else elem.add(ent.word);
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
if (validForms && isValidArray(ent.forms)) {
|
|
1756
|
+
for (const form of ent.forms)
|
|
1757
|
+
if (form.form && typeof form.form === "string" && validWordReadings.has(form.form))
|
|
1758
|
+
readingForms.add(form.form);
|
|
1759
|
+
}
|
|
1760
|
+
} else if (validWordReadings.has(ent.word) && isValidArray(ent.forms) && word.kanjiForms && validWordKanjiForms && ent.forms.some((form) => validWordKanjiForms.has(form.form)))
|
|
1761
|
+
readingWithFormsEntries.push(ent);
|
|
1762
|
+
else if (word.kanjiForms === void 0 && validWordReadings.has(ent.word))
|
|
1763
|
+
readingEntries.push(ent);
|
|
1764
|
+
}
|
|
1765
|
+
for (const entry of entriesWithForms) {
|
|
1766
|
+
const elem = titleFormMap.get(entry.word);
|
|
1767
|
+
if (elem && entry.forms.some((form) => elem.has(form.form)))
|
|
1768
|
+
readingWithFormsEntries.push(entry);
|
|
1769
|
+
}
|
|
1770
|
+
for (const entry of entriesWithFormTitles)
|
|
1771
|
+
if (readingForms.has(entry.word)) readingWithFormsEntries.push(entry);
|
|
1772
|
+
let parsedReadingWithFormsEntries = false;
|
|
1773
|
+
for (const entry of kanjiFormEntries)
|
|
1774
|
+
if (entry.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18" && readingWithFormsEntries.length > 0) {
|
|
1775
|
+
if (!parsedReadingWithFormsEntries)
|
|
1776
|
+
parsedReadingWithFormsEntries = true;
|
|
1777
|
+
for (const ref of readingWithFormsEntries)
|
|
1778
|
+
parseEntry(ref, definitions, definitionMap);
|
|
1779
|
+
} else parseEntry(entry, definitions, definitionMap);
|
|
1780
|
+
if (!parsedReadingWithFormsEntries && readingWithFormsEntries.length > 0) {
|
|
1781
|
+
parsedReadingWithFormsEntries = true;
|
|
1782
|
+
for (const ref of readingWithFormsEntries)
|
|
1783
|
+
parseEntry(ref, definitions, definitionMap);
|
|
1784
|
+
}
|
|
1785
|
+
if (readingEntries.length > 0)
|
|
1786
|
+
for (const readingEntry of readingEntries)
|
|
1787
|
+
parseEntry(readingEntry, definitions, definitionMap);
|
|
1788
|
+
if (definitions.length > 0)
|
|
1789
|
+
japaneseDefinitions.push({
|
|
1790
|
+
wordID: word.id,
|
|
1791
|
+
definitions
|
|
1792
|
+
});
|
|
1793
|
+
}
|
|
1794
|
+
const kuroshiro = generateFurigana === true ? new Kuroshiro.default() : null;
|
|
1795
|
+
if (kuroshiro !== null) await kuroshiro.init(new KuromojiAnalyzer());
|
|
1796
|
+
const convert = kuroshiro !== null ? kuroshiro.convert.bind(kuroshiro) : null;
|
|
1797
|
+
for (let i = 0; i < japaneseDefinitions.length; i++) {
|
|
1798
|
+
const pair = japaneseDefinitions[i];
|
|
1799
|
+
for (let j = 0; j < pair.definitions.length; j++) {
|
|
1800
|
+
const defCount = definitionMap.get(
|
|
1801
|
+
pair.definitions[j].definition
|
|
1802
|
+
);
|
|
1803
|
+
if (defCount && defCount.count > 1)
|
|
1804
|
+
pair.definitions[j].mayNotBeAccurate = true;
|
|
1805
|
+
if (convert !== null && !pair.definitions[j].definition.includes("\u30FB"))
|
|
1806
|
+
pair.definitions[j].furigana = await convert(
|
|
1807
|
+
pair.definitions[j].definition,
|
|
1808
|
+
{
|
|
1809
|
+
to: "hiragana",
|
|
1810
|
+
mode: "furigana"
|
|
1811
|
+
}
|
|
1812
|
+
);
|
|
1813
|
+
}
|
|
1814
|
+
japaneseDefinitions[i] = pair;
|
|
1815
|
+
}
|
|
1816
|
+
resolve(japaneseDefinitions);
|
|
1817
|
+
} catch (err) {
|
|
1818
|
+
reject(err);
|
|
1819
|
+
}
|
|
1820
|
+
});
|
|
1821
|
+
}
|
|
1555
1822
|
function lookupWordNote(key, notes, tags, required, fallback) {
|
|
1556
1823
|
const info = noteMap.get(
|
|
1557
1824
|
key.toLowerCase()
|
|
@@ -1570,7 +1837,7 @@ var wordAddNoteArray = (arr, cb) => {
|
|
|
1570
1837
|
if (!arr) return;
|
|
1571
1838
|
for (const v of arr) cb(v);
|
|
1572
1839
|
};
|
|
1573
|
-
function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath) {
|
|
1840
|
+
function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeName, deckPath) {
|
|
1574
1841
|
try {
|
|
1575
1842
|
if (!dictWord && id && dict)
|
|
1576
1843
|
dictWord = dict.find((entry) => entry.id === id);
|
|
@@ -1711,11 +1978,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1711
1978
|
(kanji) => kanji.kanji === char
|
|
1712
1979
|
);
|
|
1713
1980
|
if (dictKanji) {
|
|
1714
|
-
const kanjiObj = getKanji(
|
|
1715
|
-
dictKanji.kanji,
|
|
1716
|
-
kanjiDic,
|
|
1717
|
-
void 0
|
|
1718
|
-
);
|
|
1981
|
+
const kanjiObj = getKanji(kanjiDic, void 0, dictKanji);
|
|
1719
1982
|
word.kanji.push({
|
|
1720
1983
|
kanji: kanjiObj.kanji,
|
|
1721
1984
|
...kanjiObj.meanings ? { meanings: kanjiObj.meanings } : {}
|
|
@@ -1730,28 +1993,29 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1730
1993
|
if (dictWord.hasPhrases === true && examples) {
|
|
1731
1994
|
const readings = new Set(
|
|
1732
1995
|
word.readings.filter(
|
|
1733
|
-
(reading) => reading.notes === void 0 || !reading.notes.some(
|
|
1996
|
+
(reading) => (reading.notes === void 0 || !reading.notes.some(
|
|
1734
1997
|
(note) => notSearchedForms.has(note)
|
|
1735
|
-
) || reading.common === true
|
|
1998
|
+
)) && (word.common === void 0 || reading.common === true)
|
|
1736
1999
|
).map((reading) => reading.reading)
|
|
1737
2000
|
);
|
|
1738
2001
|
const existValidKf = word.kanjiForms && word.kanjiForms.length > 0 ? word.kanjiForms.some(
|
|
1739
|
-
(kf) => kf.notes === void 0 || !kf.notes.some(
|
|
2002
|
+
(kf) => (kf.notes === void 0 || !kf.notes.some(
|
|
1740
2003
|
(note) => notSearchedForms.has(note)
|
|
1741
|
-
) || kf.common === true
|
|
2004
|
+
)) && (word.common === void 0 || kf.common === true)
|
|
1742
2005
|
) : void 0;
|
|
1743
2006
|
const kanjiForms = word.kanjiForms && word.kanjiForms.length > 0 ? new Set(
|
|
1744
2007
|
word.kanjiForms.filter((kanjiForm) => {
|
|
1745
2008
|
if (existValidKf === true)
|
|
1746
|
-
return kanjiForm.notes === void 0 || !kanjiForm.notes.some(
|
|
2009
|
+
return (kanjiForm.notes === void 0 || !kanjiForm.notes.some(
|
|
1747
2010
|
(note) => notSearchedForms.has(note)
|
|
1748
|
-
) || kanjiForm.common === true;
|
|
2011
|
+
)) && (word.common === void 0 || kanjiForm.common === true);
|
|
1749
2012
|
else return true;
|
|
1750
2013
|
}).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
1751
2014
|
) : void 0;
|
|
1752
|
-
|
|
2015
|
+
let kanjiFormExamples = [];
|
|
1753
2016
|
const readingMatchingKanjiFormExamples = [];
|
|
1754
2017
|
const readingExamples = [];
|
|
2018
|
+
const readingMatchingKanjiForms = /* @__PURE__ */ new Set();
|
|
1755
2019
|
for (const example of examples)
|
|
1756
2020
|
for (let i = 0; i < example.parts.length; i++) {
|
|
1757
2021
|
const part = example.parts[i];
|
|
@@ -1759,12 +2023,18 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1759
2023
|
const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
1760
2024
|
const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
|
|
1761
2025
|
if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
|
|
1762
|
-
if (readingAsReadingMatch || readingAsInflectedFormMatch)
|
|
2026
|
+
if (readingAsReadingMatch || readingAsInflectedFormMatch) {
|
|
1763
2027
|
readingMatchingKanjiFormExamples.push({
|
|
1764
2028
|
ex: example,
|
|
1765
2029
|
partIndex: i
|
|
1766
2030
|
});
|
|
1767
|
-
|
|
2031
|
+
readingMatchingKanjiForms.add(part.baseForm);
|
|
2032
|
+
} else
|
|
2033
|
+
kanjiFormExamples.push({
|
|
2034
|
+
ex: example,
|
|
2035
|
+
partIndex: i,
|
|
2036
|
+
form: part.baseForm
|
|
2037
|
+
});
|
|
1768
2038
|
break;
|
|
1769
2039
|
}
|
|
1770
2040
|
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
@@ -1773,11 +2043,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1773
2043
|
break;
|
|
1774
2044
|
}
|
|
1775
2045
|
}
|
|
2046
|
+
if (readingMatchingKanjiForms.size > 0)
|
|
2047
|
+
kanjiFormExamples = kanjiFormExamples.filter(
|
|
2048
|
+
(ex) => ex.form && readingMatchingKanjiForms.has(ex.form)
|
|
2049
|
+
);
|
|
1776
2050
|
const includeKanjiFormExamples = word.kanjiForms !== void 0;
|
|
1777
2051
|
let wordExamples = [
|
|
1778
2052
|
...includeKanjiFormExamples ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples] : [],
|
|
1779
2053
|
...!includeKanjiFormExamples ? readingExamples : []
|
|
1780
2054
|
];
|
|
2055
|
+
readingMatchingKanjiForms.clear();
|
|
1781
2056
|
const glossSpecificExamples = [];
|
|
1782
2057
|
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1783
2058
|
for (let i = 0; i < word.translations.length; i++) {
|
|
@@ -1822,41 +2097,58 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1822
2097
|
word.tags.push("word::has_meaning-specific_phrases");
|
|
1823
2098
|
}
|
|
1824
2099
|
}
|
|
2100
|
+
if (definitions) {
|
|
2101
|
+
const pair = definitions.find(
|
|
2102
|
+
(wdp) => wdp.wordID === word.id
|
|
2103
|
+
);
|
|
2104
|
+
if (pair) word.definitions = pair.definitions;
|
|
2105
|
+
}
|
|
1825
2106
|
return word;
|
|
1826
2107
|
} else throw new Error(`Word${id ? ` ${id}` : ""} not found`);
|
|
1827
2108
|
} catch (err) {
|
|
1828
2109
|
throw err;
|
|
1829
2110
|
}
|
|
1830
2111
|
}
|
|
1831
|
-
function getKanji(kanjiChar,
|
|
2112
|
+
function getKanji(dict, kanjiChar, dictKanji, jmDict, svgList, noteTypeName, deckPath) {
|
|
1832
2113
|
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
1833
2114
|
try {
|
|
1834
|
-
|
|
1835
|
-
(entry) => entry.kanji === kanjiChar
|
|
1836
|
-
);
|
|
2115
|
+
if (!dictKanji && kanjiChar)
|
|
2116
|
+
dictKanji = dict.find((entry) => entry.kanji === kanjiChar);
|
|
1837
2117
|
if (dictKanji) {
|
|
1838
2118
|
const kanji = {
|
|
1839
2119
|
kanji: dictKanji.kanji,
|
|
1840
2120
|
...dictKanji.misc ? { strokes: dictKanji.misc.strokeNumber } : {},
|
|
1841
2121
|
...dictKanji.misc && dictKanji.misc.grade ? { grade: dictKanji.misc.grade } : {},
|
|
1842
|
-
...dictKanji.misc && dictKanji.misc.frequency ? {
|
|
2122
|
+
...dictKanji.misc && dictKanji.misc.frequency ? { frequency: dictKanji.misc.frequency } : {},
|
|
1843
2123
|
noteID: `kanji_${dictKanji.kanji}`,
|
|
1844
2124
|
...noteTypeName ? { noteTypeName } : {},
|
|
1845
2125
|
...deckPath ? { deckPath } : {}
|
|
1846
2126
|
};
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
for (const
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
2127
|
+
if (dictKanji.readingMeaning) {
|
|
2128
|
+
kanji.meanings = [];
|
|
2129
|
+
kanji.nanori = [];
|
|
2130
|
+
kanji.onyomi = [];
|
|
2131
|
+
kanji.kunyomi = [];
|
|
2132
|
+
for (const rm of dictKanji.readingMeaning) {
|
|
2133
|
+
if (rm.nanori && rm.nanori.length > 0)
|
|
2134
|
+
kanji.nanori.push(...rm.nanori);
|
|
2135
|
+
for (const group of rm.groups) {
|
|
2136
|
+
kanji.onyomi.push(
|
|
2137
|
+
...group.readings.filter((reading) => reading.type === "ja_on").map((reading) => reading.reading)
|
|
2138
|
+
);
|
|
2139
|
+
kanji.kunyomi.push(
|
|
2140
|
+
...group.readings.filter(
|
|
2141
|
+
(reading) => reading.type === "ja_kun"
|
|
2142
|
+
).map((reading) => reading.reading)
|
|
2143
|
+
);
|
|
2144
|
+
kanji.meanings.push(...group.meanings);
|
|
2145
|
+
}
|
|
1859
2146
|
}
|
|
2147
|
+
if (kanji.meanings && kanji.meanings.length === 0)
|
|
2148
|
+
delete kanji.meanings;
|
|
2149
|
+
if (kanji.nanori && kanji.nanori.length === 0) delete kanji.nanori;
|
|
2150
|
+
if (kanji.onyomi && kanji.onyomi.length === 0) delete kanji.onyomi;
|
|
2151
|
+
if (kanji.kunyomi && kanji.kunyomi.length === 0) delete kanji.kunyomi;
|
|
1860
2152
|
}
|
|
1861
2153
|
if (jmDict) {
|
|
1862
2154
|
let kanjiWords = jmDict.filter(
|
|
@@ -1870,6 +2162,7 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
1870
2162
|
void 0,
|
|
1871
2163
|
void 0,
|
|
1872
2164
|
void 0,
|
|
2165
|
+
void 0,
|
|
1873
2166
|
word,
|
|
1874
2167
|
void 0
|
|
1875
2168
|
);
|
|
@@ -1906,6 +2199,7 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
1906
2199
|
void 0,
|
|
1907
2200
|
void 0,
|
|
1908
2201
|
void 0,
|
|
2202
|
+
void 0,
|
|
1909
2203
|
word,
|
|
1910
2204
|
void 0
|
|
1911
2205
|
);
|
|
@@ -1975,17 +2269,19 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
|
|
|
1975
2269
|
...kanji.svg ? ["kanji::has_svg"] : []
|
|
1976
2270
|
);
|
|
1977
2271
|
return kanji;
|
|
1978
|
-
} else
|
|
2272
|
+
} else
|
|
2273
|
+
throw new Error(`Kanji not found${kanjiChar ? `: ${kanjiChar}` : ""}`);
|
|
1979
2274
|
} catch (err) {
|
|
1980
2275
|
throw err;
|
|
1981
2276
|
}
|
|
1982
2277
|
}
|
|
1983
|
-
function getKanjiExtended(
|
|
2278
|
+
function getKanjiExtended(info, dict, kanjiChar, dictKanji, useWords, jmDict, svgList, noteTypeName, deckPath) {
|
|
1984
2279
|
var _a, _b, _c, _d;
|
|
1985
2280
|
try {
|
|
1986
2281
|
const kanji = getKanji(
|
|
1987
|
-
kanjiChar,
|
|
1988
2282
|
dict,
|
|
2283
|
+
kanjiChar,
|
|
2284
|
+
dictKanji,
|
|
1989
2285
|
jmDict,
|
|
1990
2286
|
svgList,
|
|
1991
2287
|
noteTypeName,
|
|
@@ -1995,13 +2291,13 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
|
|
|
1995
2291
|
kanji.components = info.components;
|
|
1996
2292
|
if (info.mnemonic && info.mnemonic.length > 0)
|
|
1997
2293
|
kanji.mnemonic = info.mnemonic;
|
|
1998
|
-
if (
|
|
2294
|
+
if (useWords === true && info.words && info.words.length > 0)
|
|
1999
2295
|
kanji.words = info.words;
|
|
2000
2296
|
if (kanji.tags) {
|
|
2001
2297
|
kanji.tags.push(`kanji::components::${(_b = (_a = kanji.components) == null ? void 0 : _a.length) != null ? _b : 0}`);
|
|
2002
2298
|
if (kanji.mnemonic && kanji.mnemonic.length > 0)
|
|
2003
2299
|
kanji.tags.push("kanji::has_mnemonic");
|
|
2004
|
-
if (
|
|
2300
|
+
if (useWords === true && kanji.words) {
|
|
2005
2301
|
if (!kanji.tags.some((tag, index) => {
|
|
2006
2302
|
var _a2, _b2;
|
|
2007
2303
|
if (tag.startsWith("kanji::words::")) {
|
|
@@ -2016,8 +2312,8 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
|
|
|
2016
2312
|
kanji.tags.push(`kanji::words::${(_d = (_c = kanji.words) == null ? void 0 : _c.length) != null ? _d : 0}`);
|
|
2017
2313
|
}
|
|
2018
2314
|
}
|
|
2019
|
-
if (
|
|
2020
|
-
kanji.source = `https://jpdb.io/kanji/${kanji.kanji}
|
|
2315
|
+
if (info.fromJpdb === true && (kanji.mnemonic || kanji.components || kanji.words && useWords === true))
|
|
2316
|
+
kanji.source = `https://jpdb.io/kanji/${kanji.kanji}`;
|
|
2021
2317
|
return kanji;
|
|
2022
2318
|
} catch (err) {
|
|
2023
2319
|
throw err;
|
|
@@ -2106,12 +2402,6 @@ function generateAnkiNote(entry) {
|
|
|
2106
2402
|
entry.kanjiForms && kanjiFormsField ? kanjiFormsField : noKanjiForms
|
|
2107
2403
|
],
|
|
2108
2404
|
translationsField,
|
|
2109
|
-
entry.kanji ? entry.kanji.map(
|
|
2110
|
-
(kanjiEntry) => createEntry(
|
|
2111
|
-
`<span class="word word-kanji">${kanjiEntry.kanji}${kanjiEntry.meanings === void 0 ? " (no meanings)" : ""}</span>`,
|
|
2112
|
-
kanjiEntry.meanings
|
|
2113
|
-
)
|
|
2114
|
-
).join("") : '<span class="word word-kanji">(no kanji)</span>',
|
|
2115
2405
|
entry.phrases ? entry.phrases.map(
|
|
2116
2406
|
(phraseEntry, index) => createEntry(
|
|
2117
2407
|
`<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
|
|
@@ -2122,6 +2412,20 @@ function generateAnkiNote(entry) {
|
|
|
2122
2412
|
) ? true : void 0
|
|
2123
2413
|
)
|
|
2124
2414
|
).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
|
|
2415
|
+
entry.definitions ? entry.definitions.map(
|
|
2416
|
+
(definitionEntry) => {
|
|
2417
|
+
var _a;
|
|
2418
|
+
return createEntry(
|
|
2419
|
+
`<span class="word word-definition>"<span class="word word-definition-original">${definitionEntry.definition}</span><span class="word word-definition-furigana">${(_a = definitionEntry.furigana) != null ? _a : definitionEntry.definition}</span></span>`
|
|
2420
|
+
);
|
|
2421
|
+
}
|
|
2422
|
+
).join("") : '<span class="word word-definition">(no definitions) (Search on ja.wiktionary.org)</span>',
|
|
2423
|
+
entry.kanji ? entry.kanji.map(
|
|
2424
|
+
(kanjiEntry) => createEntry(
|
|
2425
|
+
`<span class="word word-kanji">${kanjiEntry.kanji}${kanjiEntry.meanings === void 0 ? " (no meanings)" : ""}</span>`,
|
|
2426
|
+
kanjiEntry.meanings
|
|
2427
|
+
)
|
|
2428
|
+
).join("") : '<span class="word word-kanji">(no kanji)</span>',
|
|
2125
2429
|
...entry.tags && entry.tags.length > 0 ? [
|
|
2126
2430
|
entry.tags.map(
|
|
2127
2431
|
(tag) => tag.trim().toLowerCase().replaceAll(" ", "::")
|
|
@@ -2278,13 +2582,13 @@ function generateAnkiNotesFile(list) {
|
|
|
2278
2582
|
if (ankiNotes.length === 0) throw new Error("Invalid list");
|
|
2279
2583
|
return `${headers.join("\n")}
|
|
2280
2584
|
${ankiNotes}`;
|
|
2281
|
-
} else
|
|
2282
|
-
return void 0;
|
|
2585
|
+
} else throw new Error("No entries available for Anki notes creation");
|
|
2283
2586
|
}
|
|
2284
2587
|
// Annotate the CommonJS export names for ESM import in node:
|
|
2285
2588
|
0 && (module.exports = {
|
|
2286
2589
|
capitalizeString,
|
|
2287
2590
|
convertJMdict,
|
|
2591
|
+
convertJawiktionary,
|
|
2288
2592
|
convertKanjiDic,
|
|
2289
2593
|
convertKradFile,
|
|
2290
2594
|
convertRadkFile,
|
|
@@ -2294,6 +2598,7 @@ ${ankiNotes}`;
|
|
|
2294
2598
|
getKanji,
|
|
2295
2599
|
getKanjiExtended,
|
|
2296
2600
|
getWord,
|
|
2601
|
+
getWordDefinitions,
|
|
2297
2602
|
isGrammar,
|
|
2298
2603
|
isKana,
|
|
2299
2604
|
isKanji,
|