henkan 0.9.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CONTRIBUTING.md +0 -0
  2. package/LICENSE.md +0 -0
  3. package/README.md +1 -1
  4. package/SECURITY.md +0 -0
  5. package/dist/index.cjs.js +432 -77
  6. package/dist/index.cjs.js.map +2 -2
  7. package/dist/index.mjs +427 -77
  8. package/dist/index.mjs.map +2 -2
  9. package/dist/types/types.d.ts +43 -5
  10. package/dist/types/types.d.ts.map +1 -1
  11. package/dist/types/utils.d.ts +32 -11
  12. package/dist/types/utils.d.ts.map +1 -1
  13. package/docs/api/README.md +5 -0
  14. package/docs/api/functions/capitalizeString.md +1 -1
  15. package/docs/api/functions/convertJMdict.md +1 -1
  16. package/docs/api/functions/convertJawiktionary.md +29 -0
  17. package/docs/api/functions/convertKanjiDic.md +1 -1
  18. package/docs/api/functions/convertKradFile.md +1 -1
  19. package/docs/api/functions/convertRadkFile.md +1 -1
  20. package/docs/api/functions/convertTanakaCorpus.md +2 -2
  21. package/docs/api/functions/generateAnkiNote.md +1 -1
  22. package/docs/api/functions/generateAnkiNotesFile.md +4 -4
  23. package/docs/api/functions/getKanji.md +12 -6
  24. package/docs/api/functions/getKanjiExtended.md +17 -11
  25. package/docs/api/functions/getWord.md +8 -2
  26. package/docs/api/functions/getWordDefinitions.md +45 -0
  27. package/docs/api/functions/isStringArray.md +1 -1
  28. package/docs/api/functions/isValidArray.md +1 -1
  29. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  30. package/docs/api/functions/shuffleArray.md +1 -1
  31. package/docs/api/functions/synthesizeSpeech.md +1 -1
  32. package/docs/api/interfaces/Definition.md +41 -0
  33. package/docs/api/interfaces/DictKanji.md +7 -7
  34. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  35. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  36. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  37. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  38. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  39. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  40. package/docs/api/interfaces/DictMeaning.md +12 -12
  41. package/docs/api/interfaces/DictRadical.md +4 -4
  42. package/docs/api/interfaces/DictReading.md +5 -5
  43. package/docs/api/interfaces/DictWord.md +8 -8
  44. package/docs/api/interfaces/ExamplePart.md +7 -7
  45. package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
  46. package/docs/api/interfaces/Grammar.md +15 -15
  47. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  48. package/docs/api/interfaces/Kana.md +11 -11
  49. package/docs/api/interfaces/Kanji.md +22 -22
  50. package/docs/api/interfaces/KanjiComponent.md +3 -3
  51. package/docs/api/interfaces/KanjiForm.md +4 -4
  52. package/docs/api/interfaces/NoteAndTag.md +3 -3
  53. package/docs/api/interfaces/Phrase.md +5 -5
  54. package/docs/api/interfaces/Radical.md +16 -16
  55. package/docs/api/interfaces/Reading.md +5 -5
  56. package/docs/api/interfaces/ResultEntry.md +7 -7
  57. package/docs/api/interfaces/TanakaExample.md +7 -7
  58. package/docs/api/interfaces/Translation.md +3 -3
  59. package/docs/api/interfaces/UsefulRegExps.md +8 -8
  60. package/docs/api/interfaces/Word.md +24 -14
  61. package/docs/api/interfaces/WordDefinitionPair.md +31 -0
  62. package/docs/api/type-aliases/Dict.md +1 -1
  63. package/docs/api/type-aliases/DictName.md +1 -1
  64. package/docs/api/type-aliases/DictTranslation.md +13 -0
  65. package/docs/api/type-aliases/EntryType.md +1 -1
  66. package/docs/api/type-aliases/JLPT.md +1 -1
  67. package/docs/api/type-aliases/Result.md +1 -1
  68. package/package.json +16 -9
package/dist/index.mjs CHANGED
@@ -1085,6 +1085,7 @@ import iconv from "iconv-lite";
1085
1085
  import {
1086
1086
  SynthesizeSpeechCommand
1087
1087
  } from "@aws-sdk/client-polly";
1088
+ import { createInterface } from "readline";
1088
1089
  var Kuroshiro = __require("kuroshiro");
1089
1090
  var KuromojiAnalyzer = __require("kuroshiro-analyzer-kuromoji");
1090
1091
  function capitalizeString(value) {
@@ -1100,6 +1101,7 @@ function isStringArray(arg) {
1100
1101
  return arg !== null && arg !== void 0 && Array.isArray(arg) && arg.every((element) => typeof element === "string");
1101
1102
  }
1102
1103
  function shuffleArray(arr) {
1104
+ if (arr.length < 2) return arr;
1103
1105
  const a = arr.slice();
1104
1106
  for (let i = a.length - 1; i > 0; i--) {
1105
1107
  const j = Math.floor(Math.random() * (i + 1));
@@ -1215,7 +1217,7 @@ function convertJMdict(xmlString, examples) {
1215
1217
  }
1216
1218
  if (isStringArray(meaning.dial))
1217
1219
  meaningObj.dialects = meaning.dial;
1218
- if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations && meaningObj.translations.length > 0)
1220
+ if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations)
1219
1221
  entryObj.meanings.push(meaningObj);
1220
1222
  }
1221
1223
  if (entryObj.meanings.length === usuallyInKanaMeanings)
@@ -1224,9 +1226,9 @@ function convertJMdict(xmlString, examples) {
1224
1226
  if (examples) {
1225
1227
  const readings2 = new Set(
1226
1228
  entryObj.readings.filter(
1227
- (reading) => (!reading.notes || !reading.notes.some(
1229
+ (reading) => reading.notes === void 0 || !reading.notes.some(
1228
1230
  (note) => notSearchedForms.has(note)
1229
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1231
+ ) || reading.commonness
1230
1232
  ).map((reading) => reading.reading)
1231
1233
  );
1232
1234
  const kanjiForms2 = entryObj.kanjiForms ? new Set(
@@ -1327,9 +1329,13 @@ function convertKanjiDic(xmlString) {
1327
1329
  rmObj.groups.push(groupObj);
1328
1330
  }
1329
1331
  if (isStringArray(rm.nanori)) rmObj.nanori = rm.nanori;
1330
- if (rmObj.groups.length > 0) kanjiObj.readingMeaning.push(rmObj);
1332
+ if (rmObj.groups.length > 0 && kanjiObj.readingMeaning)
1333
+ kanjiObj.readingMeaning.push(rmObj);
1331
1334
  }
1332
- if (kanjiObj.kanji.length > 0) dict.push(kanjiObj);
1335
+ if (kanjiObj.readingMeaning && kanjiObj.readingMeaning.length === 0)
1336
+ delete kanjiObj.readingMeaning;
1337
+ if (kanjiObj.kanji.length === 1 && kanjiObj.misc && kanjiObj.misc.strokeNumber.length > 0)
1338
+ dict.push(kanjiObj);
1333
1339
  }
1334
1340
  });
1335
1341
  return dict;
@@ -1433,7 +1439,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1433
1439
  (dictKanji) => dictKanji.kanji === kanji
1434
1440
  );
1435
1441
  if (foundKanji) kanjiList.push(foundKanji);
1436
- else kanjiList.push({ kanji, readingMeaning: [] });
1442
+ else kanjiList.push({ kanji });
1437
1443
  }
1438
1444
  j++;
1439
1445
  kanjiLine = fileParsed[j];
@@ -1441,7 +1447,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1441
1447
  if (kanjiLine.startsWith("$ ")) i = j - 1;
1442
1448
  }
1443
1449
  if (kanjiList.length > 0) radical.kanji = kanjiList;
1444
- if (radical.radical.length > 0 && radical.strokes.length > 0)
1450
+ if (radical.radical.length === 1 && radical.strokes.length > 0)
1445
1451
  radicals.push(radical);
1446
1452
  }
1447
1453
  }
@@ -1502,6 +1508,265 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
1502
1508
  throw err;
1503
1509
  }
1504
1510
  }
1511
+ async function convertJawiktionary(stream) {
1512
+ const rl = createInterface({
1513
+ input: stream,
1514
+ crlfDelay: Infinity
1515
+ });
1516
+ let lineNumber = 0;
1517
+ return await new Promise(
1518
+ async (resolve, reject) => {
1519
+ try {
1520
+ const entries = [];
1521
+ for await (const line of rl) {
1522
+ lineNumber++;
1523
+ let obj = void 0;
1524
+ try {
1525
+ obj = JSON.parse(line.trim());
1526
+ } catch (err) {
1527
+ rl.close();
1528
+ throw new Error(
1529
+ `Invalid JSONL at line ${lineNumber}: ${err.message}`
1530
+ );
1531
+ }
1532
+ if (obj !== void 0 && obj !== null && typeof obj === "object" && obj.lang_code === "ja" && obj.lang === "\u65E5\u672C\u8A9E")
1533
+ entries.push(obj);
1534
+ }
1535
+ rl.close();
1536
+ stream.close();
1537
+ stream.destroy();
1538
+ resolve(entries);
1539
+ } catch (err) {
1540
+ reject(err);
1541
+ }
1542
+ }
1543
+ );
1544
+ }
1545
+ function parseEntry(entry, definitions, definitionMap) {
1546
+ if (isValidArray(entry.senses)) {
1547
+ for (const sense of entry.senses)
1548
+ if (isStringArray(sense.glosses)) {
1549
+ const definition = sense.glosses.join("<br>");
1550
+ if (!definitions.some((def) => def.definition === definition)) {
1551
+ if (!definitionMap.has(definition))
1552
+ definitionMap.set(definition, { count: 1 });
1553
+ else definitionMap.get(definition).count++;
1554
+ definitions.push({ definition });
1555
+ }
1556
+ }
1557
+ }
1558
+ }
1559
+ async function getWordDefinitions(entries, jmDict, kanjiDic, generateFurigana) {
1560
+ return await new Promise(async (resolve, reject) => {
1561
+ try {
1562
+ const japaneseDefinitions = [];
1563
+ const definitionMap = /* @__PURE__ */ new Map();
1564
+ const validWords = [];
1565
+ const validReadings = /* @__PURE__ */ new Set();
1566
+ const validKanjiForms = /* @__PURE__ */ new Set();
1567
+ for (const word of jmDict) {
1568
+ let valid = false;
1569
+ for (const r of word.readings)
1570
+ if ((r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.commonness !== void 0) && !validReadings.has(r.reading)) {
1571
+ validReadings.add(r.reading);
1572
+ if (!valid) valid = true;
1573
+ }
1574
+ if (word.kanjiForms) {
1575
+ for (const kf of word.kanjiForms)
1576
+ if ((kf.notes === void 0 || !kf.notes.some((note) => notSearchedForms.has(note)) || kf.commonness !== void 0) && !validKanjiForms.has(kf.form)) {
1577
+ validKanjiForms.add(kf.form);
1578
+ if (!valid) valid = true;
1579
+ }
1580
+ }
1581
+ if (valid)
1582
+ validWords.push(
1583
+ getWord(void 0, void 0, kanjiDic, void 0, void 0, word)
1584
+ );
1585
+ }
1586
+ const validTitleEntries = [];
1587
+ const entriesWithFormTitlesGlobal = [];
1588
+ const entriesWithFormsGlobal = [];
1589
+ const validFormOfEntries = /* @__PURE__ */ new Set();
1590
+ const validGlossesEntries = /* @__PURE__ */ new Set();
1591
+ const validFormsEntries = /* @__PURE__ */ new Set();
1592
+ for (const entry of entries) {
1593
+ let valid = false;
1594
+ if (validKanjiForms && validKanjiForms.has(entry.word)) {
1595
+ valid = true;
1596
+ if (isValidArray(entry.senses))
1597
+ for (const sense of entry.senses) {
1598
+ if (isValidArray(sense.form_of) && sense.form_of.some(
1599
+ (form) => form.word && typeof form.word === "string" && validReadings.has(form.word)
1600
+ ))
1601
+ validFormOfEntries.add(entry.word);
1602
+ else if (isValidArray(sense.glosses) && sense.glosses.length === 1) {
1603
+ const gloss = sense.glosses[0];
1604
+ let reading = void 0;
1605
+ if (gloss !== void 0) {
1606
+ if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u53C2\u7167\u3002")) {
1607
+ for (const r of validReadings)
1608
+ if (gloss.trim().includes(r)) {
1609
+ reading = r;
1610
+ break;
1611
+ }
1612
+ }
1613
+ }
1614
+ if (reading) validGlossesEntries.add(entry.word);
1615
+ }
1616
+ }
1617
+ if (isValidArray(entry.forms)) {
1618
+ for (const form of entry.forms)
1619
+ if (form.form && typeof form.form === "string" && validReadings.has(form.form))
1620
+ validFormsEntries.add(entry.word);
1621
+ }
1622
+ } else if (validReadings.has(entry.word) && isValidArray(entry.forms) && entry.forms.some((form) => validKanjiForms.has(form.form))) {
1623
+ valid = true;
1624
+ entriesWithFormTitlesGlobal.push(entry);
1625
+ } else if (validReadings.has(entry.word)) {
1626
+ valid = true;
1627
+ entriesWithFormTitlesGlobal.push(entry);
1628
+ }
1629
+ if (valid) validTitleEntries.push(entry);
1630
+ if (isValidArray(entry.forms) && (validKanjiForms.has(entry.word) || validReadings.has(entry.word)) && entry.forms.some(
1631
+ (form) => validKanjiForms.has(form.form) || validReadings.has(form.form)
1632
+ ))
1633
+ entriesWithFormsGlobal.push(entry);
1634
+ }
1635
+ for (const word of validWords) {
1636
+ const definitions = [];
1637
+ const kanjiFormEntries = [];
1638
+ const readingWithFormsEntries = [];
1639
+ const readingEntries = [];
1640
+ const titleFormMap = /* @__PURE__ */ new Map();
1641
+ const readingForms = /* @__PURE__ */ new Set();
1642
+ const validWordReadings = new Set(
1643
+ word.readings.filter(
1644
+ (r) => r.notes === void 0 || !r.notes.some((note) => notSearchedForms.has(note)) || r.common === true
1645
+ ).map((r) => r.reading)
1646
+ );
1647
+ const validWordKanjiForms = word.kanjiForms ? new Set(
1648
+ word.kanjiForms.filter(
1649
+ (kf) => kf.notes === void 0 || !kf.notes.some(
1650
+ (note) => notSearchedForms.has(note)
1651
+ ) || kf.common === true
1652
+ ).map((kf) => kf.kanjiForm)
1653
+ ) : void 0;
1654
+ const entriesWithFormTitles = entriesWithFormTitlesGlobal.filter(
1655
+ (entry) => validWordReadings.has(entry.word)
1656
+ );
1657
+ const entriesWithForms = entriesWithFormsGlobal.filter(
1658
+ (entry) => isValidArray(entry.forms) && (validWordKanjiForms && validWordKanjiForms.has(entry.word) || validWordReadings.has(entry.word)) && entry.forms.some(
1659
+ (form) => validWordKanjiForms && validWordKanjiForms.has(form.form) || validWordReadings.has(form.form)
1660
+ )
1661
+ );
1662
+ for (const ent of validTitleEntries) {
1663
+ const validFormOf = validFormOfEntries.has(ent.word);
1664
+ const validGlosses = validGlossesEntries.has(ent.word);
1665
+ const validForms = validFormsEntries.has(ent.word);
1666
+ if (word.kanjiForms && validWordKanjiForms && validWordKanjiForms.has(ent.word) && (validFormOf || validGlosses || validForms)) {
1667
+ kanjiFormEntries.push(ent);
1668
+ if ((validFormOf || validGlosses) && isValidArray(ent.senses))
1669
+ for (const sense of ent.senses) {
1670
+ if (validFormOf && isValidArray(sense.form_of)) {
1671
+ for (const form of sense.form_of)
1672
+ if (form.word && typeof form.word === "string" && validWordReadings.has(form.word)) {
1673
+ const elem = titleFormMap.get(
1674
+ form.word
1675
+ );
1676
+ if (!elem)
1677
+ titleFormMap.set(
1678
+ form.word,
1679
+ /* @__PURE__ */ new Set([ent.word])
1680
+ );
1681
+ else elem.add(ent.word);
1682
+ }
1683
+ } else if (validGlosses && isStringArray(sense.glosses) && sense.glosses.length === 1) {
1684
+ const gloss = sense.glosses[0];
1685
+ let reading = void 0;
1686
+ if (gloss !== void 0) {
1687
+ if (gloss.trim().includes("\u6F22\u5B57\u8868\u8A18\u3002") || gloss.trim().includes("\u53C2\u7167\u3002")) {
1688
+ for (const r of validWordReadings)
1689
+ if (gloss.trim().includes(r)) {
1690
+ reading = r;
1691
+ break;
1692
+ }
1693
+ }
1694
+ }
1695
+ if (reading) {
1696
+ const elem = titleFormMap.get(reading);
1697
+ if (!elem)
1698
+ titleFormMap.set(reading, /* @__PURE__ */ new Set([ent.word]));
1699
+ else elem.add(ent.word);
1700
+ }
1701
+ }
1702
+ }
1703
+ if (validForms && isValidArray(ent.forms)) {
1704
+ for (const form of ent.forms)
1705
+ if (form.form && typeof form.form === "string" && validWordReadings.has(form.form))
1706
+ readingForms.add(form.form);
1707
+ }
1708
+ } else if (validWordReadings.has(ent.word) && isValidArray(ent.forms) && word.kanjiForms && validWordKanjiForms && ent.forms.some((form) => validWordKanjiForms.has(form.form)))
1709
+ readingWithFormsEntries.push(ent);
1710
+ else if (word.kanjiForms === void 0 && validWordReadings.has(ent.word))
1711
+ readingEntries.push(ent);
1712
+ }
1713
+ for (const entry of entriesWithForms) {
1714
+ const elem = titleFormMap.get(entry.word);
1715
+ if (elem && entry.forms.some((form) => elem.has(form.form)))
1716
+ readingWithFormsEntries.push(entry);
1717
+ }
1718
+ for (const entry of entriesWithFormTitles)
1719
+ if (readingForms.has(entry.word)) readingWithFormsEntries.push(entry);
1720
+ let parsedReadingWithFormsEntries = false;
1721
+ for (const entry of kanjiFormEntries)
1722
+ if (entry.pos_title === "\u548C\u8A9E\u306E\u6F22\u5B57\u8868\u8A18" && readingWithFormsEntries.length > 0) {
1723
+ if (!parsedReadingWithFormsEntries)
1724
+ parsedReadingWithFormsEntries = true;
1725
+ for (const ref of readingWithFormsEntries)
1726
+ parseEntry(ref, definitions, definitionMap);
1727
+ } else parseEntry(entry, definitions, definitionMap);
1728
+ if (!parsedReadingWithFormsEntries && readingWithFormsEntries.length > 0) {
1729
+ parsedReadingWithFormsEntries = true;
1730
+ for (const ref of readingWithFormsEntries)
1731
+ parseEntry(ref, definitions, definitionMap);
1732
+ }
1733
+ if (readingEntries.length > 0)
1734
+ for (const readingEntry of readingEntries)
1735
+ parseEntry(readingEntry, definitions, definitionMap);
1736
+ if (definitions.length > 0)
1737
+ japaneseDefinitions.push({
1738
+ wordID: word.id,
1739
+ definitions
1740
+ });
1741
+ }
1742
+ const kuroshiro = generateFurigana === true ? new Kuroshiro.default() : null;
1743
+ if (kuroshiro !== null) await kuroshiro.init(new KuromojiAnalyzer());
1744
+ const convert = kuroshiro !== null ? kuroshiro.convert.bind(kuroshiro) : null;
1745
+ for (let i = 0; i < japaneseDefinitions.length; i++) {
1746
+ const pair = japaneseDefinitions[i];
1747
+ for (let j = 0; j < pair.definitions.length; j++) {
1748
+ const defCount = definitionMap.get(
1749
+ pair.definitions[j].definition
1750
+ );
1751
+ if (defCount && defCount.count > 1)
1752
+ pair.definitions[j].mayNotBeAccurate = true;
1753
+ if (convert !== null && !pair.definitions[j].definition.includes("\u30FB"))
1754
+ pair.definitions[j].furigana = await convert(
1755
+ pair.definitions[j].definition,
1756
+ {
1757
+ to: "hiragana",
1758
+ mode: "furigana"
1759
+ }
1760
+ );
1761
+ }
1762
+ japaneseDefinitions[i] = pair;
1763
+ }
1764
+ resolve(japaneseDefinitions);
1765
+ } catch (err) {
1766
+ reject(err);
1767
+ }
1768
+ });
1769
+ }
1505
1770
  function lookupWordNote(key, notes, tags, required, fallback) {
1506
1771
  const info = noteMap.get(
1507
1772
  key.toLowerCase()
@@ -1520,7 +1785,7 @@ var wordAddNoteArray = (arr, cb) => {
1520
1785
  if (!arr) return;
1521
1786
  for (const v of arr) cb(v);
1522
1787
  };
1523
- function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath) {
1788
+ function getWord(dict, id, kanjiDic, examples, definitions, dictWord, noteTypeName, deckPath) {
1524
1789
  try {
1525
1790
  if (!dictWord && id && dict)
1526
1791
  dictWord = dict.find((entry) => entry.id === id);
@@ -1534,7 +1799,10 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1534
1799
  ...deckPath ? { deckPath } : {},
1535
1800
  tags: []
1536
1801
  };
1537
- if (dictWord.isCommon === true) word.common = true;
1802
+ if (dictWord.isCommon === true) {
1803
+ word.common = true;
1804
+ word.tags.push("word::common");
1805
+ }
1538
1806
  if (dictWord.kanjiForms)
1539
1807
  word.kanjiForms = dictWord.kanjiForms.map(
1540
1808
  (dictKanjiForm) => ({
@@ -1583,12 +1851,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1583
1851
  (translation) => {
1584
1852
  if (typeof translation === "string") return translation;
1585
1853
  else {
1586
- if (translation.type === "lit")
1854
+ if (translation.type === "lit") {
1587
1855
  translationTypes.push("Literal meaning");
1588
- else if (translation.type === "expl")
1856
+ word.tags.push("word::literal_meaning");
1857
+ } else if (translation.type === "expl") {
1589
1858
  translationTypes.push("Explanation");
1590
- else if (translation.type === "tm")
1859
+ word.tags.push("word::explanation");
1860
+ } else if (translation.type === "tm") {
1591
1861
  translationTypes.push("Trademark");
1862
+ word.tags.push("word::trademark");
1863
+ }
1592
1864
  return translation.translation;
1593
1865
  }
1594
1866
  }
@@ -1638,7 +1910,10 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1638
1910
  notes
1639
1911
  };
1640
1912
  });
1641
- if (dictWord.usuallyInKana === true) word.usuallyInKana = true;
1913
+ if (dictWord.usuallyInKana === true) {
1914
+ word.usuallyInKana = true;
1915
+ word.tags.push("word::usually_in_kana_for_all_senses");
1916
+ }
1642
1917
  if (kanjiDic && word.kanjiForms) {
1643
1918
  word.kanji = [];
1644
1919
  for (const kanjiForm of word.kanjiForms)
@@ -1649,35 +1924,44 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1649
1924
  (kanji) => kanji.kanji === char
1650
1925
  );
1651
1926
  if (dictKanji) {
1652
- const kanjiObj = getKanji(
1653
- dictKanji.kanji,
1654
- kanjiDic,
1655
- void 0
1656
- );
1927
+ const kanjiObj = getKanji(kanjiDic, void 0, dictKanji);
1657
1928
  word.kanji.push({
1658
1929
  kanji: kanjiObj.kanji,
1659
1930
  ...kanjiObj.meanings ? { meanings: kanjiObj.meanings } : {}
1660
1931
  });
1661
1932
  }
1662
1933
  }
1663
- if (word.kanji.length === 0) delete word.kanji;
1934
+ if (word.kanji.length === 0) {
1935
+ delete word.kanji;
1936
+ word.tags.push("word::no_kanji");
1937
+ }
1664
1938
  }
1665
1939
  if (dictWord.hasPhrases === true && examples) {
1666
1940
  const readings = new Set(
1667
1941
  word.readings.filter(
1668
- (reading) => (!reading.notes || !reading.notes.some(
1942
+ (reading) => (reading.notes === void 0 || !reading.notes.some(
1669
1943
  (note) => notSearchedForms.has(note)
1670
1944
  )) && (word.common === void 0 || reading.common === true)
1671
1945
  ).map((reading) => reading.reading)
1672
1946
  );
1947
+ const existValidKf = word.kanjiForms && word.kanjiForms.length > 0 ? word.kanjiForms.some(
1948
+ (kf) => (kf.notes === void 0 || !kf.notes.some(
1949
+ (note) => notSearchedForms.has(note)
1950
+ )) && (word.common === void 0 || kf.common === true)
1951
+ ) : void 0;
1673
1952
  const kanjiForms = word.kanjiForms && word.kanjiForms.length > 0 ? new Set(
1674
- word.kanjiForms.map(
1675
- (kanjiForm) => kanjiForm.kanjiForm
1676
- )
1953
+ word.kanjiForms.filter((kanjiForm) => {
1954
+ if (existValidKf === true)
1955
+ return (kanjiForm.notes === void 0 || !kanjiForm.notes.some(
1956
+ (note) => notSearchedForms.has(note)
1957
+ )) && (word.common === void 0 || kanjiForm.common === true);
1958
+ else return true;
1959
+ }).map((kanjiForm) => kanjiForm.kanjiForm)
1677
1960
  ) : void 0;
1678
- const kanjiFormExamples = [];
1961
+ let kanjiFormExamples = [];
1679
1962
  const readingMatchingKanjiFormExamples = [];
1680
1963
  const readingExamples = [];
1964
+ const readingMatchingKanjiForms = /* @__PURE__ */ new Set();
1681
1965
  for (const example of examples)
1682
1966
  for (let i = 0; i < example.parts.length; i++) {
1683
1967
  const part = example.parts[i];
@@ -1685,12 +1969,18 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1685
1969
  const readingAsInflectedFormMatch = part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
1686
1970
  const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
1687
1971
  if (kanjiForms && kanjiForms.has(part.baseForm) || referenceIDMatch) {
1688
- if (readingAsReadingMatch || readingAsInflectedFormMatch)
1972
+ if (readingAsReadingMatch || readingAsInflectedFormMatch) {
1689
1973
  readingMatchingKanjiFormExamples.push({
1690
1974
  ex: example,
1691
1975
  partIndex: i
1692
1976
  });
1693
- else kanjiFormExamples.push({ ex: example, partIndex: i });
1977
+ readingMatchingKanjiForms.add(part.baseForm);
1978
+ } else
1979
+ kanjiFormExamples.push({
1980
+ ex: example,
1981
+ partIndex: i,
1982
+ form: part.baseForm
1983
+ });
1694
1984
  break;
1695
1985
  }
1696
1986
  const readingAsBaseFormMatch = readings.has(part.baseForm);
@@ -1699,11 +1989,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1699
1989
  break;
1700
1990
  }
1701
1991
  }
1992
+ if (readingMatchingKanjiForms.size > 0)
1993
+ kanjiFormExamples = kanjiFormExamples.filter(
1994
+ (ex) => ex.form && readingMatchingKanjiForms.has(ex.form)
1995
+ );
1702
1996
  const includeKanjiFormExamples = word.kanjiForms !== void 0;
1703
1997
  let wordExamples = [
1704
1998
  ...includeKanjiFormExamples ? [...readingMatchingKanjiFormExamples, ...kanjiFormExamples] : [],
1705
1999
  ...!includeKanjiFormExamples ? readingExamples : []
1706
2000
  ];
2001
+ readingMatchingKanjiForms.clear();
1707
2002
  const glossSpecificExamples = [];
1708
2003
  const seenPhrases = /* @__PURE__ */ new Set();
1709
2004
  for (let i = 0; i < word.translations.length; i++) {
@@ -1733,13 +2028,23 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1733
2028
  (ex) => !seenPhrases.has(ex.ex.phrase)
1734
2029
  ).slice(0, 5 - glossSpecificExamples.length)
1735
2030
  ];
1736
- if (wordExamples.length > 0)
2031
+ if (wordExamples.length > 0) {
1737
2032
  word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
1738
2033
  phrase: ex.ex.furigana ?? ex.ex.phrase,
1739
2034
  translation: ex.ex.translation,
1740
2035
  originalPhrase: ex.ex.phrase,
1741
2036
  ...ex.ex.glossNumber ? { glossNumber: ex.ex.glossNumber } : {}
1742
2037
  }));
2038
+ word.tags.push("word::has_phrases");
2039
+ if (glossSpecificExamples.length > 0)
2040
+ word.tags.push("word::has_meaning-specific_phrases");
2041
+ }
2042
+ }
2043
+ if (definitions) {
2044
+ const pair = definitions.find(
2045
+ (wdp) => wdp.wordID === word.id
2046
+ );
2047
+ if (pair) word.definitions = pair.definitions;
1743
2048
  }
1744
2049
  return word;
1745
2050
  } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);
@@ -1747,35 +2052,46 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1747
2052
  throw err;
1748
2053
  }
1749
2054
  }
1750
- function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
2055
+ function getKanji(dict, kanjiChar, dictKanji, jmDict, svgList, noteTypeName, deckPath) {
1751
2056
  var _a, _b, _c, _d;
1752
2057
  try {
1753
- const dictKanji = dict.find(
1754
- (entry) => entry.kanji === kanjiChar
1755
- );
2058
+ if (!dictKanji && kanjiChar)
2059
+ dictKanji = dict.find((entry) => entry.kanji === kanjiChar);
1756
2060
  if (dictKanji) {
1757
2061
  const kanji = {
1758
2062
  kanji: dictKanji.kanji,
1759
2063
  ...dictKanji.misc ? { strokes: dictKanji.misc.strokeNumber } : {},
1760
2064
  ...dictKanji.misc && dictKanji.misc.grade ? { grade: dictKanji.misc.grade } : {},
1761
- ...dictKanji.misc && dictKanji.misc.frequency ? { grade: dictKanji.misc.frequency } : {},
2065
+ ...dictKanji.misc && dictKanji.misc.frequency ? { frequency: dictKanji.misc.frequency } : {},
1762
2066
  noteID: `kanji_${dictKanji.kanji}`,
1763
2067
  ...noteTypeName ? { noteTypeName } : {},
1764
2068
  ...deckPath ? { deckPath } : {}
1765
2069
  };
1766
- for (const rm of dictKanji.readingMeaning) {
1767
- if (rm.nanori && rm.nanori.length > 0) {
1768
- if (kanji.nanori === void 0) kanji.nanori = [];
1769
- kanji.nanori.push(...rm.nanori);
1770
- }
1771
- for (const group of rm.groups) {
1772
- kanji.onyomi = group.readings.filter((reading) => reading.type === "ja_on").map((reading) => reading.reading);
1773
- kanji.kunyomi = group.readings.filter((reading) => reading.type === "ja_kun").map((reading) => reading.reading);
1774
- if (kanji.onyomi.length === 0) delete kanji.onyomi;
1775
- if (kanji.kunyomi.length === 0) delete kanji.kunyomi;
1776
- kanji.meanings = group.meanings;
1777
- if (kanji.meanings.length === 0) delete kanji.meanings;
2070
+ if (dictKanji.readingMeaning) {
2071
+ kanji.meanings = [];
2072
+ kanji.nanori = [];
2073
+ kanji.onyomi = [];
2074
+ kanji.kunyomi = [];
2075
+ for (const rm of dictKanji.readingMeaning) {
2076
+ if (rm.nanori && rm.nanori.length > 0)
2077
+ kanji.nanori.push(...rm.nanori);
2078
+ for (const group of rm.groups) {
2079
+ kanji.onyomi.push(
2080
+ ...group.readings.filter((reading) => reading.type === "ja_on").map((reading) => reading.reading)
2081
+ );
2082
+ kanji.kunyomi.push(
2083
+ ...group.readings.filter(
2084
+ (reading) => reading.type === "ja_kun"
2085
+ ).map((reading) => reading.reading)
2086
+ );
2087
+ kanji.meanings.push(...group.meanings);
2088
+ }
1778
2089
  }
2090
+ if (kanji.meanings && kanji.meanings.length === 0)
2091
+ delete kanji.meanings;
2092
+ if (kanji.nanori && kanji.nanori.length === 0) delete kanji.nanori;
2093
+ if (kanji.onyomi && kanji.onyomi.length === 0) delete kanji.onyomi;
2094
+ if (kanji.kunyomi && kanji.kunyomi.length === 0) delete kanji.kunyomi;
1779
2095
  }
1780
2096
  if (jmDict) {
1781
2097
  let kanjiWords = jmDict.filter(
@@ -1789,6 +2105,7 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
1789
2105
  void 0,
1790
2106
  void 0,
1791
2107
  void 0,
2108
+ void 0,
1792
2109
  word,
1793
2110
  void 0
1794
2111
  );
@@ -1825,6 +2142,7 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
1825
2142
  void 0,
1826
2143
  void 0,
1827
2144
  void 0,
2145
+ void 0,
1828
2146
  word,
1829
2147
  void 0
1830
2148
  );
@@ -1894,17 +2212,19 @@ function getKanji(kanjiChar, dict, jmDict, svgList, noteTypeName, deckPath) {
1894
2212
  ...kanji.svg ? ["kanji::has_svg"] : []
1895
2213
  );
1896
2214
  return kanji;
1897
- } else throw new Error(`Kanji ${kanjiChar} not found`);
2215
+ } else
2216
+ throw new Error(`Kanji not found${kanjiChar ? `: ${kanjiChar}` : ""}`);
1898
2217
  } catch (err) {
1899
2218
  throw err;
1900
2219
  }
1901
2220
  }
1902
- function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList, noteTypeName, deckPath) {
2221
+ function getKanjiExtended(info, dict, kanjiChar, dictKanji, useWords, jmDict, svgList, noteTypeName, deckPath) {
1903
2222
  var _a, _b;
1904
2223
  try {
1905
2224
  const kanji = getKanji(
1906
- kanjiChar,
1907
2225
  dict,
2226
+ kanjiChar,
2227
+ dictKanji,
1908
2228
  jmDict,
1909
2229
  svgList,
1910
2230
  noteTypeName,
@@ -1914,13 +2234,13 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
1914
2234
  kanji.components = info.components;
1915
2235
  if (info.mnemonic && info.mnemonic.length > 0)
1916
2236
  kanji.mnemonic = info.mnemonic;
1917
- if (useJpdbWords === true && info.words && info.words.length > 0)
2237
+ if (useWords === true && info.words && info.words.length > 0)
1918
2238
  kanji.words = info.words;
1919
2239
  if (kanji.tags) {
1920
2240
  kanji.tags.push(`kanji::components::${((_a = kanji.components) == null ? void 0 : _a.length) ?? 0}`);
1921
2241
  if (kanji.mnemonic && kanji.mnemonic.length > 0)
1922
2242
  kanji.tags.push("kanji::has_mnemonic");
1923
- if (useJpdbWords === true && kanji.words) {
2243
+ if (useWords === true && kanji.words) {
1924
2244
  if (!kanji.tags.some((tag, index) => {
1925
2245
  var _a2;
1926
2246
  if (tag.startsWith("kanji::words::")) {
@@ -1935,8 +2255,8 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
1935
2255
  kanji.tags.push(`kanji::words::${((_b = kanji.words) == null ? void 0 : _b.length) ?? 0}`);
1936
2256
  }
1937
2257
  }
1938
- if (kanji.fromJpdb === true && (kanji.mnemonic || kanji.components && kanji.components.length > 0 || kanji.words))
1939
- kanji.source = `https://jpdb.io/kanji/${kanji.kanji}#a`;
2258
+ if (info.fromJpdb === true && (kanji.mnemonic || kanji.components || kanji.words && useWords === true))
2259
+ kanji.source = `https://jpdb.io/kanji/${kanji.kanji}`;
1940
2260
  return kanji;
1941
2261
  } catch (err) {
1942
2262
  throw err;
@@ -1981,32 +2301,50 @@ function generateAnkiNote(entry) {
1981
2301
  if (!entry.noteID) throw new Error("Invalid note ID");
1982
2302
  const fields = [];
1983
2303
  if (isWord(entry)) {
1984
- if (!entry.translations) throw new Error(`Invalid word: ${entry.noteID}`);
2304
+ if (!entry.translations || entry.readings.length === 0)
2305
+ throw new Error(`Invalid word: ${entry.noteID}`);
2306
+ const firstReading = createEntry(
2307
+ `<span class="word word-reading">${entry.readings[0].reading}${entry.readings[0].audio !== void 0 ? `<br>[sound:${entry.readings[0].audio}]` : ""}</span>`,
2308
+ entry.readings[0].notes
2309
+ );
2310
+ const otherReadings = entry.readings.length > 1 ? `<details><summary>Show other readings</summary>${entry.readings.slice(1).map(
2311
+ (readingEntry) => createEntry(
2312
+ `<span class="word word-reading">${readingEntry.reading}${readingEntry.audio !== void 0 ? `<br>[sound:${readingEntry.audio}]` : ""}</span>`,
2313
+ readingEntry.notes
2314
+ )
2315
+ ).join("")}</details>` : void 0;
2316
+ const readingsField = [firstReading, ...otherReadings ?? []].join(
2317
+ ""
2318
+ );
2319
+ const firstKanjiForm = entry.kanjiForms ? createEntry(
2320
+ `<span class="word word-kanjiform"><ruby><rb>${entry.kanjiForms[0].kanjiForm}</rb><rt>${entry.readings[0].reading}</rt></ruby></span>`,
2321
+ entry.kanjiForms[0].notes
2322
+ ) : void 0;
2323
+ const otherKanjiForms = entry.kanjiForms && entry.kanjiForms.length > 1 ? `<details><summary>Show other kanji forms</summary>${entry.kanjiForms.slice(1).map((kanjiFormEntry) => {
2324
+ const restrictedReading = entry.readings.find(
2325
+ (r) => r.notes && r.notes.includes(
2326
+ `Reading restricted to ${kanjiFormEntry.kanjiForm}`
2327
+ )
2328
+ );
2329
+ return `${createEntry(`<span class="word word-kanjiform">${restrictedReading ? "<ruby><rb>" : ""}${kanjiFormEntry.kanjiForm}${restrictedReading ? `</rb><rt>${restrictedReading.reading}</rt></ruby>` : ""}</span>`, kanjiFormEntry.notes)}`;
2330
+ }).join("")}</details>` : void 0;
2331
+ const kanjiFormsField = firstKanjiForm ? [firstKanjiForm, ...otherKanjiForms ?? []].join("") : void 0;
2332
+ const firstThreeTranslations = entry.translations.slice(0, 3).map(
2333
+ (translationEntry, index) => `${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes, void 0, entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? true : void 0)}`
2334
+ ).join("");
2335
+ const otherTranslations = entry.translations.length > 3 ? `<details><summary>Show other translations</summary>${entry.translations.map(
2336
+ (translationEntry, index) => index > 2 ? `${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes, void 0, entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? true : void 0)}` : "null"
2337
+ ).filter((translation) => translation !== "null").join("")}</details>` : void 0;
2338
+ const translationsField = [
2339
+ firstThreeTranslations,
2340
+ ...otherTranslations ?? []
2341
+ ].join("");
1985
2342
  fields.push(
1986
- ...entry.kanjiForms && !entry.usuallyInKana ? [
1987
- entry.kanjiForms.map(
1988
- (kanjiFormEntry, index) => `${index > 0 ? "<details><summary>Show kanji form</summary>" : ""}${createEntry(`<span class="word word-kanjiform">${index === 0 ? "<ruby><rb>" : ""}${kanjiFormEntry.kanjiForm}${index === 0 ? `</rb><rt>${entry.readings[0].reading}</rt></ruby>` : ""}</span>`, kanjiFormEntry.notes)}${index > 0 ? "</details>" : ""}`
1989
- ).join(""),
1990
- entry.readings.map(
1991
- (readingEntry, index) => `${index > 0 ? "<details><summary>Show reading</summary>" : ""}${createEntry(`<span class="word word-reading">${readingEntry.reading}${readingEntry.audio !== void 0 ? `<br>[sound:${readingEntry.audio}]` : ""}</span>`, readingEntry.notes)}${index > 0 ? "</details>" : ""}`
1992
- ).join("")
1993
- ] : [
1994
- entry.readings.map(
1995
- (readingEntry, index) => `${index > 0 ? "<details><summary>Show reading</summary>" : ""}${createEntry(`<span class="word word-reading">${readingEntry.reading}${readingEntry.audio !== void 0 ? `<br>[sound:${readingEntry.audio}]` : ""}</span>`, readingEntry.notes)}${index > 0 ? "</details>" : ""}`
1996
- ).join(""),
1997
- entry.kanjiForms ? entry.kanjiForms.map(
1998
- (kanjiFormEntry, index) => `${index > 0 ? "<details><summary>Show kanji form</summary>" : ""}${createEntry(`<span class="word word-kanjiform">${index === 0 ? "<ruby><rb>" : ""}${kanjiFormEntry.kanjiForm}${index === 0 ? `</rb><rt>${entry.readings[0].reading}</rt></ruby>` : ""}</span>`, kanjiFormEntry.notes)}${index > 0 ? "</details>" : ""}`
1999
- ).join("") : noKanjiForms
2343
+ ...entry.kanjiForms && kanjiFormsField && !entry.usuallyInKana ? [kanjiFormsField, readingsField] : [
2344
+ readingsField,
2345
+ entry.kanjiForms && kanjiFormsField ? kanjiFormsField : noKanjiForms
2000
2346
  ],
2001
- entry.translations.map(
2002
- (translationEntry, index) => `${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes, void 0, entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? true : void 0)}${index > 2 ? "</details>" : ""}`
2003
- ).join(""),
2004
- entry.kanji ? entry.kanji.map(
2005
- (kanjiEntry) => createEntry(
2006
- `<span class="word word-kanji">${kanjiEntry.kanji}${kanjiEntry.meanings === void 0 ? " (no meanings)" : ""}</span>`,
2007
- kanjiEntry.meanings
2008
- )
2009
- ).join("") : '<span class="word word-kanji">(no kanji)</span>',
2347
+ translationsField,
2010
2348
  entry.phrases ? entry.phrases.map(
2011
2349
  (phraseEntry, index) => createEntry(
2012
2350
  `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
@@ -2017,6 +2355,17 @@ function generateAnkiNote(entry) {
2017
2355
  ) ? true : void 0
2018
2356
  )
2019
2357
  ).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
2358
+ entry.definitions ? entry.definitions.map(
2359
+ (definitionEntry) => createEntry(
2360
+ `<span class="word word-definition>"<span class="word word-definition-original">${definitionEntry.definition}</span><span class="word word-definition-furigana">${definitionEntry.furigana ?? definitionEntry.definition}</span></span>`
2361
+ )
2362
+ ).join("") : '<span class="word word-definition">(no definitions) (Search on ja.wiktionary.org)</span>',
2363
+ entry.kanji ? entry.kanji.map(
2364
+ (kanjiEntry) => createEntry(
2365
+ `<span class="word word-kanji">${kanjiEntry.kanji}${kanjiEntry.meanings === void 0 ? " (no meanings)" : ""}</span>`,
2366
+ kanjiEntry.meanings
2367
+ )
2368
+ ).join("") : '<span class="word word-kanji">(no kanji)</span>',
2020
2369
  ...entry.tags && entry.tags.length > 0 ? [
2021
2370
  entry.tags.map(
2022
2371
  (tag) => tag.trim().toLowerCase().replaceAll(" ", "::")
@@ -2173,12 +2522,12 @@ function generateAnkiNotesFile(list) {
2173
2522
  if (ankiNotes.length === 0) throw new Error("Invalid list");
2174
2523
  return `${headers.join("\n")}
2175
2524
  ${ankiNotes}`;
2176
- } else console.log("No entries available for Anki notes creation");
2177
- return void 0;
2525
+ } else throw new Error("No entries available for Anki notes creation");
2178
2526
  }
2179
2527
  export {
2180
2528
  capitalizeString,
2181
2529
  convertJMdict,
2530
+ convertJawiktionary,
2182
2531
  convertKanjiDic,
2183
2532
  convertKradFile,
2184
2533
  convertRadkFile,
@@ -2188,6 +2537,7 @@ export {
2188
2537
  getKanji,
2189
2538
  getKanjiExtended,
2190
2539
  getWord,
2540
+ getWordDefinitions,
2191
2541
  isGrammar,
2192
2542
  isKana,
2193
2543
  isKanji,