henkan 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/index.cjs.js +96 -74
  2. package/dist/index.cjs.js.map +2 -2
  3. package/dist/index.mjs +95 -74
  4. package/dist/index.mjs.map +2 -2
  5. package/dist/types/constants.d.ts.map +1 -1
  6. package/dist/types/types.d.ts +2 -2
  7. package/dist/types/types.d.ts.map +1 -1
  8. package/dist/types/utils.d.ts +7 -0
  9. package/dist/types/utils.d.ts.map +1 -1
  10. package/docs/api/README.md +1 -0
  11. package/docs/api/functions/convertJMdict.md +1 -1
  12. package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
  13. package/docs/api/functions/convertJawiktionarySync.md +1 -1
  14. package/docs/api/functions/convertKanjiDic.md +1 -1
  15. package/docs/api/functions/convertKradFile.md +1 -1
  16. package/docs/api/functions/convertRadkFile.md +1 -1
  17. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  18. package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
  19. package/docs/api/functions/createEntryMaps.md +1 -1
  20. package/docs/api/functions/generateAnkiNote.md +1 -1
  21. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  22. package/docs/api/functions/generateFurigana.md +33 -0
  23. package/docs/api/functions/getKanji.md +1 -1
  24. package/docs/api/functions/getKanjiExtended.md +1 -1
  25. package/docs/api/functions/getValidForms.md +1 -1
  26. package/docs/api/functions/getWord.md +1 -1
  27. package/docs/api/functions/getWordDefinitions.md +1 -1
  28. package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
  29. package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
  30. package/docs/api/interfaces/Definition.md +4 -4
  31. package/docs/api/interfaces/DictKanji.md +5 -5
  32. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  33. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  34. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  35. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  36. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  37. package/docs/api/interfaces/DictMeaning.md +11 -11
  38. package/docs/api/interfaces/DictRadical.md +4 -4
  39. package/docs/api/interfaces/DictWord.md +8 -8
  40. package/docs/api/interfaces/EntryMaps.md +7 -7
  41. package/docs/api/interfaces/ExamplePart.md +7 -7
  42. package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
  43. package/docs/api/interfaces/Grammar.md +15 -15
  44. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  45. package/docs/api/interfaces/JaWiktionaryEntry.md +7 -7
  46. package/docs/api/interfaces/Kana.md +11 -11
  47. package/docs/api/interfaces/Kanji.md +23 -23
  48. package/docs/api/interfaces/KanjiComponent.md +3 -3
  49. package/docs/api/interfaces/KanjiForm.md +4 -4
  50. package/docs/api/interfaces/NoteAndTag.md +3 -3
  51. package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
  52. package/docs/api/interfaces/Phrase.md +5 -5
  53. package/docs/api/interfaces/Radical.md +16 -16
  54. package/docs/api/interfaces/Reading.md +5 -5
  55. package/docs/api/interfaces/ResultEntry.md +7 -7
  56. package/docs/api/interfaces/TanakaExample.md +7 -7
  57. package/docs/api/interfaces/Translation.md +3 -3
  58. package/docs/api/interfaces/UsefulRegExps.md +8 -8
  59. package/docs/api/interfaces/Word.md +15 -15
  60. package/docs/api/interfaces/WordDefinitionPair.md +4 -4
  61. package/docs/api/type-aliases/Dict.md +1 -1
  62. package/docs/api/type-aliases/DictTranslation.md +1 -1
  63. package/docs/api/type-aliases/EntryType.md +1 -1
  64. package/docs/api/type-aliases/KanjiEntryMap.md +1 -1
  65. package/docs/api/type-aliases/KanjiSVGMap.md +1 -1
  66. package/docs/api/type-aliases/KanjiWordsMap.md +1 -1
  67. package/docs/api/type-aliases/Result.md +1 -1
  68. package/docs/api/type-aliases/WordDefinitionsMap.md +1 -1
  69. package/docs/api/type-aliases/WordExamplesMap.md +1 -1
  70. package/docs/api/type-aliases/WordIDEntryMap.md +1 -1
  71. package/package.json +1 -1
  72. package/src/constants.ts +2 -1
  73. package/src/types.ts +2 -4
  74. package/src/utils.ts +127 -100
package/dist/index.mjs CHANGED
@@ -92,8 +92,9 @@ var noteMap = /* @__PURE__ */ new Map([
92
92
  ["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
93
93
  ["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
94
94
  ["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
95
- ["lit", ["literal_meaning", "Literal meaning"]],
96
95
  ["expl", ["explanation", "Explanation"]],
96
+ ["fig", ["figurative", "Figurative"]],
97
+ ["lit", ["literal_meaning", "Literal meaning"]],
97
98
  ["tm", ["trademark", "Trademark"]],
98
99
  ["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
99
100
  ["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
@@ -1278,6 +1279,22 @@ function katakanaToHiragana(input) {
1278
1279
  }
1279
1280
  return output.join("").normalize("NFC");
1280
1281
  }
1282
+ async function generateFurigana(text, bindedFunction) {
1283
+ if (!text.includes("\u30FB"))
1284
+ return String(
1285
+ await bindedFunction(text, {
1286
+ to: "hiragana",
1287
+ mode: "furigana"
1288
+ })
1289
+ );
1290
+ else
1291
+ return (await Promise.all(
1292
+ text.split("\u30FB").map(async (t) => {
1293
+ const tFurigana = await generateFurigana(t, bindedFunction);
1294
+ return tFurigana;
1295
+ })
1296
+ )).join("");
1297
+ }
1281
1298
  function getValidForms(readings, kanjiForms, wordIsCommon) {
1282
1299
  const kanjiFormRestrictions = /* @__PURE__ */ new Set();
1283
1300
  const validReadings = readings.filter(
@@ -1373,13 +1390,16 @@ function convertJMdict(xmlString, examples) {
1373
1390
  const meaningObj = { partOfSpeech: [], translations: [] };
1374
1391
  meaningObj.partOfSpeech = meaning.pos;
1375
1392
  meaningObj.translations = [];
1376
- for (const gloss of meaning.gloss)
1377
- if (typeof gloss === "string") meaningObj.translations.push(gloss);
1378
- else if (typeof gloss === "object" && typeof gloss._ === "string" && typeof gloss.$ === "object" && (gloss.$.g_type === "lit" || gloss.$.g_type === "expl" || gloss.$.g_type === "tm"))
1393
+ for (const gloss of meaning.gloss) {
1394
+ const translation = String(gloss._ ?? gloss).trim();
1395
+ const type = typeof gloss.$ === "object" && typeof gloss.$.g_type === "string" ? gloss.$.g_type : void 0;
1396
+ if (translation.length > 0 && type !== void 0 && type.length > 0)
1379
1397
  meaningObj.translations.push({
1380
- translation: gloss._,
1381
- type: gloss.$.g_type
1398
+ translation,
1399
+ type
1382
1400
  });
1401
+ else meaningObj.translations.push(translation);
1402
+ }
1383
1403
  if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
1384
1404
  if (isStringArray(meaning.stagk))
1385
1405
  meaningObj.kanjiFormRestrictions = meaning.stagk;
@@ -1509,7 +1529,7 @@ function convertTanakaCorpus(tanakaString) {
1509
1529
  if (a !== void 0 && b !== void 0 && a.startsWith("A: ") && b.startsWith("B: ")) {
1510
1530
  a = a.replace("A: ", "");
1511
1531
  b = b.replace("B: ", "");
1512
- const idMatch = regexps.tanakaID.exec(a)?.groups["id"]?.trim();
1532
+ const idMatch = regexps.tanakaID.exec(a)?.groups.id?.trim();
1513
1533
  const idParts = String(idMatch).split("_");
1514
1534
  const id = `${Number(idParts[0])}_${Number(idParts[1])}`;
1515
1535
  const aParts = a.replace(regexps.tanakaID, "").split(" ");
@@ -1517,17 +1537,15 @@ function convertTanakaCorpus(tanakaString) {
1517
1537
  const bParts = [];
1518
1538
  for (const part of bRawParts) {
1519
1539
  const partMatches = regexps.tanakaPart.exec(part);
1520
- const baseForm = partMatches?.groups["base"];
1540
+ const baseForm = partMatches?.groups.base;
1521
1541
  const examplePart = { baseForm };
1522
- const reading = partMatches?.groups["reading"];
1523
- const glossNumber = partMatches?.groups["glossnum"];
1524
- const inflectedForm = partMatches?.groups["inflection"];
1542
+ const reading = partMatches?.groups.reading;
1543
+ const glossNumber = partMatches?.groups.glossnum;
1544
+ const inflectedForm = partMatches?.groups.inflection;
1525
1545
  if (reading !== void 0)
1526
1546
  if (regexps.tanakaReferenceID.test(reading)) {
1527
1547
  const referenceID = regexps.tanakaReferenceID.exec(reading);
1528
- examplePart.referenceID = `${Number(
1529
- referenceID?.groups["entryid"]
1530
- )}`;
1548
+ examplePart.referenceID = `${Number(referenceID?.groups.entryid)}`;
1531
1549
  } else examplePart.reading = reading;
1532
1550
  if (glossNumber !== void 0)
1533
1551
  examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
@@ -1557,13 +1575,10 @@ async function convertTanakaCorpusWithFurigana(tanakaString) {
1557
1575
  await kuroshiro.init(new KuromojiAnalyzer());
1558
1576
  const convert = kuroshiro.convert.bind(kuroshiro);
1559
1577
  for (let i = 0; i < tanakaArray.length; i++)
1560
- if (!tanakaArray[i].phrase.includes("\u30FB"))
1561
- tanakaArray[i].furigana = String(
1562
- await convert(tanakaArray[i].phrase, {
1563
- to: "hiragana",
1564
- mode: "furigana"
1565
- })
1566
- );
1578
+ tanakaArray[i].furigana = await generateFurigana(
1579
+ tanakaArray[i].phrase,
1580
+ convert
1581
+ );
1567
1582
  return tanakaArray;
1568
1583
  }
1569
1584
  function convertRadkFile(radkBuffer, kanjiDic) {
@@ -1766,17 +1781,22 @@ function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs,
1766
1781
  };
1767
1782
  }
1768
1783
  function mapEntry(entry) {
1784
+ const senses = Array.from(
1785
+ entry.senses.filter(
1786
+ (sense) => isObjectArray(sense.form_of) && sense.form_of.every((form) => typeof form.word === "string") === true || isStringArray(sense.glosses)
1787
+ )
1788
+ );
1769
1789
  return {
1770
1790
  word: entry.word,
1771
1791
  pos_title: entry.pos_title,
1772
- senses: entry.senses.filter(
1773
- (sense) => (isObjectArray(sense.form_of) ? sense.form_of.every((form) => typeof form.word === "string") : isStringArray(sense.glosses)) === true || isStringArray(sense.glosses)
1774
- ).map((sense) => ({
1775
- ...isObjectArray(sense.form_of) ? {
1776
- form_of: sense.form_of.map((form) => String(form.word))
1777
- } : {},
1778
- glosses: sense.glosses
1779
- })),
1792
+ ...senses.length > 0 ? {
1793
+ senses: entry.senses.map((sense) => ({
1794
+ ...isObjectArray(sense.form_of) ? {
1795
+ form_of: sense.form_of.map((form) => String(form.word))
1796
+ } : {},
1797
+ glosses: sense.glosses
1798
+ }))
1799
+ } : {},
1780
1800
  ...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") === true ? { forms: entry.forms.map((form) => String(form.form)) } : {}
1781
1801
  };
1782
1802
  }
@@ -1809,15 +1829,16 @@ async function convertJawiktionaryAsync(stream) {
1809
1829
  return entries;
1810
1830
  }
1811
1831
  function parseEntry(entry, definitions, definitionMap) {
1812
- for (const sense of entry.senses) {
1813
- const definition = sense.glosses.join("");
1814
- if (!definitions.some((def) => def.definition === definition)) {
1815
- if (!definitionMap.has(definition))
1816
- definitionMap.set(definition, { count: 1 });
1817
- else definitionMap.get(definition).count++;
1818
- definitions.push({ definition });
1832
+ if (entry.senses !== void 0)
1833
+ for (const sense of entry.senses) {
1834
+ const definition = sense.glosses.join("");
1835
+ if (!definitions.some((def) => def.definition === definition)) {
1836
+ if (!definitionMap.has(definition))
1837
+ definitionMap.set(definition, { count: 1 });
1838
+ else definitionMap.get(definition).count++;
1839
+ definitions.push({ definition });
1840
+ }
1819
1841
  }
1820
- }
1821
1842
  }
1822
1843
  function getWordDefinitions(wiktionaryEntries, jmDict) {
1823
1844
  const entries = /* @__PURE__ */ new Map();
@@ -1866,19 +1887,20 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
1866
1887
  let valid = false;
1867
1888
  if (validKanjiForms.has(entry.word)) {
1868
1889
  valid = true;
1869
- for (const sense of entry.senses) {
1870
- if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
1871
- validFormOfEntries.add(entry.word);
1872
- for (const gloss of sense.glosses) {
1873
- let hasForm = false;
1874
- for (const r of validForms)
1875
- if (gloss.includes(r)) {
1876
- hasForm = true;
1877
- break;
1878
- }
1879
- if (hasForm) validGlossesEntries.add(entry.word);
1890
+ if (entry.senses !== void 0)
1891
+ for (const sense of entry.senses) {
1892
+ if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
1893
+ validFormOfEntries.add(entry.word);
1894
+ for (const gloss of sense.glosses) {
1895
+ let hasForm = false;
1896
+ for (const r of validForms)
1897
+ if (gloss.includes(r)) {
1898
+ hasForm = true;
1899
+ break;
1900
+ }
1901
+ if (hasForm) validGlossesEntries.add(entry.word);
1902
+ }
1880
1903
  }
1881
- }
1882
1904
  if (entry.forms !== void 0) {
1883
1905
  for (const form of entry.forms)
1884
1906
  if (validForms.has(form)) validFormsEntries.add(entry.word);
@@ -2084,25 +2106,26 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
2084
2106
  const hasForms = ent.forms !== void 0 && ent.forms.some((form) => pair.forms.has(form));
2085
2107
  if (pair.kanjiForms !== void 0 && pair.kanjiForms.has(ent.word)) {
2086
2108
  kanjiFormEntries.push(ent);
2087
- for (const sense of ent.senses) {
2088
- if (hasValidFormOf && sense.form_of !== void 0) {
2089
- for (const form of sense.form_of)
2090
- if (pair.forms.has(form)) {
2091
- const elem = titleFormMap.get(form);
2092
- if (elem === void 0)
2093
- titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
2094
- else elem.add(ent.word);
2095
- }
2109
+ if (ent.senses !== void 0)
2110
+ for (const sense of ent.senses) {
2111
+ if (hasValidFormOf && sense.form_of !== void 0) {
2112
+ for (const form of sense.form_of)
2113
+ if (pair.forms.has(form)) {
2114
+ const elem = titleFormMap.get(form);
2115
+ if (elem === void 0)
2116
+ titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
2117
+ else elem.add(ent.word);
2118
+ }
2119
+ }
2120
+ for (const gloss of sense.glosses)
2121
+ for (const f of pair.forms)
2122
+ if (gloss.includes(f)) {
2123
+ const elem = refsMap.get(f);
2124
+ if (elem === void 0)
2125
+ refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
2126
+ else elem.add(ent.word);
2127
+ }
2096
2128
  }
2097
- for (const gloss of sense.glosses)
2098
- for (const f of pair.forms)
2099
- if (gloss.includes(f)) {
2100
- const elem = refsMap.get(f);
2101
- if (elem === void 0)
2102
- refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
2103
- else elem.add(ent.word);
2104
- }
2105
- }
2106
2129
  if (hasValidForms && ent.forms !== void 0) {
2107
2130
  for (const form of ent.forms)
2108
2131
  if (pair.forms.has(form)) forms.add(form);
@@ -2183,13 +2206,10 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
2183
2206
  for (let i = 0; i < japaneseDefinitions.length; i++) {
2184
2207
  const pair = japaneseDefinitions[i];
2185
2208
  for (let j = 0; j < pair.definitions.length; j++)
2186
- if (!pair.definitions[j].definition.includes("\u30FB"))
2187
- pair.definitions[j].furigana = String(
2188
- await convert(pair.definitions[j].definition, {
2189
- to: "hiragana",
2190
- mode: "furigana"
2191
- })
2192
- );
2209
+ pair.definitions[j].furigana = await generateFurigana(
2210
+ pair.definitions[j].definition,
2211
+ convert
2212
+ );
2193
2213
  japaneseDefinitions[i] = pair;
2194
2214
  }
2195
2215
  return japaneseDefinitions;
@@ -2933,6 +2953,7 @@ export {
2933
2953
  createEntryMaps,
2934
2954
  generateAnkiNote,
2935
2955
  generateAnkiNotesFile,
2956
+ generateFurigana,
2936
2957
  getKanji,
2937
2958
  getKanjiExtended,
2938
2959
  getValidForms,