henkan 2.3.1 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/index.cjs.js +96 -74
  2. package/dist/index.cjs.js.map +2 -2
  3. package/dist/index.mjs +95 -74
  4. package/dist/index.mjs.map +2 -2
  5. package/dist/types/constants.d.ts.map +1 -1
  6. package/dist/types/types.d.ts +2 -2
  7. package/dist/types/types.d.ts.map +1 -1
  8. package/dist/types/utils.d.ts +7 -0
  9. package/dist/types/utils.d.ts.map +1 -1
  10. package/docs/api/README.md +1 -0
  11. package/docs/api/functions/convertJMdict.md +1 -1
  12. package/docs/api/functions/convertJawiktionaryAsync.md +1 -1
  13. package/docs/api/functions/convertJawiktionarySync.md +1 -1
  14. package/docs/api/functions/convertKanjiDic.md +1 -1
  15. package/docs/api/functions/convertKradFile.md +1 -1
  16. package/docs/api/functions/convertRadkFile.md +1 -1
  17. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  18. package/docs/api/functions/convertTanakaCorpusWithFurigana.md +1 -1
  19. package/docs/api/functions/createEntryMaps.md +1 -1
  20. package/docs/api/functions/generateAnkiNote.md +1 -1
  21. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  22. package/docs/api/functions/generateFurigana.md +33 -0
  23. package/docs/api/functions/getKanji.md +1 -1
  24. package/docs/api/functions/getKanjiExtended.md +1 -1
  25. package/docs/api/functions/getValidForms.md +1 -1
  26. package/docs/api/functions/getWord.md +1 -1
  27. package/docs/api/functions/getWordDefinitions.md +1 -1
  28. package/docs/api/functions/getWordDefinitionsWithFurigana.md +1 -1
  29. package/docs/api/interfaces/DefaultNoteInfo.md +4 -4
  30. package/docs/api/interfaces/Definition.md +4 -4
  31. package/docs/api/interfaces/DictKanji.md +5 -5
  32. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  33. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  34. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  35. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  36. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  37. package/docs/api/interfaces/DictMeaning.md +11 -11
  38. package/docs/api/interfaces/DictRadical.md +4 -4
  39. package/docs/api/interfaces/DictWord.md +8 -8
  40. package/docs/api/interfaces/EntryMaps.md +7 -7
  41. package/docs/api/interfaces/ExamplePart.md +7 -7
  42. package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
  43. package/docs/api/interfaces/Grammar.md +15 -15
  44. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  45. package/docs/api/interfaces/JaWiktionaryEntry.md +7 -7
  46. package/docs/api/interfaces/Kana.md +11 -11
  47. package/docs/api/interfaces/Kanji.md +23 -23
  48. package/docs/api/interfaces/KanjiComponent.md +3 -3
  49. package/docs/api/interfaces/KanjiForm.md +4 -4
  50. package/docs/api/interfaces/NoteAndTag.md +3 -3
  51. package/docs/api/interfaces/NoteHeaderKeys.md +7 -7
  52. package/docs/api/interfaces/Phrase.md +5 -5
  53. package/docs/api/interfaces/Radical.md +16 -16
  54. package/docs/api/interfaces/Reading.md +5 -5
  55. package/docs/api/interfaces/ResultEntry.md +7 -7
  56. package/docs/api/interfaces/TanakaExample.md +7 -7
  57. package/docs/api/interfaces/Translation.md +3 -3
  58. package/docs/api/interfaces/UsefulRegExps.md +8 -8
  59. package/docs/api/interfaces/Word.md +15 -15
  60. package/docs/api/interfaces/WordDefinitionPair.md +4 -4
  61. package/docs/api/type-aliases/Dict.md +1 -1
  62. package/docs/api/type-aliases/DictTranslation.md +1 -1
  63. package/docs/api/type-aliases/EntryType.md +1 -1
  64. package/docs/api/type-aliases/KanjiEntryMap.md +1 -1
  65. package/docs/api/type-aliases/KanjiSVGMap.md +1 -1
  66. package/docs/api/type-aliases/KanjiWordsMap.md +1 -1
  67. package/docs/api/type-aliases/Result.md +1 -1
  68. package/docs/api/type-aliases/WordDefinitionsMap.md +1 -1
  69. package/docs/api/type-aliases/WordExamplesMap.md +1 -1
  70. package/docs/api/type-aliases/WordIDEntryMap.md +1 -1
  71. package/package.json +1 -1
  72. package/src/constants.ts +2 -1
  73. package/src/types.ts +2 -4
  74. package/src/utils.ts +127 -100
package/dist/index.cjs.js CHANGED
@@ -42,6 +42,7 @@ __export(index_exports, {
42
42
  createEntryMaps: () => createEntryMaps,
43
43
  generateAnkiNote: () => generateAnkiNote,
44
44
  generateAnkiNotesFile: () => generateAnkiNotesFile,
45
+ generateFurigana: () => generateFurigana,
45
46
  getKanji: () => getKanji,
46
47
  getKanjiExtended: () => getKanjiExtended,
47
48
  getValidForms: () => getValidForms,
@@ -154,8 +155,9 @@ var noteMap = /* @__PURE__ */ new Map([
154
155
  ["tsugaru-ben", ["dialect::tsugaru-ben", "Dialect: Tsugaru-ben"]],
155
156
  ["aichi dialect", ["dialect::aichi", "Dialect: Aichi"]],
156
157
  ["tochigi dialect", ["dialect::tochigi", "Dialect: Tochigi"]],
157
- ["lit", ["literal_meaning", "Literal meaning"]],
158
158
  ["expl", ["explanation", "Explanation"]],
159
+ ["fig", ["figurative", "Figurative"]],
160
+ ["lit", ["literal_meaning", "Literal meaning"]],
159
161
  ["tm", ["trademark", "Trademark"]],
160
162
  ["adjective (keiyoushi)", ["adjective::i", "\u3044-adjective", "\u5F62\u5BB9\u8A5E"]],
161
163
  ["'taru' adjective", ["adjective::taru", "\u305F\u308B-adjective", "\u5F62\u5BB9\u52D5\u8A5E"]],
@@ -1340,6 +1342,22 @@ function katakanaToHiragana(input) {
1340
1342
  }
1341
1343
  return output.join("").normalize("NFC");
1342
1344
  }
1345
+ async function generateFurigana(text, bindedFunction) {
1346
+ if (!text.includes("\u30FB"))
1347
+ return String(
1348
+ await bindedFunction(text, {
1349
+ to: "hiragana",
1350
+ mode: "furigana"
1351
+ })
1352
+ );
1353
+ else
1354
+ return (await Promise.all(
1355
+ text.split("\u30FB").map(async (t) => {
1356
+ const tFurigana = await generateFurigana(t, bindedFunction);
1357
+ return tFurigana;
1358
+ })
1359
+ )).join("");
1360
+ }
1343
1361
  function getValidForms(readings, kanjiForms, wordIsCommon) {
1344
1362
  const kanjiFormRestrictions = /* @__PURE__ */ new Set();
1345
1363
  const validReadings = readings.filter(
@@ -1435,13 +1453,16 @@ function convertJMdict(xmlString, examples) {
1435
1453
  const meaningObj = { partOfSpeech: [], translations: [] };
1436
1454
  meaningObj.partOfSpeech = meaning.pos;
1437
1455
  meaningObj.translations = [];
1438
- for (const gloss of meaning.gloss)
1439
- if (typeof gloss === "string") meaningObj.translations.push(gloss);
1440
- else if (typeof gloss === "object" && typeof gloss._ === "string" && typeof gloss.$ === "object" && (gloss.$.g_type === "lit" || gloss.$.g_type === "expl" || gloss.$.g_type === "tm"))
1456
+ for (const gloss of meaning.gloss) {
1457
+ const translation = String(gloss._ ?? gloss).trim();
1458
+ const type = typeof gloss.$ === "object" && typeof gloss.$.g_type === "string" ? gloss.$.g_type : void 0;
1459
+ if (translation.length > 0 && type !== void 0 && type.length > 0)
1441
1460
  meaningObj.translations.push({
1442
- translation: gloss._,
1443
- type: gloss.$.g_type
1461
+ translation,
1462
+ type
1444
1463
  });
1464
+ else meaningObj.translations.push(translation);
1465
+ }
1445
1466
  if (isStringArray(meaning.xref)) meaningObj.references = meaning.xref;
1446
1467
  if (isStringArray(meaning.stagk))
1447
1468
  meaningObj.kanjiFormRestrictions = meaning.stagk;
@@ -1571,7 +1592,7 @@ function convertTanakaCorpus(tanakaString) {
1571
1592
  if (a !== void 0 && b !== void 0 && a.startsWith("A: ") && b.startsWith("B: ")) {
1572
1593
  a = a.replace("A: ", "");
1573
1594
  b = b.replace("B: ", "");
1574
- const idMatch = regexps.tanakaID.exec(a)?.groups["id"]?.trim();
1595
+ const idMatch = regexps.tanakaID.exec(a)?.groups.id?.trim();
1575
1596
  const idParts = String(idMatch).split("_");
1576
1597
  const id = `${Number(idParts[0])}_${Number(idParts[1])}`;
1577
1598
  const aParts = a.replace(regexps.tanakaID, "").split(" ");
@@ -1579,17 +1600,15 @@ function convertTanakaCorpus(tanakaString) {
1579
1600
  const bParts = [];
1580
1601
  for (const part of bRawParts) {
1581
1602
  const partMatches = regexps.tanakaPart.exec(part);
1582
- const baseForm = partMatches?.groups["base"];
1603
+ const baseForm = partMatches?.groups.base;
1583
1604
  const examplePart = { baseForm };
1584
- const reading = partMatches?.groups["reading"];
1585
- const glossNumber = partMatches?.groups["glossnum"];
1586
- const inflectedForm = partMatches?.groups["inflection"];
1605
+ const reading = partMatches?.groups.reading;
1606
+ const glossNumber = partMatches?.groups.glossnum;
1607
+ const inflectedForm = partMatches?.groups.inflection;
1587
1608
  if (reading !== void 0)
1588
1609
  if (regexps.tanakaReferenceID.test(reading)) {
1589
1610
  const referenceID = regexps.tanakaReferenceID.exec(reading);
1590
- examplePart.referenceID = `${Number(
1591
- referenceID?.groups["entryid"]
1592
- )}`;
1611
+ examplePart.referenceID = `${Number(referenceID?.groups.entryid)}`;
1593
1612
  } else examplePart.reading = reading;
1594
1613
  if (glossNumber !== void 0)
1595
1614
  examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
@@ -1619,13 +1638,10 @@ async function convertTanakaCorpusWithFurigana(tanakaString) {
1619
1638
  await kuroshiro.init(new KuromojiAnalyzer());
1620
1639
  const convert = kuroshiro.convert.bind(kuroshiro);
1621
1640
  for (let i = 0; i < tanakaArray.length; i++)
1622
- if (!tanakaArray[i].phrase.includes("\u30FB"))
1623
- tanakaArray[i].furigana = String(
1624
- await convert(tanakaArray[i].phrase, {
1625
- to: "hiragana",
1626
- mode: "furigana"
1627
- })
1628
- );
1641
+ tanakaArray[i].furigana = await generateFurigana(
1642
+ tanakaArray[i].phrase,
1643
+ convert
1644
+ );
1629
1645
  return tanakaArray;
1630
1646
  }
1631
1647
  function convertRadkFile(radkBuffer, kanjiDic) {
@@ -1828,17 +1844,22 @@ function createEntryMaps(jmDict, kanjiDic, tanakaExamples, wordDefinitionPairs,
1828
1844
  };
1829
1845
  }
1830
1846
  function mapEntry(entry) {
1847
+ const senses = Array.from(
1848
+ entry.senses.filter(
1849
+ (sense) => isObjectArray(sense.form_of) && sense.form_of.every((form) => typeof form.word === "string") === true || isStringArray(sense.glosses)
1850
+ )
1851
+ );
1831
1852
  return {
1832
1853
  word: entry.word,
1833
1854
  pos_title: entry.pos_title,
1834
- senses: entry.senses.filter(
1835
- (sense) => (isObjectArray(sense.form_of) ? sense.form_of.every((form) => typeof form.word === "string") : isStringArray(sense.glosses)) === true || isStringArray(sense.glosses)
1836
- ).map((sense) => ({
1837
- ...isObjectArray(sense.form_of) ? {
1838
- form_of: sense.form_of.map((form) => String(form.word))
1839
- } : {},
1840
- glosses: sense.glosses
1841
- })),
1855
+ ...senses.length > 0 ? {
1856
+ senses: entry.senses.map((sense) => ({
1857
+ ...isObjectArray(sense.form_of) ? {
1858
+ form_of: sense.form_of.map((form) => String(form.word))
1859
+ } : {},
1860
+ glosses: sense.glosses
1861
+ }))
1862
+ } : {},
1842
1863
  ...isObjectArray(entry.forms) && entry.forms.every((form) => typeof form.form === "string") === true ? { forms: entry.forms.map((form) => String(form.form)) } : {}
1843
1864
  };
1844
1865
  }
@@ -1871,15 +1892,16 @@ async function convertJawiktionaryAsync(stream) {
1871
1892
  return entries;
1872
1893
  }
1873
1894
  function parseEntry(entry, definitions, definitionMap) {
1874
- for (const sense of entry.senses) {
1875
- const definition = sense.glosses.join("");
1876
- if (!definitions.some((def) => def.definition === definition)) {
1877
- if (!definitionMap.has(definition))
1878
- definitionMap.set(definition, { count: 1 });
1879
- else definitionMap.get(definition).count++;
1880
- definitions.push({ definition });
1895
+ if (entry.senses !== void 0)
1896
+ for (const sense of entry.senses) {
1897
+ const definition = sense.glosses.join("");
1898
+ if (!definitions.some((def) => def.definition === definition)) {
1899
+ if (!definitionMap.has(definition))
1900
+ definitionMap.set(definition, { count: 1 });
1901
+ else definitionMap.get(definition).count++;
1902
+ definitions.push({ definition });
1903
+ }
1881
1904
  }
1882
- }
1883
1905
  }
1884
1906
  function getWordDefinitions(wiktionaryEntries, jmDict) {
1885
1907
  const entries = /* @__PURE__ */ new Map();
@@ -1928,19 +1950,20 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
1928
1950
  let valid = false;
1929
1951
  if (validKanjiForms.has(entry.word)) {
1930
1952
  valid = true;
1931
- for (const sense of entry.senses) {
1932
- if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
1933
- validFormOfEntries.add(entry.word);
1934
- for (const gloss of sense.glosses) {
1935
- let hasForm = false;
1936
- for (const r of validForms)
1937
- if (gloss.includes(r)) {
1938
- hasForm = true;
1939
- break;
1940
- }
1941
- if (hasForm) validGlossesEntries.add(entry.word);
1953
+ if (entry.senses !== void 0)
1954
+ for (const sense of entry.senses) {
1955
+ if (sense.form_of !== void 0 && sense.form_of.some((form) => validForms.has(form)))
1956
+ validFormOfEntries.add(entry.word);
1957
+ for (const gloss of sense.glosses) {
1958
+ let hasForm = false;
1959
+ for (const r of validForms)
1960
+ if (gloss.includes(r)) {
1961
+ hasForm = true;
1962
+ break;
1963
+ }
1964
+ if (hasForm) validGlossesEntries.add(entry.word);
1965
+ }
1942
1966
  }
1943
- }
1944
1967
  if (entry.forms !== void 0) {
1945
1968
  for (const form of entry.forms)
1946
1969
  if (validForms.has(form)) validFormsEntries.add(entry.word);
@@ -2146,25 +2169,26 @@ function getWordDefinitions(wiktionaryEntries, jmDict) {
2146
2169
  const hasForms = ent.forms !== void 0 && ent.forms.some((form) => pair.forms.has(form));
2147
2170
  if (pair.kanjiForms !== void 0 && pair.kanjiForms.has(ent.word)) {
2148
2171
  kanjiFormEntries.push(ent);
2149
- for (const sense of ent.senses) {
2150
- if (hasValidFormOf && sense.form_of !== void 0) {
2151
- for (const form of sense.form_of)
2152
- if (pair.forms.has(form)) {
2153
- const elem = titleFormMap.get(form);
2154
- if (elem === void 0)
2155
- titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
2156
- else elem.add(ent.word);
2157
- }
2172
+ if (ent.senses !== void 0)
2173
+ for (const sense of ent.senses) {
2174
+ if (hasValidFormOf && sense.form_of !== void 0) {
2175
+ for (const form of sense.form_of)
2176
+ if (pair.forms.has(form)) {
2177
+ const elem = titleFormMap.get(form);
2178
+ if (elem === void 0)
2179
+ titleFormMap.set(form, /* @__PURE__ */ new Set([ent.word]));
2180
+ else elem.add(ent.word);
2181
+ }
2182
+ }
2183
+ for (const gloss of sense.glosses)
2184
+ for (const f of pair.forms)
2185
+ if (gloss.includes(f)) {
2186
+ const elem = refsMap.get(f);
2187
+ if (elem === void 0)
2188
+ refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
2189
+ else elem.add(ent.word);
2190
+ }
2158
2191
  }
2159
- for (const gloss of sense.glosses)
2160
- for (const f of pair.forms)
2161
- if (gloss.includes(f)) {
2162
- const elem = refsMap.get(f);
2163
- if (elem === void 0)
2164
- refsMap.set(f, /* @__PURE__ */ new Set([ent.word]));
2165
- else elem.add(ent.word);
2166
- }
2167
- }
2168
2192
  if (hasValidForms && ent.forms !== void 0) {
2169
2193
  for (const form of ent.forms)
2170
2194
  if (pair.forms.has(form)) forms.add(form);
@@ -2245,13 +2269,10 @@ async function getWordDefinitionsWithFurigana(entryList, jmDict) {
2245
2269
  for (let i = 0; i < japaneseDefinitions.length; i++) {
2246
2270
  const pair = japaneseDefinitions[i];
2247
2271
  for (let j = 0; j < pair.definitions.length; j++)
2248
- if (!pair.definitions[j].definition.includes("\u30FB"))
2249
- pair.definitions[j].furigana = String(
2250
- await convert(pair.definitions[j].definition, {
2251
- to: "hiragana",
2252
- mode: "furigana"
2253
- })
2254
- );
2272
+ pair.definitions[j].furigana = await generateFurigana(
2273
+ pair.definitions[j].definition,
2274
+ convert
2275
+ );
2255
2276
  japaneseDefinitions[i] = pair;
2256
2277
  }
2257
2278
  return japaneseDefinitions;
@@ -2996,6 +3017,7 @@ ${ankiNotes}`;
2996
3017
  createEntryMaps,
2997
3018
  generateAnkiNote,
2998
3019
  generateAnkiNotesFile,
3020
+ generateFurigana,
2999
3021
  getKanji,
3000
3022
  getKanjiExtended,
3001
3023
  getValidForms,