henkan 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/index.cjs.js +91 -54
  2. package/dist/index.cjs.js.map +3 -3
  3. package/dist/index.mjs +91 -54
  4. package/dist/index.mjs.map +3 -3
  5. package/dist/types/constants.d.ts.map +1 -1
  6. package/dist/types/types.d.ts +8 -0
  7. package/dist/types/types.d.ts.map +1 -1
  8. package/dist/types/utils.d.ts.map +1 -1
  9. package/docs/api/functions/capitalizeString.md +1 -1
  10. package/docs/api/functions/convertJMdict.md +1 -1
  11. package/docs/api/functions/convertKanjiDic.md +1 -1
  12. package/docs/api/functions/convertKradFile.md +1 -1
  13. package/docs/api/functions/convertRadkFile.md +1 -1
  14. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  15. package/docs/api/functions/generateAnkiNote.md +1 -1
  16. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  17. package/docs/api/functions/getKanji.md +1 -1
  18. package/docs/api/functions/getKanjiExtended.md +1 -1
  19. package/docs/api/functions/getWord.md +1 -1
  20. package/docs/api/functions/isStringArray.md +1 -1
  21. package/docs/api/functions/isValidArray.md +1 -1
  22. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  23. package/docs/api/functions/makeSSML.md +1 -1
  24. package/docs/api/functions/shuffleArray.md +1 -1
  25. package/docs/api/functions/synthesizeSpeech.md +1 -1
  26. package/docs/api/interfaces/DictKanji.md +5 -5
  27. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  28. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  29. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  30. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  31. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  32. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  33. package/docs/api/interfaces/DictMeaning.md +11 -11
  34. package/docs/api/interfaces/DictRadical.md +4 -4
  35. package/docs/api/interfaces/DictReading.md +5 -5
  36. package/docs/api/interfaces/DictWord.md +7 -7
  37. package/docs/api/interfaces/ExamplePart.md +7 -7
  38. package/docs/api/interfaces/Grammar.md +15 -15
  39. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  40. package/docs/api/interfaces/Kana.md +11 -11
  41. package/docs/api/interfaces/Kanji.md +22 -22
  42. package/docs/api/interfaces/KanjiComponent.md +3 -3
  43. package/docs/api/interfaces/KanjiForm.md +13 -3
  44. package/docs/api/interfaces/NoteAndTag.md +3 -3
  45. package/docs/api/interfaces/Phrase.md +4 -4
  46. package/docs/api/interfaces/Radical.md +16 -16
  47. package/docs/api/interfaces/Reading.md +14 -4
  48. package/docs/api/interfaces/ResultEntry.md +7 -7
  49. package/docs/api/interfaces/TanakaExample.md +5 -5
  50. package/docs/api/interfaces/Translation.md +3 -3
  51. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  52. package/docs/api/interfaces/Word.md +14 -14
  53. package/docs/api/type-aliases/Dict.md +1 -1
  54. package/docs/api/type-aliases/DictName.md +1 -1
  55. package/docs/api/type-aliases/EntryType.md +1 -1
  56. package/docs/api/type-aliases/JLPT.md +1 -1
  57. package/docs/api/type-aliases/Result.md +1 -1
  58. package/package.json +8 -8
package/dist/index.mjs CHANGED
@@ -65,10 +65,21 @@ var symbolMap = {
65
65
  "&": "\u30A2\u30F3\u30C9"
66
66
  };
67
67
  var notSearchedForms = /* @__PURE__ */ new Set([
68
+ "search-only kana form",
68
69
  "Search-only kana form",
70
+ "rarely used kana form",
69
71
  "Rarely used kana form",
72
+ "out-dated or obsolete kana usage",
70
73
  "Out-dated or obsolete kana usage",
71
- "Word containing out-dated kanji or kanji usage"
74
+ "search-only kanji form",
75
+ "Search-only kanji form",
76
+ "rarely-used kanji form",
77
+ "Rarely-used kanji form",
78
+ "out-dated kanji",
79
+ "Out-dated kanji form",
80
+ "out-dated kanji or kanji usage",
81
+ "word containing out-dated kanji or kanji usage",
82
+ "Out-dated kanji or kanji usage"
72
83
  ]);
73
84
  var noteMap = /* @__PURE__ */ new Map([
74
85
  ["brazilian", ["dialect::brazilian", "Dialect: Brazilian"]],
@@ -1160,13 +1171,15 @@ function convertJMdict(xmlString, examples) {
1160
1171
  const dict = [];
1161
1172
  xml.parseString(dictParsed, (err, result) => {
1162
1173
  if (err) throw err;
1163
- let tanakaBaseParts = void 0;
1164
- if (examples)
1165
- tanakaBaseParts = new Set(
1166
- examples.map(
1167
- (example) => example.parts.map((part) => part.baseForm)
1168
- ).flat()
1169
- );
1174
+ const tanakaParts = examples && examples.length > 0 ? new Set(
1175
+ examples.map(
1176
+ (example) => example.parts.map((part) => [
1177
+ part.baseForm,
1178
+ ...part.reading ? [part.reading] : [],
1179
+ ...part.referenceID ? [part.referenceID] : []
1180
+ ])
1181
+ ).flat(2)
1182
+ ) : void 0;
1170
1183
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
1171
1184
  for (const entry of result.JMdict.entry) {
1172
1185
  const entryObj = {
@@ -1250,34 +1263,36 @@ function convertJMdict(xmlString, examples) {
1250
1263
  if (examples) {
1251
1264
  const readings2 = new Set(
1252
1265
  entryObj.readings.filter(
1253
- (reading) => !reading.notes || reading.notes && !reading.notes.some(
1266
+ (reading) => (!reading.notes || !reading.notes.some(
1254
1267
  (note) => notSearchedForms.has(note)
1255
- )
1268
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1256
1269
  ).map((reading) => reading.reading)
1257
1270
  );
1258
1271
  const kanjiForms2 = entryObj.kanjiForms ? new Set(
1259
- entryObj.kanjiForms.map(
1260
- (kanjiForm) => kanjiForm.form
1261
- )
1272
+ entryObj.kanjiForms.filter(
1273
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1274
+ (note) => notSearchedForms.has(note)
1275
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1276
+ ).map((kanjiForm) => kanjiForm.form)
1262
1277
  ) : void 0;
1263
- let kanjiFormExamples = false;
1264
- let readingExamples = false;
1265
- if (kanjiForms2 && kanjiForms2.size > 0 && tanakaBaseParts) {
1278
+ let existsExample = false;
1279
+ if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
1266
1280
  for (const kf of kanjiForms2)
1267
- if (tanakaBaseParts.has(kf)) {
1268
- kanjiFormExamples = true;
1281
+ if (tanakaParts.has(kf)) {
1282
+ existsExample = true;
1269
1283
  break;
1270
1284
  }
1271
1285
  }
1272
- if (!kanjiFormExamples && readings2.size > 0 && tanakaBaseParts) {
1286
+ if (!existsExample && readings2.size > 0 && tanakaParts) {
1273
1287
  for (const r of readings2)
1274
- if (tanakaBaseParts.has(r)) {
1275
- readingExamples = true;
1288
+ if (tanakaParts.has(r)) {
1289
+ existsExample = true;
1276
1290
  break;
1277
1291
  }
1278
1292
  }
1279
- if (kanjiFormExamples || readingExamples)
1280
- entryObj.hasPhrases = true;
1293
+ if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
1294
+ existsExample = true;
1295
+ if (existsExample) entryObj.hasPhrases = true;
1281
1296
  }
1282
1297
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1283
1298
  dict.push(entryObj);
@@ -1570,7 +1585,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1570
1585
  );
1571
1586
  return capitalizeString(noteAndTag.note ?? note);
1572
1587
  })
1573
- } : {}
1588
+ } : {},
1589
+ ...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
1574
1590
  })
1575
1591
  );
1576
1592
  word.readings = dictWord.readings.map((dictReading) => ({
@@ -1591,7 +1607,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1591
1607
  return capitalizeString(noteAndTag.note ?? note);
1592
1608
  }) : []
1593
1609
  ]
1594
- } : {}
1610
+ } : {},
1611
+ ...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
1595
1612
  }));
1596
1613
  let usuallyInKanaMeanings = 0;
1597
1614
  word.translations = dictWord.meanings.map((dictMeaning) => {
@@ -1684,36 +1701,61 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1684
1701
  if (word.kanji.length === 0) delete word.kanji;
1685
1702
  }
1686
1703
  if (examples && dictWord.hasPhrases === true) {
1704
+ let pushIfUnique2 = function(ex) {
1705
+ if (!seenPhrases.has(ex.phrase)) {
1706
+ wordExamples.push(ex);
1707
+ seenPhrases.add(ex.phrase);
1708
+ }
1709
+ };
1710
+ var pushIfUnique = pushIfUnique2;
1687
1711
  const readings = new Set(
1688
1712
  word.readings.filter(
1689
- (reading) => !reading.notes || reading.notes && !reading.notes.some(
1713
+ (reading) => (!reading.notes || !reading.notes.some(
1690
1714
  (note) => notSearchedForms.has(note)
1691
- )
1715
+ )) && (word.common === void 0 || reading.common === true)
1692
1716
  ).map((reading) => reading.reading)
1693
1717
  );
1694
1718
  const kanjiForms = word.kanjiForms ? new Set(
1695
- word.kanjiForms.map(
1696
- (kanjiForm) => kanjiForm.kanjiForm
1697
- )
1719
+ word.kanjiForms.filter(
1720
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1721
+ (note) => notSearchedForms.has(note)
1722
+ )) && (word.common === void 0 || kanjiForm.common === true)
1723
+ ).map((kanjiForm) => kanjiForm.kanjiForm)
1698
1724
  ) : void 0;
1699
1725
  const kanjiFormExamples = [];
1726
+ const readingMatchingKanjiFormExamples = [];
1700
1727
  const readingExamples = [];
1701
- if (kanjiForms) {
1702
- for (const example of examples)
1703
- for (const part of example.parts)
1704
- if (kanjiForms.has(part.baseForm))
1705
- kanjiFormExamples.push(example);
1706
- }
1707
- if (kanjiFormExamples.length === 0) {
1708
- for (const example of examples)
1709
- for (const part of example.parts)
1710
- if (readings.has(part.baseForm)) readingExamples.push(example);
1711
- }
1712
- examples = [...kanjiFormExamples, ...readingExamples];
1728
+ for (const example of examples)
1729
+ for (const part of example.parts) {
1730
+ const readingMatch = part.reading && readings.has(part.reading) || readings.has(part.baseForm);
1731
+ if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1732
+ if (readingMatch) readingMatchingKanjiFormExamples.push(example);
1733
+ else kanjiFormExamples.push(example);
1734
+ break;
1735
+ }
1736
+ if (readingMatch || part.referenceID && word.id && part.referenceID === word.id) {
1737
+ readingExamples.push(example);
1738
+ break;
1739
+ }
1740
+ }
1741
+ const exampleSize = (/* @__PURE__ */ new Set([
1742
+ ...readingMatchingKanjiFormExamples,
1743
+ ...kanjiFormExamples,
1744
+ ...readingExamples
1745
+ ])).size;
1746
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1747
+ const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1748
+ const seenPhrases = /* @__PURE__ */ new Set();
1749
+ let wordExamples = [];
1750
+ for (const ex of readingMatchingKanjiFormExamples) pushIfUnique2(ex);
1751
+ if (includeKanjiFormExamples)
1752
+ for (const ex of kanjiFormExamples) pushIfUnique2(ex);
1753
+ if (includeReadingExamples)
1754
+ for (const ex of readingExamples) pushIfUnique2(ex);
1713
1755
  if (word.translations) {
1714
1756
  const glossSpecificExamples = [];
1715
1757
  for (let i = 0; i < word.translations.length; i++) {
1716
- outer: for (const example of examples)
1758
+ outer: for (const example of wordExamples)
1717
1759
  for (const part of example.parts)
1718
1760
  if (part.glossNumber === i + 1) {
1719
1761
  glossSpecificExamples.push(example);
@@ -1722,24 +1764,19 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1722
1764
  if (glossSpecificExamples.length === 5) break;
1723
1765
  }
1724
1766
  if (glossSpecificExamples.length === 5)
1725
- examples = glossSpecificExamples;
1767
+ wordExamples = glossSpecificExamples;
1726
1768
  else if (glossSpecificExamples.length > 0) {
1727
- const seenPhrases = new Set(
1769
+ const seenPhrases2 = new Set(
1728
1770
  glossSpecificExamples.map((ex) => ex.phrase)
1729
1771
  );
1730
- examples = [
1772
+ wordExamples = [
1731
1773
  ...glossSpecificExamples,
1732
- ...examples.filter((ex) => !seenPhrases.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1774
+ ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1733
1775
  ];
1734
1776
  }
1735
1777
  }
1736
- examples = examples.filter(
1737
- (example, index, arr) => arr.findIndex(
1738
- (ex) => ex.phrase === example.phrase
1739
- ) === index
1740
- );
1741
- if (examples.length > 0)
1742
- word.phrases = (examples.length > 5 ? examples.slice(0, 5) : examples).map((ex) => ({
1778
+ if (wordExamples.length > 0)
1779
+ word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
1743
1780
  phrase: ex.furigana ?? ex.phrase,
1744
1781
  translation: ex.translation,
1745
1782
  originalPhrase: ex.phrase