henkan 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/index.cjs.js +123 -120
  2. package/dist/index.cjs.js.map +3 -3
  3. package/dist/index.mjs +119 -118
  4. package/dist/index.mjs.map +3 -3
  5. package/dist/types/types.d.ts +5 -3
  6. package/dist/types/types.d.ts.map +1 -1
  7. package/dist/types/utils.d.ts.map +1 -1
  8. package/docs/api/functions/capitalizeString.md +1 -1
  9. package/docs/api/functions/convertJMdict.md +1 -1
  10. package/docs/api/functions/convertKanjiDic.md +1 -1
  11. package/docs/api/functions/convertKradFile.md +1 -1
  12. package/docs/api/functions/convertRadkFile.md +1 -1
  13. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  14. package/docs/api/functions/generateAnkiNote.md +1 -1
  15. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  16. package/docs/api/functions/getKanji.md +1 -1
  17. package/docs/api/functions/getKanjiExtended.md +1 -1
  18. package/docs/api/functions/getWord.md +1 -1
  19. package/docs/api/functions/isStringArray.md +1 -1
  20. package/docs/api/functions/isValidArray.md +1 -1
  21. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  22. package/docs/api/functions/makeSSML.md +1 -1
  23. package/docs/api/functions/shuffleArray.md +1 -1
  24. package/docs/api/functions/synthesizeSpeech.md +1 -1
  25. package/docs/api/interfaces/DictKanji.md +5 -5
  26. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  27. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  28. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  29. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  30. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  31. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  32. package/docs/api/interfaces/DictMeaning.md +11 -11
  33. package/docs/api/interfaces/DictRadical.md +4 -4
  34. package/docs/api/interfaces/DictReading.md +5 -5
  35. package/docs/api/interfaces/DictWord.md +19 -17
  36. package/docs/api/interfaces/ExamplePart.md +7 -7
  37. package/docs/api/interfaces/Grammar.md +15 -15
  38. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  39. package/docs/api/interfaces/Kana.md +11 -11
  40. package/docs/api/interfaces/Kanji.md +22 -22
  41. package/docs/api/interfaces/KanjiComponent.md +3 -3
  42. package/docs/api/interfaces/KanjiForm.md +4 -4
  43. package/docs/api/interfaces/NoteAndTag.md +3 -3
  44. package/docs/api/interfaces/Phrase.md +4 -4
  45. package/docs/api/interfaces/Radical.md +16 -16
  46. package/docs/api/interfaces/Reading.md +5 -5
  47. package/docs/api/interfaces/ResultEntry.md +7 -7
  48. package/docs/api/interfaces/TanakaExample.md +6 -6
  49. package/docs/api/interfaces/Translation.md +3 -3
  50. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  51. package/docs/api/interfaces/Word.md +16 -16
  52. package/docs/api/type-aliases/Dict.md +1 -1
  53. package/docs/api/type-aliases/DictName.md +1 -1
  54. package/docs/api/type-aliases/EntryType.md +1 -1
  55. package/docs/api/type-aliases/JLPT.md +1 -1
  56. package/docs/api/type-aliases/Result.md +1 -1
  57. package/package.json +1 -1
package/dist/index.cjs.js CHANGED
@@ -1222,10 +1222,18 @@ function convertJMdict(xmlString, examples) {
1222
1222
  noent: true,
1223
1223
  recover: false
1224
1224
  });
1225
- let dict = [];
1226
- const partMatches = /* @__PURE__ */ new Set();
1225
+ const dict = [];
1227
1226
  import_xml2js.default.parseString(dictParsed, (err, result) => {
1228
1227
  if (err) throw err;
1228
+ const tanakaParts = examples && examples.length > 0 ? new Set(
1229
+ examples.map(
1230
+ (example) => example.parts.map((part) => [
1231
+ part.baseForm,
1232
+ ...part.reading ? [part.reading] : [],
1233
+ ...part.referenceID ? [part.referenceID] : []
1234
+ ])
1235
+ ).flat(2)
1236
+ ) : void 0;
1229
1237
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
1230
1238
  for (const entry of result.JMdict.entry) {
1231
1239
  const entryObj = {
@@ -1317,117 +1325,43 @@ function convertJMdict(xmlString, examples) {
1317
1325
  entryObj.usuallyInKana = true;
1318
1326
  }
1319
1327
  if (examples) {
1320
- const readings2 = entryObj.readings.filter(
1321
- (reading) => (!reading.notes || !reading.notes.some(
1322
- (note) => notSearchedForms.has(note)
1323
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1324
- ).map((reading) => reading.reading);
1325
- const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
1326
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1327
- (note) => notSearchedForms.has(note)
1328
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1329
- ).map((kanjiForm) => kanjiForm.form) : void 0;
1330
- for (const reading of readings2) partMatches.add(reading);
1331
- if (kanjiForms2)
1332
- for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
1333
- partMatches.add(entryObj.id);
1328
+ const readings2 = new Set(
1329
+ entryObj.readings.filter(
1330
+ (reading) => (!reading.notes || !reading.notes.some(
1331
+ (note) => notSearchedForms.has(note)
1332
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1333
+ ).map((reading) => reading.reading)
1334
+ );
1335
+ const kanjiForms2 = entryObj.kanjiForms ? new Set(
1336
+ entryObj.kanjiForms.filter(
1337
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1338
+ (note) => notSearchedForms.has(note)
1339
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1340
+ ).map((kanjiForm) => kanjiForm.form)
1341
+ ) : void 0;
1342
+ let existsExample = false;
1343
+ if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
1344
+ for (const kf of kanjiForms2)
1345
+ if (tanakaParts.has(kf)) {
1346
+ existsExample = true;
1347
+ break;
1348
+ }
1349
+ }
1350
+ if (!existsExample && readings2.size > 0 && tanakaParts) {
1351
+ for (const r of readings2)
1352
+ if (tanakaParts.has(r)) {
1353
+ existsExample = true;
1354
+ break;
1355
+ }
1356
+ }
1357
+ if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
1358
+ existsExample = true;
1359
+ if (existsExample) entryObj.hasPhrases = true;
1334
1360
  }
1335
1361
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1336
1362
  dict.push(entryObj);
1337
1363
  }
1338
1364
  });
1339
- if (examples && dict.length > 0) {
1340
- const filteredExamples = examples.filter(
1341
- (ex) => {
1342
- const parts = ex.parts.flatMap((part) => [
1343
- part.baseForm,
1344
- ...part.reading ? [part.reading] : [],
1345
- ...part.referenceID ? [part.referenceID] : []
1346
- ]);
1347
- for (const part of parts) if (partMatches.has(part)) return true;
1348
- return false;
1349
- }
1350
- );
1351
- dict = dict.map((entryObj) => {
1352
- const readings = new Set(
1353
- entryObj.readings.filter(
1354
- (reading) => (!reading.notes || !reading.notes.some(
1355
- (note) => notSearchedForms.has(note)
1356
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1357
- ).map((reading) => reading.reading)
1358
- );
1359
- const kanjiForms = entryObj.kanjiForms ? new Set(
1360
- entryObj.kanjiForms.filter(
1361
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1362
- (note) => notSearchedForms.has(note)
1363
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1364
- ).map((kanjiForm) => kanjiForm.form)
1365
- ) : void 0;
1366
- const kanjiFormExamples = [];
1367
- const readingMatchingKanjiFormExamples = [];
1368
- const readingExamples = [];
1369
- const partParts = /* @__PURE__ */ new Set();
1370
- for (const example of filteredExamples)
1371
- for (const part of example.parts) {
1372
- const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1373
- if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1374
- if (readingAsReadingMatch) {
1375
- readingMatchingKanjiFormExamples.push(example);
1376
- partParts.add(part.baseForm).add(part.reading);
1377
- } else {
1378
- kanjiFormExamples.push(example);
1379
- partParts.add(part.baseForm);
1380
- }
1381
- break;
1382
- }
1383
- const readingAsBaseFormMatch = readings.has(part.baseForm);
1384
- const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1385
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1386
- readingExamples.push(example);
1387
- if (readingAsReadingMatch) partParts.add(part.reading);
1388
- if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1389
- if (referenceIDMatch) partParts.add(part.referenceID);
1390
- break;
1391
- }
1392
- }
1393
- const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1394
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1395
- const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1396
- let wordExamples = [
1397
- ...readingMatchingKanjiFormExamples,
1398
- ...includeKanjiFormExamples ? kanjiFormExamples : [],
1399
- ...includeReadingExamples ? readingExamples : []
1400
- ];
1401
- const glossSpecificExamples = [];
1402
- const seenPhrases = /* @__PURE__ */ new Set();
1403
- for (let i = 0; i < entryObj.meanings.length; i++) {
1404
- outer: for (const example of wordExamples) {
1405
- if (seenPhrases.has(example.phrase)) continue;
1406
- for (const part of example.parts)
1407
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1408
- glossSpecificExamples.push(example);
1409
- seenPhrases.add(example.phrase);
1410
- break outer;
1411
- }
1412
- }
1413
- if (glossSpecificExamples.length === 5) break;
1414
- }
1415
- if (glossSpecificExamples.length === 5)
1416
- wordExamples = glossSpecificExamples;
1417
- else if (glossSpecificExamples.length > 0) {
1418
- const seenPhrases2 = new Set(
1419
- glossSpecificExamples.map((ex) => ex.phrase)
1420
- );
1421
- wordExamples = [
1422
- ...glossSpecificExamples,
1423
- ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1424
- ];
1425
- }
1426
- if (wordExamples.length > 0)
1427
- entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1428
- return entryObj;
1429
- });
1430
- }
1431
1365
  return dict;
1432
1366
  } catch (err) {
1433
1367
  throw err;
@@ -1691,7 +1625,6 @@ var wordAddNoteArray = (arr, cb) => {
1691
1625
  for (const v of arr) cb(v);
1692
1626
  };
1693
1627
  function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath) {
1694
- var _a;
1695
1628
  try {
1696
1629
  if (!dictWord && id && dict)
1697
1630
  dictWord = dict.find((entry) => entry.id === id);
@@ -1712,7 +1645,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1712
1645
  kanjiForm: dictKanjiForm.form,
1713
1646
  ...dictKanjiForm.notes ? {
1714
1647
  notes: dictKanjiForm.notes.map((note) => {
1715
- var _a2;
1648
+ var _a;
1716
1649
  const noteAndTag = lookupWordNote(
1717
1650
  note,
1718
1651
  void 0,
@@ -1720,7 +1653,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1720
1653
  false,
1721
1654
  note
1722
1655
  );
1723
- return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
1656
+ return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
1724
1657
  })
1725
1658
  } : {},
1726
1659
  ...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
@@ -1734,7 +1667,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1734
1667
  (restriction) => `Reading restricted to ${restriction}`
1735
1668
  ) : [],
1736
1669
  ...dictReading.notes ? dictReading.notes.map((note) => {
1737
- var _a2;
1670
+ var _a;
1738
1671
  const noteAndTag = lookupWordNote(
1739
1672
  note,
1740
1673
  void 0,
@@ -1742,7 +1675,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1742
1675
  false,
1743
1676
  note
1744
1677
  );
1745
- return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
1678
+ return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
1746
1679
  }) : []
1747
1680
  ]
1748
1681
  } : {},
@@ -1835,16 +1768,86 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1835
1768
  }
1836
1769
  if (word.kanji.length === 0) delete word.kanji;
1837
1770
  }
1838
- if (examples && dictWord.phraseIDs && dictWord.phraseIDs.length > 0) {
1839
- word.phrases = [];
1840
- const phraseIDs = new Set(dictWord.phraseIDs);
1841
- for (const ex of examples)
1842
- if (phraseIDs.has(ex.id))
1843
- word.phrases.push({
1771
+ if (dictWord.hasPhrases === true && examples) {
1772
+ const readings = new Set(
1773
+ word.readings.filter(
1774
+ (reading) => (!reading.notes || !reading.notes.some(
1775
+ (note) => notSearchedForms.has(note)
1776
+ )) && (word.common === void 0 || reading.common === true)
1777
+ ).map((reading) => reading.reading)
1778
+ );
1779
+ const kanjiForms = word.kanjiForms ? new Set(
1780
+ word.kanjiForms.filter(
1781
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1782
+ (note) => notSearchedForms.has(note)
1783
+ )) && (word.common === void 0 || kanjiForm.common === true)
1784
+ ).map((kanjiForm) => kanjiForm.kanjiForm)
1785
+ ) : void 0;
1786
+ const kanjiFormExamples = [];
1787
+ const readingMatchingKanjiFormExamples = [];
1788
+ const readingExamples = [];
1789
+ const partParts = /* @__PURE__ */ new Set();
1790
+ for (const example of examples)
1791
+ for (const part of example.parts) {
1792
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1793
+ if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1794
+ if (readingAsReadingMatch) {
1795
+ readingMatchingKanjiFormExamples.push(example);
1796
+ partParts.add(part.baseForm).add(part.reading);
1797
+ } else {
1798
+ kanjiFormExamples.push(example);
1799
+ partParts.add(part.baseForm);
1800
+ }
1801
+ break;
1802
+ }
1803
+ const readingAsBaseFormMatch = readings.has(part.baseForm);
1804
+ const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
1805
+ if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1806
+ readingExamples.push(example);
1807
+ if (readingAsReadingMatch) partParts.add(part.reading);
1808
+ if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1809
+ if (referenceIDMatch) partParts.add(part.referenceID);
1810
+ break;
1811
+ }
1812
+ }
1813
+ const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1814
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1815
+ const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1816
+ let wordExamples = [
1817
+ ...readingMatchingKanjiFormExamples,
1818
+ ...includeKanjiFormExamples ? kanjiFormExamples : [],
1819
+ ...includeReadingExamples ? readingExamples : []
1820
+ ];
1821
+ const glossSpecificExamples = [];
1822
+ const seenPhrases = /* @__PURE__ */ new Set();
1823
+ for (let i = 0; i < word.translations.length; i++) {
1824
+ outer: for (const example of wordExamples) {
1825
+ if (seenPhrases.has(example.phrase)) continue;
1826
+ for (const part of example.parts)
1827
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1828
+ glossSpecificExamples.push(example);
1829
+ seenPhrases.add(example.phrase);
1830
+ break outer;
1831
+ }
1832
+ }
1833
+ if (glossSpecificExamples.length === 5) break;
1834
+ }
1835
+ if (glossSpecificExamples.length === 5)
1836
+ wordExamples = [...glossSpecificExamples];
1837
+ else if (glossSpecificExamples.length > 0)
1838
+ wordExamples = [
1839
+ ...glossSpecificExamples,
1840
+ ...wordExamples.filter((ex) => !seenPhrases.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1841
+ ];
1842
+ if (wordExamples.length > 0)
1843
+ word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => {
1844
+ var _a;
1845
+ return {
1844
1846
  phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
1845
1847
  translation: ex.translation,
1846
1848
  originalPhrase: ex.phrase
1847
- });
1849
+ };
1850
+ });
1848
1851
  }
1849
1852
  return word;
1850
1853
  } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);