henkan 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/index.cjs.js +100 -125
  2. package/dist/index.cjs.js.map +3 -3
  3. package/dist/index.mjs +98 -121
  4. package/dist/index.mjs.map +3 -3
  5. package/dist/types/types.d.ts +16 -12
  6. package/dist/types/types.d.ts.map +1 -1
  7. package/dist/types/utils.d.ts +6 -6
  8. package/dist/types/utils.d.ts.map +1 -1
  9. package/docs/api/functions/capitalizeString.md +1 -1
  10. package/docs/api/functions/convertJMdict.md +1 -1
  11. package/docs/api/functions/convertKanjiDic.md +1 -1
  12. package/docs/api/functions/convertKradFile.md +4 -4
  13. package/docs/api/functions/convertRadkFile.md +4 -4
  14. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  15. package/docs/api/functions/generateAnkiNote.md +1 -1
  16. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  17. package/docs/api/functions/getKanji.md +1 -1
  18. package/docs/api/functions/getKanjiExtended.md +1 -1
  19. package/docs/api/functions/getWord.md +1 -1
  20. package/docs/api/functions/isStringArray.md +1 -1
  21. package/docs/api/functions/isValidArray.md +1 -1
  22. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  23. package/docs/api/functions/makeSSML.md +1 -1
  24. package/docs/api/functions/shuffleArray.md +1 -1
  25. package/docs/api/functions/synthesizeSpeech.md +1 -1
  26. package/docs/api/interfaces/DictKanji.md +5 -5
  27. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  28. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  29. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  30. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  31. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  32. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  33. package/docs/api/interfaces/DictMeaning.md +11 -11
  34. package/docs/api/interfaces/DictRadical.md +4 -4
  35. package/docs/api/interfaces/DictReading.md +5 -5
  36. package/docs/api/interfaces/DictWord.md +29 -19
  37. package/docs/api/interfaces/ExamplePart.md +7 -7
  38. package/docs/api/interfaces/Grammar.md +15 -15
  39. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  40. package/docs/api/interfaces/Kana.md +11 -11
  41. package/docs/api/interfaces/Kanji.md +22 -22
  42. package/docs/api/interfaces/KanjiComponent.md +3 -3
  43. package/docs/api/interfaces/KanjiForm.md +4 -4
  44. package/docs/api/interfaces/NoteAndTag.md +3 -3
  45. package/docs/api/interfaces/Phrase.md +4 -4
  46. package/docs/api/interfaces/Radical.md +16 -16
  47. package/docs/api/interfaces/Reading.md +5 -5
  48. package/docs/api/interfaces/ResultEntry.md +7 -7
  49. package/docs/api/interfaces/TanakaExample.md +17 -7
  50. package/docs/api/interfaces/Translation.md +3 -3
  51. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  52. package/docs/api/interfaces/Word.md +18 -18
  53. package/docs/api/type-aliases/Dict.md +1 -1
  54. package/docs/api/type-aliases/DictName.md +1 -1
  55. package/docs/api/type-aliases/EntryType.md +1 -1
  56. package/docs/api/type-aliases/JLPT.md +1 -1
  57. package/docs/api/type-aliases/Result.md +1 -1
  58. package/package.json +6 -6
package/dist/index.cjs.js CHANGED
@@ -68,9 +68,9 @@ var regexps = {
68
68
  kanji: new RegExp("\\p{Script=Han}+", "u"),
69
69
  scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
70
70
  regExChars: /[-\/\\^$*+?.()|[\]{}]/,
71
- tanakaID: /#ID=\d+_\d+$/,
71
+ tanakaID: /#ID=(?<id>\d+_\d+)$/,
72
72
  tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
73
- tanakaReferenceID: /#([\d]+)/
73
+ tanakaReferenceID: /#(?<entryid>[\d]+)/
74
74
  };
75
75
  var romajiMap = {
76
76
  A: "\u30A8\u30FC",
@@ -1225,15 +1225,6 @@ function convertJMdict(xmlString, examples) {
1225
1225
  const dict = [];
1226
1226
  import_xml2js.default.parseString(dictParsed, (err, result) => {
1227
1227
  if (err) throw err;
1228
- const tanakaParts = examples && examples.length > 0 ? new Set(
1229
- examples.map(
1230
- (example) => example.parts.map((part) => [
1231
- part.baseForm,
1232
- ...part.reading ? [part.reading] : [],
1233
- ...part.referenceID ? [part.referenceID] : []
1234
- ])
1235
- ).flat(2)
1236
- ) : void 0;
1237
1228
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
1238
1229
  for (const entry of result.JMdict.entry) {
1239
1230
  const entryObj = {
@@ -1282,7 +1273,8 @@ function convertJMdict(xmlString, examples) {
1282
1273
  if (readingObj.reading.length > 0)
1283
1274
  entryObj.readings.push(readingObj);
1284
1275
  }
1285
- if (isValidArray(meanings))
1276
+ if (isValidArray(meanings)) {
1277
+ let usuallyInKanaMeanings = 0;
1286
1278
  for (const meaning of meanings) {
1287
1279
  const meaningObj = {};
1288
1280
  if (isStringArray(meaning.pos))
@@ -1308,12 +1300,21 @@ function convertJMdict(xmlString, examples) {
1308
1300
  if (isStringArray(meaning.field))
1309
1301
  meaningObj.fields = meaning.field;
1310
1302
  if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
1311
- if (isStringArray(meaning.misc)) meaningObj.misc = meaning.misc;
1303
+ if (isStringArray(meaning.misc)) {
1304
+ meaningObj.misc = meaning.misc;
1305
+ if (meaningObj.misc && meaningObj.misc.includes(
1306
+ "word usually written using kana alone"
1307
+ ))
1308
+ usuallyInKanaMeanings++;
1309
+ }
1312
1310
  if (isStringArray(meaning.dial))
1313
1311
  meaningObj.dialects = meaning.dial;
1314
1312
  if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations && meaningObj.translations.length > 0)
1315
1313
  entryObj.meanings.push(meaningObj);
1316
1314
  }
1315
+ if (entryObj.meanings.length === usuallyInKanaMeanings)
1316
+ entryObj.usuallyInKana = true;
1317
+ }
1317
1318
  if (examples) {
1318
1319
  const readings2 = new Set(
1319
1320
  entryObj.readings.filter(
@@ -1329,24 +1330,70 @@ function convertJMdict(xmlString, examples) {
1329
1330
  )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1330
1331
  ).map((kanjiForm) => kanjiForm.form)
1331
1332
  ) : void 0;
1332
- let existsExample = false;
1333
- if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
1334
- for (const kf of kanjiForms2)
1335
- if (tanakaParts.has(kf)) {
1336
- existsExample = true;
1333
+ const kanjiFormExamples = [];
1334
+ const readingMatchingKanjiFormExamples = [];
1335
+ const readingExamples = [];
1336
+ const partParts = /* @__PURE__ */ new Set();
1337
+ for (const example of examples)
1338
+ for (const part of example.parts) {
1339
+ const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
1340
+ if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
1341
+ if (readingAsReadingMatch) {
1342
+ readingMatchingKanjiFormExamples.push(example);
1343
+ partParts.add(part.baseForm).add(part.reading);
1344
+ } else {
1345
+ kanjiFormExamples.push(example);
1346
+ partParts.add(part.baseForm);
1347
+ }
1337
1348
  break;
1338
1349
  }
1339
- }
1340
- if (!existsExample && readings2.size > 0 && tanakaParts) {
1341
- for (const r of readings2)
1342
- if (tanakaParts.has(r)) {
1343
- existsExample = true;
1350
+ const readingAsBaseFormMatch = readings2.has(
1351
+ part.baseForm
1352
+ );
1353
+ const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1354
+ if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1355
+ readingExamples.push(example);
1356
+ if (readingAsReadingMatch) partParts.add(part.reading);
1357
+ if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1358
+ if (referenceIDMatch) partParts.add(part.referenceID);
1344
1359
  break;
1345
1360
  }
1361
+ }
1362
+ const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1363
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1364
+ const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1365
+ let wordExamples = [
1366
+ ...readingMatchingKanjiFormExamples,
1367
+ ...includeKanjiFormExamples ? kanjiFormExamples : [],
1368
+ ...includeReadingExamples ? readingExamples : []
1369
+ ];
1370
+ const glossSpecificExamples = [];
1371
+ const seenPhrases = /* @__PURE__ */ new Set();
1372
+ for (let i = 0; i < entryObj.meanings.length; i++) {
1373
+ outer: for (const example of wordExamples) {
1374
+ if (seenPhrases.has(example.phrase)) continue;
1375
+ for (const part of example.parts)
1376
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1377
+ glossSpecificExamples.push(example);
1378
+ seenPhrases.add(example.phrase);
1379
+ break outer;
1380
+ }
1381
+ }
1382
+ if (glossSpecificExamples.length === 5) break;
1383
+ }
1384
+ if (glossSpecificExamples.length === 5)
1385
+ wordExamples = [...glossSpecificExamples];
1386
+ else if (glossSpecificExamples.length > 0) {
1387
+ const seenPhrases2 = new Set(
1388
+ glossSpecificExamples.map((ex) => ex.phrase)
1389
+ );
1390
+ wordExamples = [
1391
+ ...glossSpecificExamples,
1392
+ ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1393
+ ];
1346
1394
  }
1347
- if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
1348
- existsExample = true;
1349
- if (existsExample) entryObj.hasPhrases = true;
1395
+ if (wordExamples.length > 0)
1396
+ entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1350
1397
  }
1351
1398
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1352
1399
  dict.push(entryObj);
@@ -1445,9 +1492,12 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
1445
1492
  let a = tanakaParsed[i];
1446
1493
  let b = tanakaParsed[i + 1];
1447
1494
  if (a && b && a.startsWith("A: ") && b.startsWith("B: ")) {
1448
- a = a.replace("A: ", "").replace(regexps.tanakaID, "");
1495
+ a = a.replace("A: ", "");
1449
1496
  b = b.replace("B: ", "");
1450
- const aParts = a.split(" ");
1497
+ const idMatch = regexps.tanakaID.exec(a);
1498
+ if (!idMatch || !idMatch.groups || !idMatch.groups["id"])
1499
+ throw new Error(`Invalid phrase ID for ${a}`);
1500
+ const aParts = a.replace(regexps.tanakaID, "").split(" ");
1451
1501
  const bParts = b.split(" ").filter((part) => part.trim().length !== 0).map((part) => {
1452
1502
  const partMatches = regexps.tanakaPart.exec(part);
1453
1503
  if (!partMatches || !partMatches.groups || partMatches.length === 0)
@@ -1462,9 +1512,9 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
1462
1512
  if (reading)
1463
1513
  if (regexps.tanakaReferenceID.test(reading)) {
1464
1514
  const referenceID = regexps.tanakaReferenceID.exec(reading);
1465
- if (!referenceID)
1515
+ if (!referenceID || !referenceID.groups || !referenceID.groups["entryid"])
1466
1516
  throw new Error(`Invalid reference ID: ${reading}`);
1467
- examplePart.referenceID = referenceID[0];
1517
+ examplePart.referenceID = referenceID.groups["entryid"];
1468
1518
  } else examplePart.reading = reading;
1469
1519
  if (glossNumber)
1470
1520
  examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
@@ -1485,8 +1535,9 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
1485
1535
  mode: "furigana"
1486
1536
  });
1487
1537
  tanakaArray.push({
1488
- phrase,
1489
- translation,
1538
+ id: idMatch.groups["id"].trim(),
1539
+ phrase: phrase.trim(),
1540
+ translation: translation.trim(),
1490
1541
  parts: bParts,
1491
1542
  ...furigana ? { furigana } : {}
1492
1543
  });
@@ -1611,6 +1662,7 @@ var wordAddNoteArray = (arr, cb) => {
1611
1662
  for (const v of arr) cb(v);
1612
1663
  };
1613
1664
  function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath) {
1665
+ var _a;
1614
1666
  try {
1615
1667
  if (!dictWord && id && dict)
1616
1668
  dictWord = dict.find((entry) => entry.id === id);
@@ -1631,7 +1683,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1631
1683
  kanjiForm: dictKanjiForm.form,
1632
1684
  ...dictKanjiForm.notes ? {
1633
1685
  notes: dictKanjiForm.notes.map((note) => {
1634
- var _a;
1686
+ var _a2;
1635
1687
  const noteAndTag = lookupWordNote(
1636
1688
  note,
1637
1689
  void 0,
@@ -1639,7 +1691,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1639
1691
  false,
1640
1692
  note
1641
1693
  );
1642
- return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
1694
+ return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
1643
1695
  })
1644
1696
  } : {},
1645
1697
  ...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
@@ -1653,7 +1705,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1653
1705
  (restriction) => `Reading restricted to ${restriction}`
1654
1706
  ) : [],
1655
1707
  ...dictReading.notes ? dictReading.notes.map((note) => {
1656
- var _a;
1708
+ var _a2;
1657
1709
  const noteAndTag = lookupWordNote(
1658
1710
  note,
1659
1711
  void 0,
@@ -1661,13 +1713,12 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1661
1713
  false,
1662
1714
  note
1663
1715
  );
1664
- return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
1716
+ return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
1665
1717
  }) : []
1666
1718
  ]
1667
1719
  } : {},
1668
1720
  ...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
1669
1721
  }));
1670
- let usuallyInKanaMeanings = 0;
1671
1722
  word.translations = dictWord.meanings.map((dictMeaning) => {
1672
1723
  if (!dictMeaning.translations)
1673
1724
  throw new Error(`No translations for ${dictWord.id}`);
@@ -1720,11 +1771,10 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1720
1771
  dictMeaning.info,
1721
1772
  (info) => lookupWordNote(info, notes, word.tags, false, info)
1722
1773
  );
1723
- wordAddNoteArray(dictMeaning.misc, (misc) => {
1724
- lookupWordNote(misc, notes, word.tags, false, misc);
1725
- if (misc.toLowerCase() === "word usually written using kana alone")
1726
- usuallyInKanaMeanings++;
1727
- });
1774
+ wordAddNoteArray(
1775
+ dictMeaning.misc,
1776
+ (misc) => lookupWordNote(misc, notes, word.tags, false, misc)
1777
+ );
1728
1778
  for (let i = 0; i < notes.length; i++)
1729
1779
  notes[i] = capitalizeString(notes[i]);
1730
1780
  return {
@@ -1732,8 +1782,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1732
1782
  notes
1733
1783
  };
1734
1784
  });
1735
- if (word.translations && word.translations.length === usuallyInKanaMeanings)
1736
- word.usuallyInKana = true;
1785
+ if (dictWord.usuallyInKana === true) word.usuallyInKana = true;
1737
1786
  if (kanjiDic && word.kanjiForms) {
1738
1787
  word.kanji = [];
1739
1788
  for (const kanjiForm of word.kanjiForms)
@@ -1757,90 +1806,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1757
1806
  }
1758
1807
  if (word.kanji.length === 0) delete word.kanji;
1759
1808
  }
1760
- if (examples && dictWord.hasPhrases === true) {
1761
- let pushIfUnique2 = function(ex) {
1762
- if (!seenPhrases.has(ex.phrase)) {
1763
- wordExamples.push(ex);
1764
- seenPhrases.add(ex.phrase);
1765
- }
1766
- };
1767
- var pushIfUnique = pushIfUnique2;
1768
- const readings = new Set(
1769
- word.readings.filter(
1770
- (reading) => (!reading.notes || !reading.notes.some(
1771
- (note) => notSearchedForms.has(note)
1772
- )) && (word.common === void 0 || reading.common === true)
1773
- ).map((reading) => reading.reading)
1774
- );
1775
- const kanjiForms = word.kanjiForms ? new Set(
1776
- word.kanjiForms.filter(
1777
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1778
- (note) => notSearchedForms.has(note)
1779
- )) && (word.common === void 0 || kanjiForm.common === true)
1780
- ).map((kanjiForm) => kanjiForm.kanjiForm)
1781
- ) : void 0;
1782
- const kanjiFormExamples = [];
1783
- const readingMatchingKanjiFormExamples = [];
1784
- const readingExamples = [];
1785
- for (const example of examples)
1786
- for (const part of example.parts) {
1787
- const readingMatch = part.reading && readings.has(part.reading) || readings.has(part.baseForm);
1788
- if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1789
- if (readingMatch) readingMatchingKanjiFormExamples.push(example);
1790
- else kanjiFormExamples.push(example);
1791
- break;
1792
- }
1793
- if (readingMatch || part.referenceID && word.id && part.referenceID === word.id) {
1794
- readingExamples.push(example);
1795
- break;
1796
- }
1797
- }
1798
- const exampleSize = (/* @__PURE__ */ new Set([
1799
- ...readingMatchingKanjiFormExamples,
1800
- ...kanjiFormExamples,
1801
- ...readingExamples
1802
- ])).size;
1803
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1804
- const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1805
- const seenPhrases = /* @__PURE__ */ new Set();
1806
- let wordExamples = [];
1807
- for (const ex of readingMatchingKanjiFormExamples) pushIfUnique2(ex);
1808
- if (includeKanjiFormExamples)
1809
- for (const ex of kanjiFormExamples) pushIfUnique2(ex);
1810
- if (includeReadingExamples)
1811
- for (const ex of readingExamples) pushIfUnique2(ex);
1812
- if (word.translations) {
1813
- const glossSpecificExamples = [];
1814
- for (let i = 0; i < word.translations.length; i++) {
1815
- outer: for (const example of wordExamples)
1816
- for (const part of example.parts)
1817
- if (part.glossNumber === i + 1) {
1818
- glossSpecificExamples.push(example);
1819
- break outer;
1820
- }
1821
- if (glossSpecificExamples.length === 5) break;
1822
- }
1823
- if (glossSpecificExamples.length === 5)
1824
- wordExamples = glossSpecificExamples;
1825
- else if (glossSpecificExamples.length > 0) {
1826
- const seenPhrases2 = new Set(
1827
- glossSpecificExamples.map((ex) => ex.phrase)
1828
- );
1829
- wordExamples = [
1830
- ...glossSpecificExamples,
1831
- ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1832
- ];
1833
- }
1834
- }
1835
- if (wordExamples.length > 0)
1836
- word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => {
1837
- var _a;
1838
- return {
1809
+ if (examples && dictWord.phraseIDs && dictWord.phraseIDs.length > 0) {
1810
+ word.phrases = [];
1811
+ const phraseIDs = new Set(dictWord.phraseIDs);
1812
+ for (const ex of examples)
1813
+ if (phraseIDs.has(ex.id))
1814
+ word.phrases.push({
1839
1815
  phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
1840
1816
  translation: ex.translation,
1841
1817
  originalPhrase: ex.phrase
1842
- };
1843
- });
1818
+ });
1844
1819
  }
1845
1820
  return word;
1846
1821
  } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);