henkan 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/index.cjs.js +123 -120
  2. package/dist/index.cjs.js.map +3 -3
  3. package/dist/index.mjs +119 -118
  4. package/dist/index.mjs.map +3 -3
  5. package/dist/types/types.d.ts +5 -3
  6. package/dist/types/types.d.ts.map +1 -1
  7. package/dist/types/utils.d.ts.map +1 -1
  8. package/docs/api/functions/capitalizeString.md +1 -1
  9. package/docs/api/functions/convertJMdict.md +1 -1
  10. package/docs/api/functions/convertKanjiDic.md +1 -1
  11. package/docs/api/functions/convertKradFile.md +1 -1
  12. package/docs/api/functions/convertRadkFile.md +1 -1
  13. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  14. package/docs/api/functions/generateAnkiNote.md +1 -1
  15. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  16. package/docs/api/functions/getKanji.md +1 -1
  17. package/docs/api/functions/getKanjiExtended.md +1 -1
  18. package/docs/api/functions/getWord.md +1 -1
  19. package/docs/api/functions/isStringArray.md +1 -1
  20. package/docs/api/functions/isValidArray.md +1 -1
  21. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  22. package/docs/api/functions/makeSSML.md +1 -1
  23. package/docs/api/functions/shuffleArray.md +1 -1
  24. package/docs/api/functions/synthesizeSpeech.md +1 -1
  25. package/docs/api/interfaces/DictKanji.md +5 -5
  26. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  27. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  28. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  29. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  30. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  31. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  32. package/docs/api/interfaces/DictMeaning.md +11 -11
  33. package/docs/api/interfaces/DictRadical.md +4 -4
  34. package/docs/api/interfaces/DictReading.md +5 -5
  35. package/docs/api/interfaces/DictWord.md +19 -17
  36. package/docs/api/interfaces/ExamplePart.md +7 -7
  37. package/docs/api/interfaces/Grammar.md +15 -15
  38. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  39. package/docs/api/interfaces/Kana.md +11 -11
  40. package/docs/api/interfaces/Kanji.md +22 -22
  41. package/docs/api/interfaces/KanjiComponent.md +3 -3
  42. package/docs/api/interfaces/KanjiForm.md +4 -4
  43. package/docs/api/interfaces/NoteAndTag.md +3 -3
  44. package/docs/api/interfaces/Phrase.md +4 -4
  45. package/docs/api/interfaces/Radical.md +16 -16
  46. package/docs/api/interfaces/Reading.md +5 -5
  47. package/docs/api/interfaces/ResultEntry.md +7 -7
  48. package/docs/api/interfaces/TanakaExample.md +6 -6
  49. package/docs/api/interfaces/Translation.md +3 -3
  50. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  51. package/docs/api/interfaces/Word.md +16 -16
  52. package/docs/api/type-aliases/Dict.md +1 -1
  53. package/docs/api/type-aliases/DictName.md +1 -1
  54. package/docs/api/type-aliases/EntryType.md +1 -1
  55. package/docs/api/type-aliases/JLPT.md +1 -1
  56. package/docs/api/type-aliases/Result.md +1 -1
  57. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -1168,10 +1168,18 @@ function convertJMdict(xmlString, examples) {
1168
1168
  noent: true,
1169
1169
  recover: false
1170
1170
  });
1171
- let dict = [];
1172
- const partMatches = /* @__PURE__ */ new Set();
1171
+ const dict = [];
1173
1172
  xml.parseString(dictParsed, (err, result) => {
1174
1173
  if (err) throw err;
1174
+ const tanakaParts = examples && examples.length > 0 ? new Set(
1175
+ examples.map(
1176
+ (example) => example.parts.map((part) => [
1177
+ part.baseForm,
1178
+ ...part.reading ? [part.reading] : [],
1179
+ ...part.referenceID ? [part.referenceID] : []
1180
+ ])
1181
+ ).flat(2)
1182
+ ) : void 0;
1175
1183
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
1176
1184
  for (const entry of result.JMdict.entry) {
1177
1185
  const entryObj = {
@@ -1263,117 +1271,43 @@ function convertJMdict(xmlString, examples) {
1263
1271
  entryObj.usuallyInKana = true;
1264
1272
  }
1265
1273
  if (examples) {
1266
- const readings2 = entryObj.readings.filter(
1267
- (reading) => (!reading.notes || !reading.notes.some(
1268
- (note) => notSearchedForms.has(note)
1269
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1270
- ).map((reading) => reading.reading);
1271
- const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
1272
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1273
- (note) => notSearchedForms.has(note)
1274
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1275
- ).map((kanjiForm) => kanjiForm.form) : void 0;
1276
- for (const reading of readings2) partMatches.add(reading);
1277
- if (kanjiForms2)
1278
- for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
1279
- partMatches.add(entryObj.id);
1274
+ const readings2 = new Set(
1275
+ entryObj.readings.filter(
1276
+ (reading) => (!reading.notes || !reading.notes.some(
1277
+ (note) => notSearchedForms.has(note)
1278
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1279
+ ).map((reading) => reading.reading)
1280
+ );
1281
+ const kanjiForms2 = entryObj.kanjiForms ? new Set(
1282
+ entryObj.kanjiForms.filter(
1283
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1284
+ (note) => notSearchedForms.has(note)
1285
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1286
+ ).map((kanjiForm) => kanjiForm.form)
1287
+ ) : void 0;
1288
+ let existsExample = false;
1289
+ if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
1290
+ for (const kf of kanjiForms2)
1291
+ if (tanakaParts.has(kf)) {
1292
+ existsExample = true;
1293
+ break;
1294
+ }
1295
+ }
1296
+ if (!existsExample && readings2.size > 0 && tanakaParts) {
1297
+ for (const r of readings2)
1298
+ if (tanakaParts.has(r)) {
1299
+ existsExample = true;
1300
+ break;
1301
+ }
1302
+ }
1303
+ if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
1304
+ existsExample = true;
1305
+ if (existsExample) entryObj.hasPhrases = true;
1280
1306
  }
1281
1307
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1282
1308
  dict.push(entryObj);
1283
1309
  }
1284
1310
  });
1285
- if (examples && dict.length > 0) {
1286
- const filteredExamples = examples.filter(
1287
- (ex) => {
1288
- const parts = ex.parts.flatMap((part) => [
1289
- part.baseForm,
1290
- ...part.reading ? [part.reading] : [],
1291
- ...part.referenceID ? [part.referenceID] : []
1292
- ]);
1293
- for (const part of parts) if (partMatches.has(part)) return true;
1294
- return false;
1295
- }
1296
- );
1297
- dict = dict.map((entryObj) => {
1298
- const readings = new Set(
1299
- entryObj.readings.filter(
1300
- (reading) => (!reading.notes || !reading.notes.some(
1301
- (note) => notSearchedForms.has(note)
1302
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1303
- ).map((reading) => reading.reading)
1304
- );
1305
- const kanjiForms = entryObj.kanjiForms ? new Set(
1306
- entryObj.kanjiForms.filter(
1307
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1308
- (note) => notSearchedForms.has(note)
1309
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1310
- ).map((kanjiForm) => kanjiForm.form)
1311
- ) : void 0;
1312
- const kanjiFormExamples = [];
1313
- const readingMatchingKanjiFormExamples = [];
1314
- const readingExamples = [];
1315
- const partParts = /* @__PURE__ */ new Set();
1316
- for (const example of filteredExamples)
1317
- for (const part of example.parts) {
1318
- const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1319
- if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1320
- if (readingAsReadingMatch) {
1321
- readingMatchingKanjiFormExamples.push(example);
1322
- partParts.add(part.baseForm).add(part.reading);
1323
- } else {
1324
- kanjiFormExamples.push(example);
1325
- partParts.add(part.baseForm);
1326
- }
1327
- break;
1328
- }
1329
- const readingAsBaseFormMatch = readings.has(part.baseForm);
1330
- const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1331
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1332
- readingExamples.push(example);
1333
- if (readingAsReadingMatch) partParts.add(part.reading);
1334
- if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1335
- if (referenceIDMatch) partParts.add(part.referenceID);
1336
- break;
1337
- }
1338
- }
1339
- const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1340
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1341
- const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1342
- let wordExamples = [
1343
- ...readingMatchingKanjiFormExamples,
1344
- ...includeKanjiFormExamples ? kanjiFormExamples : [],
1345
- ...includeReadingExamples ? readingExamples : []
1346
- ];
1347
- const glossSpecificExamples = [];
1348
- const seenPhrases = /* @__PURE__ */ new Set();
1349
- for (let i = 0; i < entryObj.meanings.length; i++) {
1350
- outer: for (const example of wordExamples) {
1351
- if (seenPhrases.has(example.phrase)) continue;
1352
- for (const part of example.parts)
1353
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1354
- glossSpecificExamples.push(example);
1355
- seenPhrases.add(example.phrase);
1356
- break outer;
1357
- }
1358
- }
1359
- if (glossSpecificExamples.length === 5) break;
1360
- }
1361
- if (glossSpecificExamples.length === 5)
1362
- wordExamples = glossSpecificExamples;
1363
- else if (glossSpecificExamples.length > 0) {
1364
- const seenPhrases2 = new Set(
1365
- glossSpecificExamples.map((ex) => ex.phrase)
1366
- );
1367
- wordExamples = [
1368
- ...glossSpecificExamples,
1369
- ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1370
- ];
1371
- }
1372
- if (wordExamples.length > 0)
1373
- entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1374
- return entryObj;
1375
- });
1376
- }
1377
1311
  return dict;
1378
1312
  } catch (err) {
1379
1313
  throw err;
@@ -1778,16 +1712,83 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1778
1712
  }
1779
1713
  if (word.kanji.length === 0) delete word.kanji;
1780
1714
  }
1781
- if (examples && dictWord.phraseIDs && dictWord.phraseIDs.length > 0) {
1782
- word.phrases = [];
1783
- const phraseIDs = new Set(dictWord.phraseIDs);
1784
- for (const ex of examples)
1785
- if (phraseIDs.has(ex.id))
1786
- word.phrases.push({
1787
- phrase: ex.furigana ?? ex.phrase,
1788
- translation: ex.translation,
1789
- originalPhrase: ex.phrase
1790
- });
1715
+ if (dictWord.hasPhrases === true && examples) {
1716
+ const readings = new Set(
1717
+ word.readings.filter(
1718
+ (reading) => (!reading.notes || !reading.notes.some(
1719
+ (note) => notSearchedForms.has(note)
1720
+ )) && (word.common === void 0 || reading.common === true)
1721
+ ).map((reading) => reading.reading)
1722
+ );
1723
+ const kanjiForms = word.kanjiForms ? new Set(
1724
+ word.kanjiForms.filter(
1725
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1726
+ (note) => notSearchedForms.has(note)
1727
+ )) && (word.common === void 0 || kanjiForm.common === true)
1728
+ ).map((kanjiForm) => kanjiForm.kanjiForm)
1729
+ ) : void 0;
1730
+ const kanjiFormExamples = [];
1731
+ const readingMatchingKanjiFormExamples = [];
1732
+ const readingExamples = [];
1733
+ const partParts = /* @__PURE__ */ new Set();
1734
+ for (const example of examples)
1735
+ for (const part of example.parts) {
1736
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1737
+ if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1738
+ if (readingAsReadingMatch) {
1739
+ readingMatchingKanjiFormExamples.push(example);
1740
+ partParts.add(part.baseForm).add(part.reading);
1741
+ } else {
1742
+ kanjiFormExamples.push(example);
1743
+ partParts.add(part.baseForm);
1744
+ }
1745
+ break;
1746
+ }
1747
+ const readingAsBaseFormMatch = readings.has(part.baseForm);
1748
+ const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
1749
+ if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1750
+ readingExamples.push(example);
1751
+ if (readingAsReadingMatch) partParts.add(part.reading);
1752
+ if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1753
+ if (referenceIDMatch) partParts.add(part.referenceID);
1754
+ break;
1755
+ }
1756
+ }
1757
+ const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1758
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1759
+ const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1760
+ let wordExamples = [
1761
+ ...readingMatchingKanjiFormExamples,
1762
+ ...includeKanjiFormExamples ? kanjiFormExamples : [],
1763
+ ...includeReadingExamples ? readingExamples : []
1764
+ ];
1765
+ const glossSpecificExamples = [];
1766
+ const seenPhrases = /* @__PURE__ */ new Set();
1767
+ for (let i = 0; i < word.translations.length; i++) {
1768
+ outer: for (const example of wordExamples) {
1769
+ if (seenPhrases.has(example.phrase)) continue;
1770
+ for (const part of example.parts)
1771
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1772
+ glossSpecificExamples.push(example);
1773
+ seenPhrases.add(example.phrase);
1774
+ break outer;
1775
+ }
1776
+ }
1777
+ if (glossSpecificExamples.length === 5) break;
1778
+ }
1779
+ if (glossSpecificExamples.length === 5)
1780
+ wordExamples = [...glossSpecificExamples];
1781
+ else if (glossSpecificExamples.length > 0)
1782
+ wordExamples = [
1783
+ ...glossSpecificExamples,
1784
+ ...wordExamples.filter((ex) => !seenPhrases.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1785
+ ];
1786
+ if (wordExamples.length > 0)
1787
+ word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
1788
+ phrase: ex.furigana ?? ex.phrase,
1789
+ translation: ex.translation,
1790
+ originalPhrase: ex.phrase
1791
+ }));
1791
1792
  }
1792
1793
  return word;
1793
1794
  } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);