henkan 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/index.cjs.js +108 -79
  2. package/dist/index.cjs.js.map +2 -2
  3. package/dist/index.mjs +108 -79
  4. package/dist/index.mjs.map +2 -2
  5. package/dist/types/utils.d.ts +2 -2
  6. package/dist/types/utils.d.ts.map +1 -1
  7. package/docs/api/functions/capitalizeString.md +1 -1
  8. package/docs/api/functions/convertJMdict.md +3 -3
  9. package/docs/api/functions/convertKanjiDic.md +1 -1
  10. package/docs/api/functions/convertKradFile.md +1 -1
  11. package/docs/api/functions/convertRadkFile.md +1 -1
  12. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  13. package/docs/api/functions/generateAnkiNote.md +1 -1
  14. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  15. package/docs/api/functions/getKanji.md +1 -1
  16. package/docs/api/functions/getKanjiExtended.md +1 -1
  17. package/docs/api/functions/getWord.md +1 -1
  18. package/docs/api/functions/isStringArray.md +1 -1
  19. package/docs/api/functions/isValidArray.md +1 -1
  20. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  21. package/docs/api/functions/makeSSML.md +1 -1
  22. package/docs/api/functions/shuffleArray.md +1 -1
  23. package/docs/api/functions/synthesizeSpeech.md +1 -1
  24. package/docs/api/interfaces/DictKanji.md +5 -5
  25. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  26. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  27. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  28. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  29. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  30. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  31. package/docs/api/interfaces/DictMeaning.md +11 -11
  32. package/docs/api/interfaces/DictRadical.md +4 -4
  33. package/docs/api/interfaces/DictReading.md +5 -5
  34. package/docs/api/interfaces/DictWord.md +8 -8
  35. package/docs/api/interfaces/ExamplePart.md +7 -7
  36. package/docs/api/interfaces/Grammar.md +15 -15
  37. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  38. package/docs/api/interfaces/Kana.md +11 -11
  39. package/docs/api/interfaces/Kanji.md +22 -22
  40. package/docs/api/interfaces/KanjiComponent.md +3 -3
  41. package/docs/api/interfaces/KanjiForm.md +4 -4
  42. package/docs/api/interfaces/NoteAndTag.md +3 -3
  43. package/docs/api/interfaces/Phrase.md +4 -4
  44. package/docs/api/interfaces/Radical.md +16 -16
  45. package/docs/api/interfaces/Reading.md +5 -5
  46. package/docs/api/interfaces/ResultEntry.md +7 -7
  47. package/docs/api/interfaces/TanakaExample.md +6 -6
  48. package/docs/api/interfaces/Translation.md +3 -3
  49. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  50. package/docs/api/interfaces/Word.md +14 -14
  51. package/docs/api/type-aliases/Dict.md +1 -1
  52. package/docs/api/type-aliases/DictName.md +1 -1
  53. package/docs/api/type-aliases/EntryType.md +1 -1
  54. package/docs/api/type-aliases/JLPT.md +1 -1
  55. package/docs/api/type-aliases/Result.md +1 -1
  56. package/package.json +2 -2
package/dist/index.cjs.js CHANGED
@@ -1222,7 +1222,8 @@ function convertJMdict(xmlString, examples) {
1222
1222
  noent: true,
1223
1223
  recover: false
1224
1224
  });
1225
- const dict = [];
1225
+ let dict = [];
1226
+ const partMatches = /* @__PURE__ */ new Set();
1226
1227
  import_xml2js.default.parseString(dictParsed, (err, result) => {
1227
1228
  if (err) throw err;
1228
1229
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
@@ -1316,89 +1317,117 @@ function convertJMdict(xmlString, examples) {
1316
1317
  entryObj.usuallyInKana = true;
1317
1318
  }
1318
1319
  if (examples) {
1319
- const readings2 = new Set(
1320
- entryObj.readings.filter(
1321
- (reading) => (!reading.notes || !reading.notes.some(
1322
- (note) => notSearchedForms.has(note)
1323
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1324
- ).map((reading) => reading.reading)
1325
- );
1326
- const kanjiForms2 = entryObj.kanjiForms ? new Set(
1327
- entryObj.kanjiForms.filter(
1328
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1329
- (note) => notSearchedForms.has(note)
1330
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1331
- ).map((kanjiForm) => kanjiForm.form)
1332
- ) : void 0;
1333
- const kanjiFormExamples = [];
1334
- const readingMatchingKanjiFormExamples = [];
1335
- const readingExamples = [];
1336
- const partParts = /* @__PURE__ */ new Set();
1337
- for (const example of examples)
1338
- for (const part of example.parts) {
1339
- const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
1340
- if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
1341
- if (readingAsReadingMatch) {
1342
- readingMatchingKanjiFormExamples.push(example);
1343
- partParts.add(part.baseForm).add(part.reading);
1344
- } else {
1345
- kanjiFormExamples.push(example);
1346
- partParts.add(part.baseForm);
1347
- }
1348
- break;
1349
- }
1350
- const readingAsBaseFormMatch = readings2.has(
1351
- part.baseForm
1352
- );
1353
- const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1354
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1355
- readingExamples.push(example);
1356
- if (readingAsReadingMatch) partParts.add(part.reading);
1357
- if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1358
- if (referenceIDMatch) partParts.add(part.referenceID);
1359
- break;
1360
- }
1361
- }
1362
- const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1363
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1364
- const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1365
- let wordExamples = [
1366
- ...readingMatchingKanjiFormExamples,
1367
- ...includeKanjiFormExamples ? kanjiFormExamples : [],
1368
- ...includeReadingExamples ? readingExamples : []
1369
- ];
1370
- const glossSpecificExamples = [];
1371
- const seenPhrases = /* @__PURE__ */ new Set();
1372
- for (let i = 0; i < entryObj.meanings.length; i++) {
1373
- outer: for (const example of wordExamples) {
1374
- if (seenPhrases.has(example.phrase)) continue;
1375
- for (const part of example.parts)
1376
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1377
- glossSpecificExamples.push(example);
1378
- seenPhrases.add(example.phrase);
1379
- break outer;
1380
- }
1381
- }
1382
- if (glossSpecificExamples.length === 5) break;
1383
- }
1384
- if (glossSpecificExamples.length === 5)
1385
- wordExamples = [...glossSpecificExamples];
1386
- else if (glossSpecificExamples.length > 0) {
1387
- const seenPhrases2 = new Set(
1388
- glossSpecificExamples.map((ex) => ex.phrase)
1389
- );
1390
- wordExamples = [
1391
- ...glossSpecificExamples,
1392
- ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1393
- ];
1394
- }
1395
- if (wordExamples.length > 0)
1396
- entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1320
+ const readings2 = entryObj.readings.filter(
1321
+ (reading) => (!reading.notes || !reading.notes.some(
1322
+ (note) => notSearchedForms.has(note)
1323
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1324
+ ).map((reading) => reading.reading);
1325
+ const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
1326
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1327
+ (note) => notSearchedForms.has(note)
1328
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1329
+ ).map((kanjiForm) => kanjiForm.form) : void 0;
1330
+ for (const reading of readings2) partMatches.add(reading);
1331
+ if (kanjiForms2)
1332
+ for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
1333
+ partMatches.add(entryObj.id);
1397
1334
  }
1398
1335
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1399
1336
  dict.push(entryObj);
1400
1337
  }
1401
1338
  });
1339
+ if (examples && dict.length > 0) {
1340
+ const filteredExamples = examples.filter(
1341
+ (ex) => {
1342
+ const parts = ex.parts.flatMap((part) => [
1343
+ part.baseForm,
1344
+ ...part.reading ? [part.reading] : [],
1345
+ ...part.referenceID ? [part.referenceID] : []
1346
+ ]);
1347
+ for (const part of parts) if (partMatches.has(part)) return true;
1348
+ return false;
1349
+ }
1350
+ );
1351
+ dict = dict.map((entryObj) => {
1352
+ const readings = new Set(
1353
+ entryObj.readings.filter(
1354
+ (reading) => (!reading.notes || !reading.notes.some(
1355
+ (note) => notSearchedForms.has(note)
1356
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1357
+ ).map((reading) => reading.reading)
1358
+ );
1359
+ const kanjiForms = entryObj.kanjiForms ? new Set(
1360
+ entryObj.kanjiForms.filter(
1361
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1362
+ (note) => notSearchedForms.has(note)
1363
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1364
+ ).map((kanjiForm) => kanjiForm.form)
1365
+ ) : void 0;
1366
+ const kanjiFormExamples = [];
1367
+ const readingMatchingKanjiFormExamples = [];
1368
+ const readingExamples = [];
1369
+ const partParts = /* @__PURE__ */ new Set();
1370
+ for (const example of filteredExamples)
1371
+ for (const part of example.parts) {
1372
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1373
+ if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1374
+ if (readingAsReadingMatch) {
1375
+ readingMatchingKanjiFormExamples.push(example);
1376
+ partParts.add(part.baseForm).add(part.reading);
1377
+ } else {
1378
+ kanjiFormExamples.push(example);
1379
+ partParts.add(part.baseForm);
1380
+ }
1381
+ break;
1382
+ }
1383
+ const readingAsBaseFormMatch = readings.has(part.baseForm);
1384
+ const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1385
+ if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1386
+ readingExamples.push(example);
1387
+ if (readingAsReadingMatch) partParts.add(part.reading);
1388
+ if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1389
+ if (referenceIDMatch) partParts.add(part.referenceID);
1390
+ break;
1391
+ }
1392
+ }
1393
+ const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1394
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1395
+ const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1396
+ let wordExamples = [
1397
+ ...readingMatchingKanjiFormExamples,
1398
+ ...includeKanjiFormExamples ? kanjiFormExamples : [],
1399
+ ...includeReadingExamples ? readingExamples : []
1400
+ ];
1401
+ const glossSpecificExamples = [];
1402
+ const seenPhrases = /* @__PURE__ */ new Set();
1403
+ for (let i = 0; i < entryObj.meanings.length; i++) {
1404
+ outer: for (const example of wordExamples) {
1405
+ if (seenPhrases.has(example.phrase)) continue;
1406
+ for (const part of example.parts)
1407
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1408
+ glossSpecificExamples.push(example);
1409
+ seenPhrases.add(example.phrase);
1410
+ break outer;
1411
+ }
1412
+ }
1413
+ if (glossSpecificExamples.length === 5) break;
1414
+ }
1415
+ if (glossSpecificExamples.length === 5)
1416
+ wordExamples = glossSpecificExamples;
1417
+ else if (glossSpecificExamples.length > 0) {
1418
+ const seenPhrases2 = new Set(
1419
+ glossSpecificExamples.map((ex) => ex.phrase)
1420
+ );
1421
+ wordExamples = [
1422
+ ...glossSpecificExamples,
1423
+ ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1424
+ ];
1425
+ }
1426
+ if (wordExamples.length > 0)
1427
+ entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1428
+ return entryObj;
1429
+ });
1430
+ }
1402
1431
  return dict;
1403
1432
  } catch (err) {
1404
1433
  throw err;