henkan 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/index.cjs.js +108 -79
  2. package/dist/index.cjs.js.map +2 -2
  3. package/dist/index.mjs +108 -79
  4. package/dist/index.mjs.map +2 -2
  5. package/dist/types/utils.d.ts +2 -2
  6. package/dist/types/utils.d.ts.map +1 -1
  7. package/docs/api/functions/capitalizeString.md +1 -1
  8. package/docs/api/functions/convertJMdict.md +3 -3
  9. package/docs/api/functions/convertKanjiDic.md +1 -1
  10. package/docs/api/functions/convertKradFile.md +1 -1
  11. package/docs/api/functions/convertRadkFile.md +1 -1
  12. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  13. package/docs/api/functions/generateAnkiNote.md +1 -1
  14. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  15. package/docs/api/functions/getKanji.md +1 -1
  16. package/docs/api/functions/getKanjiExtended.md +1 -1
  17. package/docs/api/functions/getWord.md +1 -1
  18. package/docs/api/functions/isStringArray.md +1 -1
  19. package/docs/api/functions/isValidArray.md +1 -1
  20. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  21. package/docs/api/functions/makeSSML.md +1 -1
  22. package/docs/api/functions/shuffleArray.md +1 -1
  23. package/docs/api/functions/synthesizeSpeech.md +1 -1
  24. package/docs/api/interfaces/DictKanji.md +5 -5
  25. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  26. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  27. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  28. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  29. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  30. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  31. package/docs/api/interfaces/DictMeaning.md +11 -11
  32. package/docs/api/interfaces/DictRadical.md +4 -4
  33. package/docs/api/interfaces/DictReading.md +5 -5
  34. package/docs/api/interfaces/DictWord.md +8 -8
  35. package/docs/api/interfaces/ExamplePart.md +7 -7
  36. package/docs/api/interfaces/Grammar.md +15 -15
  37. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  38. package/docs/api/interfaces/Kana.md +11 -11
  39. package/docs/api/interfaces/Kanji.md +22 -22
  40. package/docs/api/interfaces/KanjiComponent.md +3 -3
  41. package/docs/api/interfaces/KanjiForm.md +4 -4
  42. package/docs/api/interfaces/NoteAndTag.md +3 -3
  43. package/docs/api/interfaces/Phrase.md +4 -4
  44. package/docs/api/interfaces/Radical.md +16 -16
  45. package/docs/api/interfaces/Reading.md +5 -5
  46. package/docs/api/interfaces/ResultEntry.md +7 -7
  47. package/docs/api/interfaces/TanakaExample.md +6 -6
  48. package/docs/api/interfaces/Translation.md +3 -3
  49. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  50. package/docs/api/interfaces/Word.md +14 -14
  51. package/docs/api/type-aliases/Dict.md +1 -1
  52. package/docs/api/type-aliases/DictName.md +1 -1
  53. package/docs/api/type-aliases/EntryType.md +1 -1
  54. package/docs/api/type-aliases/JLPT.md +1 -1
  55. package/docs/api/type-aliases/Result.md +1 -1
  56. package/package.json +2 -2
package/dist/index.mjs CHANGED
@@ -1168,7 +1168,8 @@ function convertJMdict(xmlString, examples) {
1168
1168
  noent: true,
1169
1169
  recover: false
1170
1170
  });
1171
- const dict = [];
1171
+ let dict = [];
1172
+ const partMatches = /* @__PURE__ */ new Set();
1172
1173
  xml.parseString(dictParsed, (err, result) => {
1173
1174
  if (err) throw err;
1174
1175
  if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
@@ -1262,89 +1263,117 @@ function convertJMdict(xmlString, examples) {
1262
1263
  entryObj.usuallyInKana = true;
1263
1264
  }
1264
1265
  if (examples) {
1265
- const readings2 = new Set(
1266
- entryObj.readings.filter(
1267
- (reading) => (!reading.notes || !reading.notes.some(
1268
- (note) => notSearchedForms.has(note)
1269
- )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1270
- ).map((reading) => reading.reading)
1271
- );
1272
- const kanjiForms2 = entryObj.kanjiForms ? new Set(
1273
- entryObj.kanjiForms.filter(
1274
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1275
- (note) => notSearchedForms.has(note)
1276
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1277
- ).map((kanjiForm) => kanjiForm.form)
1278
- ) : void 0;
1279
- const kanjiFormExamples = [];
1280
- const readingMatchingKanjiFormExamples = [];
1281
- const readingExamples = [];
1282
- const partParts = /* @__PURE__ */ new Set();
1283
- for (const example of examples)
1284
- for (const part of example.parts) {
1285
- const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
1286
- if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
1287
- if (readingAsReadingMatch) {
1288
- readingMatchingKanjiFormExamples.push(example);
1289
- partParts.add(part.baseForm).add(part.reading);
1290
- } else {
1291
- kanjiFormExamples.push(example);
1292
- partParts.add(part.baseForm);
1293
- }
1294
- break;
1295
- }
1296
- const readingAsBaseFormMatch = readings2.has(
1297
- part.baseForm
1298
- );
1299
- const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1300
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1301
- readingExamples.push(example);
1302
- if (readingAsReadingMatch) partParts.add(part.reading);
1303
- if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1304
- if (referenceIDMatch) partParts.add(part.referenceID);
1305
- break;
1306
- }
1307
- }
1308
- const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1309
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1310
- const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1311
- let wordExamples = [
1312
- ...readingMatchingKanjiFormExamples,
1313
- ...includeKanjiFormExamples ? kanjiFormExamples : [],
1314
- ...includeReadingExamples ? readingExamples : []
1315
- ];
1316
- const glossSpecificExamples = [];
1317
- const seenPhrases = /* @__PURE__ */ new Set();
1318
- for (let i = 0; i < entryObj.meanings.length; i++) {
1319
- outer: for (const example of wordExamples) {
1320
- if (seenPhrases.has(example.phrase)) continue;
1321
- for (const part of example.parts)
1322
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1323
- glossSpecificExamples.push(example);
1324
- seenPhrases.add(example.phrase);
1325
- break outer;
1326
- }
1327
- }
1328
- if (glossSpecificExamples.length === 5) break;
1329
- }
1330
- if (glossSpecificExamples.length === 5)
1331
- wordExamples = [...glossSpecificExamples];
1332
- else if (glossSpecificExamples.length > 0) {
1333
- const seenPhrases2 = new Set(
1334
- glossSpecificExamples.map((ex) => ex.phrase)
1335
- );
1336
- wordExamples = [
1337
- ...glossSpecificExamples,
1338
- ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1339
- ];
1340
- }
1341
- if (wordExamples.length > 0)
1342
- entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1266
+ const readings2 = entryObj.readings.filter(
1267
+ (reading) => (!reading.notes || !reading.notes.some(
1268
+ (note) => notSearchedForms.has(note)
1269
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1270
+ ).map((reading) => reading.reading);
1271
+ const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
1272
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1273
+ (note) => notSearchedForms.has(note)
1274
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1275
+ ).map((kanjiForm) => kanjiForm.form) : void 0;
1276
+ for (const reading of readings2) partMatches.add(reading);
1277
+ if (kanjiForms2)
1278
+ for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
1279
+ partMatches.add(entryObj.id);
1343
1280
  }
1344
1281
  if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
1345
1282
  dict.push(entryObj);
1346
1283
  }
1347
1284
  });
1285
+ if (examples && dict.length > 0) {
1286
+ const filteredExamples = examples.filter(
1287
+ (ex) => {
1288
+ const parts = ex.parts.flatMap((part) => [
1289
+ part.baseForm,
1290
+ ...part.reading ? [part.reading] : [],
1291
+ ...part.referenceID ? [part.referenceID] : []
1292
+ ]);
1293
+ for (const part of parts) if (partMatches.has(part)) return true;
1294
+ return false;
1295
+ }
1296
+ );
1297
+ dict = dict.map((entryObj) => {
1298
+ const readings = new Set(
1299
+ entryObj.readings.filter(
1300
+ (reading) => (!reading.notes || !reading.notes.some(
1301
+ (note) => notSearchedForms.has(note)
1302
+ )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
1303
+ ).map((reading) => reading.reading)
1304
+ );
1305
+ const kanjiForms = entryObj.kanjiForms ? new Set(
1306
+ entryObj.kanjiForms.filter(
1307
+ (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1308
+ (note) => notSearchedForms.has(note)
1309
+ )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1310
+ ).map((kanjiForm) => kanjiForm.form)
1311
+ ) : void 0;
1312
+ const kanjiFormExamples = [];
1313
+ const readingMatchingKanjiFormExamples = [];
1314
+ const readingExamples = [];
1315
+ const partParts = /* @__PURE__ */ new Set();
1316
+ for (const example of filteredExamples)
1317
+ for (const part of example.parts) {
1318
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1319
+ if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1320
+ if (readingAsReadingMatch) {
1321
+ readingMatchingKanjiFormExamples.push(example);
1322
+ partParts.add(part.baseForm).add(part.reading);
1323
+ } else {
1324
+ kanjiFormExamples.push(example);
1325
+ partParts.add(part.baseForm);
1326
+ }
1327
+ break;
1328
+ }
1329
+ const readingAsBaseFormMatch = readings.has(part.baseForm);
1330
+ const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
1331
+ if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1332
+ readingExamples.push(example);
1333
+ if (readingAsReadingMatch) partParts.add(part.reading);
1334
+ if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1335
+ if (referenceIDMatch) partParts.add(part.referenceID);
1336
+ break;
1337
+ }
1338
+ }
1339
+ const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1340
+ const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1341
+ const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1342
+ let wordExamples = [
1343
+ ...readingMatchingKanjiFormExamples,
1344
+ ...includeKanjiFormExamples ? kanjiFormExamples : [],
1345
+ ...includeReadingExamples ? readingExamples : []
1346
+ ];
1347
+ const glossSpecificExamples = [];
1348
+ const seenPhrases = /* @__PURE__ */ new Set();
1349
+ for (let i = 0; i < entryObj.meanings.length; i++) {
1350
+ outer: for (const example of wordExamples) {
1351
+ if (seenPhrases.has(example.phrase)) continue;
1352
+ for (const part of example.parts)
1353
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1354
+ glossSpecificExamples.push(example);
1355
+ seenPhrases.add(example.phrase);
1356
+ break outer;
1357
+ }
1358
+ }
1359
+ if (glossSpecificExamples.length === 5) break;
1360
+ }
1361
+ if (glossSpecificExamples.length === 5)
1362
+ wordExamples = glossSpecificExamples;
1363
+ else if (glossSpecificExamples.length > 0) {
1364
+ const seenPhrases2 = new Set(
1365
+ glossSpecificExamples.map((ex) => ex.phrase)
1366
+ );
1367
+ wordExamples = [
1368
+ ...glossSpecificExamples,
1369
+ ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
1370
+ ];
1371
+ }
1372
+ if (wordExamples.length > 0)
1373
+ entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
1374
+ return entryObj;
1375
+ });
1376
+ }
1348
1377
  return dict;
1349
1378
  } catch (err) {
1350
1379
  throw err;