henkan 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +108 -79
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +108 -79
- package/dist/index.mjs.map +2 -2
- package/dist/types/utils.d.ts +2 -2
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +3 -3
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +1 -1
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +4 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +6 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +2 -2
package/dist/index.cjs.js
CHANGED
|
@@ -1222,7 +1222,8 @@ function convertJMdict(xmlString, examples) {
|
|
|
1222
1222
|
noent: true,
|
|
1223
1223
|
recover: false
|
|
1224
1224
|
});
|
|
1225
|
-
|
|
1225
|
+
let dict = [];
|
|
1226
|
+
const partMatches = /* @__PURE__ */ new Set();
|
|
1226
1227
|
import_xml2js.default.parseString(dictParsed, (err, result) => {
|
|
1227
1228
|
if (err) throw err;
|
|
1228
1229
|
if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
|
|
@@ -1316,89 +1317,117 @@ function convertJMdict(xmlString, examples) {
|
|
|
1316
1317
|
entryObj.usuallyInKana = true;
|
|
1317
1318
|
}
|
|
1318
1319
|
if (examples) {
|
|
1319
|
-
const readings2 =
|
|
1320
|
-
|
|
1321
|
-
(
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
entryObj.
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
)
|
|
1333
|
-
const kanjiFormExamples = [];
|
|
1334
|
-
const readingMatchingKanjiFormExamples = [];
|
|
1335
|
-
const readingExamples = [];
|
|
1336
|
-
const partParts = /* @__PURE__ */ new Set();
|
|
1337
|
-
for (const example of examples)
|
|
1338
|
-
for (const part of example.parts) {
|
|
1339
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
|
|
1340
|
-
if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
|
|
1341
|
-
if (readingAsReadingMatch) {
|
|
1342
|
-
readingMatchingKanjiFormExamples.push(example);
|
|
1343
|
-
partParts.add(part.baseForm).add(part.reading);
|
|
1344
|
-
} else {
|
|
1345
|
-
kanjiFormExamples.push(example);
|
|
1346
|
-
partParts.add(part.baseForm);
|
|
1347
|
-
}
|
|
1348
|
-
break;
|
|
1349
|
-
}
|
|
1350
|
-
const readingAsBaseFormMatch = readings2.has(
|
|
1351
|
-
part.baseForm
|
|
1352
|
-
);
|
|
1353
|
-
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1354
|
-
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1355
|
-
readingExamples.push(example);
|
|
1356
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1357
|
-
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1358
|
-
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1359
|
-
break;
|
|
1360
|
-
}
|
|
1361
|
-
}
|
|
1362
|
-
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1363
|
-
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1364
|
-
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1365
|
-
let wordExamples = [
|
|
1366
|
-
...readingMatchingKanjiFormExamples,
|
|
1367
|
-
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1368
|
-
...includeReadingExamples ? readingExamples : []
|
|
1369
|
-
];
|
|
1370
|
-
const glossSpecificExamples = [];
|
|
1371
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1372
|
-
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1373
|
-
outer: for (const example of wordExamples) {
|
|
1374
|
-
if (seenPhrases.has(example.phrase)) continue;
|
|
1375
|
-
for (const part of example.parts)
|
|
1376
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1377
|
-
glossSpecificExamples.push(example);
|
|
1378
|
-
seenPhrases.add(example.phrase);
|
|
1379
|
-
break outer;
|
|
1380
|
-
}
|
|
1381
|
-
}
|
|
1382
|
-
if (glossSpecificExamples.length === 5) break;
|
|
1383
|
-
}
|
|
1384
|
-
if (glossSpecificExamples.length === 5)
|
|
1385
|
-
wordExamples = [...glossSpecificExamples];
|
|
1386
|
-
else if (glossSpecificExamples.length > 0) {
|
|
1387
|
-
const seenPhrases2 = new Set(
|
|
1388
|
-
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1389
|
-
);
|
|
1390
|
-
wordExamples = [
|
|
1391
|
-
...glossSpecificExamples,
|
|
1392
|
-
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1393
|
-
];
|
|
1394
|
-
}
|
|
1395
|
-
if (wordExamples.length > 0)
|
|
1396
|
-
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1320
|
+
const readings2 = entryObj.readings.filter(
|
|
1321
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1322
|
+
(note) => notSearchedForms.has(note)
|
|
1323
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1324
|
+
).map((reading) => reading.reading);
|
|
1325
|
+
const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
|
|
1326
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1327
|
+
(note) => notSearchedForms.has(note)
|
|
1328
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1329
|
+
).map((kanjiForm) => kanjiForm.form) : void 0;
|
|
1330
|
+
for (const reading of readings2) partMatches.add(reading);
|
|
1331
|
+
if (kanjiForms2)
|
|
1332
|
+
for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
|
|
1333
|
+
partMatches.add(entryObj.id);
|
|
1397
1334
|
}
|
|
1398
1335
|
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1399
1336
|
dict.push(entryObj);
|
|
1400
1337
|
}
|
|
1401
1338
|
});
|
|
1339
|
+
if (examples && dict.length > 0) {
|
|
1340
|
+
const filteredExamples = examples.filter(
|
|
1341
|
+
(ex) => {
|
|
1342
|
+
const parts = ex.parts.flatMap((part) => [
|
|
1343
|
+
part.baseForm,
|
|
1344
|
+
...part.reading ? [part.reading] : [],
|
|
1345
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1346
|
+
]);
|
|
1347
|
+
for (const part of parts) if (partMatches.has(part)) return true;
|
|
1348
|
+
return false;
|
|
1349
|
+
}
|
|
1350
|
+
);
|
|
1351
|
+
dict = dict.map((entryObj) => {
|
|
1352
|
+
const readings = new Set(
|
|
1353
|
+
entryObj.readings.filter(
|
|
1354
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1355
|
+
(note) => notSearchedForms.has(note)
|
|
1356
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1357
|
+
).map((reading) => reading.reading)
|
|
1358
|
+
);
|
|
1359
|
+
const kanjiForms = entryObj.kanjiForms ? new Set(
|
|
1360
|
+
entryObj.kanjiForms.filter(
|
|
1361
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1362
|
+
(note) => notSearchedForms.has(note)
|
|
1363
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1364
|
+
).map((kanjiForm) => kanjiForm.form)
|
|
1365
|
+
) : void 0;
|
|
1366
|
+
const kanjiFormExamples = [];
|
|
1367
|
+
const readingMatchingKanjiFormExamples = [];
|
|
1368
|
+
const readingExamples = [];
|
|
1369
|
+
const partParts = /* @__PURE__ */ new Set();
|
|
1370
|
+
for (const example of filteredExamples)
|
|
1371
|
+
for (const part of example.parts) {
|
|
1372
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1373
|
+
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1374
|
+
if (readingAsReadingMatch) {
|
|
1375
|
+
readingMatchingKanjiFormExamples.push(example);
|
|
1376
|
+
partParts.add(part.baseForm).add(part.reading);
|
|
1377
|
+
} else {
|
|
1378
|
+
kanjiFormExamples.push(example);
|
|
1379
|
+
partParts.add(part.baseForm);
|
|
1380
|
+
}
|
|
1381
|
+
break;
|
|
1382
|
+
}
|
|
1383
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1384
|
+
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1385
|
+
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1386
|
+
readingExamples.push(example);
|
|
1387
|
+
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1388
|
+
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1389
|
+
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1390
|
+
break;
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1394
|
+
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1395
|
+
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1396
|
+
let wordExamples = [
|
|
1397
|
+
...readingMatchingKanjiFormExamples,
|
|
1398
|
+
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1399
|
+
...includeReadingExamples ? readingExamples : []
|
|
1400
|
+
];
|
|
1401
|
+
const glossSpecificExamples = [];
|
|
1402
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1403
|
+
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1404
|
+
outer: for (const example of wordExamples) {
|
|
1405
|
+
if (seenPhrases.has(example.phrase)) continue;
|
|
1406
|
+
for (const part of example.parts)
|
|
1407
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1408
|
+
glossSpecificExamples.push(example);
|
|
1409
|
+
seenPhrases.add(example.phrase);
|
|
1410
|
+
break outer;
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
if (glossSpecificExamples.length === 5) break;
|
|
1414
|
+
}
|
|
1415
|
+
if (glossSpecificExamples.length === 5)
|
|
1416
|
+
wordExamples = glossSpecificExamples;
|
|
1417
|
+
else if (glossSpecificExamples.length > 0) {
|
|
1418
|
+
const seenPhrases2 = new Set(
|
|
1419
|
+
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1420
|
+
);
|
|
1421
|
+
wordExamples = [
|
|
1422
|
+
...glossSpecificExamples,
|
|
1423
|
+
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1424
|
+
];
|
|
1425
|
+
}
|
|
1426
|
+
if (wordExamples.length > 0)
|
|
1427
|
+
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1428
|
+
return entryObj;
|
|
1429
|
+
});
|
|
1430
|
+
}
|
|
1402
1431
|
return dict;
|
|
1403
1432
|
} catch (err) {
|
|
1404
1433
|
throw err;
|