henkan 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +108 -79
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +108 -79
- package/dist/index.mjs.map +2 -2
- package/dist/types/utils.d.ts +2 -2
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +3 -3
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +1 -1
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +4 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +6 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -1168,7 +1168,8 @@ function convertJMdict(xmlString, examples) {
|
|
|
1168
1168
|
noent: true,
|
|
1169
1169
|
recover: false
|
|
1170
1170
|
});
|
|
1171
|
-
|
|
1171
|
+
let dict = [];
|
|
1172
|
+
const partMatches = /* @__PURE__ */ new Set();
|
|
1172
1173
|
xml.parseString(dictParsed, (err, result) => {
|
|
1173
1174
|
if (err) throw err;
|
|
1174
1175
|
if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
|
|
@@ -1262,89 +1263,117 @@ function convertJMdict(xmlString, examples) {
|
|
|
1262
1263
|
entryObj.usuallyInKana = true;
|
|
1263
1264
|
}
|
|
1264
1265
|
if (examples) {
|
|
1265
|
-
const readings2 =
|
|
1266
|
-
|
|
1267
|
-
(
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
entryObj.
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
)
|
|
1279
|
-
const kanjiFormExamples = [];
|
|
1280
|
-
const readingMatchingKanjiFormExamples = [];
|
|
1281
|
-
const readingExamples = [];
|
|
1282
|
-
const partParts = /* @__PURE__ */ new Set();
|
|
1283
|
-
for (const example of examples)
|
|
1284
|
-
for (const part of example.parts) {
|
|
1285
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
|
|
1286
|
-
if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
|
|
1287
|
-
if (readingAsReadingMatch) {
|
|
1288
|
-
readingMatchingKanjiFormExamples.push(example);
|
|
1289
|
-
partParts.add(part.baseForm).add(part.reading);
|
|
1290
|
-
} else {
|
|
1291
|
-
kanjiFormExamples.push(example);
|
|
1292
|
-
partParts.add(part.baseForm);
|
|
1293
|
-
}
|
|
1294
|
-
break;
|
|
1295
|
-
}
|
|
1296
|
-
const readingAsBaseFormMatch = readings2.has(
|
|
1297
|
-
part.baseForm
|
|
1298
|
-
);
|
|
1299
|
-
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1300
|
-
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1301
|
-
readingExamples.push(example);
|
|
1302
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1303
|
-
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1304
|
-
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1305
|
-
break;
|
|
1306
|
-
}
|
|
1307
|
-
}
|
|
1308
|
-
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1309
|
-
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1310
|
-
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1311
|
-
let wordExamples = [
|
|
1312
|
-
...readingMatchingKanjiFormExamples,
|
|
1313
|
-
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1314
|
-
...includeReadingExamples ? readingExamples : []
|
|
1315
|
-
];
|
|
1316
|
-
const glossSpecificExamples = [];
|
|
1317
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1318
|
-
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1319
|
-
outer: for (const example of wordExamples) {
|
|
1320
|
-
if (seenPhrases.has(example.phrase)) continue;
|
|
1321
|
-
for (const part of example.parts)
|
|
1322
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1323
|
-
glossSpecificExamples.push(example);
|
|
1324
|
-
seenPhrases.add(example.phrase);
|
|
1325
|
-
break outer;
|
|
1326
|
-
}
|
|
1327
|
-
}
|
|
1328
|
-
if (glossSpecificExamples.length === 5) break;
|
|
1329
|
-
}
|
|
1330
|
-
if (glossSpecificExamples.length === 5)
|
|
1331
|
-
wordExamples = [...glossSpecificExamples];
|
|
1332
|
-
else if (glossSpecificExamples.length > 0) {
|
|
1333
|
-
const seenPhrases2 = new Set(
|
|
1334
|
-
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1335
|
-
);
|
|
1336
|
-
wordExamples = [
|
|
1337
|
-
...glossSpecificExamples,
|
|
1338
|
-
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1339
|
-
];
|
|
1340
|
-
}
|
|
1341
|
-
if (wordExamples.length > 0)
|
|
1342
|
-
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1266
|
+
const readings2 = entryObj.readings.filter(
|
|
1267
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1268
|
+
(note) => notSearchedForms.has(note)
|
|
1269
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1270
|
+
).map((reading) => reading.reading);
|
|
1271
|
+
const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
|
|
1272
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1273
|
+
(note) => notSearchedForms.has(note)
|
|
1274
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1275
|
+
).map((kanjiForm) => kanjiForm.form) : void 0;
|
|
1276
|
+
for (const reading of readings2) partMatches.add(reading);
|
|
1277
|
+
if (kanjiForms2)
|
|
1278
|
+
for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
|
|
1279
|
+
partMatches.add(entryObj.id);
|
|
1343
1280
|
}
|
|
1344
1281
|
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1345
1282
|
dict.push(entryObj);
|
|
1346
1283
|
}
|
|
1347
1284
|
});
|
|
1285
|
+
if (examples && dict.length > 0) {
|
|
1286
|
+
const filteredExamples = examples.filter(
|
|
1287
|
+
(ex) => {
|
|
1288
|
+
const parts = ex.parts.flatMap((part) => [
|
|
1289
|
+
part.baseForm,
|
|
1290
|
+
...part.reading ? [part.reading] : [],
|
|
1291
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1292
|
+
]);
|
|
1293
|
+
for (const part of parts) if (partMatches.has(part)) return true;
|
|
1294
|
+
return false;
|
|
1295
|
+
}
|
|
1296
|
+
);
|
|
1297
|
+
dict = dict.map((entryObj) => {
|
|
1298
|
+
const readings = new Set(
|
|
1299
|
+
entryObj.readings.filter(
|
|
1300
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1301
|
+
(note) => notSearchedForms.has(note)
|
|
1302
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1303
|
+
).map((reading) => reading.reading)
|
|
1304
|
+
);
|
|
1305
|
+
const kanjiForms = entryObj.kanjiForms ? new Set(
|
|
1306
|
+
entryObj.kanjiForms.filter(
|
|
1307
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1308
|
+
(note) => notSearchedForms.has(note)
|
|
1309
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1310
|
+
).map((kanjiForm) => kanjiForm.form)
|
|
1311
|
+
) : void 0;
|
|
1312
|
+
const kanjiFormExamples = [];
|
|
1313
|
+
const readingMatchingKanjiFormExamples = [];
|
|
1314
|
+
const readingExamples = [];
|
|
1315
|
+
const partParts = /* @__PURE__ */ new Set();
|
|
1316
|
+
for (const example of filteredExamples)
|
|
1317
|
+
for (const part of example.parts) {
|
|
1318
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1319
|
+
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1320
|
+
if (readingAsReadingMatch) {
|
|
1321
|
+
readingMatchingKanjiFormExamples.push(example);
|
|
1322
|
+
partParts.add(part.baseForm).add(part.reading);
|
|
1323
|
+
} else {
|
|
1324
|
+
kanjiFormExamples.push(example);
|
|
1325
|
+
partParts.add(part.baseForm);
|
|
1326
|
+
}
|
|
1327
|
+
break;
|
|
1328
|
+
}
|
|
1329
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1330
|
+
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1331
|
+
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1332
|
+
readingExamples.push(example);
|
|
1333
|
+
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1334
|
+
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1335
|
+
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1336
|
+
break;
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1340
|
+
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1341
|
+
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1342
|
+
let wordExamples = [
|
|
1343
|
+
...readingMatchingKanjiFormExamples,
|
|
1344
|
+
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1345
|
+
...includeReadingExamples ? readingExamples : []
|
|
1346
|
+
];
|
|
1347
|
+
const glossSpecificExamples = [];
|
|
1348
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1349
|
+
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1350
|
+
outer: for (const example of wordExamples) {
|
|
1351
|
+
if (seenPhrases.has(example.phrase)) continue;
|
|
1352
|
+
for (const part of example.parts)
|
|
1353
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1354
|
+
glossSpecificExamples.push(example);
|
|
1355
|
+
seenPhrases.add(example.phrase);
|
|
1356
|
+
break outer;
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
if (glossSpecificExamples.length === 5) break;
|
|
1360
|
+
}
|
|
1361
|
+
if (glossSpecificExamples.length === 5)
|
|
1362
|
+
wordExamples = glossSpecificExamples;
|
|
1363
|
+
else if (glossSpecificExamples.length > 0) {
|
|
1364
|
+
const seenPhrases2 = new Set(
|
|
1365
|
+
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1366
|
+
);
|
|
1367
|
+
wordExamples = [
|
|
1368
|
+
...glossSpecificExamples,
|
|
1369
|
+
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1370
|
+
];
|
|
1371
|
+
}
|
|
1372
|
+
if (wordExamples.length > 0)
|
|
1373
|
+
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1374
|
+
return entryObj;
|
|
1375
|
+
});
|
|
1376
|
+
}
|
|
1348
1377
|
return dict;
|
|
1349
1378
|
} catch (err) {
|
|
1350
1379
|
throw err;
|