henkan 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +123 -120
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +119 -118
- package/dist/index.mjs.map +3 -3
- package/dist/types/types.d.ts +5 -3
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +1 -1
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +19 -17
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +4 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +6 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +16 -16
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1168,10 +1168,18 @@ function convertJMdict(xmlString, examples) {
|
|
|
1168
1168
|
noent: true,
|
|
1169
1169
|
recover: false
|
|
1170
1170
|
});
|
|
1171
|
-
|
|
1172
|
-
const partMatches = /* @__PURE__ */ new Set();
|
|
1171
|
+
const dict = [];
|
|
1173
1172
|
xml.parseString(dictParsed, (err, result) => {
|
|
1174
1173
|
if (err) throw err;
|
|
1174
|
+
const tanakaParts = examples && examples.length > 0 ? new Set(
|
|
1175
|
+
examples.map(
|
|
1176
|
+
(example) => example.parts.map((part) => [
|
|
1177
|
+
part.baseForm,
|
|
1178
|
+
...part.reading ? [part.reading] : [],
|
|
1179
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1180
|
+
])
|
|
1181
|
+
).flat(2)
|
|
1182
|
+
) : void 0;
|
|
1175
1183
|
if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
|
|
1176
1184
|
for (const entry of result.JMdict.entry) {
|
|
1177
1185
|
const entryObj = {
|
|
@@ -1263,117 +1271,43 @@ function convertJMdict(xmlString, examples) {
|
|
|
1263
1271
|
entryObj.usuallyInKana = true;
|
|
1264
1272
|
}
|
|
1265
1273
|
if (examples) {
|
|
1266
|
-
const readings2 =
|
|
1267
|
-
|
|
1268
|
-
(
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1274
|
+
const readings2 = new Set(
|
|
1275
|
+
entryObj.readings.filter(
|
|
1276
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1277
|
+
(note) => notSearchedForms.has(note)
|
|
1278
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1279
|
+
).map((reading) => reading.reading)
|
|
1280
|
+
);
|
|
1281
|
+
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1282
|
+
entryObj.kanjiForms.filter(
|
|
1283
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1284
|
+
(note) => notSearchedForms.has(note)
|
|
1285
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1286
|
+
).map((kanjiForm) => kanjiForm.form)
|
|
1287
|
+
) : void 0;
|
|
1288
|
+
let existsExample = false;
|
|
1289
|
+
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1290
|
+
for (const kf of kanjiForms2)
|
|
1291
|
+
if (tanakaParts.has(kf)) {
|
|
1292
|
+
existsExample = true;
|
|
1293
|
+
break;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
if (!existsExample && readings2.size > 0 && tanakaParts) {
|
|
1297
|
+
for (const r of readings2)
|
|
1298
|
+
if (tanakaParts.has(r)) {
|
|
1299
|
+
existsExample = true;
|
|
1300
|
+
break;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
|
|
1304
|
+
existsExample = true;
|
|
1305
|
+
if (existsExample) entryObj.hasPhrases = true;
|
|
1280
1306
|
}
|
|
1281
1307
|
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1282
1308
|
dict.push(entryObj);
|
|
1283
1309
|
}
|
|
1284
1310
|
});
|
|
1285
|
-
if (examples && dict.length > 0) {
|
|
1286
|
-
const filteredExamples = examples.filter(
|
|
1287
|
-
(ex) => {
|
|
1288
|
-
const parts = ex.parts.flatMap((part) => [
|
|
1289
|
-
part.baseForm,
|
|
1290
|
-
...part.reading ? [part.reading] : [],
|
|
1291
|
-
...part.referenceID ? [part.referenceID] : []
|
|
1292
|
-
]);
|
|
1293
|
-
for (const part of parts) if (partMatches.has(part)) return true;
|
|
1294
|
-
return false;
|
|
1295
|
-
}
|
|
1296
|
-
);
|
|
1297
|
-
dict = dict.map((entryObj) => {
|
|
1298
|
-
const readings = new Set(
|
|
1299
|
-
entryObj.readings.filter(
|
|
1300
|
-
(reading) => (!reading.notes || !reading.notes.some(
|
|
1301
|
-
(note) => notSearchedForms.has(note)
|
|
1302
|
-
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1303
|
-
).map((reading) => reading.reading)
|
|
1304
|
-
);
|
|
1305
|
-
const kanjiForms = entryObj.kanjiForms ? new Set(
|
|
1306
|
-
entryObj.kanjiForms.filter(
|
|
1307
|
-
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1308
|
-
(note) => notSearchedForms.has(note)
|
|
1309
|
-
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1310
|
-
).map((kanjiForm) => kanjiForm.form)
|
|
1311
|
-
) : void 0;
|
|
1312
|
-
const kanjiFormExamples = [];
|
|
1313
|
-
const readingMatchingKanjiFormExamples = [];
|
|
1314
|
-
const readingExamples = [];
|
|
1315
|
-
const partParts = /* @__PURE__ */ new Set();
|
|
1316
|
-
for (const example of filteredExamples)
|
|
1317
|
-
for (const part of example.parts) {
|
|
1318
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1319
|
-
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1320
|
-
if (readingAsReadingMatch) {
|
|
1321
|
-
readingMatchingKanjiFormExamples.push(example);
|
|
1322
|
-
partParts.add(part.baseForm).add(part.reading);
|
|
1323
|
-
} else {
|
|
1324
|
-
kanjiFormExamples.push(example);
|
|
1325
|
-
partParts.add(part.baseForm);
|
|
1326
|
-
}
|
|
1327
|
-
break;
|
|
1328
|
-
}
|
|
1329
|
-
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1330
|
-
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1331
|
-
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1332
|
-
readingExamples.push(example);
|
|
1333
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1334
|
-
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1335
|
-
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1336
|
-
break;
|
|
1337
|
-
}
|
|
1338
|
-
}
|
|
1339
|
-
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1340
|
-
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1341
|
-
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1342
|
-
let wordExamples = [
|
|
1343
|
-
...readingMatchingKanjiFormExamples,
|
|
1344
|
-
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1345
|
-
...includeReadingExamples ? readingExamples : []
|
|
1346
|
-
];
|
|
1347
|
-
const glossSpecificExamples = [];
|
|
1348
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1349
|
-
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1350
|
-
outer: for (const example of wordExamples) {
|
|
1351
|
-
if (seenPhrases.has(example.phrase)) continue;
|
|
1352
|
-
for (const part of example.parts)
|
|
1353
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1354
|
-
glossSpecificExamples.push(example);
|
|
1355
|
-
seenPhrases.add(example.phrase);
|
|
1356
|
-
break outer;
|
|
1357
|
-
}
|
|
1358
|
-
}
|
|
1359
|
-
if (glossSpecificExamples.length === 5) break;
|
|
1360
|
-
}
|
|
1361
|
-
if (glossSpecificExamples.length === 5)
|
|
1362
|
-
wordExamples = glossSpecificExamples;
|
|
1363
|
-
else if (glossSpecificExamples.length > 0) {
|
|
1364
|
-
const seenPhrases2 = new Set(
|
|
1365
|
-
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1366
|
-
);
|
|
1367
|
-
wordExamples = [
|
|
1368
|
-
...glossSpecificExamples,
|
|
1369
|
-
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1370
|
-
];
|
|
1371
|
-
}
|
|
1372
|
-
if (wordExamples.length > 0)
|
|
1373
|
-
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1374
|
-
return entryObj;
|
|
1375
|
-
});
|
|
1376
|
-
}
|
|
1377
1311
|
return dict;
|
|
1378
1312
|
} catch (err) {
|
|
1379
1313
|
throw err;
|
|
@@ -1778,16 +1712,83 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1778
1712
|
}
|
|
1779
1713
|
if (word.kanji.length === 0) delete word.kanji;
|
|
1780
1714
|
}
|
|
1781
|
-
if (
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
word.
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1715
|
+
if (dictWord.hasPhrases === true && examples) {
|
|
1716
|
+
const readings = new Set(
|
|
1717
|
+
word.readings.filter(
|
|
1718
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1719
|
+
(note) => notSearchedForms.has(note)
|
|
1720
|
+
)) && (word.common === void 0 || reading.common === true)
|
|
1721
|
+
).map((reading) => reading.reading)
|
|
1722
|
+
);
|
|
1723
|
+
const kanjiForms = word.kanjiForms ? new Set(
|
|
1724
|
+
word.kanjiForms.filter(
|
|
1725
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1726
|
+
(note) => notSearchedForms.has(note)
|
|
1727
|
+
)) && (word.common === void 0 || kanjiForm.common === true)
|
|
1728
|
+
).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
1729
|
+
) : void 0;
|
|
1730
|
+
const kanjiFormExamples = [];
|
|
1731
|
+
const readingMatchingKanjiFormExamples = [];
|
|
1732
|
+
const readingExamples = [];
|
|
1733
|
+
const partParts = /* @__PURE__ */ new Set();
|
|
1734
|
+
for (const example of examples)
|
|
1735
|
+
for (const part of example.parts) {
|
|
1736
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1737
|
+
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1738
|
+
if (readingAsReadingMatch) {
|
|
1739
|
+
readingMatchingKanjiFormExamples.push(example);
|
|
1740
|
+
partParts.add(part.baseForm).add(part.reading);
|
|
1741
|
+
} else {
|
|
1742
|
+
kanjiFormExamples.push(example);
|
|
1743
|
+
partParts.add(part.baseForm);
|
|
1744
|
+
}
|
|
1745
|
+
break;
|
|
1746
|
+
}
|
|
1747
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1748
|
+
const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
|
|
1749
|
+
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1750
|
+
readingExamples.push(example);
|
|
1751
|
+
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1752
|
+
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1753
|
+
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1754
|
+
break;
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1758
|
+
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1759
|
+
const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1760
|
+
let wordExamples = [
|
|
1761
|
+
...readingMatchingKanjiFormExamples,
|
|
1762
|
+
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1763
|
+
...includeReadingExamples ? readingExamples : []
|
|
1764
|
+
];
|
|
1765
|
+
const glossSpecificExamples = [];
|
|
1766
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1767
|
+
for (let i = 0; i < word.translations.length; i++) {
|
|
1768
|
+
outer: for (const example of wordExamples) {
|
|
1769
|
+
if (seenPhrases.has(example.phrase)) continue;
|
|
1770
|
+
for (const part of example.parts)
|
|
1771
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1772
|
+
glossSpecificExamples.push(example);
|
|
1773
|
+
seenPhrases.add(example.phrase);
|
|
1774
|
+
break outer;
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
if (glossSpecificExamples.length === 5) break;
|
|
1778
|
+
}
|
|
1779
|
+
if (glossSpecificExamples.length === 5)
|
|
1780
|
+
wordExamples = [...glossSpecificExamples];
|
|
1781
|
+
else if (glossSpecificExamples.length > 0)
|
|
1782
|
+
wordExamples = [
|
|
1783
|
+
...glossSpecificExamples,
|
|
1784
|
+
...wordExamples.filter((ex) => !seenPhrases.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1785
|
+
];
|
|
1786
|
+
if (wordExamples.length > 0)
|
|
1787
|
+
word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
|
|
1788
|
+
phrase: ex.furigana ?? ex.phrase,
|
|
1789
|
+
translation: ex.translation,
|
|
1790
|
+
originalPhrase: ex.phrase
|
|
1791
|
+
}));
|
|
1791
1792
|
}
|
|
1792
1793
|
return word;
|
|
1793
1794
|
} else throw new Error(`Word${id ? ` ${id}` : ""} not found`);
|