henkan 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +127 -124
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +123 -122
- package/dist/index.mjs.map +3 -3
- package/dist/types/types.d.ts +9 -7
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +1 -1
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +7 -7
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +19 -17
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +4 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +6 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +16 -16
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1168,10 +1168,18 @@ function convertJMdict(xmlString, examples) {
|
|
|
1168
1168
|
noent: true,
|
|
1169
1169
|
recover: false
|
|
1170
1170
|
});
|
|
1171
|
-
|
|
1172
|
-
const partMatches = /* @__PURE__ */ new Set();
|
|
1171
|
+
const dict = [];
|
|
1173
1172
|
xml.parseString(dictParsed, (err, result) => {
|
|
1174
1173
|
if (err) throw err;
|
|
1174
|
+
const tanakaParts = examples && examples.length > 0 ? new Set(
|
|
1175
|
+
examples.map(
|
|
1176
|
+
(example) => example.parts.map((part) => [
|
|
1177
|
+
part.baseForm,
|
|
1178
|
+
...part.reading ? [part.reading] : [],
|
|
1179
|
+
...part.referenceID ? [part.referenceID] : []
|
|
1180
|
+
])
|
|
1181
|
+
).flat(2)
|
|
1182
|
+
) : void 0;
|
|
1175
1183
|
if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
|
|
1176
1184
|
for (const entry of result.JMdict.entry) {
|
|
1177
1185
|
const entryObj = {
|
|
@@ -1263,117 +1271,43 @@ function convertJMdict(xmlString, examples) {
|
|
|
1263
1271
|
entryObj.usuallyInKana = true;
|
|
1264
1272
|
}
|
|
1265
1273
|
if (examples) {
|
|
1266
|
-
const readings2 =
|
|
1267
|
-
|
|
1268
|
-
(
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1274
|
+
const readings2 = new Set(
|
|
1275
|
+
entryObj.readings.filter(
|
|
1276
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1277
|
+
(note) => notSearchedForms.has(note)
|
|
1278
|
+
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1279
|
+
).map((reading) => reading.reading)
|
|
1280
|
+
);
|
|
1281
|
+
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1282
|
+
entryObj.kanjiForms.filter(
|
|
1283
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1284
|
+
(note) => notSearchedForms.has(note)
|
|
1285
|
+
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1286
|
+
).map((kanjiForm) => kanjiForm.form)
|
|
1287
|
+
) : void 0;
|
|
1288
|
+
let existsExample = false;
|
|
1289
|
+
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
1290
|
+
for (const kf of kanjiForms2)
|
|
1291
|
+
if (tanakaParts.has(kf)) {
|
|
1292
|
+
existsExample = true;
|
|
1293
|
+
break;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
if (!existsExample && readings2.size > 0 && tanakaParts) {
|
|
1297
|
+
for (const r of readings2)
|
|
1298
|
+
if (tanakaParts.has(r)) {
|
|
1299
|
+
existsExample = true;
|
|
1300
|
+
break;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
|
|
1304
|
+
existsExample = true;
|
|
1305
|
+
if (existsExample) entryObj.hasPhrases = true;
|
|
1280
1306
|
}
|
|
1281
1307
|
if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
|
|
1282
1308
|
dict.push(entryObj);
|
|
1283
1309
|
}
|
|
1284
1310
|
});
|
|
1285
|
-
if (examples && dict.length > 0) {
|
|
1286
|
-
const filteredExamples = examples.filter(
|
|
1287
|
-
(ex) => {
|
|
1288
|
-
const parts = ex.parts.flatMap((part) => [
|
|
1289
|
-
part.baseForm,
|
|
1290
|
-
...part.reading ? [part.reading] : [],
|
|
1291
|
-
...part.referenceID ? [part.referenceID] : []
|
|
1292
|
-
]);
|
|
1293
|
-
for (const part of parts) if (partMatches.has(part)) return true;
|
|
1294
|
-
return false;
|
|
1295
|
-
}
|
|
1296
|
-
);
|
|
1297
|
-
dict = dict.map((entryObj) => {
|
|
1298
|
-
const readings = new Set(
|
|
1299
|
-
entryObj.readings.filter(
|
|
1300
|
-
(reading) => (!reading.notes || !reading.notes.some(
|
|
1301
|
-
(note) => notSearchedForms.has(note)
|
|
1302
|
-
)) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
|
|
1303
|
-
).map((reading) => reading.reading)
|
|
1304
|
-
);
|
|
1305
|
-
const kanjiForms = entryObj.kanjiForms ? new Set(
|
|
1306
|
-
entryObj.kanjiForms.filter(
|
|
1307
|
-
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1308
|
-
(note) => notSearchedForms.has(note)
|
|
1309
|
-
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1310
|
-
).map((kanjiForm) => kanjiForm.form)
|
|
1311
|
-
) : void 0;
|
|
1312
|
-
const kanjiFormExamples = [];
|
|
1313
|
-
const readingMatchingKanjiFormExamples = [];
|
|
1314
|
-
const readingExamples = [];
|
|
1315
|
-
const partParts = /* @__PURE__ */ new Set();
|
|
1316
|
-
for (const example of filteredExamples)
|
|
1317
|
-
for (const part of example.parts) {
|
|
1318
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1319
|
-
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1320
|
-
if (readingAsReadingMatch) {
|
|
1321
|
-
readingMatchingKanjiFormExamples.push(example);
|
|
1322
|
-
partParts.add(part.baseForm).add(part.reading);
|
|
1323
|
-
} else {
|
|
1324
|
-
kanjiFormExamples.push(example);
|
|
1325
|
-
partParts.add(part.baseForm);
|
|
1326
|
-
}
|
|
1327
|
-
break;
|
|
1328
|
-
}
|
|
1329
|
-
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1330
|
-
const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
|
|
1331
|
-
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1332
|
-
readingExamples.push(example);
|
|
1333
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1334
|
-
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1335
|
-
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1336
|
-
break;
|
|
1337
|
-
}
|
|
1338
|
-
}
|
|
1339
|
-
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1340
|
-
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1341
|
-
const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1342
|
-
let wordExamples = [
|
|
1343
|
-
...readingMatchingKanjiFormExamples,
|
|
1344
|
-
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1345
|
-
...includeReadingExamples ? readingExamples : []
|
|
1346
|
-
];
|
|
1347
|
-
const glossSpecificExamples = [];
|
|
1348
|
-
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1349
|
-
for (let i = 0; i < entryObj.meanings.length; i++) {
|
|
1350
|
-
outer: for (const example of wordExamples) {
|
|
1351
|
-
if (seenPhrases.has(example.phrase)) continue;
|
|
1352
|
-
for (const part of example.parts)
|
|
1353
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1354
|
-
glossSpecificExamples.push(example);
|
|
1355
|
-
seenPhrases.add(example.phrase);
|
|
1356
|
-
break outer;
|
|
1357
|
-
}
|
|
1358
|
-
}
|
|
1359
|
-
if (glossSpecificExamples.length === 5) break;
|
|
1360
|
-
}
|
|
1361
|
-
if (glossSpecificExamples.length === 5)
|
|
1362
|
-
wordExamples = glossSpecificExamples;
|
|
1363
|
-
else if (glossSpecificExamples.length > 0) {
|
|
1364
|
-
const seenPhrases2 = new Set(
|
|
1365
|
-
glossSpecificExamples.map((ex) => ex.phrase)
|
|
1366
|
-
);
|
|
1367
|
-
wordExamples = [
|
|
1368
|
-
...glossSpecificExamples,
|
|
1369
|
-
...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1370
|
-
];
|
|
1371
|
-
}
|
|
1372
|
-
if (wordExamples.length > 0)
|
|
1373
|
-
entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
|
|
1374
|
-
return entryObj;
|
|
1375
|
-
});
|
|
1376
|
-
}
|
|
1377
1311
|
return dict;
|
|
1378
1312
|
} catch (err) {
|
|
1379
1313
|
throw err;
|
|
@@ -1537,27 +1471,27 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1537
1471
|
if (line.startsWith("$ ")) {
|
|
1538
1472
|
const radical = {
|
|
1539
1473
|
radical: line.charAt(2),
|
|
1540
|
-
kanji: [],
|
|
1541
1474
|
strokes: line.substring(4)
|
|
1542
1475
|
};
|
|
1543
1476
|
let j = i + 1;
|
|
1544
1477
|
let kanjiLine = fileParsed[j];
|
|
1545
1478
|
if (!kanjiLine) continue;
|
|
1479
|
+
const kanjiList = [];
|
|
1546
1480
|
while (kanjiLine && !kanjiLine.startsWith("$ ")) {
|
|
1547
1481
|
const kanjis = kanjiLine.split("");
|
|
1548
1482
|
for (const kanji of kanjis) {
|
|
1549
1483
|
const foundKanji = kanjiDic.find(
|
|
1550
1484
|
(dictKanji) => dictKanji.kanji === kanji
|
|
1551
1485
|
);
|
|
1552
|
-
if (
|
|
1553
|
-
radical.kanji.push(foundKanji);
|
|
1486
|
+
if (foundKanji) kanjiList.push(foundKanji);
|
|
1554
1487
|
}
|
|
1555
1488
|
j++;
|
|
1556
1489
|
kanjiLine = fileParsed[j];
|
|
1557
1490
|
if (!kanjiLine) continue;
|
|
1558
1491
|
if (kanjiLine.startsWith("$ ")) i = j - 1;
|
|
1559
1492
|
}
|
|
1560
|
-
if (
|
|
1493
|
+
if (kanjiList.length > 0) radical.kanji = kanjiList;
|
|
1494
|
+
if (radical.radical.length > 0 && radical.strokes.length > 0)
|
|
1561
1495
|
radicals.push(radical);
|
|
1562
1496
|
}
|
|
1563
1497
|
}
|
|
@@ -1778,16 +1712,83 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1778
1712
|
}
|
|
1779
1713
|
if (word.kanji.length === 0) delete word.kanji;
|
|
1780
1714
|
}
|
|
1781
|
-
if (
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
word.
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1715
|
+
if (dictWord.hasPhrases === true && examples) {
|
|
1716
|
+
const readings = new Set(
|
|
1717
|
+
word.readings.filter(
|
|
1718
|
+
(reading) => (!reading.notes || !reading.notes.some(
|
|
1719
|
+
(note) => notSearchedForms.has(note)
|
|
1720
|
+
)) && (word.common === void 0 || reading.common === true)
|
|
1721
|
+
).map((reading) => reading.reading)
|
|
1722
|
+
);
|
|
1723
|
+
const kanjiForms = word.kanjiForms ? new Set(
|
|
1724
|
+
word.kanjiForms.filter(
|
|
1725
|
+
(kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
|
|
1726
|
+
(note) => notSearchedForms.has(note)
|
|
1727
|
+
)) && (word.common === void 0 || kanjiForm.common === true)
|
|
1728
|
+
).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
1729
|
+
) : void 0;
|
|
1730
|
+
const kanjiFormExamples = [];
|
|
1731
|
+
const readingMatchingKanjiFormExamples = [];
|
|
1732
|
+
const readingExamples = [];
|
|
1733
|
+
const partParts = /* @__PURE__ */ new Set();
|
|
1734
|
+
for (const example of examples)
|
|
1735
|
+
for (const part of example.parts) {
|
|
1736
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1737
|
+
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1738
|
+
if (readingAsReadingMatch) {
|
|
1739
|
+
readingMatchingKanjiFormExamples.push(example);
|
|
1740
|
+
partParts.add(part.baseForm).add(part.reading);
|
|
1741
|
+
} else {
|
|
1742
|
+
kanjiFormExamples.push(example);
|
|
1743
|
+
partParts.add(part.baseForm);
|
|
1744
|
+
}
|
|
1745
|
+
break;
|
|
1746
|
+
}
|
|
1747
|
+
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1748
|
+
const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
|
|
1749
|
+
if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
|
|
1750
|
+
readingExamples.push(example);
|
|
1751
|
+
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1752
|
+
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1753
|
+
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1754
|
+
break;
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1758
|
+
const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
|
|
1759
|
+
const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
|
|
1760
|
+
let wordExamples = [
|
|
1761
|
+
...readingMatchingKanjiFormExamples,
|
|
1762
|
+
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
1763
|
+
...includeReadingExamples ? readingExamples : []
|
|
1764
|
+
];
|
|
1765
|
+
const glossSpecificExamples = [];
|
|
1766
|
+
const seenPhrases = /* @__PURE__ */ new Set();
|
|
1767
|
+
for (let i = 0; i < word.translations.length; i++) {
|
|
1768
|
+
outer: for (const example of wordExamples) {
|
|
1769
|
+
if (seenPhrases.has(example.phrase)) continue;
|
|
1770
|
+
for (const part of example.parts)
|
|
1771
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1772
|
+
glossSpecificExamples.push(example);
|
|
1773
|
+
seenPhrases.add(example.phrase);
|
|
1774
|
+
break outer;
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
if (glossSpecificExamples.length === 5) break;
|
|
1778
|
+
}
|
|
1779
|
+
if (glossSpecificExamples.length === 5)
|
|
1780
|
+
wordExamples = [...glossSpecificExamples];
|
|
1781
|
+
else if (glossSpecificExamples.length > 0)
|
|
1782
|
+
wordExamples = [
|
|
1783
|
+
...glossSpecificExamples,
|
|
1784
|
+
...wordExamples.filter((ex) => !seenPhrases.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
|
|
1785
|
+
];
|
|
1786
|
+
if (wordExamples.length > 0)
|
|
1787
|
+
word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
|
|
1788
|
+
phrase: ex.furigana ?? ex.phrase,
|
|
1789
|
+
translation: ex.translation,
|
|
1790
|
+
originalPhrase: ex.phrase
|
|
1791
|
+
}));
|
|
1791
1792
|
}
|
|
1792
1793
|
return word;
|
|
1793
1794
|
} else throw new Error(`Word${id ? ` ${id}` : ""} not found`);
|