henkan 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs.js +54 -33
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +54 -35
- package/dist/index.mjs.map +2 -2
- package/dist/types/types.d.ts +21 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +13 -10
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +1 -0
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +3 -3
- package/docs/api/functions/convertRadkFile.md +3 -3
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +25 -13
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +31 -0
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +16 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +16 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -36,7 +36,7 @@ pnpm add henkan
|
|
|
36
36
|
- JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
|
|
37
37
|
- User-friendly schemas for dictionary entries
|
|
38
38
|
- Anki note generation
|
|
39
|
-
- Other useful tools (
|
|
39
|
+
- Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
package/dist/index.cjs.js
CHANGED
|
@@ -1187,7 +1187,7 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
1187
1187
|
var import_libxmljs2 = __toESM(require("libxmljs2"));
|
|
1188
1188
|
var import_xml2js = __toESM(require("xml2js"));
|
|
1189
1189
|
var import_iconv_lite = __toESM(require("iconv-lite"));
|
|
1190
|
-
var
|
|
1190
|
+
var import_node_fetch = __toESM(require("node-fetch"));
|
|
1191
1191
|
var Kuroshiro = require("kuroshiro");
|
|
1192
1192
|
var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
|
|
1193
1193
|
function capitalizeString(value) {
|
|
@@ -1333,11 +1333,9 @@ function convertJMdict(xmlString, examples) {
|
|
|
1333
1333
|
).map((reading) => reading.reading)
|
|
1334
1334
|
);
|
|
1335
1335
|
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1336
|
-
entryObj.kanjiForms.
|
|
1337
|
-
(kanjiForm) =>
|
|
1338
|
-
|
|
1339
|
-
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1340
|
-
).map((kanjiForm) => kanjiForm.form)
|
|
1336
|
+
entryObj.kanjiForms.map(
|
|
1337
|
+
(kanjiForm) => kanjiForm.form
|
|
1338
|
+
)
|
|
1341
1339
|
) : void 0;
|
|
1342
1340
|
let existsExample = false;
|
|
1343
1341
|
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
@@ -1519,13 +1517,13 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1519
1517
|
try {
|
|
1520
1518
|
const fileParsed = import_iconv_lite.default.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
|
|
1521
1519
|
const radicals = [];
|
|
1522
|
-
for (let i = 0; i
|
|
1520
|
+
for (let i = 0; i < fileParsed.length; i++) {
|
|
1523
1521
|
const line = fileParsed[i];
|
|
1524
1522
|
if (!line) continue;
|
|
1525
1523
|
if (line.startsWith("$ ")) {
|
|
1526
1524
|
const radical = {
|
|
1527
|
-
radical: line.charAt(2),
|
|
1528
|
-
strokes: line.substring(4)
|
|
1525
|
+
radical: line.charAt(2).trim(),
|
|
1526
|
+
strokes: line.substring(4).trim()
|
|
1529
1527
|
};
|
|
1530
1528
|
let j = i + 1;
|
|
1531
1529
|
let kanjiLine = fileParsed[j];
|
|
@@ -1538,6 +1536,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1538
1536
|
(dictKanji) => dictKanji.kanji === kanji
|
|
1539
1537
|
);
|
|
1540
1538
|
if (foundKanji) kanjiList.push(foundKanji);
|
|
1539
|
+
else kanjiList.push({ kanji, readingMeaning: [] });
|
|
1541
1540
|
}
|
|
1542
1541
|
j++;
|
|
1543
1542
|
kanjiLine = fileParsed[j];
|
|
@@ -1564,7 +1563,7 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
|
1564
1563
|
const split = line.split(" : ");
|
|
1565
1564
|
const kanjiChar = split[0];
|
|
1566
1565
|
const radicalsRow = split[1];
|
|
1567
|
-
if (!kanjiChar || !radicalsRow)
|
|
1566
|
+
if (!kanjiChar || !radicalsRow) continue;
|
|
1568
1567
|
const kanji = {
|
|
1569
1568
|
...kanjiChar && radicalsRow && kanjiChar.length === 1 && radicalsRow.length > 0 ? { kanji: kanjiChar } : { kanji: "" },
|
|
1570
1569
|
radicals: []
|
|
@@ -1777,11 +1776,9 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1777
1776
|
).map((reading) => reading.reading)
|
|
1778
1777
|
);
|
|
1779
1778
|
const kanjiForms = word.kanjiForms ? new Set(
|
|
1780
|
-
word.kanjiForms.
|
|
1781
|
-
(kanjiForm) =>
|
|
1782
|
-
|
|
1783
|
-
)) && (word.common === void 0 || kanjiForm.common === true)
|
|
1784
|
-
).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
1779
|
+
word.kanjiForms.map(
|
|
1780
|
+
(kanjiForm) => kanjiForm.kanjiForm
|
|
1781
|
+
)
|
|
1785
1782
|
) : void 0;
|
|
1786
1783
|
const kanjiFormExamples = [];
|
|
1787
1784
|
const readingMatchingKanjiFormExamples = [];
|
|
@@ -1789,7 +1786,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1789
1786
|
const partParts = /* @__PURE__ */ new Set();
|
|
1790
1787
|
for (const example of examples)
|
|
1791
1788
|
for (const part of example.parts) {
|
|
1792
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1789
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading) || part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
1793
1790
|
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1794
1791
|
if (readingAsReadingMatch) {
|
|
1795
1792
|
readingMatchingKanjiFormExamples.push(example);
|
|
@@ -1802,17 +1799,20 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1802
1799
|
}
|
|
1803
1800
|
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1804
1801
|
const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
|
|
1805
|
-
if (
|
|
1802
|
+
if (readingAsBaseFormMatch || referenceIDMatch) {
|
|
1806
1803
|
readingExamples.push(example);
|
|
1807
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1808
1804
|
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1809
1805
|
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1810
1806
|
break;
|
|
1811
1807
|
}
|
|
1812
1808
|
}
|
|
1813
1809
|
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1814
|
-
const
|
|
1815
|
-
|
|
1810
|
+
const includeReadingThreshold = Math.max(
|
|
1811
|
+
10,
|
|
1812
|
+
Math.round(exampleSize * 0.5)
|
|
1813
|
+
);
|
|
1814
|
+
const includeKanjiFormExamples = word.kanjiForms !== void 0;
|
|
1815
|
+
const includeReadingExamples = readingExamples.length >= includeReadingThreshold && readingExamples.length >= readingMatchingKanjiFormExamples.length && readingExamples.length >= kanjiFormExamples.length || readingExamples.length >= includeReadingThreshold && word.usuallyInKana === true || word.kanjiForms === void 0;
|
|
1816
1816
|
let wordExamples = [
|
|
1817
1817
|
...readingMatchingKanjiFormExamples,
|
|
1818
1818
|
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
@@ -1824,7 +1824,11 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1824
1824
|
outer: for (const example of wordExamples) {
|
|
1825
1825
|
if (seenPhrases.has(example.phrase)) continue;
|
|
1826
1826
|
for (const part of example.parts)
|
|
1827
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1827
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || includeReadingExamples && (part.reading && partParts.has(part.reading) || part.inflectedForm && partParts.has(part.inflectedForm) || part.referenceID && partParts.has(part.referenceID)))) {
|
|
1828
|
+
example.glossNumber = {
|
|
1829
|
+
wordId: word.id,
|
|
1830
|
+
glossNumber: i + 1
|
|
1831
|
+
};
|
|
1828
1832
|
glossSpecificExamples.push(example);
|
|
1829
1833
|
seenPhrases.add(example.phrase);
|
|
1830
1834
|
break outer;
|
|
@@ -1845,7 +1849,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1845
1849
|
return {
|
|
1846
1850
|
phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
|
|
1847
1851
|
translation: ex.translation,
|
|
1848
|
-
originalPhrase: ex.phrase
|
|
1852
|
+
originalPhrase: ex.phrase,
|
|
1853
|
+
...ex.glossNumber ? { glossNumber: ex.glossNumber } : {}
|
|
1849
1854
|
};
|
|
1850
1855
|
});
|
|
1851
1856
|
}
|
|
@@ -2124,18 +2129,34 @@ function makeSSML(formText, fullReading) {
|
|
|
2124
2129
|
}
|
|
2125
2130
|
return ssml;
|
|
2126
2131
|
}
|
|
2127
|
-
async function synthesizeSpeech(
|
|
2132
|
+
async function synthesizeSpeech(ssmlText, apiKey, options) {
|
|
2128
2133
|
return await new Promise(
|
|
2129
2134
|
async (resolve, reject) => {
|
|
2130
2135
|
try {
|
|
2131
|
-
const
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2136
|
+
const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
|
|
2137
|
+
method: "POST",
|
|
2138
|
+
body: JSON.stringify({
|
|
2139
|
+
text: ssmlText,
|
|
2140
|
+
...options
|
|
2141
|
+
}),
|
|
2142
|
+
headers: {
|
|
2143
|
+
"Content-Type": "application/json",
|
|
2144
|
+
apikey: apiKey
|
|
2145
|
+
}
|
|
2135
2146
|
});
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2147
|
+
if (!res.ok)
|
|
2148
|
+
throw new Error(
|
|
2149
|
+
`TTS request failed:
|
|
2150
|
+
${res.status}: ${res.statusText}`
|
|
2151
|
+
);
|
|
2152
|
+
const data = await res.json();
|
|
2153
|
+
if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
|
|
2154
|
+
throw new Error("Invalid TTS response data");
|
|
2155
|
+
const mp3Buffer = Buffer.from(
|
|
2156
|
+
data.audioData,
|
|
2157
|
+
"base64"
|
|
2158
|
+
);
|
|
2159
|
+
resolve(mp3Buffer);
|
|
2139
2160
|
} catch (err) {
|
|
2140
2161
|
reject(err);
|
|
2141
2162
|
}
|
|
@@ -2182,7 +2203,7 @@ function generateAnkiNote(entry) {
|
|
|
2182
2203
|
).join("") : noKanjiForms
|
|
2183
2204
|
],
|
|
2184
2205
|
entry.translations.map(
|
|
2185
|
-
(translationEntry, index) =>
|
|
2206
|
+
(translationEntry, index) => `<span class="word word-index${entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? " gloss-specific" : ""}">${index + 1}</span>${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
|
|
2186
2207
|
).join(""),
|
|
2187
2208
|
entry.kanji ? entry.kanji.map(
|
|
2188
2209
|
(kanjiEntry) => createEntry(
|
|
@@ -2191,11 +2212,11 @@ function generateAnkiNote(entry) {
|
|
|
2191
2212
|
)
|
|
2192
2213
|
).join("") : '<span class="word word-kanji">(no kanji)</span>',
|
|
2193
2214
|
entry.phrases ? entry.phrases.map(
|
|
2194
|
-
(phraseEntry) => createEntry(
|
|
2215
|
+
(phraseEntry, index) => `<span class="word word-index${entry.translations.some((_translation, index2) => index === index2 && phraseEntry.glossNumber && phraseEntry.glossNumber.wordId === entry.id && phraseEntry.glossNumber.glossNumber === index2 + 1) ? " gloss-specific" : ""}">${index + 1}</span>${createEntry(
|
|
2195
2216
|
`<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
|
|
2196
2217
|
[phraseEntry.translation],
|
|
2197
2218
|
true
|
|
2198
|
-
)
|
|
2219
|
+
)}`
|
|
2199
2220
|
).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
|
|
2200
2221
|
...entry.tags && entry.tags.length > 0 ? [
|
|
2201
2222
|
entry.tags.map(
|