henkan 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.cjs.js +53 -34
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +53 -36
- package/dist/index.mjs.map +2 -2
- package/dist/types/types.d.ts +21 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +9 -6
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +1 -0
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/makeSSML.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +25 -13
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +31 -0
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +16 -4
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +16 -6
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +9 -9
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -36,7 +36,7 @@ pnpm add henkan
|
|
|
36
36
|
- JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
|
|
37
37
|
- User-friendly schemas for dictionary entries
|
|
38
38
|
- Anki note generation
|
|
39
|
-
- Other useful tools (
|
|
39
|
+
- Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
package/dist/index.cjs.js
CHANGED
|
@@ -1187,7 +1187,7 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
1187
1187
|
var import_libxmljs2 = __toESM(require("libxmljs2"));
|
|
1188
1188
|
var import_xml2js = __toESM(require("xml2js"));
|
|
1189
1189
|
var import_iconv_lite = __toESM(require("iconv-lite"));
|
|
1190
|
-
var
|
|
1190
|
+
var import_node_fetch = __toESM(require("node-fetch"));
|
|
1191
1191
|
var Kuroshiro = require("kuroshiro");
|
|
1192
1192
|
var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
|
|
1193
1193
|
function capitalizeString(value) {
|
|
@@ -1333,11 +1333,9 @@ function convertJMdict(xmlString, examples) {
|
|
|
1333
1333
|
).map((reading) => reading.reading)
|
|
1334
1334
|
);
|
|
1335
1335
|
const kanjiForms2 = entryObj.kanjiForms ? new Set(
|
|
1336
|
-
entryObj.kanjiForms.
|
|
1337
|
-
(kanjiForm) =>
|
|
1338
|
-
|
|
1339
|
-
)) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
|
|
1340
|
-
).map((kanjiForm) => kanjiForm.form)
|
|
1336
|
+
entryObj.kanjiForms.map(
|
|
1337
|
+
(kanjiForm) => kanjiForm.form
|
|
1338
|
+
)
|
|
1341
1339
|
) : void 0;
|
|
1342
1340
|
let existsExample = false;
|
|
1343
1341
|
if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
|
|
@@ -1519,9 +1517,9 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1519
1517
|
try {
|
|
1520
1518
|
const fileParsed = import_iconv_lite.default.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
|
|
1521
1519
|
const radicals = [];
|
|
1522
|
-
for (let i = 0; i
|
|
1520
|
+
for (let i = 0; i < fileParsed.length; i++) {
|
|
1523
1521
|
const line = fileParsed[i];
|
|
1524
|
-
if (!line)
|
|
1522
|
+
if (!line) continue;
|
|
1525
1523
|
if (line.startsWith("$ ")) {
|
|
1526
1524
|
const radical = {
|
|
1527
1525
|
radical: line.charAt(2).trim(),
|
|
@@ -1529,7 +1527,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
|
|
|
1529
1527
|
};
|
|
1530
1528
|
let j = i + 1;
|
|
1531
1529
|
let kanjiLine = fileParsed[j];
|
|
1532
|
-
if (!kanjiLine)
|
|
1530
|
+
if (!kanjiLine) continue;
|
|
1533
1531
|
const kanjiList = [];
|
|
1534
1532
|
while (kanjiLine && !kanjiLine.startsWith("$ ")) {
|
|
1535
1533
|
const kanjis = kanjiLine.split("");
|
|
@@ -1565,8 +1563,7 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
|
|
|
1565
1563
|
const split = line.split(" : ");
|
|
1566
1564
|
const kanjiChar = split[0];
|
|
1567
1565
|
const radicalsRow = split[1];
|
|
1568
|
-
if (!kanjiChar || !radicalsRow)
|
|
1569
|
-
throw new Error("Invalid kradfile2 buffer");
|
|
1566
|
+
if (!kanjiChar || !radicalsRow) continue;
|
|
1570
1567
|
const kanji = {
|
|
1571
1568
|
...kanjiChar && radicalsRow && kanjiChar.length === 1 && radicalsRow.length > 0 ? { kanji: kanjiChar } : { kanji: "" },
|
|
1572
1569
|
radicals: []
|
|
@@ -1779,11 +1776,9 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1779
1776
|
).map((reading) => reading.reading)
|
|
1780
1777
|
);
|
|
1781
1778
|
const kanjiForms = word.kanjiForms ? new Set(
|
|
1782
|
-
word.kanjiForms.
|
|
1783
|
-
(kanjiForm) =>
|
|
1784
|
-
|
|
1785
|
-
)) && (word.common === void 0 || kanjiForm.common === true)
|
|
1786
|
-
).map((kanjiForm) => kanjiForm.kanjiForm)
|
|
1779
|
+
word.kanjiForms.map(
|
|
1780
|
+
(kanjiForm) => kanjiForm.kanjiForm
|
|
1781
|
+
)
|
|
1787
1782
|
) : void 0;
|
|
1788
1783
|
const kanjiFormExamples = [];
|
|
1789
1784
|
const readingMatchingKanjiFormExamples = [];
|
|
@@ -1791,7 +1786,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1791
1786
|
const partParts = /* @__PURE__ */ new Set();
|
|
1792
1787
|
for (const example of examples)
|
|
1793
1788
|
for (const part of example.parts) {
|
|
1794
|
-
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
|
|
1789
|
+
const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading) || part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
|
|
1795
1790
|
if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
|
|
1796
1791
|
if (readingAsReadingMatch) {
|
|
1797
1792
|
readingMatchingKanjiFormExamples.push(example);
|
|
@@ -1804,17 +1799,20 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1804
1799
|
}
|
|
1805
1800
|
const readingAsBaseFormMatch = readings.has(part.baseForm);
|
|
1806
1801
|
const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
|
|
1807
|
-
if (
|
|
1802
|
+
if (readingAsBaseFormMatch || referenceIDMatch) {
|
|
1808
1803
|
readingExamples.push(example);
|
|
1809
|
-
if (readingAsReadingMatch) partParts.add(part.reading);
|
|
1810
1804
|
if (readingAsBaseFormMatch) partParts.add(part.baseForm);
|
|
1811
1805
|
if (referenceIDMatch) partParts.add(part.referenceID);
|
|
1812
1806
|
break;
|
|
1813
1807
|
}
|
|
1814
1808
|
}
|
|
1815
1809
|
const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
|
|
1816
|
-
const
|
|
1817
|
-
|
|
1810
|
+
const includeReadingThreshold = Math.max(
|
|
1811
|
+
10,
|
|
1812
|
+
Math.round(exampleSize * 0.5)
|
|
1813
|
+
);
|
|
1814
|
+
const includeKanjiFormExamples = word.kanjiForms !== void 0;
|
|
1815
|
+
const includeReadingExamples = readingExamples.length >= includeReadingThreshold && readingExamples.length >= readingMatchingKanjiFormExamples.length && readingExamples.length >= kanjiFormExamples.length || readingExamples.length >= includeReadingThreshold && word.usuallyInKana === true || word.kanjiForms === void 0;
|
|
1818
1816
|
let wordExamples = [
|
|
1819
1817
|
...readingMatchingKanjiFormExamples,
|
|
1820
1818
|
...includeKanjiFormExamples ? kanjiFormExamples : [],
|
|
@@ -1826,7 +1824,11 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1826
1824
|
outer: for (const example of wordExamples) {
|
|
1827
1825
|
if (seenPhrases.has(example.phrase)) continue;
|
|
1828
1826
|
for (const part of example.parts)
|
|
1829
|
-
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
|
|
1827
|
+
if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || includeReadingExamples && (part.reading && partParts.has(part.reading) || part.inflectedForm && partParts.has(part.inflectedForm) || part.referenceID && partParts.has(part.referenceID)))) {
|
|
1828
|
+
example.glossNumber = {
|
|
1829
|
+
wordId: word.id,
|
|
1830
|
+
glossNumber: i + 1
|
|
1831
|
+
};
|
|
1830
1832
|
glossSpecificExamples.push(example);
|
|
1831
1833
|
seenPhrases.add(example.phrase);
|
|
1832
1834
|
break outer;
|
|
@@ -1847,7 +1849,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
|
|
|
1847
1849
|
return {
|
|
1848
1850
|
phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
|
|
1849
1851
|
translation: ex.translation,
|
|
1850
|
-
originalPhrase: ex.phrase
|
|
1852
|
+
originalPhrase: ex.phrase,
|
|
1853
|
+
...ex.glossNumber ? { glossNumber: ex.glossNumber } : {}
|
|
1851
1854
|
};
|
|
1852
1855
|
});
|
|
1853
1856
|
}
|
|
@@ -2126,18 +2129,34 @@ function makeSSML(formText, fullReading) {
|
|
|
2126
2129
|
}
|
|
2127
2130
|
return ssml;
|
|
2128
2131
|
}
|
|
2129
|
-
async function synthesizeSpeech(
|
|
2132
|
+
async function synthesizeSpeech(ssmlText, apiKey, options) {
|
|
2130
2133
|
return await new Promise(
|
|
2131
2134
|
async (resolve, reject) => {
|
|
2132
2135
|
try {
|
|
2133
|
-
const
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2136
|
+
const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
|
|
2137
|
+
method: "POST",
|
|
2138
|
+
body: JSON.stringify({
|
|
2139
|
+
text: ssmlText,
|
|
2140
|
+
...options
|
|
2141
|
+
}),
|
|
2142
|
+
headers: {
|
|
2143
|
+
"Content-Type": "application/json",
|
|
2144
|
+
apikey: apiKey
|
|
2145
|
+
}
|
|
2137
2146
|
});
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2147
|
+
if (!res.ok)
|
|
2148
|
+
throw new Error(
|
|
2149
|
+
`TTS request failed:
|
|
2150
|
+
${res.status}: ${res.statusText}`
|
|
2151
|
+
);
|
|
2152
|
+
const data = await res.json();
|
|
2153
|
+
if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
|
|
2154
|
+
throw new Error("Invalid TTS response data");
|
|
2155
|
+
const mp3Buffer = Buffer.from(
|
|
2156
|
+
data.audioData,
|
|
2157
|
+
"base64"
|
|
2158
|
+
);
|
|
2159
|
+
resolve(mp3Buffer);
|
|
2141
2160
|
} catch (err) {
|
|
2142
2161
|
reject(err);
|
|
2143
2162
|
}
|
|
@@ -2184,7 +2203,7 @@ function generateAnkiNote(entry) {
|
|
|
2184
2203
|
).join("") : noKanjiForms
|
|
2185
2204
|
],
|
|
2186
2205
|
entry.translations.map(
|
|
2187
|
-
(translationEntry, index) =>
|
|
2206
|
+
(translationEntry, index) => `<span class="word word-index${entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? " gloss-specific" : ""}">${index + 1}</span>${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
|
|
2188
2207
|
).join(""),
|
|
2189
2208
|
entry.kanji ? entry.kanji.map(
|
|
2190
2209
|
(kanjiEntry) => createEntry(
|
|
@@ -2193,11 +2212,11 @@ function generateAnkiNote(entry) {
|
|
|
2193
2212
|
)
|
|
2194
2213
|
).join("") : '<span class="word word-kanji">(no kanji)</span>',
|
|
2195
2214
|
entry.phrases ? entry.phrases.map(
|
|
2196
|
-
(phraseEntry) => createEntry(
|
|
2215
|
+
(phraseEntry, index) => `<span class="word word-index${entry.translations.some((_translation, index2) => index === index2 && phraseEntry.glossNumber && phraseEntry.glossNumber.wordId === entry.id && phraseEntry.glossNumber.glossNumber === index2 + 1) ? " gloss-specific" : ""}">${index + 1}</span>${createEntry(
|
|
2197
2216
|
`<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
|
|
2198
2217
|
[phraseEntry.translation],
|
|
2199
2218
|
true
|
|
2200
|
-
)
|
|
2219
|
+
)}`
|
|
2201
2220
|
).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
|
|
2202
2221
|
...entry.tags && entry.tags.length > 0 ? [
|
|
2203
2222
|
entry.tags.map(
|