henkan 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/index.cjs.js +8 -156
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +10 -152
- package/dist/index.mjs.map +2 -2
- package/dist/types/constants.d.ts +0 -3
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +0 -6
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +7 -17
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +0 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +13 -25
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -20
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +2 -2
- package/docs/api/functions/makeSSML.md +0 -33
package/README.md
CHANGED
|
@@ -36,7 +36,7 @@ pnpm add henkan
|
|
|
36
36
|
- JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
|
|
37
37
|
- User-friendly schemas for dictionary entries
|
|
38
38
|
- Anki note generation
|
|
39
|
-
- Other useful tools (
|
|
39
|
+
- Other useful tools (Amazon Polly audio generation, Japanese RegExps, array checking etc.)
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
|
@@ -55,12 +55,14 @@ const dictContent = fs.readFileSync(dictPath, 'utf-8');
|
|
|
55
55
|
|
|
56
56
|
const dictWords = convertJMdict(dictContent);
|
|
57
57
|
|
|
58
|
+
const jmDict = undefined, id = undefined, kanjiDic = undefined, tanakaCorpus = undefined;
|
|
59
|
+
|
|
58
60
|
const noteTypeName = 'Word';
|
|
59
61
|
const deckName = 'Japanese::Vocabulary::No kanji form words';
|
|
60
62
|
|
|
61
63
|
const noKanjiFormWords = dictWords
|
|
62
64
|
.filter(word => word.kanjiForms === undefined)
|
|
63
|
-
.map(word => getWord(
|
|
65
|
+
.map(word => getWord(jmDict, id, kanjiDic, tanakaCorpus, word, noteTypeName, deckName));
|
|
64
66
|
|
|
65
67
|
const ankiNotesFile = generateAnkiNotesFile(noKanjiFormWords);
|
|
66
68
|
|
package/dist/index.cjs.js
CHANGED
|
@@ -49,14 +49,10 @@ __export(index_exports, {
|
|
|
49
49
|
isValidArray: () => isValidArray,
|
|
50
50
|
isValidArrayWithFirstElement: () => isValidArrayWithFirstElement,
|
|
51
51
|
isWord: () => isWord,
|
|
52
|
-
makeSSML: () => makeSSML,
|
|
53
52
|
notSearchedForms: () => notSearchedForms,
|
|
54
53
|
noteMap: () => noteMap,
|
|
55
|
-
numberMap: () => numberMap,
|
|
56
54
|
regexps: () => regexps,
|
|
57
|
-
romajiMap: () => romajiMap,
|
|
58
55
|
shuffleArray: () => shuffleArray,
|
|
59
|
-
symbolMap: () => symbolMap,
|
|
60
56
|
synthesizeSpeech: () => synthesizeSpeech
|
|
61
57
|
});
|
|
62
58
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -66,60 +62,11 @@ var regexps = {
|
|
|
66
62
|
hiragana: /[\u{3040}-\u{309F}]/u,
|
|
67
63
|
katakana: /[\u{30A0}-\u{30FF}]/u,
|
|
68
64
|
kanji: new RegExp("\\p{Script=Han}+", "u"),
|
|
69
|
-
scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
|
|
70
65
|
regExChars: /[-\/\\^$*+?.()|[\]{}]/,
|
|
71
66
|
tanakaID: /#ID=(?<id>\d+_\d+)$/,
|
|
72
67
|
tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
|
|
73
68
|
tanakaReferenceID: /#(?<entryid>[\d]+)/
|
|
74
69
|
};
|
|
75
|
-
var romajiMap = {
|
|
76
|
-
A: "\u30A8\u30FC",
|
|
77
|
-
B: "\u30D3\u30FC",
|
|
78
|
-
C: "\u30B7\u30FC",
|
|
79
|
-
D: "\u30C7\u30A3\u30FC",
|
|
80
|
-
E: "\u30A4\u30FC",
|
|
81
|
-
F: "\u30A8\u30D5",
|
|
82
|
-
G: "\u30B8\u30FC",
|
|
83
|
-
H: "\u30A8\u30A4\u30C1",
|
|
84
|
-
I: "\u30A2\u30A4",
|
|
85
|
-
J: "\u30B8\u30A7\u30FC",
|
|
86
|
-
K: "\u30B1\u30FC",
|
|
87
|
-
L: "\u30A8\u30EB",
|
|
88
|
-
M: "\u30A8\u30E0",
|
|
89
|
-
N: "\u30A8\u30CC",
|
|
90
|
-
O: "\u30AA\u30FC",
|
|
91
|
-
P: "\u30D4\u30FC",
|
|
92
|
-
Q: "\u30AD\u30E5\u30FC",
|
|
93
|
-
R: "\u30A2\u30FC\u30EB",
|
|
94
|
-
S: "\u30A8\u30B9",
|
|
95
|
-
T: "\u30C6\u30A3\u30FC",
|
|
96
|
-
U: "\u30E6\u30FC",
|
|
97
|
-
V: "\u30D6\u30A4",
|
|
98
|
-
W: "\u30C0\u30D6\u30EA\u30E5\u30FC",
|
|
99
|
-
X: "\u30A8\u30C3\u30AF\u30B9",
|
|
100
|
-
Y: "\u30EF\u30A4",
|
|
101
|
-
Z: "\u30BC\u30C3\u30C8"
|
|
102
|
-
};
|
|
103
|
-
var numberMap = {
|
|
104
|
-
"0": "\u30BC\u30ED",
|
|
105
|
-
"1": "\u30A4\u30C1",
|
|
106
|
-
"2": "\u30CB",
|
|
107
|
-
"3": "\u30B5\u30F3",
|
|
108
|
-
"4": "\u30E8\u30F3",
|
|
109
|
-
"5": "\u30B4",
|
|
110
|
-
"6": "\u30ED\u30AF",
|
|
111
|
-
"7": "\u30CA\u30CA",
|
|
112
|
-
"8": "\u30CF\u30C1",
|
|
113
|
-
"9": "\u30AD\u30E5\u30A6"
|
|
114
|
-
};
|
|
115
|
-
var symbolMap = {
|
|
116
|
-
"\uFF04": "\u30C9\u30EB",
|
|
117
|
-
"%": "\u30D1\u30FC\u30BB\u30F3\u30C8",
|
|
118
|
-
"\xA5": "\u30A8\u30F3",
|
|
119
|
-
"#": "\u30B7\u30E3\u30FC\u30D7",
|
|
120
|
-
"@": "\u30A2\u30C3\u30C8",
|
|
121
|
-
"&": "\u30A2\u30F3\u30C9"
|
|
122
|
-
};
|
|
123
70
|
var notSearchedForms = /* @__PURE__ */ new Set([
|
|
124
71
|
"search-only kana form",
|
|
125
72
|
"Search-only kana form",
|
|
@@ -1187,7 +1134,7 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
1187
1134
|
var import_libxmljs2 = __toESM(require("libxmljs2"));
|
|
1188
1135
|
var import_xml2js = __toESM(require("xml2js"));
|
|
1189
1136
|
var import_iconv_lite = __toESM(require("iconv-lite"));
|
|
1190
|
-
var
|
|
1137
|
+
var import_client_polly = require("@aws-sdk/client-polly");
|
|
1191
1138
|
var Kuroshiro = require("kuroshiro");
|
|
1192
1139
|
var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
|
|
1193
1140
|
function capitalizeString(value) {
|
|
@@ -2055,108 +2002,17 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
|
|
|
2055
2002
|
throw err;
|
|
2056
2003
|
}
|
|
2057
2004
|
}
|
|
2058
|
-
|
|
2059
|
-
if (regexps.kanji.test(char)) return "kanji";
|
|
2060
|
-
if (regexps.hiragana.test(char)) return "hiragana";
|
|
2061
|
-
if (regexps.katakana.test(char)) return "katakana";
|
|
2062
|
-
return "other";
|
|
2063
|
-
};
|
|
2064
|
-
var splitByScript = (text) => text.match(regexps.scriptSplit) || [];
|
|
2065
|
-
var convertToHiragana = (str) => str.replace(
|
|
2066
|
-
regexps.katakana,
|
|
2067
|
-
(c) => String.fromCharCode(c.charCodeAt(0) - 96)
|
|
2068
|
-
);
|
|
2069
|
-
var convertOtherToKatakana = (str) => str.split("").map((c) => {
|
|
2070
|
-
if (romajiMap[c.toUpperCase()]) return romajiMap[c.toUpperCase()];
|
|
2071
|
-
if (numberMap[c]) return numberMap[c];
|
|
2072
|
-
if (symbolMap[c]) return symbolMap[c];
|
|
2073
|
-
return c;
|
|
2074
|
-
}).join("");
|
|
2075
|
-
function makeSSML(formText, fullReading) {
|
|
2076
|
-
let ssml = "";
|
|
2077
|
-
const allTypes = Array.from(
|
|
2078
|
-
formText
|
|
2079
|
-
).map((c) => getCharType(c));
|
|
2080
|
-
const uniqueTypes = Array.from(new Set(allTypes));
|
|
2081
|
-
if (uniqueTypes.length === 1)
|
|
2082
|
-
switch (uniqueTypes[0]) {
|
|
2083
|
-
case "kanji":
|
|
2084
|
-
ssml = `<speak><phoneme alphabet="x-amazon-yomigana" ph="${fullReading}">${formText}</phoneme></speak>`;
|
|
2085
|
-
break;
|
|
2086
|
-
case "katakana":
|
|
2087
|
-
ssml = `<speak><phoneme alphabet="x-amazon-pron-kana" ph="${formText}">${formText}</phoneme></speak>`;
|
|
2088
|
-
break;
|
|
2089
|
-
case "hiragana":
|
|
2090
|
-
default:
|
|
2091
|
-
ssml = `<speak>${formText}</speak>`;
|
|
2092
|
-
}
|
|
2093
|
-
else {
|
|
2094
|
-
const segments = splitByScript(formText);
|
|
2095
|
-
let pureKanjiReading = convertToHiragana(fullReading);
|
|
2096
|
-
segments.forEach((seg) => {
|
|
2097
|
-
const type = getCharType(
|
|
2098
|
-
seg[0]
|
|
2099
|
-
);
|
|
2100
|
-
if (type !== "kanji") {
|
|
2101
|
-
const converted = type === "other" ? convertToHiragana(convertOtherToKatakana(seg)) : convertToHiragana(seg);
|
|
2102
|
-
pureKanjiReading = pureKanjiReading.replace(converted, "");
|
|
2103
|
-
}
|
|
2104
|
-
});
|
|
2105
|
-
const kanjiSegments = segments.filter(
|
|
2106
|
-
(seg) => getCharType(seg[0]) === "kanji"
|
|
2107
|
-
);
|
|
2108
|
-
let readingPointer = 0;
|
|
2109
|
-
const ssmlSegments = segments.map((seg) => {
|
|
2110
|
-
const type = getCharType(
|
|
2111
|
-
seg[0]
|
|
2112
|
-
);
|
|
2113
|
-
if (type === "kanji") {
|
|
2114
|
-
const expectedLength = pureKanjiReading.length / kanjiSegments.length;
|
|
2115
|
-
const allocated = pureKanjiReading.slice(
|
|
2116
|
-
readingPointer,
|
|
2117
|
-
readingPointer + Math.ceil(expectedLength)
|
|
2118
|
-
);
|
|
2119
|
-
readingPointer += allocated.length;
|
|
2120
|
-
return `<phoneme alphabet="x-amazon-yomigana" ph="${allocated}">${seg}</phoneme>`;
|
|
2121
|
-
} else if (type === "katakana")
|
|
2122
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${seg}">${seg}</phoneme>`;
|
|
2123
|
-
else if (type === "other") {
|
|
2124
|
-
const katakanaReading = convertOtherToKatakana(seg);
|
|
2125
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${katakanaReading}">${seg}</phoneme>`;
|
|
2126
|
-
} else return seg;
|
|
2127
|
-
});
|
|
2128
|
-
ssml = `<speak>${ssmlSegments.join("")}</speak>`;
|
|
2129
|
-
}
|
|
2130
|
-
return ssml;
|
|
2131
|
-
}
|
|
2132
|
-
async function synthesizeSpeech(ssmlText, apiKey, options) {
|
|
2005
|
+
async function synthesizeSpeech(client, input, options) {
|
|
2133
2006
|
return await new Promise(
|
|
2134
2007
|
async (resolve, reject) => {
|
|
2135
2008
|
try {
|
|
2136
|
-
const
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
text: ssmlText,
|
|
2140
|
-
...options
|
|
2141
|
-
}),
|
|
2142
|
-
headers: {
|
|
2143
|
-
"Content-Type": "application/json",
|
|
2144
|
-
apikey: apiKey
|
|
2145
|
-
}
|
|
2009
|
+
const command = new import_client_polly.SynthesizeSpeechCommand({
|
|
2010
|
+
Text: input,
|
|
2011
|
+
...options
|
|
2146
2012
|
});
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
${res.status}: ${res.statusText}`
|
|
2151
|
-
);
|
|
2152
|
-
const data = await res.json();
|
|
2153
|
-
if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
|
|
2154
|
-
throw new Error("Invalid TTS response data");
|
|
2155
|
-
const mp3Buffer = Buffer.from(
|
|
2156
|
-
data.audioData,
|
|
2157
|
-
"base64"
|
|
2158
|
-
);
|
|
2159
|
-
resolve(mp3Buffer);
|
|
2013
|
+
const response = await client.send(command);
|
|
2014
|
+
const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
|
|
2015
|
+
resolve(stream);
|
|
2160
2016
|
} catch (err) {
|
|
2161
2017
|
reject(err);
|
|
2162
2018
|
}
|
|
@@ -2398,14 +2254,10 @@ ${ankiNotes}`;
|
|
|
2398
2254
|
isValidArray,
|
|
2399
2255
|
isValidArrayWithFirstElement,
|
|
2400
2256
|
isWord,
|
|
2401
|
-
makeSSML,
|
|
2402
2257
|
notSearchedForms,
|
|
2403
2258
|
noteMap,
|
|
2404
|
-
numberMap,
|
|
2405
2259
|
regexps,
|
|
2406
|
-
romajiMap,
|
|
2407
2260
|
shuffleArray,
|
|
2408
|
-
symbolMap,
|
|
2409
2261
|
synthesizeSpeech
|
|
2410
2262
|
});
|
|
2411
2263
|
//# sourceMappingURL=index.cjs.js.map
|