henkan 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/index.cjs.js +8 -156
- package/dist/index.cjs.js.map +3 -3
- package/dist/index.mjs +10 -152
- package/dist/index.mjs.map +2 -2
- package/dist/types/constants.d.ts +0 -3
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +0 -6
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +7 -17
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +0 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +13 -25
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -20
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +2 -2
- package/docs/api/functions/makeSSML.md +0 -33
package/dist/index.mjs
CHANGED
|
@@ -10,60 +10,11 @@ var regexps = {
|
|
|
10
10
|
hiragana: /[\u{3040}-\u{309F}]/u,
|
|
11
11
|
katakana: /[\u{30A0}-\u{30FF}]/u,
|
|
12
12
|
kanji: new RegExp("\\p{Script=Han}+", "u"),
|
|
13
|
-
scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
|
|
14
13
|
regExChars: /[-\/\\^$*+?.()|[\]{}]/,
|
|
15
14
|
tanakaID: /#ID=(?<id>\d+_\d+)$/,
|
|
16
15
|
tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
|
|
17
16
|
tanakaReferenceID: /#(?<entryid>[\d]+)/
|
|
18
17
|
};
|
|
19
|
-
var romajiMap = {
|
|
20
|
-
A: "\u30A8\u30FC",
|
|
21
|
-
B: "\u30D3\u30FC",
|
|
22
|
-
C: "\u30B7\u30FC",
|
|
23
|
-
D: "\u30C7\u30A3\u30FC",
|
|
24
|
-
E: "\u30A4\u30FC",
|
|
25
|
-
F: "\u30A8\u30D5",
|
|
26
|
-
G: "\u30B8\u30FC",
|
|
27
|
-
H: "\u30A8\u30A4\u30C1",
|
|
28
|
-
I: "\u30A2\u30A4",
|
|
29
|
-
J: "\u30B8\u30A7\u30FC",
|
|
30
|
-
K: "\u30B1\u30FC",
|
|
31
|
-
L: "\u30A8\u30EB",
|
|
32
|
-
M: "\u30A8\u30E0",
|
|
33
|
-
N: "\u30A8\u30CC",
|
|
34
|
-
O: "\u30AA\u30FC",
|
|
35
|
-
P: "\u30D4\u30FC",
|
|
36
|
-
Q: "\u30AD\u30E5\u30FC",
|
|
37
|
-
R: "\u30A2\u30FC\u30EB",
|
|
38
|
-
S: "\u30A8\u30B9",
|
|
39
|
-
T: "\u30C6\u30A3\u30FC",
|
|
40
|
-
U: "\u30E6\u30FC",
|
|
41
|
-
V: "\u30D6\u30A4",
|
|
42
|
-
W: "\u30C0\u30D6\u30EA\u30E5\u30FC",
|
|
43
|
-
X: "\u30A8\u30C3\u30AF\u30B9",
|
|
44
|
-
Y: "\u30EF\u30A4",
|
|
45
|
-
Z: "\u30BC\u30C3\u30C8"
|
|
46
|
-
};
|
|
47
|
-
var numberMap = {
|
|
48
|
-
"0": "\u30BC\u30ED",
|
|
49
|
-
"1": "\u30A4\u30C1",
|
|
50
|
-
"2": "\u30CB",
|
|
51
|
-
"3": "\u30B5\u30F3",
|
|
52
|
-
"4": "\u30E8\u30F3",
|
|
53
|
-
"5": "\u30B4",
|
|
54
|
-
"6": "\u30ED\u30AF",
|
|
55
|
-
"7": "\u30CA\u30CA",
|
|
56
|
-
"8": "\u30CF\u30C1",
|
|
57
|
-
"9": "\u30AD\u30E5\u30A6"
|
|
58
|
-
};
|
|
59
|
-
var symbolMap = {
|
|
60
|
-
"\uFF04": "\u30C9\u30EB",
|
|
61
|
-
"%": "\u30D1\u30FC\u30BB\u30F3\u30C8",
|
|
62
|
-
"\xA5": "\u30A8\u30F3",
|
|
63
|
-
"#": "\u30B7\u30E3\u30FC\u30D7",
|
|
64
|
-
"@": "\u30A2\u30C3\u30C8",
|
|
65
|
-
"&": "\u30A2\u30F3\u30C9"
|
|
66
|
-
};
|
|
67
18
|
var notSearchedForms = /* @__PURE__ */ new Set([
|
|
68
19
|
"search-only kana form",
|
|
69
20
|
"Search-only kana form",
|
|
@@ -1131,7 +1082,9 @@ var noteMap = /* @__PURE__ */ new Map([
|
|
|
1131
1082
|
import libxml from "libxmljs2";
|
|
1132
1083
|
import xml from "xml2js";
|
|
1133
1084
|
import iconv from "iconv-lite";
|
|
1134
|
-
import
|
|
1085
|
+
import {
|
|
1086
|
+
SynthesizeSpeechCommand
|
|
1087
|
+
} from "@aws-sdk/client-polly";
|
|
1135
1088
|
var Kuroshiro = __require("kuroshiro");
|
|
1136
1089
|
var KuromojiAnalyzer = __require("kuroshiro-analyzer-kuromoji");
|
|
1137
1090
|
function capitalizeString(value) {
|
|
@@ -1994,108 +1947,17 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
|
|
|
1994
1947
|
throw err;
|
|
1995
1948
|
}
|
|
1996
1949
|
}
|
|
1997
|
-
|
|
1998
|
-
if (regexps.kanji.test(char)) return "kanji";
|
|
1999
|
-
if (regexps.hiragana.test(char)) return "hiragana";
|
|
2000
|
-
if (regexps.katakana.test(char)) return "katakana";
|
|
2001
|
-
return "other";
|
|
2002
|
-
};
|
|
2003
|
-
var splitByScript = (text) => text.match(regexps.scriptSplit) || [];
|
|
2004
|
-
var convertToHiragana = (str) => str.replace(
|
|
2005
|
-
regexps.katakana,
|
|
2006
|
-
(c) => String.fromCharCode(c.charCodeAt(0) - 96)
|
|
2007
|
-
);
|
|
2008
|
-
var convertOtherToKatakana = (str) => str.split("").map((c) => {
|
|
2009
|
-
if (romajiMap[c.toUpperCase()]) return romajiMap[c.toUpperCase()];
|
|
2010
|
-
if (numberMap[c]) return numberMap[c];
|
|
2011
|
-
if (symbolMap[c]) return symbolMap[c];
|
|
2012
|
-
return c;
|
|
2013
|
-
}).join("");
|
|
2014
|
-
function makeSSML(formText, fullReading) {
|
|
2015
|
-
let ssml = "";
|
|
2016
|
-
const allTypes = Array.from(
|
|
2017
|
-
formText
|
|
2018
|
-
).map((c) => getCharType(c));
|
|
2019
|
-
const uniqueTypes = Array.from(new Set(allTypes));
|
|
2020
|
-
if (uniqueTypes.length === 1)
|
|
2021
|
-
switch (uniqueTypes[0]) {
|
|
2022
|
-
case "kanji":
|
|
2023
|
-
ssml = `<speak><phoneme alphabet="x-amazon-yomigana" ph="${fullReading}">${formText}</phoneme></speak>`;
|
|
2024
|
-
break;
|
|
2025
|
-
case "katakana":
|
|
2026
|
-
ssml = `<speak><phoneme alphabet="x-amazon-pron-kana" ph="${formText}">${formText}</phoneme></speak>`;
|
|
2027
|
-
break;
|
|
2028
|
-
case "hiragana":
|
|
2029
|
-
default:
|
|
2030
|
-
ssml = `<speak>${formText}</speak>`;
|
|
2031
|
-
}
|
|
2032
|
-
else {
|
|
2033
|
-
const segments = splitByScript(formText);
|
|
2034
|
-
let pureKanjiReading = convertToHiragana(fullReading);
|
|
2035
|
-
segments.forEach((seg) => {
|
|
2036
|
-
const type = getCharType(
|
|
2037
|
-
seg[0]
|
|
2038
|
-
);
|
|
2039
|
-
if (type !== "kanji") {
|
|
2040
|
-
const converted = type === "other" ? convertToHiragana(convertOtherToKatakana(seg)) : convertToHiragana(seg);
|
|
2041
|
-
pureKanjiReading = pureKanjiReading.replace(converted, "");
|
|
2042
|
-
}
|
|
2043
|
-
});
|
|
2044
|
-
const kanjiSegments = segments.filter(
|
|
2045
|
-
(seg) => getCharType(seg[0]) === "kanji"
|
|
2046
|
-
);
|
|
2047
|
-
let readingPointer = 0;
|
|
2048
|
-
const ssmlSegments = segments.map((seg) => {
|
|
2049
|
-
const type = getCharType(
|
|
2050
|
-
seg[0]
|
|
2051
|
-
);
|
|
2052
|
-
if (type === "kanji") {
|
|
2053
|
-
const expectedLength = pureKanjiReading.length / kanjiSegments.length;
|
|
2054
|
-
const allocated = pureKanjiReading.slice(
|
|
2055
|
-
readingPointer,
|
|
2056
|
-
readingPointer + Math.ceil(expectedLength)
|
|
2057
|
-
);
|
|
2058
|
-
readingPointer += allocated.length;
|
|
2059
|
-
return `<phoneme alphabet="x-amazon-yomigana" ph="${allocated}">${seg}</phoneme>`;
|
|
2060
|
-
} else if (type === "katakana")
|
|
2061
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${seg}">${seg}</phoneme>`;
|
|
2062
|
-
else if (type === "other") {
|
|
2063
|
-
const katakanaReading = convertOtherToKatakana(seg);
|
|
2064
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${katakanaReading}">${seg}</phoneme>`;
|
|
2065
|
-
} else return seg;
|
|
2066
|
-
});
|
|
2067
|
-
ssml = `<speak>${ssmlSegments.join("")}</speak>`;
|
|
2068
|
-
}
|
|
2069
|
-
return ssml;
|
|
2070
|
-
}
|
|
2071
|
-
async function synthesizeSpeech(ssmlText, apiKey, options) {
|
|
1950
|
+
async function synthesizeSpeech(client, input, options) {
|
|
2072
1951
|
return await new Promise(
|
|
2073
1952
|
async (resolve, reject) => {
|
|
2074
1953
|
try {
|
|
2075
|
-
const
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
text: ssmlText,
|
|
2079
|
-
...options
|
|
2080
|
-
}),
|
|
2081
|
-
headers: {
|
|
2082
|
-
"Content-Type": "application/json",
|
|
2083
|
-
apikey: apiKey
|
|
2084
|
-
}
|
|
1954
|
+
const command = new SynthesizeSpeechCommand({
|
|
1955
|
+
Text: input,
|
|
1956
|
+
...options
|
|
2085
1957
|
});
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
${res.status}: ${res.statusText}`
|
|
2090
|
-
);
|
|
2091
|
-
const data = await res.json();
|
|
2092
|
-
if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
|
|
2093
|
-
throw new Error("Invalid TTS response data");
|
|
2094
|
-
const mp3Buffer = Buffer.from(
|
|
2095
|
-
data.audioData,
|
|
2096
|
-
"base64"
|
|
2097
|
-
);
|
|
2098
|
-
resolve(mp3Buffer);
|
|
1958
|
+
const response = await client.send(command);
|
|
1959
|
+
const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
|
|
1960
|
+
resolve(stream);
|
|
2099
1961
|
} catch (err) {
|
|
2100
1962
|
reject(err);
|
|
2101
1963
|
}
|
|
@@ -2336,14 +2198,10 @@ export {
|
|
|
2336
2198
|
isValidArray,
|
|
2337
2199
|
isValidArrayWithFirstElement,
|
|
2338
2200
|
isWord,
|
|
2339
|
-
makeSSML,
|
|
2340
2201
|
notSearchedForms,
|
|
2341
2202
|
noteMap,
|
|
2342
|
-
numberMap,
|
|
2343
2203
|
regexps,
|
|
2344
|
-
romajiMap,
|
|
2345
2204
|
shuffleArray,
|
|
2346
|
-
symbolMap,
|
|
2347
2205
|
synthesizeSpeech
|
|
2348
2206
|
};
|
|
2349
2207
|
//# sourceMappingURL=index.mjs.map
|