henkan 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/index.cjs.js +2 -133
- package/dist/index.cjs.js.map +2 -2
- package/dist/index.mjs +2 -129
- package/dist/index.mjs.map +2 -2
- package/dist/types/constants.d.ts +0 -3
- package/dist/types/constants.d.ts.map +1 -1
- package/dist/types/types.d.ts +0 -6
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/utils.d.ts +2 -9
- package/dist/types/utils.d.ts.map +1 -1
- package/docs/api/README.md +0 -1
- package/docs/api/functions/capitalizeString.md +1 -1
- package/docs/api/functions/convertJMdict.md +1 -1
- package/docs/api/functions/convertKanjiDic.md +1 -1
- package/docs/api/functions/convertKradFile.md +1 -1
- package/docs/api/functions/convertRadkFile.md +1 -1
- package/docs/api/functions/convertTanakaCorpus.md +1 -1
- package/docs/api/functions/generateAnkiNote.md +1 -1
- package/docs/api/functions/generateAnkiNotesFile.md +1 -1
- package/docs/api/functions/getKanji.md +1 -1
- package/docs/api/functions/getKanjiExtended.md +1 -1
- package/docs/api/functions/getWord.md +1 -1
- package/docs/api/functions/isStringArray.md +1 -1
- package/docs/api/functions/isValidArray.md +1 -1
- package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
- package/docs/api/functions/shuffleArray.md +1 -1
- package/docs/api/functions/synthesizeSpeech.md +4 -4
- package/docs/api/interfaces/DictKanji.md +5 -5
- package/docs/api/interfaces/DictKanjiForm.md +4 -4
- package/docs/api/interfaces/DictKanjiMisc.md +5 -5
- package/docs/api/interfaces/DictKanjiReading.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
- package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
- package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
- package/docs/api/interfaces/DictMeaning.md +11 -11
- package/docs/api/interfaces/DictRadical.md +4 -4
- package/docs/api/interfaces/DictReading.md +5 -5
- package/docs/api/interfaces/DictWord.md +8 -8
- package/docs/api/interfaces/ExamplePart.md +7 -7
- package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
- package/docs/api/interfaces/Grammar.md +15 -15
- package/docs/api/interfaces/GrammarMeaning.md +3 -3
- package/docs/api/interfaces/Kana.md +11 -11
- package/docs/api/interfaces/Kanji.md +22 -22
- package/docs/api/interfaces/KanjiComponent.md +3 -3
- package/docs/api/interfaces/KanjiForm.md +4 -4
- package/docs/api/interfaces/NoteAndTag.md +3 -3
- package/docs/api/interfaces/Phrase.md +5 -5
- package/docs/api/interfaces/Radical.md +16 -16
- package/docs/api/interfaces/Reading.md +5 -5
- package/docs/api/interfaces/ResultEntry.md +7 -7
- package/docs/api/interfaces/TanakaExample.md +7 -7
- package/docs/api/interfaces/Translation.md +3 -3
- package/docs/api/interfaces/UsefulRegExps.md +8 -20
- package/docs/api/interfaces/Word.md +14 -14
- package/docs/api/type-aliases/Dict.md +1 -1
- package/docs/api/type-aliases/DictName.md +1 -1
- package/docs/api/type-aliases/EntryType.md +1 -1
- package/docs/api/type-aliases/JLPT.md +1 -1
- package/docs/api/type-aliases/Result.md +1 -1
- package/package.json +1 -1
- package/docs/api/functions/makeSSML.md +0 -33
package/README.md
CHANGED
|
@@ -55,12 +55,14 @@ const dictContent = fs.readFileSync(dictPath, 'utf-8');
|
|
|
55
55
|
|
|
56
56
|
const dictWords = convertJMdict(dictContent);
|
|
57
57
|
|
|
58
|
+
const jmDict = undefined, id = undefined, kanjiDic = undefined, tanakaCorpus = undefined;
|
|
59
|
+
|
|
58
60
|
const noteTypeName = 'Word';
|
|
59
61
|
const deckName = 'Japanese::Vocabulary::No kanji form words';
|
|
60
62
|
|
|
61
63
|
const noKanjiFormWords = dictWords
|
|
62
64
|
.filter(word => word.kanjiForms === undefined)
|
|
63
|
-
.map(word => getWord(
|
|
65
|
+
.map(word => getWord(jmDict, id, kanjiDic, tanakaCorpus, word, noteTypeName, deckName));
|
|
64
66
|
|
|
65
67
|
const ankiNotesFile = generateAnkiNotesFile(noKanjiFormWords);
|
|
66
68
|
|
package/dist/index.cjs.js
CHANGED
|
@@ -49,14 +49,10 @@ __export(index_exports, {
|
|
|
49
49
|
isValidArray: () => isValidArray,
|
|
50
50
|
isValidArrayWithFirstElement: () => isValidArrayWithFirstElement,
|
|
51
51
|
isWord: () => isWord,
|
|
52
|
-
makeSSML: () => makeSSML,
|
|
53
52
|
notSearchedForms: () => notSearchedForms,
|
|
54
53
|
noteMap: () => noteMap,
|
|
55
|
-
numberMap: () => numberMap,
|
|
56
54
|
regexps: () => regexps,
|
|
57
|
-
romajiMap: () => romajiMap,
|
|
58
55
|
shuffleArray: () => shuffleArray,
|
|
59
|
-
symbolMap: () => symbolMap,
|
|
60
56
|
synthesizeSpeech: () => synthesizeSpeech
|
|
61
57
|
});
|
|
62
58
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -66,60 +62,11 @@ var regexps = {
|
|
|
66
62
|
hiragana: /[\u{3040}-\u{309F}]/u,
|
|
67
63
|
katakana: /[\u{30A0}-\u{30FF}]/u,
|
|
68
64
|
kanji: new RegExp("\\p{Script=Han}+", "u"),
|
|
69
|
-
scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
|
|
70
65
|
regExChars: /[-\/\\^$*+?.()|[\]{}]/,
|
|
71
66
|
tanakaID: /#ID=(?<id>\d+_\d+)$/,
|
|
72
67
|
tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
|
|
73
68
|
tanakaReferenceID: /#(?<entryid>[\d]+)/
|
|
74
69
|
};
|
|
75
|
-
var romajiMap = {
|
|
76
|
-
A: "\u30A8\u30FC",
|
|
77
|
-
B: "\u30D3\u30FC",
|
|
78
|
-
C: "\u30B7\u30FC",
|
|
79
|
-
D: "\u30C7\u30A3\u30FC",
|
|
80
|
-
E: "\u30A4\u30FC",
|
|
81
|
-
F: "\u30A8\u30D5",
|
|
82
|
-
G: "\u30B8\u30FC",
|
|
83
|
-
H: "\u30A8\u30A4\u30C1",
|
|
84
|
-
I: "\u30A2\u30A4",
|
|
85
|
-
J: "\u30B8\u30A7\u30FC",
|
|
86
|
-
K: "\u30B1\u30FC",
|
|
87
|
-
L: "\u30A8\u30EB",
|
|
88
|
-
M: "\u30A8\u30E0",
|
|
89
|
-
N: "\u30A8\u30CC",
|
|
90
|
-
O: "\u30AA\u30FC",
|
|
91
|
-
P: "\u30D4\u30FC",
|
|
92
|
-
Q: "\u30AD\u30E5\u30FC",
|
|
93
|
-
R: "\u30A2\u30FC\u30EB",
|
|
94
|
-
S: "\u30A8\u30B9",
|
|
95
|
-
T: "\u30C6\u30A3\u30FC",
|
|
96
|
-
U: "\u30E6\u30FC",
|
|
97
|
-
V: "\u30D6\u30A4",
|
|
98
|
-
W: "\u30C0\u30D6\u30EA\u30E5\u30FC",
|
|
99
|
-
X: "\u30A8\u30C3\u30AF\u30B9",
|
|
100
|
-
Y: "\u30EF\u30A4",
|
|
101
|
-
Z: "\u30BC\u30C3\u30C8"
|
|
102
|
-
};
|
|
103
|
-
var numberMap = {
|
|
104
|
-
"0": "\u30BC\u30ED",
|
|
105
|
-
"1": "\u30A4\u30C1",
|
|
106
|
-
"2": "\u30CB",
|
|
107
|
-
"3": "\u30B5\u30F3",
|
|
108
|
-
"4": "\u30E8\u30F3",
|
|
109
|
-
"5": "\u30B4",
|
|
110
|
-
"6": "\u30ED\u30AF",
|
|
111
|
-
"7": "\u30CA\u30CA",
|
|
112
|
-
"8": "\u30CF\u30C1",
|
|
113
|
-
"9": "\u30AD\u30E5\u30A6"
|
|
114
|
-
};
|
|
115
|
-
var symbolMap = {
|
|
116
|
-
"\uFF04": "\u30C9\u30EB",
|
|
117
|
-
"%": "\u30D1\u30FC\u30BB\u30F3\u30C8",
|
|
118
|
-
"\xA5": "\u30A8\u30F3",
|
|
119
|
-
"#": "\u30B7\u30E3\u30FC\u30D7",
|
|
120
|
-
"@": "\u30A2\u30C3\u30C8",
|
|
121
|
-
"&": "\u30A2\u30F3\u30C9"
|
|
122
|
-
};
|
|
123
70
|
var notSearchedForms = /* @__PURE__ */ new Set([
|
|
124
71
|
"search-only kana form",
|
|
125
72
|
"Search-only kana form",
|
|
@@ -2055,88 +2002,14 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
|
|
|
2055
2002
|
throw err;
|
|
2056
2003
|
}
|
|
2057
2004
|
}
|
|
2058
|
-
|
|
2059
|
-
if (regexps.kanji.test(char)) return "kanji";
|
|
2060
|
-
if (regexps.hiragana.test(char)) return "hiragana";
|
|
2061
|
-
if (regexps.katakana.test(char)) return "katakana";
|
|
2062
|
-
return "other";
|
|
2063
|
-
};
|
|
2064
|
-
var splitByScript = (text) => text.match(regexps.scriptSplit) || [];
|
|
2065
|
-
var convertToHiragana = (str) => str.replace(
|
|
2066
|
-
regexps.katakana,
|
|
2067
|
-
(c) => String.fromCharCode(c.charCodeAt(0) - 96)
|
|
2068
|
-
);
|
|
2069
|
-
var convertOtherToKatakana = (str) => str.split("").map((c) => {
|
|
2070
|
-
if (romajiMap[c.toUpperCase()]) return romajiMap[c.toUpperCase()];
|
|
2071
|
-
if (numberMap[c]) return numberMap[c];
|
|
2072
|
-
if (symbolMap[c]) return symbolMap[c];
|
|
2073
|
-
return c;
|
|
2074
|
-
}).join("");
|
|
2075
|
-
function makeSSML(formText, fullReading) {
|
|
2076
|
-
let ssml = "";
|
|
2077
|
-
const allTypes = Array.from(
|
|
2078
|
-
formText
|
|
2079
|
-
).map((c) => getCharType(c));
|
|
2080
|
-
const uniqueTypes = Array.from(new Set(allTypes));
|
|
2081
|
-
if (uniqueTypes.length === 1)
|
|
2082
|
-
switch (uniqueTypes[0]) {
|
|
2083
|
-
case "kanji":
|
|
2084
|
-
ssml = `<speak><phoneme alphabet="x-amazon-yomigana" ph="${fullReading}">${formText}</phoneme></speak>`;
|
|
2085
|
-
break;
|
|
2086
|
-
case "katakana":
|
|
2087
|
-
ssml = `<speak><phoneme alphabet="x-amazon-pron-kana" ph="${formText}">${formText}</phoneme></speak>`;
|
|
2088
|
-
break;
|
|
2089
|
-
case "hiragana":
|
|
2090
|
-
default:
|
|
2091
|
-
ssml = `<speak>${formText}</speak>`;
|
|
2092
|
-
}
|
|
2093
|
-
else {
|
|
2094
|
-
const segments = splitByScript(formText);
|
|
2095
|
-
let pureKanjiReading = convertToHiragana(fullReading);
|
|
2096
|
-
segments.forEach((seg) => {
|
|
2097
|
-
const type = getCharType(
|
|
2098
|
-
seg[0]
|
|
2099
|
-
);
|
|
2100
|
-
if (type !== "kanji") {
|
|
2101
|
-
const converted = type === "other" ? convertToHiragana(convertOtherToKatakana(seg)) : convertToHiragana(seg);
|
|
2102
|
-
pureKanjiReading = pureKanjiReading.replace(converted, "");
|
|
2103
|
-
}
|
|
2104
|
-
});
|
|
2105
|
-
const kanjiSegments = segments.filter(
|
|
2106
|
-
(seg) => getCharType(seg[0]) === "kanji"
|
|
2107
|
-
);
|
|
2108
|
-
let readingPointer = 0;
|
|
2109
|
-
const ssmlSegments = segments.map((seg) => {
|
|
2110
|
-
const type = getCharType(
|
|
2111
|
-
seg[0]
|
|
2112
|
-
);
|
|
2113
|
-
if (type === "kanji") {
|
|
2114
|
-
const expectedLength = pureKanjiReading.length / kanjiSegments.length;
|
|
2115
|
-
const allocated = pureKanjiReading.slice(
|
|
2116
|
-
readingPointer,
|
|
2117
|
-
readingPointer + Math.ceil(expectedLength)
|
|
2118
|
-
);
|
|
2119
|
-
readingPointer += allocated.length;
|
|
2120
|
-
return `<phoneme alphabet="x-amazon-yomigana" ph="${allocated}">${seg}</phoneme>`;
|
|
2121
|
-
} else if (type === "katakana")
|
|
2122
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${seg}">${seg}</phoneme>`;
|
|
2123
|
-
else if (type === "other") {
|
|
2124
|
-
const katakanaReading = convertOtherToKatakana(seg);
|
|
2125
|
-
return `<phoneme alphabet="x-amazon-pron-kana" ph="${katakanaReading}">${seg}</phoneme>`;
|
|
2126
|
-
} else return seg;
|
|
2127
|
-
});
|
|
2128
|
-
ssml = `<speak>${ssmlSegments.join("")}</speak>`;
|
|
2129
|
-
}
|
|
2130
|
-
return ssml;
|
|
2131
|
-
}
|
|
2132
|
-
async function synthesizeSpeech(ssmlText, apiKey, options) {
|
|
2005
|
+
async function synthesizeSpeech(textOrSSML, apiKey, options) {
|
|
2133
2006
|
return await new Promise(
|
|
2134
2007
|
async (resolve, reject) => {
|
|
2135
2008
|
try {
|
|
2136
2009
|
const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
|
|
2137
2010
|
method: "POST",
|
|
2138
2011
|
body: JSON.stringify({
|
|
2139
|
-
text:
|
|
2012
|
+
text: textOrSSML,
|
|
2140
2013
|
...options
|
|
2141
2014
|
}),
|
|
2142
2015
|
headers: {
|
|
@@ -2398,14 +2271,10 @@ ${ankiNotes}`;
|
|
|
2398
2271
|
isValidArray,
|
|
2399
2272
|
isValidArrayWithFirstElement,
|
|
2400
2273
|
isWord,
|
|
2401
|
-
makeSSML,
|
|
2402
2274
|
notSearchedForms,
|
|
2403
2275
|
noteMap,
|
|
2404
|
-
numberMap,
|
|
2405
2276
|
regexps,
|
|
2406
|
-
romajiMap,
|
|
2407
2277
|
shuffleArray,
|
|
2408
|
-
symbolMap,
|
|
2409
2278
|
synthesizeSpeech
|
|
2410
2279
|
});
|
|
2411
2280
|
//# sourceMappingURL=index.cjs.js.map
|