henkan 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +4 -2
  2. package/dist/index.cjs.js +8 -156
  3. package/dist/index.cjs.js.map +3 -3
  4. package/dist/index.mjs +10 -152
  5. package/dist/index.mjs.map +2 -2
  6. package/dist/types/constants.d.ts +0 -3
  7. package/dist/types/constants.d.ts.map +1 -1
  8. package/dist/types/types.d.ts +0 -6
  9. package/dist/types/types.d.ts.map +1 -1
  10. package/dist/types/utils.d.ts +7 -17
  11. package/dist/types/utils.d.ts.map +1 -1
  12. package/docs/api/README.md +0 -1
  13. package/docs/api/functions/capitalizeString.md +1 -1
  14. package/docs/api/functions/convertJMdict.md +1 -1
  15. package/docs/api/functions/convertKanjiDic.md +1 -1
  16. package/docs/api/functions/convertKradFile.md +1 -1
  17. package/docs/api/functions/convertRadkFile.md +1 -1
  18. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  19. package/docs/api/functions/generateAnkiNote.md +1 -1
  20. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  21. package/docs/api/functions/getKanji.md +1 -1
  22. package/docs/api/functions/getKanjiExtended.md +1 -1
  23. package/docs/api/functions/getWord.md +1 -1
  24. package/docs/api/functions/isStringArray.md +1 -1
  25. package/docs/api/functions/isValidArray.md +1 -1
  26. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  27. package/docs/api/functions/shuffleArray.md +1 -1
  28. package/docs/api/functions/synthesizeSpeech.md +13 -25
  29. package/docs/api/interfaces/DictKanji.md +5 -5
  30. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  31. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  32. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  33. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  34. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  35. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  36. package/docs/api/interfaces/DictMeaning.md +11 -11
  37. package/docs/api/interfaces/DictRadical.md +4 -4
  38. package/docs/api/interfaces/DictReading.md +5 -5
  39. package/docs/api/interfaces/DictWord.md +8 -8
  40. package/docs/api/interfaces/ExamplePart.md +7 -7
  41. package/docs/api/interfaces/GlossSpecificNumber.md +3 -3
  42. package/docs/api/interfaces/Grammar.md +15 -15
  43. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  44. package/docs/api/interfaces/Kana.md +11 -11
  45. package/docs/api/interfaces/Kanji.md +22 -22
  46. package/docs/api/interfaces/KanjiComponent.md +3 -3
  47. package/docs/api/interfaces/KanjiForm.md +4 -4
  48. package/docs/api/interfaces/NoteAndTag.md +3 -3
  49. package/docs/api/interfaces/Phrase.md +5 -5
  50. package/docs/api/interfaces/Radical.md +16 -16
  51. package/docs/api/interfaces/Reading.md +5 -5
  52. package/docs/api/interfaces/ResultEntry.md +7 -7
  53. package/docs/api/interfaces/TanakaExample.md +7 -7
  54. package/docs/api/interfaces/Translation.md +3 -3
  55. package/docs/api/interfaces/UsefulRegExps.md +8 -20
  56. package/docs/api/interfaces/Word.md +14 -14
  57. package/docs/api/type-aliases/Dict.md +1 -1
  58. package/docs/api/type-aliases/DictName.md +1 -1
  59. package/docs/api/type-aliases/EntryType.md +1 -1
  60. package/docs/api/type-aliases/JLPT.md +1 -1
  61. package/docs/api/type-aliases/Result.md +1 -1
  62. package/package.json +2 -2
  63. package/docs/api/functions/makeSSML.md +0 -33
package/README.md CHANGED
@@ -36,7 +36,7 @@ pnpm add henkan
36
36
  - JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
37
37
  - User-friendly schemas for dictionary entries
38
38
  - Anki note generation
39
- - Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
39
+ - Other useful tools (Amazon Polly audio generation, Japanese RegExps, array checking etc.)
40
40
 
41
41
  ---
42
42
 
@@ -55,12 +55,14 @@ const dictContent = fs.readFileSync(dictPath, 'utf-8');
55
55
 
56
56
  const dictWords = convertJMdict(dictContent);
57
57
 
58
+ const jmDict = undefined, id = undefined, kanjiDic = undefined, tanakaCorpus = undefined;
59
+
58
60
  const noteTypeName = 'Word';
59
61
  const deckName = 'Japanese::Vocabulary::No kanji form words';
60
62
 
61
63
  const noKanjiFormWords = dictWords
62
64
  .filter(word => word.kanjiForms === undefined)
63
- .map(word => getWord(undefined, undefined, undefined, undefined, word, noteTypeName, deckName));
65
+ .map(word => getWord(jmDict, id, kanjiDic, tanakaCorpus, word, noteTypeName, deckName));
64
66
 
65
67
  const ankiNotesFile = generateAnkiNotesFile(noKanjiFormWords);
66
68
 
package/dist/index.cjs.js CHANGED
@@ -49,14 +49,10 @@ __export(index_exports, {
49
49
  isValidArray: () => isValidArray,
50
50
  isValidArrayWithFirstElement: () => isValidArrayWithFirstElement,
51
51
  isWord: () => isWord,
52
- makeSSML: () => makeSSML,
53
52
  notSearchedForms: () => notSearchedForms,
54
53
  noteMap: () => noteMap,
55
- numberMap: () => numberMap,
56
54
  regexps: () => regexps,
57
- romajiMap: () => romajiMap,
58
55
  shuffleArray: () => shuffleArray,
59
- symbolMap: () => symbolMap,
60
56
  synthesizeSpeech: () => synthesizeSpeech
61
57
  });
62
58
  module.exports = __toCommonJS(index_exports);
@@ -66,60 +62,11 @@ var regexps = {
66
62
  hiragana: /[\u{3040}-\u{309F}]/u,
67
63
  katakana: /[\u{30A0}-\u{30FF}]/u,
68
64
  kanji: new RegExp("\\p{Script=Han}+", "u"),
69
- scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
70
65
  regExChars: /[-\/\\^$*+?.()|[\]{}]/,
71
66
  tanakaID: /#ID=(?<id>\d+_\d+)$/,
72
67
  tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
73
68
  tanakaReferenceID: /#(?<entryid>[\d]+)/
74
69
  };
75
- var romajiMap = {
76
- A: "\u30A8\u30FC",
77
- B: "\u30D3\u30FC",
78
- C: "\u30B7\u30FC",
79
- D: "\u30C7\u30A3\u30FC",
80
- E: "\u30A4\u30FC",
81
- F: "\u30A8\u30D5",
82
- G: "\u30B8\u30FC",
83
- H: "\u30A8\u30A4\u30C1",
84
- I: "\u30A2\u30A4",
85
- J: "\u30B8\u30A7\u30FC",
86
- K: "\u30B1\u30FC",
87
- L: "\u30A8\u30EB",
88
- M: "\u30A8\u30E0",
89
- N: "\u30A8\u30CC",
90
- O: "\u30AA\u30FC",
91
- P: "\u30D4\u30FC",
92
- Q: "\u30AD\u30E5\u30FC",
93
- R: "\u30A2\u30FC\u30EB",
94
- S: "\u30A8\u30B9",
95
- T: "\u30C6\u30A3\u30FC",
96
- U: "\u30E6\u30FC",
97
- V: "\u30D6\u30A4",
98
- W: "\u30C0\u30D6\u30EA\u30E5\u30FC",
99
- X: "\u30A8\u30C3\u30AF\u30B9",
100
- Y: "\u30EF\u30A4",
101
- Z: "\u30BC\u30C3\u30C8"
102
- };
103
- var numberMap = {
104
- "0": "\u30BC\u30ED",
105
- "1": "\u30A4\u30C1",
106
- "2": "\u30CB",
107
- "3": "\u30B5\u30F3",
108
- "4": "\u30E8\u30F3",
109
- "5": "\u30B4",
110
- "6": "\u30ED\u30AF",
111
- "7": "\u30CA\u30CA",
112
- "8": "\u30CF\u30C1",
113
- "9": "\u30AD\u30E5\u30A6"
114
- };
115
- var symbolMap = {
116
- "\uFF04": "\u30C9\u30EB",
117
- "%": "\u30D1\u30FC\u30BB\u30F3\u30C8",
118
- "\xA5": "\u30A8\u30F3",
119
- "#": "\u30B7\u30E3\u30FC\u30D7",
120
- "@": "\u30A2\u30C3\u30C8",
121
- "&": "\u30A2\u30F3\u30C9"
122
- };
123
70
  var notSearchedForms = /* @__PURE__ */ new Set([
124
71
  "search-only kana form",
125
72
  "Search-only kana form",
@@ -1187,7 +1134,7 @@ var noteMap = /* @__PURE__ */ new Map([
1187
1134
  var import_libxmljs2 = __toESM(require("libxmljs2"));
1188
1135
  var import_xml2js = __toESM(require("xml2js"));
1189
1136
  var import_iconv_lite = __toESM(require("iconv-lite"));
1190
- var import_node_fetch = __toESM(require("node-fetch"));
1137
+ var import_client_polly = require("@aws-sdk/client-polly");
1191
1138
  var Kuroshiro = require("kuroshiro");
1192
1139
  var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
1193
1140
  function capitalizeString(value) {
@@ -2055,108 +2002,17 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
2055
2002
  throw err;
2056
2003
  }
2057
2004
  }
2058
- var getCharType = (char) => {
2059
- if (regexps.kanji.test(char)) return "kanji";
2060
- if (regexps.hiragana.test(char)) return "hiragana";
2061
- if (regexps.katakana.test(char)) return "katakana";
2062
- return "other";
2063
- };
2064
- var splitByScript = (text) => text.match(regexps.scriptSplit) || [];
2065
- var convertToHiragana = (str) => str.replace(
2066
- regexps.katakana,
2067
- (c) => String.fromCharCode(c.charCodeAt(0) - 96)
2068
- );
2069
- var convertOtherToKatakana = (str) => str.split("").map((c) => {
2070
- if (romajiMap[c.toUpperCase()]) return romajiMap[c.toUpperCase()];
2071
- if (numberMap[c]) return numberMap[c];
2072
- if (symbolMap[c]) return symbolMap[c];
2073
- return c;
2074
- }).join("");
2075
- function makeSSML(formText, fullReading) {
2076
- let ssml = "";
2077
- const allTypes = Array.from(
2078
- formText
2079
- ).map((c) => getCharType(c));
2080
- const uniqueTypes = Array.from(new Set(allTypes));
2081
- if (uniqueTypes.length === 1)
2082
- switch (uniqueTypes[0]) {
2083
- case "kanji":
2084
- ssml = `<speak><phoneme alphabet="x-amazon-yomigana" ph="${fullReading}">${formText}</phoneme></speak>`;
2085
- break;
2086
- case "katakana":
2087
- ssml = `<speak><phoneme alphabet="x-amazon-pron-kana" ph="${formText}">${formText}</phoneme></speak>`;
2088
- break;
2089
- case "hiragana":
2090
- default:
2091
- ssml = `<speak>${formText}</speak>`;
2092
- }
2093
- else {
2094
- const segments = splitByScript(formText);
2095
- let pureKanjiReading = convertToHiragana(fullReading);
2096
- segments.forEach((seg) => {
2097
- const type = getCharType(
2098
- seg[0]
2099
- );
2100
- if (type !== "kanji") {
2101
- const converted = type === "other" ? convertToHiragana(convertOtherToKatakana(seg)) : convertToHiragana(seg);
2102
- pureKanjiReading = pureKanjiReading.replace(converted, "");
2103
- }
2104
- });
2105
- const kanjiSegments = segments.filter(
2106
- (seg) => getCharType(seg[0]) === "kanji"
2107
- );
2108
- let readingPointer = 0;
2109
- const ssmlSegments = segments.map((seg) => {
2110
- const type = getCharType(
2111
- seg[0]
2112
- );
2113
- if (type === "kanji") {
2114
- const expectedLength = pureKanjiReading.length / kanjiSegments.length;
2115
- const allocated = pureKanjiReading.slice(
2116
- readingPointer,
2117
- readingPointer + Math.ceil(expectedLength)
2118
- );
2119
- readingPointer += allocated.length;
2120
- return `<phoneme alphabet="x-amazon-yomigana" ph="${allocated}">${seg}</phoneme>`;
2121
- } else if (type === "katakana")
2122
- return `<phoneme alphabet="x-amazon-pron-kana" ph="${seg}">${seg}</phoneme>`;
2123
- else if (type === "other") {
2124
- const katakanaReading = convertOtherToKatakana(seg);
2125
- return `<phoneme alphabet="x-amazon-pron-kana" ph="${katakanaReading}">${seg}</phoneme>`;
2126
- } else return seg;
2127
- });
2128
- ssml = `<speak>${ssmlSegments.join("")}</speak>`;
2129
- }
2130
- return ssml;
2131
- }
2132
- async function synthesizeSpeech(ssmlText, apiKey, options) {
2005
+ async function synthesizeSpeech(client, input, options) {
2133
2006
  return await new Promise(
2134
2007
  async (resolve, reject) => {
2135
2008
  try {
2136
- const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
2137
- method: "POST",
2138
- body: JSON.stringify({
2139
- text: ssmlText,
2140
- ...options
2141
- }),
2142
- headers: {
2143
- "Content-Type": "application/json",
2144
- apikey: apiKey
2145
- }
2009
+ const command = new import_client_polly.SynthesizeSpeechCommand({
2010
+ Text: input,
2011
+ ...options
2146
2012
  });
2147
- if (!res.ok)
2148
- throw new Error(
2149
- `TTS request failed:
2150
- ${res.status}: ${res.statusText}`
2151
- );
2152
- const data = await res.json();
2153
- if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
2154
- throw new Error("Invalid TTS response data");
2155
- const mp3Buffer = Buffer.from(
2156
- data.audioData,
2157
- "base64"
2158
- );
2159
- resolve(mp3Buffer);
2013
+ const response = await client.send(command);
2014
+ const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
2015
+ resolve(stream);
2160
2016
  } catch (err) {
2161
2017
  reject(err);
2162
2018
  }
@@ -2398,14 +2254,10 @@ ${ankiNotes}`;
2398
2254
  isValidArray,
2399
2255
  isValidArrayWithFirstElement,
2400
2256
  isWord,
2401
- makeSSML,
2402
2257
  notSearchedForms,
2403
2258
  noteMap,
2404
- numberMap,
2405
2259
  regexps,
2406
- romajiMap,
2407
2260
  shuffleArray,
2408
- symbolMap,
2409
2261
  synthesizeSpeech
2410
2262
  });
2411
2263
  //# sourceMappingURL=index.cjs.js.map