npm - henkan - Versions diffs - 0.6.1 → 0.8.0 - Mend

henkan 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/README.md +4 -2
package/dist/index.cjs.js +53 -165
package/dist/index.cjs.js.map +3 -3
package/dist/index.mjs +53 -163
package/dist/index.mjs.map +2 -2
package/dist/types/constants.d.ts +0 -3
package/dist/types/constants.d.ts.map +1 -1
package/dist/types/types.d.ts +21 -6
package/dist/types/types.d.ts.map +1 -1
package/dist/types/utils.d.ts +10 -14
package/dist/types/utils.d.ts.map +1 -1
package/docs/api/README.md +1 -1
package/docs/api/functions/capitalizeString.md +1 -1
package/docs/api/functions/convertJMdict.md +1 -1
package/docs/api/functions/convertKanjiDic.md +1 -1
package/docs/api/functions/convertKradFile.md +1 -1
package/docs/api/functions/convertRadkFile.md +1 -1
package/docs/api/functions/convertTanakaCorpus.md +1 -1
package/docs/api/functions/generateAnkiNote.md +1 -1
package/docs/api/functions/generateAnkiNotesFile.md +1 -1
package/docs/api/functions/getKanji.md +1 -1
package/docs/api/functions/getKanjiExtended.md +1 -1
package/docs/api/functions/getWord.md +1 -1
package/docs/api/functions/isStringArray.md +1 -1
package/docs/api/functions/isValidArray.md +1 -1
package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
package/docs/api/functions/shuffleArray.md +1 -1
package/docs/api/functions/synthesizeSpeech.md +25 -13
package/docs/api/interfaces/DictKanji.md +5 -5
package/docs/api/interfaces/DictKanjiForm.md +4 -4
package/docs/api/interfaces/DictKanjiMisc.md +5 -5
package/docs/api/interfaces/DictKanjiReading.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
package/docs/api/interfaces/DictMeaning.md +11 -11
package/docs/api/interfaces/DictRadical.md +4 -4
package/docs/api/interfaces/DictReading.md +5 -5
package/docs/api/interfaces/DictWord.md +8 -8
package/docs/api/interfaces/ExamplePart.md +7 -7
package/docs/api/interfaces/GlossSpecificNumber.md +31 -0
package/docs/api/interfaces/Grammar.md +15 -15
package/docs/api/interfaces/GrammarMeaning.md +3 -3
package/docs/api/interfaces/Kana.md +11 -11
package/docs/api/interfaces/Kanji.md +22 -22
package/docs/api/interfaces/KanjiComponent.md +3 -3
package/docs/api/interfaces/KanjiForm.md +4 -4
package/docs/api/interfaces/NoteAndTag.md +3 -3
package/docs/api/interfaces/Phrase.md +16 -4
package/docs/api/interfaces/Radical.md +16 -16
package/docs/api/interfaces/Reading.md +5 -5
package/docs/api/interfaces/ResultEntry.md +7 -7
package/docs/api/interfaces/TanakaExample.md +16 -6
package/docs/api/interfaces/Translation.md +3 -3
package/docs/api/interfaces/UsefulRegExps.md +8 -20
package/docs/api/interfaces/Word.md +14 -14
package/docs/api/type-aliases/Dict.md +1 -1
package/docs/api/type-aliases/DictName.md +1 -1
package/docs/api/type-aliases/EntryType.md +1 -1
package/docs/api/type-aliases/JLPT.md +1 -1
package/docs/api/type-aliases/Result.md +1 -1
package/package.json +5 -5
package/docs/api/functions/makeSSML.md +0 -33

package/README.md CHANGED Viewed

@@ -36,7 +36,7 @@ pnpm add henkan
  - JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
  - User-friendly schemas for dictionary entries
  - Anki note generation
- - Other useful tools (AWS Polly audio generation, Japanese RegExps, array checking etc.)
+ - Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
 ---
@@ -55,12 +55,14 @@ const dictContent = fs.readFileSync(dictPath, 'utf-8');
 const dictWords = convertJMdict(dictContent);
+const jmDict = undefined, id = undefined, kanjiDic = undefined, tanakaCorpus = undefined;
 const noteTypeName = 'Word';
 const deckName = 'Japanese::Vocabulary::No kanji form words';
 const noKanjiFormWords = dictWords
     .filter(word => word.kanjiForms === undefined)
-    .map(word => getWord(undefined, undefined, undefined, undefined, word, noteTypeName, deckName));
+    .map(word => getWord(jmDict, id, kanjiDic, tanakaCorpus, word, noteTypeName, deckName));
 const ankiNotesFile = generateAnkiNotesFile(noKanjiFormWords);

package/dist/index.cjs.js CHANGED Viewed

@@ -49,14 +49,10 @@ __export(index_exports, {
   isValidArray: () => isValidArray,
   isValidArrayWithFirstElement: () => isValidArrayWithFirstElement,
   isWord: () => isWord,
-  makeSSML: () => makeSSML,
   notSearchedForms: () => notSearchedForms,
   noteMap: () => noteMap,
-  numberMap: () => numberMap,
   regexps: () => regexps,
-  romajiMap: () => romajiMap,
   shuffleArray: () => shuffleArray,
-  symbolMap: () => symbolMap,
   synthesizeSpeech: () => synthesizeSpeech
 });
 module.exports = __toCommonJS(index_exports);
@@ -66,60 +62,11 @@ var regexps = {
   hiragana: /[\u{3040}-\u{309F}]/u,
   katakana: /[\u{30A0}-\u{30FF}]/u,
   kanji: new RegExp("\\p{Script=Han}+", "u"),
-  scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
   regExChars: /[-\/\\^$*+?.()|[\]{}]/,
   tanakaID: /#ID=(?<id>\d+_\d+)$/,
   tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
   tanakaReferenceID: /#(?<entryid>[\d]+)/
 };
-var romajiMap = {
-  A: "\u30A8\u30FC",
-  B: "\u30D3\u30FC",
-  C: "\u30B7\u30FC",
-  D: "\u30C7\u30A3\u30FC",
-  E: "\u30A4\u30FC",
-  F: "\u30A8\u30D5",
-  G: "\u30B8\u30FC",
-  H: "\u30A8\u30A4\u30C1",
-  I: "\u30A2\u30A4",
-  J: "\u30B8\u30A7\u30FC",
-  K: "\u30B1\u30FC",
-  L: "\u30A8\u30EB",
-  M: "\u30A8\u30E0",
-  N: "\u30A8\u30CC",
-  O: "\u30AA\u30FC",
-  P: "\u30D4\u30FC",
-  Q: "\u30AD\u30E5\u30FC",
-  R: "\u30A2\u30FC\u30EB",
-  S: "\u30A8\u30B9",
-  T: "\u30C6\u30A3\u30FC",
-  U: "\u30E6\u30FC",
-  V: "\u30D6\u30A4",
-  W: "\u30C0\u30D6\u30EA\u30E5\u30FC",
-  X: "\u30A8\u30C3\u30AF\u30B9",
-  Y: "\u30EF\u30A4",
-  Z: "\u30BC\u30C3\u30C8"
-};
-var numberMap = {
-  "0": "\u30BC\u30ED",
-  "1": "\u30A4\u30C1",
-  "2": "\u30CB",
-  "3": "\u30B5\u30F3",
-  "4": "\u30E8\u30F3",
-  "5": "\u30B4",
-  "6": "\u30ED\u30AF",
-  "7": "\u30CA\u30CA",
-  "8": "\u30CF\u30C1",
-  "9": "\u30AD\u30E5\u30A6"
-};
-var symbolMap = {
-  "\uFF04": "\u30C9\u30EB",
-  "%": "\u30D1\u30FC\u30BB\u30F3\u30C8",
-  "\xA5": "\u30A8\u30F3",
-  "#": "\u30B7\u30E3\u30FC\u30D7",
-  "@": "\u30A2\u30C3\u30C8",
-  "&": "\u30A2\u30F3\u30C9"
-};
 var notSearchedForms = /* @__PURE__ */ new Set([
   "search-only kana form",
   "Search-only kana form",
@@ -1187,7 +1134,7 @@ var noteMap = /* @__PURE__ */ new Map([
 var import_libxmljs2 = __toESM(require("libxmljs2"));
 var import_xml2js = __toESM(require("xml2js"));
 var import_iconv_lite = __toESM(require("iconv-lite"));
-var import_client_polly = require("@aws-sdk/client-polly");
+var import_node_fetch = __toESM(require("node-fetch"));
 var Kuroshiro = require("kuroshiro");
 var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
 function capitalizeString(value) {
@@ -1333,11 +1280,9 @@ function convertJMdict(xmlString, examples) {
               ).map((reading) => reading.reading)
             );
             const kanjiForms2 = entryObj.kanjiForms ? new Set(
-              entryObj.kanjiForms.filter(
-                (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
-                  (note) => notSearchedForms.has(note)
-                )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
-              ).map((kanjiForm) => kanjiForm.form)
+              entryObj.kanjiForms.map(
+                (kanjiForm) => kanjiForm.form
+              )
             ) : void 0;
             let existsExample = false;
             if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
@@ -1519,9 +1464,9 @@ function convertRadkFile(radkBuffer, kanjiDic) {
   try {
     const fileParsed = import_iconv_lite.default.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
     const radicals = [];
-    for (let i = 0; i <= fileParsed.length; i++) {
+    for (let i = 0; i < fileParsed.length; i++) {
       const line = fileParsed[i];
-      if (!line) throw new Error("Invalid radkfile2 buffer");
+      if (!line) continue;
       if (line.startsWith("$ ")) {
         const radical = {
           radical: line.charAt(2).trim(),
@@ -1529,7 +1474,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
         };
         let j = i + 1;
         let kanjiLine = fileParsed[j];
-        if (!kanjiLine) throw new Error("Invalid radkfile2 buffer");
+        if (!kanjiLine) continue;
         const kanjiList = [];
         while (kanjiLine && !kanjiLine.startsWith("$ ")) {
           const kanjis = kanjiLine.split("");
@@ -1565,8 +1510,7 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
       const split = line.split(" : ");
       const kanjiChar = split[0];
       const radicalsRow = split[1];
-      if (!kanjiChar || !radicalsRow)
-        throw new Error("Invalid kradfile2 buffer");
+      if (!kanjiChar || !radicalsRow) continue;
       const kanji = {
         ...kanjiChar && radicalsRow && kanjiChar.length === 1 && radicalsRow.length > 0 ? { kanji: kanjiChar } : { kanji: "" },
         radicals: []
@@ -1779,11 +1723,9 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           ).map((reading) => reading.reading)
         );
         const kanjiForms = word.kanjiForms ? new Set(
-          word.kanjiForms.filter(
-            (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
-              (note) => notSearchedForms.has(note)
-            )) && (word.common === void 0 || kanjiForm.common === true)
-          ).map((kanjiForm) => kanjiForm.kanjiForm)
+          word.kanjiForms.map(
+            (kanjiForm) => kanjiForm.kanjiForm
+          )
         ) : void 0;
         const kanjiFormExamples = [];
         const readingMatchingKanjiFormExamples = [];
@@ -1791,7 +1733,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
         const partParts = /* @__PURE__ */ new Set();
         for (const example of examples)
           for (const part of example.parts) {
-            const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
+            const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading) || part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
             if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
               if (readingAsReadingMatch) {
                 readingMatchingKanjiFormExamples.push(example);
@@ -1804,17 +1746,20 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
             }
             const readingAsBaseFormMatch = readings.has(part.baseForm);
             const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
-            if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
+            if (readingAsBaseFormMatch || referenceIDMatch) {
               readingExamples.push(example);
-              if (readingAsReadingMatch) partParts.add(part.reading);
               if (readingAsBaseFormMatch) partParts.add(part.baseForm);
               if (referenceIDMatch) partParts.add(part.referenceID);
               break;
             }
           }
         const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
-        const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
-        const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
+        const includeReadingThreshold = Math.max(
+          10,
+          Math.round(exampleSize * 0.5)
+        );
+        const includeKanjiFormExamples = word.kanjiForms !== void 0;
+        const includeReadingExamples = readingExamples.length >= includeReadingThreshold && readingExamples.length >= readingMatchingKanjiFormExamples.length && readingExamples.length >= kanjiFormExamples.length || readingExamples.length >= includeReadingThreshold && word.usuallyInKana === true || word.kanjiForms === void 0;
         let wordExamples = [
           ...readingMatchingKanjiFormExamples,
           ...includeKanjiFormExamples ? kanjiFormExamples : [],
@@ -1826,7 +1771,11 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           outer: for (const example of wordExamples) {
             if (seenPhrases.has(example.phrase)) continue;
             for (const part of example.parts)
-              if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
+              if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || includeReadingExamples && (part.reading && partParts.has(part.reading) || part.inflectedForm && partParts.has(part.inflectedForm) || part.referenceID && partParts.has(part.referenceID)))) {
+                example.glossNumber = {
+                  wordId: word.id,
+                  glossNumber: i + 1
+                };
                 glossSpecificExamples.push(example);
                 seenPhrases.add(example.phrase);
                 break outer;
@@ -1847,7 +1796,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
             return {
               phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
               translation: ex.translation,
-              originalPhrase: ex.phrase
+              originalPhrase: ex.phrase,
+              ...ex.glossNumber ? { glossNumber: ex.glossNumber } : {}
             };
           });
       }
@@ -2052,92 +2002,34 @@ function getKanjiExtended(kanjiChar, info, dict, useJpdbWords, jmDict, svgList,
     throw err;
   }
 }
-var getCharType = (char) => {
-  if (regexps.kanji.test(char)) return "kanji";
-  if (regexps.hiragana.test(char)) return "hiragana";
-  if (regexps.katakana.test(char)) return "katakana";
-  return "other";
-};
-var splitByScript = (text) => text.match(regexps.scriptSplit) || [];
-var convertToHiragana = (str) => str.replace(
-  regexps.katakana,
-  (c) => String.fromCharCode(c.charCodeAt(0) - 96)
-);
-var convertOtherToKatakana = (str) => str.split("").map((c) => {
-  if (romajiMap[c.toUpperCase()]) return romajiMap[c.toUpperCase()];
-  if (numberMap[c]) return numberMap[c];
-  if (symbolMap[c]) return symbolMap[c];
-  return c;
-}).join("");
-function makeSSML(formText, fullReading) {
-  let ssml = "";
-  const allTypes = Array.from(
-    formText
-  ).map((c) => getCharType(c));
-  const uniqueTypes = Array.from(new Set(allTypes));
-  if (uniqueTypes.length === 1)
-    switch (uniqueTypes[0]) {
-      case "kanji":
-        ssml = `<speak><phoneme alphabet="x-amazon-yomigana" ph="${fullReading}">${formText}</phoneme></speak>`;
-        break;
-      case "katakana":
-        ssml = `<speak><phoneme alphabet="x-amazon-pron-kana" ph="${formText}">${formText}</phoneme></speak>`;
-        break;
-      case "hiragana":
-      default:
-        ssml = `<speak>${formText}</speak>`;
-    }
-  else {
-    const segments = splitByScript(formText);
-    let pureKanjiReading = convertToHiragana(fullReading);
-    segments.forEach((seg) => {
-      const type = getCharType(
-        seg[0]
-      );
-      if (type !== "kanji") {
-        const converted = type === "other" ? convertToHiragana(convertOtherToKatakana(seg)) : convertToHiragana(seg);
-        pureKanjiReading = pureKanjiReading.replace(converted, "");
-      }
-    });
-    const kanjiSegments = segments.filter(
-      (seg) => getCharType(seg[0]) === "kanji"
-    );
-    let readingPointer = 0;
-    const ssmlSegments = segments.map((seg) => {
-      const type = getCharType(
-        seg[0]
-      );
-      if (type === "kanji") {
-        const expectedLength = pureKanjiReading.length / kanjiSegments.length;
-        const allocated = pureKanjiReading.slice(
-          readingPointer,
-          readingPointer + Math.ceil(expectedLength)
-        );
-        readingPointer += allocated.length;
-        return `<phoneme alphabet="x-amazon-yomigana" ph="${allocated}">${seg}</phoneme>`;
-      } else if (type === "katakana")
-        return `<phoneme alphabet="x-amazon-pron-kana" ph="${seg}">${seg}</phoneme>`;
-      else if (type === "other") {
-        const katakanaReading = convertOtherToKatakana(seg);
-        return `<phoneme alphabet="x-amazon-pron-kana" ph="${katakanaReading}">${seg}</phoneme>`;
-      } else return seg;
-    });
-    ssml = `<speak>${ssmlSegments.join("")}</speak>`;
-  }
-  return ssml;
-}
-async function synthesizeSpeech(client, ssmlText, options) {
+async function synthesizeSpeech(textOrSSML, apiKey, options) {
   return await new Promise(
     async (resolve, reject) => {
       try {
-        const command = new import_client_polly.SynthesizeSpeechCommand({
-          Text: ssmlText,
-          TextType: "ssml",
-          ...options
+        const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
+          method: "POST",
+          body: JSON.stringify({
+            text: textOrSSML,
+            ...options
+          }),
+          headers: {
+            "Content-Type": "application/json",
+            apikey: apiKey
+          }
         });
-        const response = await client.send(command);
-        const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
-        resolve(stream);
+        if (!res.ok)
+          throw new Error(
+            `TTS request failed:
+${res.status}: ${res.statusText}`
+          );
+        const data = await res.json();
+        if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
+          throw new Error("Invalid TTS response data");
+        const mp3Buffer = Buffer.from(
+          data.audioData,
+          "base64"
+        );
+        resolve(mp3Buffer);
       } catch (err) {
         reject(err);
       }
@@ -2184,7 +2076,7 @@ function generateAnkiNote(entry) {
         ).join("") : noKanjiForms
       ],
       entry.translations.map(
-        (translationEntry, index) => `${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
+        (translationEntry, index) => `<span class="word word-index${entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? " gloss-specific" : ""}">${index + 1}</span>${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
       ).join(""),
       entry.kanji ? entry.kanji.map(
         (kanjiEntry) => createEntry(
@@ -2193,11 +2085,11 @@ function generateAnkiNote(entry) {
         )
       ).join("") : '<span class="word word-kanji">(no kanji)</span>',
       entry.phrases ? entry.phrases.map(
-        (phraseEntry) => createEntry(
+        (phraseEntry, index) => `<span class="word word-index${entry.translations.some((_translation, index2) => index === index2 && phraseEntry.glossNumber && phraseEntry.glossNumber.wordId === entry.id && phraseEntry.glossNumber.glossNumber === index2 + 1) ? " gloss-specific" : ""}">${index + 1}</span>${createEntry(
           `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
           [phraseEntry.translation],
           true
-        )
+        )}`
       ).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
       ...entry.tags && entry.tags.length > 0 ? [
         entry.tags.map(
@@ -2379,14 +2271,10 @@ ${ankiNotes}`;
   isValidArray,
   isValidArrayWithFirstElement,
   isWord,
-  makeSSML,
   notSearchedForms,
   noteMap,
-  numberMap,
   regexps,
-  romajiMap,
   shuffleArray,
-  symbolMap,
   synthesizeSpeech
 });
 //# sourceMappingURL=index.cjs.js.map