npm - henkan - Versions diffs - 0.3.2 → 0.4.0 - Mend

henkan 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/dist/index.cjs.js +100 -125
package/dist/index.cjs.js.map +3 -3
package/dist/index.mjs +98 -121
package/dist/index.mjs.map +3 -3
package/dist/types/types.d.ts +16 -12
package/dist/types/types.d.ts.map +1 -1
package/dist/types/utils.d.ts +6 -6
package/dist/types/utils.d.ts.map +1 -1
package/docs/api/functions/capitalizeString.md +1 -1
package/docs/api/functions/convertJMdict.md +1 -1
package/docs/api/functions/convertKanjiDic.md +1 -1
package/docs/api/functions/convertKradFile.md +4 -4
package/docs/api/functions/convertRadkFile.md +4 -4
package/docs/api/functions/convertTanakaCorpus.md +1 -1
package/docs/api/functions/generateAnkiNote.md +1 -1
package/docs/api/functions/generateAnkiNotesFile.md +1 -1
package/docs/api/functions/getKanji.md +1 -1
package/docs/api/functions/getKanjiExtended.md +1 -1
package/docs/api/functions/getWord.md +1 -1
package/docs/api/functions/isStringArray.md +1 -1
package/docs/api/functions/isValidArray.md +1 -1
package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
package/docs/api/functions/makeSSML.md +1 -1
package/docs/api/functions/shuffleArray.md +1 -1
package/docs/api/functions/synthesizeSpeech.md +1 -1
package/docs/api/interfaces/DictKanji.md +5 -5
package/docs/api/interfaces/DictKanjiForm.md +4 -4
package/docs/api/interfaces/DictKanjiMisc.md +5 -5
package/docs/api/interfaces/DictKanjiReading.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
package/docs/api/interfaces/DictMeaning.md +11 -11
package/docs/api/interfaces/DictRadical.md +4 -4
package/docs/api/interfaces/DictReading.md +5 -5
package/docs/api/interfaces/DictWord.md +29 -19
package/docs/api/interfaces/ExamplePart.md +7 -7
package/docs/api/interfaces/Grammar.md +15 -15
package/docs/api/interfaces/GrammarMeaning.md +3 -3
package/docs/api/interfaces/Kana.md +11 -11
package/docs/api/interfaces/Kanji.md +22 -22
package/docs/api/interfaces/KanjiComponent.md +3 -3
package/docs/api/interfaces/KanjiForm.md +4 -4
package/docs/api/interfaces/NoteAndTag.md +3 -3
package/docs/api/interfaces/Phrase.md +4 -4
package/docs/api/interfaces/Radical.md +16 -16
package/docs/api/interfaces/Reading.md +5 -5
package/docs/api/interfaces/ResultEntry.md +7 -7
package/docs/api/interfaces/TanakaExample.md +17 -7
package/docs/api/interfaces/Translation.md +3 -3
package/docs/api/interfaces/UsefulRegExps.md +9 -9
package/docs/api/interfaces/Word.md +18 -18
package/docs/api/type-aliases/Dict.md +1 -1
package/docs/api/type-aliases/DictName.md +1 -1
package/docs/api/type-aliases/EntryType.md +1 -1
package/docs/api/type-aliases/JLPT.md +1 -1
package/docs/api/type-aliases/Result.md +1 -1
package/package.json +6 -6

package/dist/index.cjs.js CHANGED Viewed

@@ -68,9 +68,9 @@ var regexps = {
   kanji: new RegExp("\\p{Script=Han}+", "u"),
   scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
   regExChars: /[-\/\\^$*+?.()|[\]{}]/,
-  tanakaID: /#ID=\d+_\d+$/,
+  tanakaID: /#ID=(?<id>\d+_\d+)$/,
   tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
-  tanakaReferenceID: /#([\d]+)/
+  tanakaReferenceID: /#(?<entryid>[\d]+)/
 };
 var romajiMap = {
   A: "\u30A8\u30FC",
@@ -1225,15 +1225,6 @@ function convertJMdict(xmlString, examples) {
     const dict = [];
     import_xml2js.default.parseString(dictParsed, (err, result) => {
       if (err) throw err;
-      const tanakaParts = examples && examples.length > 0 ? new Set(
-        examples.map(
-          (example) => example.parts.map((part) => [
-            part.baseForm,
-            ...part.reading ? [part.reading] : [],
-            ...part.referenceID ? [part.referenceID] : []
-          ])
-        ).flat(2)
-      ) : void 0;
       if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
         for (const entry of result.JMdict.entry) {
           const entryObj = {
@@ -1282,7 +1273,8 @@ function convertJMdict(xmlString, examples) {
               if (readingObj.reading.length > 0)
                 entryObj.readings.push(readingObj);
             }
-          if (isValidArray(meanings))
+          if (isValidArray(meanings)) {
+            let usuallyInKanaMeanings = 0;
             for (const meaning of meanings) {
               const meaningObj = {};
               if (isStringArray(meaning.pos))
@@ -1308,12 +1300,21 @@ function convertJMdict(xmlString, examples) {
               if (isStringArray(meaning.field))
                 meaningObj.fields = meaning.field;
               if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
-              if (isStringArray(meaning.misc)) meaningObj.misc = meaning.misc;
+              if (isStringArray(meaning.misc)) {
+                meaningObj.misc = meaning.misc;
+                if (meaningObj.misc && meaningObj.misc.includes(
+                  "word usually written using kana alone"
+                ))
+                  usuallyInKanaMeanings++;
+              }
               if (isStringArray(meaning.dial))
                 meaningObj.dialects = meaning.dial;
               if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations && meaningObj.translations.length > 0)
                 entryObj.meanings.push(meaningObj);
             }
+            if (entryObj.meanings.length === usuallyInKanaMeanings)
+              entryObj.usuallyInKana = true;
+          }
           if (examples) {
             const readings2 = new Set(
               entryObj.readings.filter(
@@ -1329,24 +1330,70 @@ function convertJMdict(xmlString, examples) {
                 )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
               ).map((kanjiForm) => kanjiForm.form)
             ) : void 0;
-            let existsExample = false;
-            if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
-              for (const kf of kanjiForms2)
-                if (tanakaParts.has(kf)) {
-                  existsExample = true;
+            const kanjiFormExamples = [];
+            const readingMatchingKanjiFormExamples = [];
+            const readingExamples = [];
+            const partParts = /* @__PURE__ */ new Set();
+            for (const example of examples)
+              for (const part of example.parts) {
+                const readingAsReadingMatch = part.reading !== void 0 && readings2.has(part.reading);
+                if (kanjiForms2 && kanjiForms2.size > 0 && kanjiForms2.has(part.baseForm)) {
+                  if (readingAsReadingMatch) {
+                    readingMatchingKanjiFormExamples.push(example);
+                    partParts.add(part.baseForm).add(part.reading);
+                  } else {
+                    kanjiFormExamples.push(example);
+                    partParts.add(part.baseForm);
+                  }
                   break;
                 }
-            }
-            if (!existsExample && readings2.size > 0 && tanakaParts) {
-              for (const r of readings2)
-                if (tanakaParts.has(r)) {
-                  existsExample = true;
+                const readingAsBaseFormMatch = readings2.has(
+                  part.baseForm
+                );
+                const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
+                if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
+                  readingExamples.push(example);
+                  if (readingAsReadingMatch) partParts.add(part.reading);
+                  if (readingAsBaseFormMatch) partParts.add(part.baseForm);
+                  if (referenceIDMatch) partParts.add(part.referenceID);
                   break;
                 }
+              }
+            const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
+            const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
+            const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
+            let wordExamples = [
+              ...readingMatchingKanjiFormExamples,
+              ...includeKanjiFormExamples ? kanjiFormExamples : [],
+              ...includeReadingExamples ? readingExamples : []
+            ];
+            const glossSpecificExamples = [];
+            const seenPhrases = /* @__PURE__ */ new Set();
+            for (let i = 0; i < entryObj.meanings.length; i++) {
+              outer: for (const example of wordExamples) {
+                if (seenPhrases.has(example.phrase)) continue;
+                for (const part of example.parts)
+                  if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
+                    glossSpecificExamples.push(example);
+                    seenPhrases.add(example.phrase);
+                    break outer;
+                  }
+              }
+              if (glossSpecificExamples.length === 5) break;
+            }
+            if (glossSpecificExamples.length === 5)
+              wordExamples = [...glossSpecificExamples];
+            else if (glossSpecificExamples.length > 0) {
+              const seenPhrases2 = new Set(
+                glossSpecificExamples.map((ex) => ex.phrase)
+              );
+              wordExamples = [
+                ...glossSpecificExamples,
+                ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
+              ];
             }
-            if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
-              existsExample = true;
-            if (existsExample) entryObj.hasPhrases = true;
+            if (wordExamples.length > 0)
+              entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
           }
           if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
             dict.push(entryObj);
@@ -1445,9 +1492,12 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
           let a = tanakaParsed[i];
           let b = tanakaParsed[i + 1];
           if (a && b && a.startsWith("A: ") && b.startsWith("B: ")) {
-            a = a.replace("A: ", "").replace(regexps.tanakaID, "");
+            a = a.replace("A: ", "");
             b = b.replace("B: ", "");
-            const aParts = a.split("	");
+            const idMatch = regexps.tanakaID.exec(a);
+            if (!idMatch || !idMatch.groups || !idMatch.groups["id"])
+              throw new Error(`Invalid phrase ID for ${a}`);
+            const aParts = a.replace(regexps.tanakaID, "").split("	");
             const bParts = b.split(" ").filter((part) => part.trim().length !== 0).map((part) => {
               const partMatches = regexps.tanakaPart.exec(part);
               if (!partMatches || !partMatches.groups || partMatches.length === 0)
@@ -1462,9 +1512,9 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
               if (reading)
                 if (regexps.tanakaReferenceID.test(reading)) {
                   const referenceID = regexps.tanakaReferenceID.exec(reading);
-                  if (!referenceID)
+                  if (!referenceID || !referenceID.groups || !referenceID.groups["entryid"])
                     throw new Error(`Invalid reference ID: ${reading}`);
-                  examplePart.referenceID = referenceID[0];
+                  examplePart.referenceID = referenceID.groups["entryid"];
                 } else examplePart.reading = reading;
               if (glossNumber)
                 examplePart.glossNumber = glossNumber.startsWith("0") ? Number.parseInt(glossNumber.substring(1)) : Number.parseInt(glossNumber);
@@ -1485,8 +1535,9 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
                   mode: "furigana"
                 });
               tanakaArray.push({
-                phrase,
-                translation,
+                id: idMatch.groups["id"].trim(),
+                phrase: phrase.trim(),
+                translation: translation.trim(),
                 parts: bParts,
                 ...furigana ? { furigana } : {}
               });
@@ -1611,6 +1662,7 @@ var wordAddNoteArray = (arr, cb) => {
   for (const v of arr) cb(v);
 };
 function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath) {
+  var _a;
   try {
     if (!dictWord && id && dict)
       dictWord = dict.find((entry) => entry.id === id);
@@ -1631,7 +1683,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
             kanjiForm: dictKanjiForm.form,
             ...dictKanjiForm.notes ? {
               notes: dictKanjiForm.notes.map((note) => {
-                var _a;
+                var _a2;
                 const noteAndTag = lookupWordNote(
                   note,
                   void 0,
@@ -1639,7 +1691,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
                   false,
                   note
                 );
-                return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
+                return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
               })
             } : {},
             ...dictKanjiForm.commonness && dictKanjiForm.commonness.length > 0 ? { common: true } : {}
@@ -1653,7 +1705,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
               (restriction) => `Reading restricted to ${restriction}`
             ) : [],
             ...dictReading.notes ? dictReading.notes.map((note) => {
-              var _a;
+              var _a2;
               const noteAndTag = lookupWordNote(
                 note,
                 void 0,
@@ -1661,13 +1713,12 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
                 false,
                 note
               );
-              return capitalizeString((_a = noteAndTag.note) != null ? _a : note);
+              return capitalizeString((_a2 = noteAndTag.note) != null ? _a2 : note);
             }) : []
           ]
         } : {},
         ...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
       }));
-      let usuallyInKanaMeanings = 0;
       word.translations = dictWord.meanings.map((dictMeaning) => {
         if (!dictMeaning.translations)
           throw new Error(`No translations for ${dictWord.id}`);
@@ -1720,11 +1771,10 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           dictMeaning.info,
           (info) => lookupWordNote(info, notes, word.tags, false, info)
         );
-        wordAddNoteArray(dictMeaning.misc, (misc) => {
-          lookupWordNote(misc, notes, word.tags, false, misc);
-          if (misc.toLowerCase() === "word usually written using kana alone")
-            usuallyInKanaMeanings++;
-        });
+        wordAddNoteArray(
+          dictMeaning.misc,
+          (misc) => lookupWordNote(misc, notes, word.tags, false, misc)
+        );
         for (let i = 0; i < notes.length; i++)
           notes[i] = capitalizeString(notes[i]);
         return {
@@ -1732,8 +1782,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           notes
         };
       });
-      if (word.translations && word.translations.length === usuallyInKanaMeanings)
-        word.usuallyInKana = true;
+      if (dictWord.usuallyInKana === true) word.usuallyInKana = true;
       if (kanjiDic && word.kanjiForms) {
         word.kanji = [];
         for (const kanjiForm of word.kanjiForms)
@@ -1757,90 +1806,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           }
         if (word.kanji.length === 0) delete word.kanji;
       }
-      if (examples && dictWord.hasPhrases === true) {
-        let pushIfUnique2 = function(ex) {
-          if (!seenPhrases.has(ex.phrase)) {
-            wordExamples.push(ex);
-            seenPhrases.add(ex.phrase);
-          }
-        };
-        var pushIfUnique = pushIfUnique2;
-        const readings = new Set(
-          word.readings.filter(
-            (reading) => (!reading.notes || !reading.notes.some(
-              (note) => notSearchedForms.has(note)
-            )) && (word.common === void 0 || reading.common === true)
-          ).map((reading) => reading.reading)
-        );
-        const kanjiForms = word.kanjiForms ? new Set(
-          word.kanjiForms.filter(
-            (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
-              (note) => notSearchedForms.has(note)
-            )) && (word.common === void 0 || kanjiForm.common === true)
-          ).map((kanjiForm) => kanjiForm.kanjiForm)
-        ) : void 0;
-        const kanjiFormExamples = [];
-        const readingMatchingKanjiFormExamples = [];
-        const readingExamples = [];
-        for (const example of examples)
-          for (const part of example.parts) {
-            const readingMatch = part.reading && readings.has(part.reading) || readings.has(part.baseForm);
-            if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
-              if (readingMatch) readingMatchingKanjiFormExamples.push(example);
-              else kanjiFormExamples.push(example);
-              break;
-            }
-            if (readingMatch || part.referenceID && word.id && part.referenceID === word.id) {
-              readingExamples.push(example);
-              break;
-            }
-          }
-        const exampleSize = (/* @__PURE__ */ new Set([
-          ...readingMatchingKanjiFormExamples,
-          ...kanjiFormExamples,
-          ...readingExamples
-        ])).size;
-        const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
-        const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
-        const seenPhrases = /* @__PURE__ */ new Set();
-        let wordExamples = [];
-        for (const ex of readingMatchingKanjiFormExamples) pushIfUnique2(ex);
-        if (includeKanjiFormExamples)
-          for (const ex of kanjiFormExamples) pushIfUnique2(ex);
-        if (includeReadingExamples)
-          for (const ex of readingExamples) pushIfUnique2(ex);
-        if (word.translations) {
-          const glossSpecificExamples = [];
-          for (let i = 0; i < word.translations.length; i++) {
-            outer: for (const example of wordExamples)
-              for (const part of example.parts)
-                if (part.glossNumber === i + 1) {
-                  glossSpecificExamples.push(example);
-                  break outer;
-                }
-            if (glossSpecificExamples.length === 5) break;
-          }
-          if (glossSpecificExamples.length === 5)
-            wordExamples = glossSpecificExamples;
-          else if (glossSpecificExamples.length > 0) {
-            const seenPhrases2 = new Set(
-              glossSpecificExamples.map((ex) => ex.phrase)
-            );
-            wordExamples = [
-              ...glossSpecificExamples,
-              ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
-            ];
-          }
-        }
-        if (wordExamples.length > 0)
-          word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => {
-            var _a;
-            return {
+      if (examples && dictWord.phraseIDs && dictWord.phraseIDs.length > 0) {
+        word.phrases = [];
+        const phraseIDs = new Set(dictWord.phraseIDs);
+        for (const ex of examples)
+          if (phraseIDs.has(ex.id))
+            word.phrases.push({
               phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
               translation: ex.translation,
               originalPhrase: ex.phrase
-            };
-          });
+            });
       }
       return word;
     } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);