npm - henkan - Versions diffs - 0.3.3 → 0.4.1 - Mend

henkan 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/dist/index.cjs.js +146 -144
package/dist/index.cjs.js.map +3 -3
package/dist/index.mjs +144 -140
package/dist/index.mjs.map +2 -2
package/dist/types/types.d.ts +16 -12
package/dist/types/types.d.ts.map +1 -1
package/dist/types/utils.d.ts +8 -8
package/dist/types/utils.d.ts.map +1 -1
package/docs/api/functions/capitalizeString.md +1 -1
package/docs/api/functions/convertJMdict.md +3 -3
package/docs/api/functions/convertKanjiDic.md +1 -1
package/docs/api/functions/convertKradFile.md +4 -4
package/docs/api/functions/convertRadkFile.md +4 -4
package/docs/api/functions/convertTanakaCorpus.md +1 -1
package/docs/api/functions/generateAnkiNote.md +1 -1
package/docs/api/functions/generateAnkiNotesFile.md +1 -1
package/docs/api/functions/getKanji.md +1 -1
package/docs/api/functions/getKanjiExtended.md +1 -1
package/docs/api/functions/getWord.md +1 -1
package/docs/api/functions/isStringArray.md +1 -1
package/docs/api/functions/isValidArray.md +1 -1
package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
package/docs/api/functions/makeSSML.md +1 -1
package/docs/api/functions/shuffleArray.md +1 -1
package/docs/api/functions/synthesizeSpeech.md +1 -1
package/docs/api/interfaces/DictKanji.md +5 -5
package/docs/api/interfaces/DictKanjiForm.md +4 -4
package/docs/api/interfaces/DictKanjiMisc.md +5 -5
package/docs/api/interfaces/DictKanjiReading.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
package/docs/api/interfaces/DictMeaning.md +11 -11
package/docs/api/interfaces/DictRadical.md +4 -4
package/docs/api/interfaces/DictReading.md +5 -5
package/docs/api/interfaces/DictWord.md +29 -19
package/docs/api/interfaces/ExamplePart.md +7 -7
package/docs/api/interfaces/Grammar.md +15 -15
package/docs/api/interfaces/GrammarMeaning.md +3 -3
package/docs/api/interfaces/Kana.md +11 -11
package/docs/api/interfaces/Kanji.md +22 -22
package/docs/api/interfaces/KanjiComponent.md +3 -3
package/docs/api/interfaces/KanjiForm.md +4 -4
package/docs/api/interfaces/NoteAndTag.md +3 -3
package/docs/api/interfaces/Phrase.md +4 -4
package/docs/api/interfaces/Radical.md +16 -16
package/docs/api/interfaces/Reading.md +5 -5
package/docs/api/interfaces/ResultEntry.md +7 -7
package/docs/api/interfaces/TanakaExample.md +17 -7
package/docs/api/interfaces/Translation.md +3 -3
package/docs/api/interfaces/UsefulRegExps.md +9 -9
package/docs/api/interfaces/Word.md +18 -18
package/docs/api/type-aliases/Dict.md +1 -1
package/docs/api/type-aliases/DictName.md +1 -1
package/docs/api/type-aliases/EntryType.md +1 -1
package/docs/api/type-aliases/JLPT.md +1 -1
package/docs/api/type-aliases/Result.md +1 -1
package/package.json +6 -6

package/dist/index.mjs CHANGED Viewed

@@ -12,7 +12,7 @@ var regexps = {
   kanji: new RegExp("\\p{Script=Han}+", "u"),
   scriptSplit: /([\p{sc=Han}]+|[\p{sc=Hiragana}]+|[\p{sc=Katakana}]+|[^\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}]+)/u,
   regExChars: /[-\/\\^$*+?.()|[\]{}]/,
-  tanakaID: /#ID=\d+_\d+$/,
+  tanakaID: /#ID=(?<id>\d+_\d+)$/,
   tanakaPart: /(?<base>[^()\[\]\{\}\s]+)(?:\((?<reading>[\S]+)\))?(?:\[(?<glossnum>[\S]+)\])?(?:\{(?<inflection>[\S]+)\})?/,
   tanakaReferenceID: /#(?<entryid>[\d]+)/
 };
@@ -1168,18 +1168,10 @@ function convertJMdict(xmlString, examples) {
       noent: true,
       recover: false
     });
-    const dict = [];
+    let dict = [];
+    const partMatches = /* @__PURE__ */ new Set();
     xml.parseString(dictParsed, (err, result) => {
       if (err) throw err;
-      const tanakaParts = examples && examples.length > 0 ? new Set(
-        examples.map(
-          (example) => example.parts.map((part) => [
-            part.baseForm,
-            ...part.reading ? [part.reading] : [],
-            ...part.referenceID ? [part.referenceID] : []
-          ])
-        ).flat(2)
-      ) : void 0;
       if (result.JMdict && typeof result.JMdict === "object" && isValidArray(result.JMdict.entry))
         for (const entry of result.JMdict.entry) {
           const entryObj = {
@@ -1228,7 +1220,8 @@ function convertJMdict(xmlString, examples) {
               if (readingObj.reading.length > 0)
                 entryObj.readings.push(readingObj);
             }
-          if (isValidArray(meanings))
+          if (isValidArray(meanings)) {
+            let usuallyInKanaMeanings = 0;
             for (const meaning of meanings) {
               const meaningObj = {};
               if (isStringArray(meaning.pos))
@@ -1254,50 +1247,133 @@ function convertJMdict(xmlString, examples) {
               if (isStringArray(meaning.field))
                 meaningObj.fields = meaning.field;
               if (isStringArray(meaning.s_inf)) meaningObj.info = meaning.s_inf;
-              if (isStringArray(meaning.misc)) meaningObj.misc = meaning.misc;
+              if (isStringArray(meaning.misc)) {
+                meaningObj.misc = meaning.misc;
+                if (meaningObj.misc && meaningObj.misc.includes(
+                  "word usually written using kana alone"
+                ))
+                  usuallyInKanaMeanings++;
+              }
               if (isStringArray(meaning.dial))
                 meaningObj.dialects = meaning.dial;
               if (meaningObj.partOfSpeech && meaningObj.partOfSpeech.length > 0 || meaningObj.translations && meaningObj.translations.length > 0)
                 entryObj.meanings.push(meaningObj);
             }
+            if (entryObj.meanings.length === usuallyInKanaMeanings)
+              entryObj.usuallyInKana = true;
+          }
           if (examples) {
-            const readings2 = new Set(
-              entryObj.readings.filter(
-                (reading) => (!reading.notes || !reading.notes.some(
-                  (note) => notSearchedForms.has(note)
-                )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
-              ).map((reading) => reading.reading)
-            );
-            const kanjiForms2 = entryObj.kanjiForms ? new Set(
-              entryObj.kanjiForms.filter(
-                (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
-                  (note) => notSearchedForms.has(note)
-                )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
-              ).map((kanjiForm) => kanjiForm.form)
-            ) : void 0;
-            let existsExample = false;
-            if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
-              for (const kf of kanjiForms2)
-                if (tanakaParts.has(kf)) {
-                  existsExample = true;
-                  break;
-                }
-            }
-            if (!existsExample && readings2.size > 0 && tanakaParts) {
-              for (const r of readings2)
-                if (tanakaParts.has(r)) {
-                  existsExample = true;
-                  break;
-                }
-            }
-            if (!existsExample && tanakaParts && tanakaParts.has(entryObj.id))
-              existsExample = true;
-            if (existsExample) entryObj.hasPhrases = true;
+            const readings2 = entryObj.readings.filter(
+              (reading) => (!reading.notes || !reading.notes.some(
+                (note) => notSearchedForms.has(note)
+              )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
+            ).map((reading) => reading.reading);
+            const kanjiForms2 = entryObj.kanjiForms ? entryObj.kanjiForms.filter(
+              (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
+                (note) => notSearchedForms.has(note)
+              )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
+            ).map((kanjiForm) => kanjiForm.form) : void 0;
+            for (const reading of readings2) partMatches.add(reading);
+            if (kanjiForms2)
+              for (const kanjiForm of kanjiForms2) partMatches.add(kanjiForm);
+            partMatches.add(entryObj.id);
           }
           if (entryObj.id.length > 0 && entryObj.readings.length > 0 && entryObj.meanings.length > 0)
             dict.push(entryObj);
         }
     });
+    if (examples && dict.length > 0) {
+      const filteredExamples = examples.filter(
+        (ex) => {
+          const parts = ex.parts.flatMap((part) => [
+            part.baseForm,
+            ...part.reading ? [part.reading] : [],
+            ...part.referenceID ? [part.referenceID] : []
+          ]);
+          for (const part of parts) if (partMatches.has(part)) return true;
+          return false;
+        }
+      );
+      dict = dict.map((entryObj) => {
+        const readings = new Set(
+          entryObj.readings.filter(
+            (reading) => (!reading.notes || !reading.notes.some(
+              (note) => notSearchedForms.has(note)
+            )) && (entryObj.isCommon === void 0 || reading.commonness && reading.commonness.length > 0)
+          ).map((reading) => reading.reading)
+        );
+        const kanjiForms = entryObj.kanjiForms ? new Set(
+          entryObj.kanjiForms.filter(
+            (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
+              (note) => notSearchedForms.has(note)
+            )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
+          ).map((kanjiForm) => kanjiForm.form)
+        ) : void 0;
+        const kanjiFormExamples = [];
+        const readingMatchingKanjiFormExamples = [];
+        const readingExamples = [];
+        const partParts = /* @__PURE__ */ new Set();
+        for (const example of filteredExamples)
+          for (const part of example.parts) {
+            const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
+            if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
+              if (readingAsReadingMatch) {
+                readingMatchingKanjiFormExamples.push(example);
+                partParts.add(part.baseForm).add(part.reading);
+              } else {
+                kanjiFormExamples.push(example);
+                partParts.add(part.baseForm);
+              }
+              break;
+            }
+            const readingAsBaseFormMatch = readings.has(part.baseForm);
+            const referenceIDMatch = part.referenceID !== void 0 && entryObj.id !== void 0 && part.referenceID === entryObj.id;
+            if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
+              readingExamples.push(example);
+              if (readingAsReadingMatch) partParts.add(part.reading);
+              if (readingAsBaseFormMatch) partParts.add(part.baseForm);
+              if (referenceIDMatch) partParts.add(part.referenceID);
+              break;
+            }
+          }
+        const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
+        const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
+        const includeReadingExamples = entryObj.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || entryObj.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
+        let wordExamples = [
+          ...readingMatchingKanjiFormExamples,
+          ...includeKanjiFormExamples ? kanjiFormExamples : [],
+          ...includeReadingExamples ? readingExamples : []
+        ];
+        const glossSpecificExamples = [];
+        const seenPhrases = /* @__PURE__ */ new Set();
+        for (let i = 0; i < entryObj.meanings.length; i++) {
+          outer: for (const example of wordExamples) {
+            if (seenPhrases.has(example.phrase)) continue;
+            for (const part of example.parts)
+              if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
+                glossSpecificExamples.push(example);
+                seenPhrases.add(example.phrase);
+                break outer;
+              }
+          }
+          if (glossSpecificExamples.length === 5) break;
+        }
+        if (glossSpecificExamples.length === 5)
+          wordExamples = glossSpecificExamples;
+        else if (glossSpecificExamples.length > 0) {
+          const seenPhrases2 = new Set(
+            glossSpecificExamples.map((ex) => ex.phrase)
+          );
+          wordExamples = [
+            ...glossSpecificExamples,
+            ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
+          ];
+        }
+        if (wordExamples.length > 0)
+          entryObj.phraseIDs = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ex.id);
+        return entryObj;
+      });
+    }
     return dict;
   } catch (err) {
     throw err;
@@ -1391,9 +1467,12 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
           let a = tanakaParsed[i];
           let b = tanakaParsed[i + 1];
           if (a && b && a.startsWith("A: ") && b.startsWith("B: ")) {
-            a = a.replace("A: ", "").replace(regexps.tanakaID, "");
+            a = a.replace("A: ", "");
             b = b.replace("B: ", "");
-            const aParts = a.split("	");
+            const idMatch = regexps.tanakaID.exec(a);
+            if (!idMatch || !idMatch.groups || !idMatch.groups["id"])
+              throw new Error(`Invalid phrase ID for ${a}`);
+            const aParts = a.replace(regexps.tanakaID, "").split("	");
             const bParts = b.split(" ").filter((part) => part.trim().length !== 0).map((part) => {
               const partMatches = regexps.tanakaPart.exec(part);
               if (!partMatches || !partMatches.groups || partMatches.length === 0)
@@ -1431,8 +1510,9 @@ async function convertTanakaCorpus(tanakaString, generateFurigana) {
                   mode: "furigana"
                 });
               tanakaArray.push({
-                phrase,
-                translation,
+                id: idMatch.groups["id"].trim(),
+                phrase: phrase.trim(),
+                translation: translation.trim(),
                 parts: bParts,
                 ...furigana ? { furigana } : {}
               });
@@ -1611,7 +1691,6 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
         } : {},
         ...dictReading.commonness && dictReading.commonness.length > 0 ? { common: true } : {}
       }));
-      let usuallyInKanaMeanings = 0;
       word.translations = dictWord.meanings.map((dictMeaning) => {
         if (!dictMeaning.translations)
           throw new Error(`No translations for ${dictWord.id}`);
@@ -1664,11 +1743,10 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           dictMeaning.info,
           (info) => lookupWordNote(info, notes, word.tags, false, info)
         );
-        wordAddNoteArray(dictMeaning.misc, (misc) => {
-          lookupWordNote(misc, notes, word.tags, false, misc);
-          if (misc.toLowerCase() === "word usually written using kana alone")
-            usuallyInKanaMeanings++;
-        });
+        wordAddNoteArray(
+          dictMeaning.misc,
+          (misc) => lookupWordNote(misc, notes, word.tags, false, misc)
+        );
         for (let i = 0; i < notes.length; i++)
           notes[i] = capitalizeString(notes[i]);
         return {
@@ -1676,8 +1754,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           notes
         };
       });
-      if (word.translations && word.translations.length === usuallyInKanaMeanings)
-        word.usuallyInKana = true;
+      if (dictWord.usuallyInKana === true) word.usuallyInKana = true;
       if (kanjiDic && word.kanjiForms) {
         word.kanji = [];
         for (const kanjiForm of word.kanjiForms)
@@ -1701,89 +1778,16 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
           }
         if (word.kanji.length === 0) delete word.kanji;
       }
-      if (examples && dictWord.hasPhrases === true) {
-        const readings = new Set(
-          word.readings.filter(
-            (reading) => (!reading.notes || !reading.notes.some(
-              (note) => notSearchedForms.has(note)
-            )) && (word.common === void 0 || reading.common === true)
-          ).map((reading) => reading.reading)
-        );
-        const kanjiForms = word.kanjiForms ? new Set(
-          word.kanjiForms.filter(
-            (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
-              (note) => notSearchedForms.has(note)
-            )) && (word.common === void 0 || kanjiForm.common === true)
-          ).map((kanjiForm) => kanjiForm.kanjiForm)
-        ) : void 0;
-        const kanjiFormExamples = [];
-        const readingMatchingKanjiFormExamples = [];
-        const readingExamples = [];
-        const partParts = /* @__PURE__ */ new Set();
-        for (const example of examples)
-          for (const part of example.parts) {
-            const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
-            if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
-              if (readingAsReadingMatch) {
-                readingMatchingKanjiFormExamples.push(example);
-                partParts.add(part.baseForm).add(part.reading);
-              } else {
-                kanjiFormExamples.push(example);
-                partParts.add(part.baseForm);
-              }
-              break;
-            }
-            const readingAsBaseFormMatch = readings.has(part.baseForm);
-            const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
-            if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
-              readingExamples.push(example);
-              if (readingAsReadingMatch) partParts.add(part.reading);
-              if (readingAsBaseFormMatch) partParts.add(part.baseForm);
-              if (referenceIDMatch) partParts.add(part.referenceID);
-              break;
-            }
-          }
-        const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
-        const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
-        const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
-        let wordExamples = [
-          ...readingMatchingKanjiFormExamples,
-          ...includeKanjiFormExamples ? kanjiFormExamples : [],
-          ...includeReadingExamples ? readingExamples : []
-        ];
-        if (word.translations) {
-          const glossSpecificExamples = [];
-          const seenPhrases = /* @__PURE__ */ new Set();
-          for (let i = 0; i < word.translations.length; i++) {
-            outer: for (const example of wordExamples) {
-              if (seenPhrases.has(example.phrase)) continue;
-              for (const part of example.parts)
-                if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
-                  glossSpecificExamples.push(example);
-                  seenPhrases.add(example.phrase);
-                  break outer;
-                }
-            }
-            if (glossSpecificExamples.length === 5) break;
-          }
-          if (glossSpecificExamples.length === 5)
-            wordExamples = [...glossSpecificExamples];
-          else if (glossSpecificExamples.length > 0) {
-            const seenPhrases2 = new Set(
-              glossSpecificExamples.map((ex) => ex.phrase)
-            );
-            wordExamples = [
-              ...glossSpecificExamples,
-              ...wordExamples.filter((ex) => !seenPhrases2.has(ex.phrase)).slice(0, 5 - glossSpecificExamples.length)
-            ];
-          }
-        }
-        if (wordExamples.length > 0)
-          word.phrases = (wordExamples.length > 5 ? wordExamples.slice(0, 5) : wordExamples).map((ex) => ({
-            phrase: ex.furigana ?? ex.phrase,
-            translation: ex.translation,
-            originalPhrase: ex.phrase
-          }));
+      if (examples && dictWord.phraseIDs && dictWord.phraseIDs.length > 0) {
+        word.phrases = [];
+        const phraseIDs = new Set(dictWord.phraseIDs);
+        for (const ex of examples)
+          if (phraseIDs.has(ex.id))
+            word.phrases.push({
+              phrase: ex.furigana ?? ex.phrase,
+              translation: ex.translation,
+              originalPhrase: ex.phrase
+            });
       }
       return word;
     } else throw new Error(`Word${id ? ` ${id}` : ""} not found`);