henkan 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs.js +54 -33
  3. package/dist/index.cjs.js.map +3 -3
  4. package/dist/index.mjs +54 -35
  5. package/dist/index.mjs.map +2 -2
  6. package/dist/types/types.d.ts +21 -0
  7. package/dist/types/types.d.ts.map +1 -1
  8. package/dist/types/utils.d.ts +13 -10
  9. package/dist/types/utils.d.ts.map +1 -1
  10. package/docs/api/README.md +1 -0
  11. package/docs/api/functions/capitalizeString.md +1 -1
  12. package/docs/api/functions/convertJMdict.md +1 -1
  13. package/docs/api/functions/convertKanjiDic.md +1 -1
  14. package/docs/api/functions/convertKradFile.md +3 -3
  15. package/docs/api/functions/convertRadkFile.md +3 -3
  16. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  17. package/docs/api/functions/generateAnkiNote.md +1 -1
  18. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  19. package/docs/api/functions/getKanji.md +1 -1
  20. package/docs/api/functions/getKanjiExtended.md +1 -1
  21. package/docs/api/functions/getWord.md +1 -1
  22. package/docs/api/functions/isStringArray.md +1 -1
  23. package/docs/api/functions/isValidArray.md +1 -1
  24. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  25. package/docs/api/functions/makeSSML.md +1 -1
  26. package/docs/api/functions/shuffleArray.md +1 -1
  27. package/docs/api/functions/synthesizeSpeech.md +25 -13
  28. package/docs/api/interfaces/DictKanji.md +5 -5
  29. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  30. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  31. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  32. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  33. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  34. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  35. package/docs/api/interfaces/DictMeaning.md +11 -11
  36. package/docs/api/interfaces/DictRadical.md +4 -4
  37. package/docs/api/interfaces/DictReading.md +5 -5
  38. package/docs/api/interfaces/DictWord.md +8 -8
  39. package/docs/api/interfaces/ExamplePart.md +7 -7
  40. package/docs/api/interfaces/GlossSpecificNumber.md +31 -0
  41. package/docs/api/interfaces/Grammar.md +15 -15
  42. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  43. package/docs/api/interfaces/Kana.md +11 -11
  44. package/docs/api/interfaces/Kanji.md +22 -22
  45. package/docs/api/interfaces/KanjiComponent.md +3 -3
  46. package/docs/api/interfaces/KanjiForm.md +4 -4
  47. package/docs/api/interfaces/NoteAndTag.md +3 -3
  48. package/docs/api/interfaces/Phrase.md +16 -4
  49. package/docs/api/interfaces/Radical.md +16 -16
  50. package/docs/api/interfaces/Reading.md +5 -5
  51. package/docs/api/interfaces/ResultEntry.md +7 -7
  52. package/docs/api/interfaces/TanakaExample.md +16 -6
  53. package/docs/api/interfaces/Translation.md +3 -3
  54. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  55. package/docs/api/interfaces/Word.md +14 -14
  56. package/docs/api/type-aliases/Dict.md +1 -1
  57. package/docs/api/type-aliases/DictName.md +1 -1
  58. package/docs/api/type-aliases/EntryType.md +1 -1
  59. package/docs/api/type-aliases/JLPT.md +1 -1
  60. package/docs/api/type-aliases/Result.md +1 -1
  61. package/package.json +5 -5
package/README.md CHANGED
@@ -36,7 +36,7 @@ pnpm add henkan
36
36
  - JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
37
37
  - User-friendly schemas for dictionary entries
38
38
  - Anki note generation
39
- - Other useful tools (AWS Polly audio generation, Japanese RegExps, array checking etc.)
39
+ - Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
40
40
 
41
41
  ---
42
42
 
package/dist/index.cjs.js CHANGED
@@ -1187,7 +1187,7 @@ var noteMap = /* @__PURE__ */ new Map([
1187
1187
  var import_libxmljs2 = __toESM(require("libxmljs2"));
1188
1188
  var import_xml2js = __toESM(require("xml2js"));
1189
1189
  var import_iconv_lite = __toESM(require("iconv-lite"));
1190
- var import_client_polly = require("@aws-sdk/client-polly");
1190
+ var import_node_fetch = __toESM(require("node-fetch"));
1191
1191
  var Kuroshiro = require("kuroshiro");
1192
1192
  var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
1193
1193
  function capitalizeString(value) {
@@ -1333,11 +1333,9 @@ function convertJMdict(xmlString, examples) {
1333
1333
  ).map((reading) => reading.reading)
1334
1334
  );
1335
1335
  const kanjiForms2 = entryObj.kanjiForms ? new Set(
1336
- entryObj.kanjiForms.filter(
1337
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1338
- (note) => notSearchedForms.has(note)
1339
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1340
- ).map((kanjiForm) => kanjiForm.form)
1336
+ entryObj.kanjiForms.map(
1337
+ (kanjiForm) => kanjiForm.form
1338
+ )
1341
1339
  ) : void 0;
1342
1340
  let existsExample = false;
1343
1341
  if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
@@ -1519,13 +1517,13 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1519
1517
  try {
1520
1518
  const fileParsed = import_iconv_lite.default.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
1521
1519
  const radicals = [];
1522
- for (let i = 0; i <= fileParsed.length; i++) {
1520
+ for (let i = 0; i < fileParsed.length; i++) {
1523
1521
  const line = fileParsed[i];
1524
1522
  if (!line) continue;
1525
1523
  if (line.startsWith("$ ")) {
1526
1524
  const radical = {
1527
- radical: line.charAt(2),
1528
- strokes: line.substring(4)
1525
+ radical: line.charAt(2).trim(),
1526
+ strokes: line.substring(4).trim()
1529
1527
  };
1530
1528
  let j = i + 1;
1531
1529
  let kanjiLine = fileParsed[j];
@@ -1538,6 +1536,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1538
1536
  (dictKanji) => dictKanji.kanji === kanji
1539
1537
  );
1540
1538
  if (foundKanji) kanjiList.push(foundKanji);
1539
+ else kanjiList.push({ kanji, readingMeaning: [] });
1541
1540
  }
1542
1541
  j++;
1543
1542
  kanjiLine = fileParsed[j];
@@ -1564,7 +1563,7 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
1564
1563
  const split = line.split(" : ");
1565
1564
  const kanjiChar = split[0];
1566
1565
  const radicalsRow = split[1];
1567
- if (!kanjiChar || !radicalsRow) throw new Error("Invalid KRAD entry");
1566
+ if (!kanjiChar || !radicalsRow) continue;
1568
1567
  const kanji = {
1569
1568
  ...kanjiChar && radicalsRow && kanjiChar.length === 1 && radicalsRow.length > 0 ? { kanji: kanjiChar } : { kanji: "" },
1570
1569
  radicals: []
@@ -1777,11 +1776,9 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1777
1776
  ).map((reading) => reading.reading)
1778
1777
  );
1779
1778
  const kanjiForms = word.kanjiForms ? new Set(
1780
- word.kanjiForms.filter(
1781
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1782
- (note) => notSearchedForms.has(note)
1783
- )) && (word.common === void 0 || kanjiForm.common === true)
1784
- ).map((kanjiForm) => kanjiForm.kanjiForm)
1779
+ word.kanjiForms.map(
1780
+ (kanjiForm) => kanjiForm.kanjiForm
1781
+ )
1785
1782
  ) : void 0;
1786
1783
  const kanjiFormExamples = [];
1787
1784
  const readingMatchingKanjiFormExamples = [];
@@ -1789,7 +1786,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1789
1786
  const partParts = /* @__PURE__ */ new Set();
1790
1787
  for (const example of examples)
1791
1788
  for (const part of example.parts) {
1792
- const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1789
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading) || part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
1793
1790
  if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1794
1791
  if (readingAsReadingMatch) {
1795
1792
  readingMatchingKanjiFormExamples.push(example);
@@ -1802,17 +1799,20 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1802
1799
  }
1803
1800
  const readingAsBaseFormMatch = readings.has(part.baseForm);
1804
1801
  const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
1805
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1802
+ if (readingAsBaseFormMatch || referenceIDMatch) {
1806
1803
  readingExamples.push(example);
1807
- if (readingAsReadingMatch) partParts.add(part.reading);
1808
1804
  if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1809
1805
  if (referenceIDMatch) partParts.add(part.referenceID);
1810
1806
  break;
1811
1807
  }
1812
1808
  }
1813
1809
  const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1814
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1815
- const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1810
+ const includeReadingThreshold = Math.max(
1811
+ 10,
1812
+ Math.round(exampleSize * 0.5)
1813
+ );
1814
+ const includeKanjiFormExamples = word.kanjiForms !== void 0;
1815
+ const includeReadingExamples = readingExamples.length >= includeReadingThreshold && readingExamples.length >= readingMatchingKanjiFormExamples.length && readingExamples.length >= kanjiFormExamples.length || readingExamples.length >= includeReadingThreshold && word.usuallyInKana === true || word.kanjiForms === void 0;
1816
1816
  let wordExamples = [
1817
1817
  ...readingMatchingKanjiFormExamples,
1818
1818
  ...includeKanjiFormExamples ? kanjiFormExamples : [],
@@ -1824,7 +1824,11 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1824
1824
  outer: for (const example of wordExamples) {
1825
1825
  if (seenPhrases.has(example.phrase)) continue;
1826
1826
  for (const part of example.parts)
1827
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1827
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || includeReadingExamples && (part.reading && partParts.has(part.reading) || part.inflectedForm && partParts.has(part.inflectedForm) || part.referenceID && partParts.has(part.referenceID)))) {
1828
+ example.glossNumber = {
1829
+ wordId: word.id,
1830
+ glossNumber: i + 1
1831
+ };
1828
1832
  glossSpecificExamples.push(example);
1829
1833
  seenPhrases.add(example.phrase);
1830
1834
  break outer;
@@ -1845,7 +1849,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1845
1849
  return {
1846
1850
  phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
1847
1851
  translation: ex.translation,
1848
- originalPhrase: ex.phrase
1852
+ originalPhrase: ex.phrase,
1853
+ ...ex.glossNumber ? { glossNumber: ex.glossNumber } : {}
1849
1854
  };
1850
1855
  });
1851
1856
  }
@@ -2124,18 +2129,34 @@ function makeSSML(formText, fullReading) {
2124
2129
  }
2125
2130
  return ssml;
2126
2131
  }
2127
- async function synthesizeSpeech(client, ssmlText, options) {
2132
+ async function synthesizeSpeech(ssmlText, apiKey, options) {
2128
2133
  return await new Promise(
2129
2134
  async (resolve, reject) => {
2130
2135
  try {
2131
- const command = new import_client_polly.SynthesizeSpeechCommand({
2132
- Text: ssmlText,
2133
- TextType: "ssml",
2134
- ...options
2136
+ const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
2137
+ method: "POST",
2138
+ body: JSON.stringify({
2139
+ text: ssmlText,
2140
+ ...options
2141
+ }),
2142
+ headers: {
2143
+ "Content-Type": "application/json",
2144
+ apikey: apiKey
2145
+ }
2135
2146
  });
2136
- const response = await client.send(command);
2137
- const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
2138
- resolve(stream);
2147
+ if (!res.ok)
2148
+ throw new Error(
2149
+ `TTS request failed:
2150
+ ${res.status}: ${res.statusText}`
2151
+ );
2152
+ const data = await res.json();
2153
+ if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
2154
+ throw new Error("Invalid TTS response data");
2155
+ const mp3Buffer = Buffer.from(
2156
+ data.audioData,
2157
+ "base64"
2158
+ );
2159
+ resolve(mp3Buffer);
2139
2160
  } catch (err) {
2140
2161
  reject(err);
2141
2162
  }
@@ -2182,7 +2203,7 @@ function generateAnkiNote(entry) {
2182
2203
  ).join("") : noKanjiForms
2183
2204
  ],
2184
2205
  entry.translations.map(
2185
- (translationEntry, index) => `${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
2206
+ (translationEntry, index) => `<span class="word word-index${entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? " gloss-specific" : ""}">${index + 1}</span>${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
2186
2207
  ).join(""),
2187
2208
  entry.kanji ? entry.kanji.map(
2188
2209
  (kanjiEntry) => createEntry(
@@ -2191,11 +2212,11 @@ function generateAnkiNote(entry) {
2191
2212
  )
2192
2213
  ).join("") : '<span class="word word-kanji">(no kanji)</span>',
2193
2214
  entry.phrases ? entry.phrases.map(
2194
- (phraseEntry) => createEntry(
2215
+ (phraseEntry, index) => `<span class="word word-index${entry.translations.some((_translation, index2) => index === index2 && phraseEntry.glossNumber && phraseEntry.glossNumber.wordId === entry.id && phraseEntry.glossNumber.glossNumber === index2 + 1) ? " gloss-specific" : ""}">${index + 1}</span>${createEntry(
2195
2216
  `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
2196
2217
  [phraseEntry.translation],
2197
2218
  true
2198
- )
2219
+ )}`
2199
2220
  ).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
2200
2221
  ...entry.tags && entry.tags.length > 0 ? [
2201
2222
  entry.tags.map(