henkan 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs.js +53 -34
  3. package/dist/index.cjs.js.map +3 -3
  4. package/dist/index.mjs +53 -36
  5. package/dist/index.mjs.map +2 -2
  6. package/dist/types/types.d.ts +21 -0
  7. package/dist/types/types.d.ts.map +1 -1
  8. package/dist/types/utils.d.ts +9 -6
  9. package/dist/types/utils.d.ts.map +1 -1
  10. package/docs/api/README.md +1 -0
  11. package/docs/api/functions/capitalizeString.md +1 -1
  12. package/docs/api/functions/convertJMdict.md +1 -1
  13. package/docs/api/functions/convertKanjiDic.md +1 -1
  14. package/docs/api/functions/convertKradFile.md +1 -1
  15. package/docs/api/functions/convertRadkFile.md +1 -1
  16. package/docs/api/functions/convertTanakaCorpus.md +1 -1
  17. package/docs/api/functions/generateAnkiNote.md +1 -1
  18. package/docs/api/functions/generateAnkiNotesFile.md +1 -1
  19. package/docs/api/functions/getKanji.md +1 -1
  20. package/docs/api/functions/getKanjiExtended.md +1 -1
  21. package/docs/api/functions/getWord.md +1 -1
  22. package/docs/api/functions/isStringArray.md +1 -1
  23. package/docs/api/functions/isValidArray.md +1 -1
  24. package/docs/api/functions/isValidArrayWithFirstElement.md +1 -1
  25. package/docs/api/functions/makeSSML.md +1 -1
  26. package/docs/api/functions/shuffleArray.md +1 -1
  27. package/docs/api/functions/synthesizeSpeech.md +25 -13
  28. package/docs/api/interfaces/DictKanji.md +5 -5
  29. package/docs/api/interfaces/DictKanjiForm.md +4 -4
  30. package/docs/api/interfaces/DictKanjiMisc.md +5 -5
  31. package/docs/api/interfaces/DictKanjiReading.md +3 -3
  32. package/docs/api/interfaces/DictKanjiReadingMeaning.md +3 -3
  33. package/docs/api/interfaces/DictKanjiReadingMeaningGroup.md +3 -3
  34. package/docs/api/interfaces/DictKanjiWithRadicals.md +3 -3
  35. package/docs/api/interfaces/DictMeaning.md +11 -11
  36. package/docs/api/interfaces/DictRadical.md +4 -4
  37. package/docs/api/interfaces/DictReading.md +5 -5
  38. package/docs/api/interfaces/DictWord.md +8 -8
  39. package/docs/api/interfaces/ExamplePart.md +7 -7
  40. package/docs/api/interfaces/GlossSpecificNumber.md +31 -0
  41. package/docs/api/interfaces/Grammar.md +15 -15
  42. package/docs/api/interfaces/GrammarMeaning.md +3 -3
  43. package/docs/api/interfaces/Kana.md +11 -11
  44. package/docs/api/interfaces/Kanji.md +22 -22
  45. package/docs/api/interfaces/KanjiComponent.md +3 -3
  46. package/docs/api/interfaces/KanjiForm.md +4 -4
  47. package/docs/api/interfaces/NoteAndTag.md +3 -3
  48. package/docs/api/interfaces/Phrase.md +16 -4
  49. package/docs/api/interfaces/Radical.md +16 -16
  50. package/docs/api/interfaces/Reading.md +5 -5
  51. package/docs/api/interfaces/ResultEntry.md +7 -7
  52. package/docs/api/interfaces/TanakaExample.md +16 -6
  53. package/docs/api/interfaces/Translation.md +3 -3
  54. package/docs/api/interfaces/UsefulRegExps.md +9 -9
  55. package/docs/api/interfaces/Word.md +14 -14
  56. package/docs/api/type-aliases/Dict.md +1 -1
  57. package/docs/api/type-aliases/DictName.md +1 -1
  58. package/docs/api/type-aliases/EntryType.md +1 -1
  59. package/docs/api/type-aliases/JLPT.md +1 -1
  60. package/docs/api/type-aliases/Result.md +1 -1
  61. package/package.json +5 -5
package/README.md CHANGED
@@ -36,7 +36,7 @@ pnpm add henkan
36
36
  - JMdict, KANJIDIC, Tanaka Corpus, RADK and KRAD conversion
37
37
  - User-friendly schemas for dictionary entries
38
38
  - Anki note generation
39
- - Other useful tools (AWS Polly audio generation, Japanese RegExps, array checking etc.)
39
+ - Other useful tools (TTSFree.com audio generation, Japanese RegExps, array checking etc.)
40
40
 
41
41
  ---
42
42
 
package/dist/index.cjs.js CHANGED
@@ -1187,7 +1187,7 @@ var noteMap = /* @__PURE__ */ new Map([
1187
1187
  var import_libxmljs2 = __toESM(require("libxmljs2"));
1188
1188
  var import_xml2js = __toESM(require("xml2js"));
1189
1189
  var import_iconv_lite = __toESM(require("iconv-lite"));
1190
- var import_client_polly = require("@aws-sdk/client-polly");
1190
+ var import_node_fetch = __toESM(require("node-fetch"));
1191
1191
  var Kuroshiro = require("kuroshiro");
1192
1192
  var KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji");
1193
1193
  function capitalizeString(value) {
@@ -1333,11 +1333,9 @@ function convertJMdict(xmlString, examples) {
1333
1333
  ).map((reading) => reading.reading)
1334
1334
  );
1335
1335
  const kanjiForms2 = entryObj.kanjiForms ? new Set(
1336
- entryObj.kanjiForms.filter(
1337
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1338
- (note) => notSearchedForms.has(note)
1339
- )) && (entryObj.isCommon === void 0 || kanjiForm.commonness && kanjiForm.commonness.length > 0)
1340
- ).map((kanjiForm) => kanjiForm.form)
1336
+ entryObj.kanjiForms.map(
1337
+ (kanjiForm) => kanjiForm.form
1338
+ )
1341
1339
  ) : void 0;
1342
1340
  let existsExample = false;
1343
1341
  if (kanjiForms2 && kanjiForms2.size > 0 && tanakaParts) {
@@ -1519,9 +1517,9 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1519
1517
  try {
1520
1518
  const fileParsed = import_iconv_lite.default.decode(radkBuffer, "euc-jp").split("\n").filter((line) => !line.startsWith("#"));
1521
1519
  const radicals = [];
1522
- for (let i = 0; i <= fileParsed.length; i++) {
1520
+ for (let i = 0; i < fileParsed.length; i++) {
1523
1521
  const line = fileParsed[i];
1524
- if (!line) throw new Error("Invalid radkfile2 buffer");
1522
+ if (!line) continue;
1525
1523
  if (line.startsWith("$ ")) {
1526
1524
  const radical = {
1527
1525
  radical: line.charAt(2).trim(),
@@ -1529,7 +1527,7 @@ function convertRadkFile(radkBuffer, kanjiDic) {
1529
1527
  };
1530
1528
  let j = i + 1;
1531
1529
  let kanjiLine = fileParsed[j];
1532
- if (!kanjiLine) throw new Error("Invalid radkfile2 buffer");
1530
+ if (!kanjiLine) continue;
1533
1531
  const kanjiList = [];
1534
1532
  while (kanjiLine && !kanjiLine.startsWith("$ ")) {
1535
1533
  const kanjis = kanjiLine.split("");
@@ -1565,8 +1563,7 @@ function convertKradFile(kradBuffer, kanjiDic, katakanaList) {
1565
1563
  const split = line.split(" : ");
1566
1564
  const kanjiChar = split[0];
1567
1565
  const radicalsRow = split[1];
1568
- if (!kanjiChar || !radicalsRow)
1569
- throw new Error("Invalid kradfile2 buffer");
1566
+ if (!kanjiChar || !radicalsRow) continue;
1570
1567
  const kanji = {
1571
1568
  ...kanjiChar && radicalsRow && kanjiChar.length === 1 && radicalsRow.length > 0 ? { kanji: kanjiChar } : { kanji: "" },
1572
1569
  radicals: []
@@ -1779,11 +1776,9 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1779
1776
  ).map((reading) => reading.reading)
1780
1777
  );
1781
1778
  const kanjiForms = word.kanjiForms ? new Set(
1782
- word.kanjiForms.filter(
1783
- (kanjiForm) => (!kanjiForm.notes || !kanjiForm.notes.some(
1784
- (note) => notSearchedForms.has(note)
1785
- )) && (word.common === void 0 || kanjiForm.common === true)
1786
- ).map((kanjiForm) => kanjiForm.kanjiForm)
1779
+ word.kanjiForms.map(
1780
+ (kanjiForm) => kanjiForm.kanjiForm
1781
+ )
1787
1782
  ) : void 0;
1788
1783
  const kanjiFormExamples = [];
1789
1784
  const readingMatchingKanjiFormExamples = [];
@@ -1791,7 +1786,7 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1791
1786
  const partParts = /* @__PURE__ */ new Set();
1792
1787
  for (const example of examples)
1793
1788
  for (const part of example.parts) {
1794
- const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading);
1789
+ const readingAsReadingMatch = part.reading !== void 0 && readings.has(part.reading) || part.inflectedForm !== void 0 && readings.has(part.inflectedForm);
1795
1790
  if (kanjiForms && kanjiForms.size > 0 && kanjiForms.has(part.baseForm)) {
1796
1791
  if (readingAsReadingMatch) {
1797
1792
  readingMatchingKanjiFormExamples.push(example);
@@ -1804,17 +1799,20 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1804
1799
  }
1805
1800
  const readingAsBaseFormMatch = readings.has(part.baseForm);
1806
1801
  const referenceIDMatch = part.referenceID !== void 0 && word.id !== void 0 && part.referenceID === word.id;
1807
- if (readingAsReadingMatch || readingAsBaseFormMatch || referenceIDMatch) {
1802
+ if (readingAsBaseFormMatch || referenceIDMatch) {
1808
1803
  readingExamples.push(example);
1809
- if (readingAsReadingMatch) partParts.add(part.reading);
1810
1804
  if (readingAsBaseFormMatch) partParts.add(part.baseForm);
1811
1805
  if (referenceIDMatch) partParts.add(part.referenceID);
1812
1806
  break;
1813
1807
  }
1814
1808
  }
1815
1809
  const exampleSize = readingMatchingKanjiFormExamples.length + kanjiFormExamples.length + readingExamples.length;
1816
- const includeKanjiFormExamples = readingMatchingKanjiFormExamples.length < Math.max(2, Math.round(exampleSize * 0.05));
1817
- const includeReadingExamples = word.usuallyInKana === void 0 && includeKanjiFormExamples && readingExamples.length >= Math.max(10, Math.round(exampleSize * 0.15)) || word.usuallyInKana === true && readingExamples.length >= Math.max(2, Math.round(exampleSize * 0.5));
1810
+ const includeReadingThreshold = Math.max(
1811
+ 10,
1812
+ Math.round(exampleSize * 0.5)
1813
+ );
1814
+ const includeKanjiFormExamples = word.kanjiForms !== void 0;
1815
+ const includeReadingExamples = readingExamples.length >= includeReadingThreshold && readingExamples.length >= readingMatchingKanjiFormExamples.length && readingExamples.length >= kanjiFormExamples.length || readingExamples.length >= includeReadingThreshold && word.usuallyInKana === true || word.kanjiForms === void 0;
1818
1816
  let wordExamples = [
1819
1817
  ...readingMatchingKanjiFormExamples,
1820
1818
  ...includeKanjiFormExamples ? kanjiFormExamples : [],
@@ -1826,7 +1824,11 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1826
1824
  outer: for (const example of wordExamples) {
1827
1825
  if (seenPhrases.has(example.phrase)) continue;
1828
1826
  for (const part of example.parts)
1829
- if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || part.reading && partParts.has(part.reading) || part.referenceID && partParts.has(part.referenceID))) {
1827
+ if (part.glossNumber === i + 1 && (partParts.has(part.baseForm) || includeReadingExamples && (part.reading && partParts.has(part.reading) || part.inflectedForm && partParts.has(part.inflectedForm) || part.referenceID && partParts.has(part.referenceID)))) {
1828
+ example.glossNumber = {
1829
+ wordId: word.id,
1830
+ glossNumber: i + 1
1831
+ };
1830
1832
  glossSpecificExamples.push(example);
1831
1833
  seenPhrases.add(example.phrase);
1832
1834
  break outer;
@@ -1847,7 +1849,8 @@ function getWord(dict, id, kanjiDic, examples, dictWord, noteTypeName, deckPath)
1847
1849
  return {
1848
1850
  phrase: (_a = ex.furigana) != null ? _a : ex.phrase,
1849
1851
  translation: ex.translation,
1850
- originalPhrase: ex.phrase
1852
+ originalPhrase: ex.phrase,
1853
+ ...ex.glossNumber ? { glossNumber: ex.glossNumber } : {}
1851
1854
  };
1852
1855
  });
1853
1856
  }
@@ -2126,18 +2129,34 @@ function makeSSML(formText, fullReading) {
2126
2129
  }
2127
2130
  return ssml;
2128
2131
  }
2129
- async function synthesizeSpeech(client, ssmlText, options) {
2132
+ async function synthesizeSpeech(ssmlText, apiKey, options) {
2130
2133
  return await new Promise(
2131
2134
  async (resolve, reject) => {
2132
2135
  try {
2133
- const command = new import_client_polly.SynthesizeSpeechCommand({
2134
- Text: ssmlText,
2135
- TextType: "ssml",
2136
- ...options
2136
+ const res = await (0, import_node_fetch.default)("https://ttsfree.com/api/v1/tts", {
2137
+ method: "POST",
2138
+ body: JSON.stringify({
2139
+ text: ssmlText,
2140
+ ...options
2141
+ }),
2142
+ headers: {
2143
+ "Content-Type": "application/json",
2144
+ apikey: apiKey
2145
+ }
2137
2146
  });
2138
- const response = await client.send(command);
2139
- const stream = response.AudioStream ? Buffer.from(await response.AudioStream.transformToByteArray()) : null;
2140
- resolve(stream);
2147
+ if (!res.ok)
2148
+ throw new Error(
2149
+ `TTS request failed:
2150
+ ${res.status}: ${res.statusText}`
2151
+ );
2152
+ const data = await res.json();
2153
+ if (data.status !== "success" || data.mess !== "success" || data.audioData.length === 0)
2154
+ throw new Error("Invalid TTS response data");
2155
+ const mp3Buffer = Buffer.from(
2156
+ data.audioData,
2157
+ "base64"
2158
+ );
2159
+ resolve(mp3Buffer);
2141
2160
  } catch (err) {
2142
2161
  reject(err);
2143
2162
  }
@@ -2184,7 +2203,7 @@ function generateAnkiNote(entry) {
2184
2203
  ).join("") : noKanjiForms
2185
2204
  ],
2186
2205
  entry.translations.map(
2187
- (translationEntry, index) => `${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
2206
+ (translationEntry, index) => `<span class="word word-index${entry.phrases && entry.phrases.some((phrase, index2) => index === index2 && phrase.glossNumber && phrase.glossNumber.wordId === entry.id && phrase.glossNumber.glossNumber === index + 1) ? " gloss-specific" : ""}">${index + 1}</span>${index > 2 ? "<details><summary>Show translation</summary>" : ""}${createEntry(`<span class="word word-translation">${translationEntry.translation}</span>`, translationEntry.notes)}${index > 2 ? "</details>" : ""}`
2188
2207
  ).join(""),
2189
2208
  entry.kanji ? entry.kanji.map(
2190
2209
  (kanjiEntry) => createEntry(
@@ -2193,11 +2212,11 @@ function generateAnkiNote(entry) {
2193
2212
  )
2194
2213
  ).join("") : '<span class="word word-kanji">(no kanji)</span>',
2195
2214
  entry.phrases ? entry.phrases.map(
2196
- (phraseEntry) => createEntry(
2215
+ (phraseEntry, index) => `<span class="word word-index${entry.translations.some((_translation, index2) => index === index2 && phraseEntry.glossNumber && phraseEntry.glossNumber.wordId === entry.id && phraseEntry.glossNumber.glossNumber === index2 + 1) ? " gloss-specific" : ""}">${index + 1}</span>${createEntry(
2197
2216
  `<span class="word word-phrase"><span class="word word-phrase-original">${phraseEntry.originalPhrase}</span><span class="word word-phrase-furigana">${phraseEntry.phrase}</span></span>`,
2198
2217
  [phraseEntry.translation],
2199
2218
  true
2200
- )
2219
+ )}`
2201
2220
  ).join("") : '<span class="word word-phrase">(no phrases) (Search on dictionaries!)</span>',
2202
2221
  ...entry.tags && entry.tags.length > 0 ? [
2203
2222
  entry.tags.map(