@ducci/jarvis 1.0.87 → 1.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ducci/jarvis",
3
- "version": "1.0.87",
3
+ "version": "1.0.89",
4
4
  "description": "A fully automated agent system that lives on a server.",
5
5
  "main": "./src/index.js",
6
6
  "type": "module",
@@ -699,7 +699,13 @@ export async function startTelegramChannel(config) {
699
699
  // TTS: send audio summary if voice is enabled (config.voiceEnabled checked live, updated by /voice toggle)
700
700
  if (config.voiceEnabled && config.fishAudioApiKey) {
701
701
  try {
702
- const plain = toPlainText(displayText);
702
+ // If the response is a raw JSON blob (format_error recovery), extract the actual text
703
+ let ttsSource = displayText;
704
+ try {
705
+ const parsed = JSON.parse(displayText);
706
+ if (parsed?.response) ttsSource = parsed.response;
707
+ } catch { /* not JSON, use as-is */ }
708
+ const plain = toPlainText(ttsSource);
703
709
  if (plain) {
704
710
  const ttsText = await generateTtsSummary(plain, config);
705
711
  if (ttsText) {
@@ -8,6 +8,7 @@ import { promisify } from 'util';
8
8
  import { writeFile, readFile, unlink } from 'fs/promises';
9
9
  import { tmpdir } from 'os';
10
10
  import { join } from 'path';
11
+ import { Readable } from 'stream';
11
12
  const execAsync = promisify(exec);
12
13
 
13
14
  // System prompt for TTS summary generation.
@@ -26,7 +27,7 @@ Rules:
26
27
  - Choose emotions that fit the content: use (confident) or (calm) for informational answers,
27
28
  (excited) or (satisfied) for completed tasks, (curious) for questions, etc.
28
29
  - You may combine two tags on one sentence: (excited)(soft tone) Great news!
29
- - No markdown, no code blocks, no bullet points plain speech only.
30
+ - Plain text only — no emojis, no markdown, no code blocks, no bullet points, no special characters.
30
31
  - Keep technical jargon minimal; explain concepts simply as you would speak them.
31
32
  - Match the language of the original response.
32
33
 
@@ -89,7 +90,13 @@ export async function textToSpeech(text, config) {
89
90
  throw new Error(`fish.audio TTS ${response.status}: ${errText.slice(0, 200)}`);
90
91
  }
91
92
 
92
- return Buffer.from(await response.arrayBuffer());
93
+ // fish.audio streams audio via chunked transfer encoding — arrayBuffer() only
94
+ // returns the first chunk. Collect all chunks via a Node.js stream.
95
+ const chunks = [];
96
+ for await (const chunk of Readable.fromWeb(response.body)) {
97
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
98
+ }
99
+ return Buffer.concat(chunks);
93
100
  }
94
101
 
95
102
  /**