@inceptionstack/roundhouse 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -188,7 +188,6 @@ Without a config file, defaults are used with env vars (`TELEGRAM_BOT_TOKEN`, `B
188
188
  | `chat.notifyChatIds` | Telegram chat IDs to notify on startup (env: `NOTIFY_CHAT_IDS`) |
189
189
  | `chat.adapters.telegram` | `{ "mode": "polling" \| "webhook" \| "auto" }` |
190
190
  | `voice.stt.enabled` | Enable automatic voice transcription (default: off unless configured) |
191
- | `voice.stt.autoInstall` | Auto-install whisper via pip3 if missing (default: false) |
192
191
  | `voice.stt.chain` | STT provider chain, e.g. `["whisper"]` |
193
192
  | `voice.stt.providers.whisper` | `{ "model": "small", "timeoutMs": 30000 }` |
194
193
 
@@ -307,7 +306,7 @@ Roundhouse can automatically transcribe voice messages using [OpenAI Whisper](ht
307
306
  pip install openai-whisper
308
307
  ```
309
308
 
310
- Or set `autoInstall: true` in config to have roundhouse install whisper automatically on first voice message.
309
+ If whisper/ffmpeg aren't installed when a voice message arrives, roundhouse automatically injects a prompt into the agent's turn asking it to install the missing dependencies. The user is notified that setup is in progress.
311
310
 
312
311
  **Enable in config:**
313
312
  ```json
@@ -316,7 +315,6 @@ Or set `autoInstall: true` in config to have roundhouse install whisper automati
316
315
  "stt": {
317
316
  "enabled": true,
318
317
  "mode": "on",
319
- "autoInstall": true,
320
318
  "chain": ["whisper"],
321
319
  "autoTranscribe": {
322
320
  "voiceMessages": true,
package/architecture.md CHANGED
@@ -192,7 +192,6 @@ gateway.config.json
192
192
  └── stt
193
193
  ├── enabled: true
194
194
  ├── mode: "on" | "off"
195
- ├── autoInstall: false # auto-install whisper via pip3
196
195
  ├── chain: ["whisper"] # Provider chain (try in order)
197
196
  ├── autoTranscribe
198
197
  │ ├── voiceMessages: true
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inceptionstack/roundhouse",
3
- "version": "0.5.5",
3
+ "version": "0.5.7",
4
4
  "type": "module",
5
5
  "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
6
6
  "license": "MIT",
package/src/cli/cli.ts CHANGED
@@ -155,6 +155,11 @@ async function cmdUpdate() {
155
155
  return;
156
156
  }
157
157
 
158
+ if (result.action === "error") {
159
+ console.error(`[roundhouse] Update failed: ${result.error}`);
160
+ process.exit(1);
161
+ }
162
+
158
163
  console.log(`[roundhouse] Updated to v${result.latestVersion}`);
159
164
 
160
165
  const svc = getServiceManager();
@@ -34,7 +34,7 @@ export const systemChecks: DoctorCheck[] = [
34
34
  return {
35
35
  id: "pip3", category: "system", name: "pip3", summary: ver ? ver.split(" ")[1] ?? ver : "not found",
36
36
  status: ver ? "pass" : "warn",
37
- details: !ver ? ["Needed for whisper STT auto-install"] : undefined,
37
+ details: !ver ? ["Used by agent to install whisper for STT"] : undefined,
38
38
  };
39
39
  }),
40
40
 
@@ -603,8 +603,8 @@ export async function stepPostflight(logger: StepLog): Promise<void> {
603
603
 
604
604
  if (platform() === "linux" || process.env.ROUNDHOUSE_VOICE === "1") {
605
605
  if (!whichSync("whisper")) {
606
- logger.warn("whisper not found — STT will auto-install on first voice message");
607
- logger.log(" Pre-install: pip3 install openai-whisper");
606
+ logger.warn("whisper not found — agent will be prompted to install on first voice message");
607
+ logger.log(" Or pre-install manually: pip3 install --user openai-whisper");
608
608
  } else {
609
609
  logger.ok("whisper available");
610
610
  }
package/src/cli/update.ts CHANGED
@@ -10,14 +10,20 @@ import { execSync } from "node:child_process";
10
10
  import { readFileSync, writeFileSync } from "node:fs";
11
11
  import { provisionBundle } from "../provisioning/bundle";
12
12
 
13
+ const GLOBAL_PI_EXTENSION_PACKAGES = [
14
+ "@inceptionstack/pi-hard-no",
15
+ "@inceptionstack/pi-branch-enforcer",
16
+ ];
17
+
13
18
  export interface UpdateProgress {
14
19
  update(text: string): Promise<void>;
15
20
  }
16
21
 
17
22
  export interface UpdateResult {
18
- action: "already-latest" | "updated";
23
+ action: "already-latest" | "updated" | "error";
19
24
  currentVersion: string;
20
25
  latestVersion?: string;
26
+ error?: string;
21
27
  }
22
28
 
23
29
  /**
@@ -30,21 +36,57 @@ export async function performUpdate(progress: UpdateProgress): Promise<UpdateRes
30
36
  const currentVersion = pkg.default?.version ?? "unknown";
31
37
 
32
38
  // Check latest version on npm
33
- const latestVersion = execSync("npm view @inceptionstack/roundhouse version 2>/dev/null", {
34
- timeout: 30_000,
35
- encoding: "utf8",
36
- }).trim();
39
+ let latestVersion: string;
40
+ try {
41
+ latestVersion = execSync("npm view @inceptionstack/roundhouse version 2>/dev/null", {
42
+ timeout: 30_000,
43
+ encoding: "utf8",
44
+ }).trim();
45
+ } catch (e) {
46
+ // Update extensions anyway, but flag that version check failed
47
+ latestVersion = "";
48
+ console.warn("[roundhouse] npm view failed:", e instanceof Error ? e.message : e);
49
+ }
37
50
 
38
- if (!latestVersion || latestVersion === currentVersion) {
51
+ // Always update extensions (even if roundhouse is already latest)
52
+ if (!latestVersion) {
53
+ await progress.update(`⚠️ Version check failed — updating extensions only`);
54
+ }
55
+ for (const extensionPackage of GLOBAL_PI_EXTENSION_PACKAGES) {
56
+ await progress.update(`📦 Updating extension: ${extensionPackage}...`);
57
+
58
+ try {
59
+ execSync(`npm install -g ${extensionPackage}@latest 2>&1`, {
60
+ timeout: 60_000,
61
+ encoding: "utf8",
62
+ });
63
+ await progress.update(`✅ ${extensionPackage} updated`);
64
+ } catch (e) {
65
+ const msg = e instanceof Error ? e.message : String(e);
66
+ console.warn(`[roundhouse] failed to update extension ${extensionPackage}:`, msg);
67
+ await progress.update(`⚠️ Failed to update ${extensionPackage}: ${msg.slice(0, 150)}`);
68
+ }
69
+ }
70
+
71
+ if (!latestVersion) {
72
+ return { action: "error", currentVersion, error: "Version check failed (extensions updated)" };
73
+ }
74
+ if (latestVersion === currentVersion) {
39
75
  return { action: "already-latest", currentVersion };
40
76
  }
41
77
 
42
78
  await progress.update(`📦 Updating v${currentVersion} → v${latestVersion}...`);
43
79
 
44
- execSync("npm install -g @inceptionstack/roundhouse@latest 2>&1", {
45
- timeout: 120_000,
46
- encoding: "utf8",
47
- });
80
+ try {
81
+ execSync("npm install -g @inceptionstack/roundhouse@latest 2>&1", {
82
+ timeout: 120_000,
83
+ encoding: "utf8",
84
+ });
85
+ } catch (e) {
86
+ const msg = e instanceof Error ? e.message : String(e);
87
+ console.warn("[roundhouse] self-update failed:", msg);
88
+ return { action: "error", currentVersion, error: `Self-update failed: ${msg}` };
89
+ }
48
90
 
49
91
  // Provision bundle (skills sync + CLI tools + config)
50
92
  try {
@@ -75,6 +75,8 @@ export async function handleUpdate(ctx: CommandContext): Promise<void> {
75
75
  const result = await performUpdate(progress);
76
76
  if (result.action === "already-latest") {
77
77
  await progress.update(`✅ Already on latest (v${result.currentVersion})`);
78
+ } else if (result.action === "error") {
79
+ await progress.update(`⚠️ ${(result.error ?? "Update failed").slice(0, 200)}`);
78
80
  } else if (result.action === "updated") {
79
81
  await progress.update(`✅ Updated v${result.currentVersion} → v${result.latestVersion}. Restarting...`);
80
82
  console.log(`[roundhouse] updated ${result.currentVersion} -> ${result.latestVersion}, restarting`);
@@ -161,15 +163,37 @@ export async function handleStatus(ctx: CommandContext): Promise<void> {
161
163
  const nodeVer = process.version;
162
164
  const memMB = (process.memoryUsage.rss() / 1024 / 1024).toFixed(1);
163
165
 
166
+ // Check for available update (async, non-blocking)
167
+ let updateAvailable = "";
168
+ try {
169
+ const { exec } = await import("node:child_process");
170
+ const { promisify } = await import("node:util");
171
+ const execAsync = promisify(exec);
172
+ const { stdout } = await execAsync("npm view @inceptionstack/roundhouse version 2>/dev/null", { timeout: 10_000 });
173
+ const latest = stdout.trim().split("\n").pop()!.trim();
174
+ if (latest && /^\d+\.\d+\.\d+$/.test(latest) && latest !== ROUNDHOUSE_VERSION) {
175
+ // Simple semver comparison: split and compare numerically
176
+ const [lM, lm, lp] = latest.split(".").map(Number);
177
+ const [cM, cm, cp] = ROUNDHOUSE_VERSION.split(".").map(Number);
178
+ if (lM > cM || (lM === cM && lm > cm) || (lM === cM && lm === cm && lp > cp)) {
179
+ updateAvailable = latest;
180
+ }
181
+ }
182
+ } catch { /* network unavailable — skip */ }
183
+
164
184
  const info = agent.getInfo ? agent.getInfo(agentThreadId) : {};
165
185
  const agentVersion = info.version ? `v${info.version}` : "";
166
186
  const agentLabel = agentVersion ? `\`${agent.name}\` (${agentVersion})` : `\`${agent.name}\``;
167
187
 
188
+ const versionLine = updateAvailable
189
+ ? `📦 Roundhouse: v${ROUNDHOUSE_VERSION} → ⬆️ v${updateAvailable} available (/update)`
190
+ : `📦 Roundhouse: v${ROUNDHOUSE_VERSION}`;
191
+
168
192
  const lines = [
169
193
  `📊 *Roundhouse Status*`,
170
194
  ``,
171
195
  `🎫 Session: \`${agentThreadId}\``,
172
- `📦 Roundhouse: v${ROUNDHOUSE_VERSION}`,
196
+ versionLine,
173
197
  `🤖 Agent: ${agentLabel}`,
174
198
  ];
175
199
 
@@ -26,6 +26,7 @@ import { TelegramAdapter } from "../transports";
26
26
  import type { TransportAdapter } from "../transports";
27
27
  import { hostname } from "node:os";
28
28
  import { join } from "node:path";
29
+ import { injectToolsSection } from "./tools-inject";
29
30
 
30
31
  /** Bot username for command suffix validation (set during gateway init) */
31
32
  let _botUsername = "";
@@ -166,7 +167,7 @@ export class Gateway {
166
167
  };
167
168
  if (sttConfig.enabled && sttConfig.mode !== "off") {
168
169
  this.sttService = new SttService(sttConfig);
169
- console.log(`[roundhouse] STT enabled (chain: ${sttConfig.chain.join(" -> ")}, autoInstall: ${sttConfig.autoInstall ?? false})`);
170
+ console.log(`[roundhouse] STT enabled (chain: ${sttConfig.chain.join(" -> ")})`);
170
171
  // Prepare providers in background (install + warm model if needed)
171
172
  void this.sttService.prepareInBackground();
172
173
  }
@@ -371,6 +372,11 @@ export class Gateway {
371
372
  await this.enrichWithStt(thread, agentMessage);
372
373
  }
373
374
 
375
+ // Inject tools section (after STT enrichment so voice-only messages get it too)
376
+ if (agentMessage.text) {
377
+ agentMessage.text = injectToolsSection(agentMessage.text);
378
+ }
379
+
374
380
  // Let the agent adapter apply platform-specific message transforms
375
381
  if (agent.prepareMessage) {
376
382
  try {
@@ -455,19 +461,42 @@ export class Gateway {
455
461
  /**
456
462
  * Enrich audio attachments with speech-to-text transcripts.
457
463
  * Updates agentMessage.text for voice-only messages.
464
+ * If STT deps are missing, injects an install-prompt for the agent.
458
465
  */
459
466
  private async enrichWithStt(thread: any, agentMessage: AgentMessage): Promise<void> {
460
467
  if (!this.sttService || !agentMessage.attachments?.length) return;
461
468
  try {
462
469
  await enrichAttachmentsWithTranscripts(agentMessage.attachments, this.sttService, (text) => thread.post(text));
470
+
471
+ // Check if any audio attachments failed transcription
472
+ const hasFailedAudio = agentMessage.attachments.some(
473
+ (a) => a.mediaType === "audio" && a.transcript?.status === "failed",
474
+ );
475
+
463
476
  if (!agentMessage.text) {
464
477
  const transcripts = agentMessage.attachments
465
478
  .filter((a) => a.transcript?.status === "completed" && a.transcript.text)
466
479
  .map((a) => a.transcript!.text);
467
480
  if (transcripts.length > 0) {
468
481
  agentMessage.text = `Voice message transcript: ${transcripts.join(" ")}`;
469
- } else if (agentMessage.attachments.some((a) => a.mediaType === "audio")) {
470
- agentMessage.text = "Voice message attached, but automatic transcription failed.";
482
+ } else if (hasFailedAudio) {
483
+ // STT failed check if deps are missing and inject agent prompt
484
+ const missing = await this.sttService.getMissingDeps();
485
+ if (missing.length > 0) {
486
+ await thread.post(`🎤 Voice transcription not available yet (missing: ${missing.join(", ")}). Asking agent to install...`);
487
+ agentMessage.text = this.buildSttInstallPrompt(missing, agentMessage.attachments);
488
+ } else {
489
+ agentMessage.text = "Voice message attached, but automatic transcription failed.";
490
+ }
491
+ }
492
+ } else if (hasFailedAudio) {
493
+ // User sent text + audio, but audio transcription failed
494
+ const missing = await this.sttService.getMissingDeps();
495
+ if (missing.length > 0) {
496
+ await thread.post(`🎤 Voice transcription not available yet (missing: ${missing.join(", ")}). Asking agent to install...`);
497
+ agentMessage.text += "\n\n" + this.buildSttInstallPrompt(missing, agentMessage.attachments);
498
+ } else {
499
+ agentMessage.text += "\n\n(Note: a voice message was also attached but automatic transcription failed.)";
471
500
  }
472
501
  }
473
502
  } catch (err) {
@@ -475,6 +504,37 @@ export class Gateway {
475
504
  }
476
505
  }
477
506
 
507
+ /**
508
+ * Build a prompt instructing the agent to install missing STT dependencies.
509
+ */
510
+ private buildSttInstallPrompt(missing: string[], attachments: any[]): string {
511
+ const audioFile = attachments.find((a: any) => a.mediaType === "audio");
512
+ const audioPath = audioFile?.localPath ?? "(audio file path from attachment)";
513
+
514
+ const parts: string[] = [
515
+ "The user sent a voice message but speech-to-text transcription failed because dependencies are missing.",
516
+ "",
517
+ `Missing: ${missing.join(", ")}`,
518
+ "",
519
+ "Please install the missing dependencies:",
520
+ ];
521
+
522
+ if (missing.includes("ffmpeg")) {
523
+ parts.push("- ffmpeg: Install to ~/.local/bin/ffmpeg (try: curl static binary from johnvansickle.com for Linux, or `brew install ffmpeg` on macOS)");
524
+ }
525
+ if (missing.includes("whisper")) {
526
+ parts.push("- whisper: Install via `pip3 install --user openai-whisper` or `uv tool install openai-whisper`");
527
+ }
528
+
529
+ parts.push("");
530
+ parts.push("After installing, verify with `whisper --help` and `ffmpeg -version`, then transcribe the voice message:");
531
+ parts.push(` whisper ${JSON.stringify(audioPath)} --model small --language en --output_format txt --output_dir /tmp`);
532
+ parts.push("");
533
+ parts.push("Send the transcription text back to the user. If installation fails, let the user know what went wrong.");
534
+
535
+ return parts.join("\n");
536
+ }
537
+
478
538
  /**
479
539
  * Save attachments, notify skipped, and build the AgentMessage.
480
540
  * Returns null if there's nothing to send (empty text + failed attachments).
@@ -0,0 +1,45 @@
1
+ /**
2
+ * gateway/tools-inject.ts — Inject <tools> section into agent prompts
3
+ *
4
+ * Reads tools.md (bundled or user-customized) and appends it as a
5
+ * structured section so the agent knows what shell tools are available.
6
+ */
7
+
8
+ import { readFileSync } from "node:fs";
9
+ import { join, dirname } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+ import { ROUNDHOUSE_DIR } from "../config";
12
+
13
+ let cachedToolsContent: string | null = null;
14
+
15
+ function loadToolsContent(): string {
16
+ if (cachedToolsContent !== null) return cachedToolsContent;
17
+
18
+ // Try user-customized tools.md first, then bundled
19
+ const userPath = join(ROUNDHOUSE_DIR, "tools.md");
20
+ const bundledPath = join(dirname(fileURLToPath(import.meta.url)), "tools.md");
21
+
22
+ try {
23
+ cachedToolsContent = readFileSync(userPath, "utf8");
24
+ } catch {
25
+ try {
26
+ cachedToolsContent = readFileSync(bundledPath, "utf8");
27
+ } catch {
28
+ // Don't cache failure — retry next call
29
+ return "";
30
+ }
31
+ }
32
+ return cachedToolsContent;
33
+ }
34
+
35
+ /**
36
+ * Append a <tools> section to the prompt text.
37
+ * Only injects if tools.md has content.
38
+ */
39
+ export function injectToolsSection(text: string): string {
40
+ const tools = loadToolsContent();
41
+ if (!tools) return text;
42
+ // Escape any tags that could break the XML structure
43
+ const sanitized = tools.trim().replace(/<\/?tools>/gi, (m) => m.replace(/</g, "&lt;").replace(/>/g, "&gt;"));
44
+ return `${text}\n\n<tools>\n${sanitized}\n</tools>`;
45
+ }
@@ -0,0 +1,54 @@
1
+ # Tools
2
+
3
+ Available tools that can be invoked via shell commands during agent turns.
4
+
5
+ ## roundhouse cron add
6
+
7
+ Schedule recurring or one-shot jobs. The user may ask you to "remind me", "check every X", "do Y later", or "schedule Z".
8
+
9
+ **Usage:**
10
+ ```bash
11
+ roundhouse cron add <job-id> --prompt "..." --every "6h"
12
+ roundhouse cron add <job-id> --prompt "..." --cron "0 8 * * *" --tz "America/New_York"
13
+ roundhouse cron add <job-id> --prompt "..." --at "30m"
14
+ ```
15
+
16
+ **Flags:**
17
+ - `--prompt "..."` — What the agent should do when the job fires (required)
18
+ - `--cron "..."` — Cron expression (e.g. "0 9 * * 1-5" = weekdays at 9am)
19
+ - `--every "..."` — Interval (e.g. "6h", "30m", "1d")
20
+ - `--at "..."` — One-shot timer (e.g. "30m", "2h", or ISO datetime)
21
+ - `--tz "..."` — Timezone (default: UTC)
22
+ - `--telegram "..."` — Telegram chat IDs to notify (comma-separated)
23
+ - `--description "..."` — Human-readable description
24
+ - `--timeout "..."` — Max runtime (e.g. "5m", default: 10m)
25
+
26
+ **Examples:**
27
+ ```bash
28
+ # Remind user every morning
29
+ roundhouse cron add morning-checkin --prompt "Good morning! Here's a summary of yesterday's work and today's plan." --cron "0 8 * * *" --tz "Asia/Jerusalem"
30
+
31
+ # Check something every 6 hours
32
+ roundhouse cron add monitor-deploy --prompt "Check if the deployment at https://example.com is healthy. Report any issues." --every "6h"
33
+
34
+ # One-shot reminder in 30 minutes
35
+ roundhouse cron add reminder-123 --prompt "Remind the user: 'Call the dentist'" --at "30m"
36
+ ```
37
+
38
+ **Management:**
39
+ ```bash
40
+ roundhouse cron list # Show all jobs
41
+ roundhouse cron pause <id> # Disable a job
42
+ roundhouse cron resume <id> # Re-enable a job
43
+ roundhouse cron delete <id> # Remove a job
44
+ roundhouse cron trigger <id> # Run immediately
45
+ roundhouse cron runs <id> # Show run history
46
+ ```
47
+
48
+ ## roundhouse cron (via /crons chat command)
49
+
50
+ Users can also manage jobs via Telegram:
51
+ - `/crons` — list all jobs
52
+ - `/crons trigger <id>` — run now
53
+ - `/crons pause <id>` — disable
54
+ - `/crons resume <id>` — enable
@@ -2,7 +2,7 @@
2
2
  * voice/providers/whisper.ts — Local Whisper STT provider
3
3
  *
4
4
  * Runs the whisper CLI via child_process. Auto-detects language.
5
- * Can auto-install whisper via pip3 and warm the model on first use.
5
+ * Reports missing dependencies so the agent can install them.
6
6
  */
7
7
 
8
8
  import { execFile } from "node:child_process";
@@ -21,6 +21,12 @@ const WHISPER_PATHS = [
21
21
  "/usr/bin/whisper",
22
22
  ];
23
23
 
24
+ const FFMPEG_PATHS = [
25
+ join(homedir(), ".local", "bin", "ffmpeg"),
26
+ "/usr/local/bin/ffmpeg",
27
+ "/usr/bin/ffmpeg",
28
+ ];
29
+
24
30
  let cachedBinaryPath: string | null | undefined; // undefined = not checked yet
25
31
 
26
32
  async function findWhisperBinary(): Promise<string | null> {
@@ -33,80 +39,22 @@ async function findWhisperBinary(): Promise<string | null> {
33
39
  return p;
34
40
  } catch {}
35
41
  }
36
- cachedBinaryPath = null;
42
+ // Don't cache null — allows detection after agent installs whisper
37
43
  return null;
38
44
  }
39
45
 
40
- /** Reset cached path so next findWhisperBinary() re-scans */
41
- function invalidateCache(): void {
42
- cachedBinaryPath = undefined;
43
- }
44
-
45
- // ── Auto-install ─────────────────────────────────────
46
-
47
- let pipAvailable: boolean | undefined;
48
-
49
- async function checkPip(): Promise<boolean> {
50
- if (pipAvailable !== undefined) return pipAvailable;
51
- return new Promise<boolean>((resolve) => {
52
- execFile("pip3", ["--version"], { timeout: 5000 }, (err) => {
53
- pipAvailable = !err;
54
- resolve(pipAvailable);
55
- });
56
- });
57
- }
58
-
59
- /**
60
- * Install whisper via pip3 --user. Returns the binary path or null on failure.
61
- */
62
- async function installWhisperWithPip(): Promise<string | null> {
63
- if (!(await checkPip())) {
64
- console.warn("[stt/whisper] pip3 not available — cannot auto-install whisper");
65
- return null;
46
+ async function findFfmpeg(): Promise<string | null> {
47
+ for (const p of FFMPEG_PATHS) {
48
+ try {
49
+ await access(p, constants.X_OK);
50
+ return p;
51
+ } catch {}
66
52
  }
53
+ return null;
54
+ }
67
55
 
68
- console.log("[stt/whisper] installing openai-whisper via pip3...");
69
- return new Promise<string | null>((resolve) => {
70
- execFile(
71
- "pip3",
72
- ["install", "--user", "openai-whisper"],
73
- {
74
- timeout: 300_000, // 5 min for install
75
- maxBuffer: 10 * 1024 * 1024, // 10MB for pip output
76
- env: { ...process.env },
77
- },
78
- async (err, stdout, stderr) => {
79
- if (err) {
80
- console.error("[stt/whisper] pip3 install failed:", err.message);
81
- if (stderr) console.error("[stt/whisper] stderr:", stderr.slice(0, 500));
82
- resolve(null);
83
- return;
84
- }
85
- console.log("[stt/whisper] pip3 install succeeded");
86
-
87
- // Re-discover binary
88
- invalidateCache();
89
- const binary = await findWhisperBinary();
90
- if (!binary) {
91
- console.error("[stt/whisper] installed but binary not found in expected paths");
92
- resolve(null);
93
- return;
94
- }
95
56
 
96
- // Validate with --help
97
- execFile(binary, ["--help"], { timeout: 10_000 }, (helpErr) => {
98
- if (helpErr) {
99
- console.error("[stt/whisper] binary found but --help failed:", helpErr.message);
100
- resolve(null);
101
- } else {
102
- console.log(`[stt/whisper] validated binary at ${binary}`);
103
- resolve(binary);
104
- }
105
- });
106
- },
107
- );
108
- });
109
- }
57
+ // ── Model warmup ─────────────────────────────────────
110
58
 
111
59
  /**
112
60
  * Warm the whisper model by running a tiny transcription.
@@ -171,19 +119,15 @@ async function warmWhisperModel(binary: string, model: string): Promise<boolean>
171
119
 
172
120
  // ── Provider ─────────────────────────────────────────
173
121
 
174
- /** Extended provider with install capability */
122
+ /** Extended provider that reports missing dependencies */
175
123
  export interface InstallableWhisperProvider extends SttProvider {
176
124
  ensureInstalled(): Promise<boolean>;
125
+ getMissingDeps(): Promise<string[]>;
177
126
  }
178
127
 
179
- // Singleton promises to prevent concurrent installs
180
- let installPromise: Promise<string | null> | null = null;
181
- let installFailed = false; // sticky failure to prevent retry spam
182
-
183
128
  export function createWhisperProvider(config: SttProviderConfig): InstallableWhisperProvider {
184
129
  const model = (config.model as string) ?? "small";
185
130
  const timeoutMs = config.timeoutMs ?? 30000;
186
- const autoInstall = config.autoInstall === true; // explicit opt-in only
187
131
  let modelWarmed = false;
188
132
  let warmFailed = false; // sticky failure to prevent warmup retry spam
189
133
  let warmPromise: Promise<boolean> | null = null;
@@ -191,24 +135,14 @@ export function createWhisperProvider(config: SttProviderConfig): InstallableWhi
191
135
  const WHISPER_LANGS = new Set(["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","yue","zh"]);
192
136
 
193
137
  async function getBinary(): Promise<string | null> {
194
- // Check if already available
195
138
  const existing = await findWhisperBinary();
196
- if (existing) return existing;
197
-
198
- // Try auto-install
199
- if (!autoInstall) return null;
200
- if (installFailed) return null; // sticky failure — don't retry every message
201
-
202
- // Singleton: join existing install or start new one
203
- if (!installPromise) {
204
- installPromise = installWhisperWithPip().then((result) => {
205
- if (!result) installFailed = true;
206
- return result;
207
- }).finally(() => {
208
- installPromise = null;
209
- });
210
- }
211
- return installPromise;
139
+ if (!existing) return null;
140
+
141
+ // Also need ffmpeg
142
+ const ffmpeg = await findFfmpeg();
143
+ if (!ffmpeg) return null;
144
+
145
+ return existing;
212
146
  }
213
147
 
214
148
  return {
@@ -218,6 +152,15 @@ export function createWhisperProvider(config: SttProviderConfig): InstallableWhi
218
152
  return input.mime.startsWith("audio/");
219
153
  },
220
154
 
155
+ async getMissingDeps(): Promise<string[]> {
156
+ const missing: string[] = [];
157
+ const whisper = await findWhisperBinary();
158
+ if (!whisper) missing.push("whisper");
159
+ const ffmpeg = await findFfmpeg();
160
+ if (!ffmpeg) missing.push("ffmpeg");
161
+ return missing;
162
+ },
163
+
221
164
  async ensureInstalled(): Promise<boolean> {
222
165
  const binary = await getBinary();
223
166
  if (!binary) return false;
@@ -236,7 +179,7 @@ export function createWhisperProvider(config: SttProviderConfig): InstallableWhi
236
179
  }
237
180
  } catch {}
238
181
 
239
- // Run warmup — catch everything so it never rejects
182
+ // Run warmup
240
183
  try {
241
184
  const ok = await warmWhisperModel(binary, model);
242
185
  if (!ok) warmFailed = true;
@@ -258,7 +201,7 @@ export function createWhisperProvider(config: SttProviderConfig): InstallableWhi
258
201
  async transcribe(input: SttInput): Promise<TranscriptionResult> {
259
202
  const binary = await getBinary();
260
203
  if (!binary) {
261
- throw new Error("whisper not available and auto-install failed");
204
+ throw new Error("whisper or ffmpeg not available");
262
205
  }
263
206
 
264
207
  const outputDir = join(homedir(), ".roundhouse", "whisper-tmp", randomBytes(6).toString("hex"));
@@ -19,7 +19,6 @@ export class SttService {
19
19
  private config: SttConfig;
20
20
  private initPromise: Promise<void> | null = null;
21
21
  private activeStt: Promise<void> = Promise.resolve(); // global concurrency: 1 at a time
22
- private installNoticeSent = false;
23
22
 
24
23
  constructor(config: SttConfig) {
25
24
  this.config = config;
@@ -52,12 +51,7 @@ export class SttService {
52
51
  }
53
52
 
54
53
  try {
55
- // Pass autoInstall from service-level config into provider config
56
- const mergedProviderConfig = {
57
- ...providerConfig,
58
- autoInstall: providerConfig.autoInstall ?? this.config.autoInstall ?? false,
59
- };
60
- this.providers.push(factory(mergedProviderConfig));
54
+ this.providers.push(factory(providerConfig));
61
55
  console.log(`[stt] loaded provider: ${providerName} (${type})`);
62
56
  } catch (err) {
63
57
  console.warn(`[stt] failed to create provider "${providerName}":`, (err as Error).message);
@@ -97,6 +91,34 @@ export class SttService {
97
91
  }
98
92
  }
99
93
 
94
+ /**
95
+ * Check which STT dependencies are missing.
96
+ * Returns empty array if everything is installed, or names like ["whisper", "ffmpeg"].
97
+ * Note: returns assumed deps when no providers loaded (safe fallback for default config).
98
+ */
99
+ async getMissingDeps(): Promise<string[]> {
100
+ try {
101
+ await this.ensureInitialized();
102
+ } catch {
103
+ return ["whisper", "ffmpeg"]; // Can't initialize = assume all missing
104
+ }
105
+
106
+ if (this.providers.length === 0) {
107
+ // No providers loaded — most likely whisper not installed (default config uses whisper).
108
+ // Config typos are logged during doInit(); agent install prompt is a safe fallback.
109
+ return ["whisper", "ffmpeg"];
110
+ }
111
+
112
+ // Returns deps from first provider that supports getMissingDeps (single-provider today)
113
+ for (const provider of this.providers) {
114
+ const installable = provider as InstallableWhisperProvider;
115
+ if (installable.getMissingDeps && typeof installable.getMissingDeps === "function") {
116
+ return installable.getMissingDeps();
117
+ }
118
+ }
119
+ return [];
120
+ }
121
+
100
122
  /** Should this attachment be auto-transcribed? */
101
123
  shouldTranscribe(attachment: MessageAttachment): boolean {
102
124
  if (!this.config.enabled || this.config.mode === "off") return false;
@@ -141,7 +163,7 @@ export class SttService {
141
163
  const duration = await getAudioDuration(attachment.localPath);
142
164
  if (duration !== null && duration > maxDuration) {
143
165
  console.log(`[stt] skipping ${attachment.name}: duration ${duration.toFixed(1)}s exceeds ${maxDuration}s limit`);
144
- return null;
166
+ return { text: "", provider: "none", approximate: true as const, status: "skipped" as const, error: `Duration ${duration.toFixed(0)}s exceeds ${maxDuration}s limit` };
145
167
  }
146
168
  } catch {}
147
169
  }
@@ -169,18 +191,12 @@ export class SttService {
169
191
  for (const provider of this.providers) {
170
192
  if (!provider.canTranscribe(input)) continue;
171
193
 
172
- // Ensure provider is installed (with one-time user notification)
194
+ // Ensure provider is installed
173
195
  const installable = provider as InstallableWhisperProvider;
174
196
  if (installable.ensureInstalled && typeof installable.ensureInstalled === "function") {
175
197
  try {
176
198
  const isReady = await installable.ensureInstalled();
177
- if (!isReady) {
178
- if (!this.installNoticeSent && notify) {
179
- this.installNoticeSent = true;
180
- try { await notify("🎤 Voice transcription not available. Whisper install or model download failed."); } catch {}
181
- }
182
- continue;
183
- }
199
+ if (!isReady) continue;
184
200
  } catch {
185
201
  continue;
186
202
  }
@@ -239,6 +255,9 @@ export async function enrichAttachmentsWithTranscripts(
239
255
  const transcript = await sttService.tryTranscribe(att, undefined, notify);
240
256
  if (transcript) {
241
257
  att.transcript = transcript;
258
+ } else if (att.mediaType === "audio" && sttService.shouldTranscribe(att)) {
259
+ // Mark as failed so gateway can detect and act
260
+ att.transcript = { text: "", provider: "none", approximate: true, status: "failed", error: "No STT provider available" };
242
261
  }
243
262
  } catch (err) {
244
263
  console.error(`[stt] unexpected error transcribing ${att.name}:`, (err as Error).message);
@@ -267,7 +286,6 @@ async function getAudioDuration(filePath: string): Promise<number | null> {
267
286
  export const DEFAULT_STT_CONFIG: SttConfig = {
268
287
  enabled: true,
269
288
  mode: "on",
270
- autoInstall: true,
271
289
  chain: ["whisper"],
272
290
  autoTranscribe: {
273
291
  voiceMessages: true,
@@ -35,7 +35,7 @@ export interface AttachmentTranscript {
35
35
  language?: string;
36
36
  confidence?: number;
37
37
  approximate: true;
38
- status: "completed" | "failed";
38
+ status: "completed" | "failed" | "skipped";
39
39
  error?: string;
40
40
  durationMs?: number;
41
41
  }
@@ -45,14 +45,12 @@ export interface AttachmentTranscript {
45
45
  export interface SttProviderConfig {
46
46
  type: string;
47
47
  timeoutMs?: number;
48
- autoInstall?: boolean;
49
48
  [key: string]: unknown;
50
49
  }
51
50
 
52
51
  export interface SttConfig {
53
52
  enabled: boolean;
54
53
  mode: "on" | "off";
55
- autoInstall?: boolean;
56
54
  chain: string[];
57
55
  autoTranscribe: {
58
56
  voiceMessages: boolean;