voicecc 1.1.35 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/bin/voicecc.js +94 -1
  2. package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
  3. package/dashboard/dist/index.html +1 -1
  4. package/dashboard/routes/agents.ts +28 -8
  5. package/dashboard/routes/browser-call.ts +3 -2
  6. package/dashboard/routes/chat.ts +75 -55
  7. package/dashboard/routes/providers.ts +5 -74
  8. package/dashboard/routes/twilio.ts +104 -5
  9. package/dashboard/routes/voice.ts +98 -0
  10. package/dashboard/server.ts +58 -2
  11. package/package.json +2 -3
  12. package/server/index.ts +96 -8
  13. package/server/services/device-pairing.ts +18 -2
  14. package/server/services/twilio-manager.ts +29 -10
  15. package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
  16. package/dashboard/dist/audio-processor.js +0 -126
  17. package/server/services/heartbeat.ts +0 -403
  18. package/server/voice/assets/chime.wav +0 -0
  19. package/server/voice/assets/startup.pcm +0 -0
  20. package/server/voice/audio-adapter.ts +0 -60
  21. package/server/voice/audio-inactivity.test.ts +0 -108
  22. package/server/voice/audio-inactivity.ts +0 -91
  23. package/server/voice/browser-audio-playback.test.ts +0 -149
  24. package/server/voice/browser-audio.ts +0 -147
  25. package/server/voice/browser-server.ts +0 -311
  26. package/server/voice/chat-server.ts +0 -236
  27. package/server/voice/chime.test.ts +0 -69
  28. package/server/voice/chime.ts +0 -36
  29. package/server/voice/claude-session.ts +0 -293
  30. package/server/voice/endpointing.ts +0 -163
  31. package/server/voice/mic-vpio +0 -0
  32. package/server/voice/narration.ts +0 -204
  33. package/server/voice/prompt-builder.ts +0 -108
  34. package/server/voice/session-lock.ts +0 -123
  35. package/server/voice/stt-elevenlabs.ts +0 -210
  36. package/server/voice/stt-provider.ts +0 -106
  37. package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
  38. package/server/voice/tts-elevenlabs.ts +0 -397
  39. package/server/voice/tts-provider.ts +0 -155
  40. package/server/voice/twilio-audio.ts +0 -338
  41. package/server/voice/twilio-server.ts +0 -540
  42. package/server/voice/types.ts +0 -282
  43. package/server/voice/vad.ts +0 -101
  44. package/server/voice/voice-loop-bugs.test.ts +0 -348
  45. package/server/voice/voice-server.ts +0 -129
  46. package/server/voice/voice-session.ts +0 -539
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * Thin wiring file that creates the Hono app and starts listening:
5
5
  * - Mount all API route groups under /api/*
6
+ * - Proxy Twilio media WebSocket upgrades to the Python server
6
7
  * - Serve the Vite build output as static files
7
8
  * - SPA fallback for client-side routing
8
9
  */
@@ -15,6 +16,10 @@ import { readFileSync } from "fs";
15
16
  import { access } from "fs/promises";
16
17
  import { join } from "path";
17
18
  import { homedir } from "os";
19
+ import { WebSocket as WsWebSocket, WebSocketServer } from "ws";
20
+
21
+ import type { IncomingMessage } from "http";
22
+ import type { Duplex } from "stream";
18
23
 
19
24
  import { claudeMdRoutes } from "./routes/claude-md.js";
20
25
  import { conversationRoutes } from "./routes/conversations.js";
@@ -30,6 +35,7 @@ import { providersRoutes } from "./routes/providers.js";
30
35
  import { agentsRoutes } from "./routes/agents.js";
31
36
  import { versionRoutes } from "./routes/version.js";
32
37
  import { chatRoutes } from "./routes/chat.js";
38
+ import { voiceRoutes } from "./routes/voice.js";
33
39
  import { loadDeviceTokens } from "../server/services/device-pairing.js";
34
40
 
35
41
  // ============================================================================
@@ -39,6 +45,9 @@ import { loadDeviceTokens } from "../server/services/device-pairing.js";
39
45
  const PORTS_TO_TRY = [3456, 3457, 3458, 3459, 3460];
40
46
  const USER_CLAUDE_MD_PATH = join(homedir(), ".claude", "CLAUDE.md");
41
47
 
48
+ /** Base URL for the Python FastAPI server (for WebSocket + HTTP proxy) */
49
+ const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
50
+
42
51
  // ============================================================================
43
52
  // MAIN HANDLERS
44
53
  // ============================================================================
@@ -51,10 +60,19 @@ const USER_CLAUDE_MD_PATH = join(homedir(), ".claude", "CLAUDE.md");
51
60
  export function createApp(): Hono {
52
61
  const app = new Hono();
53
62
 
54
- // Dashboard password protection (HTTP Basic Auth)
63
+ // Dashboard password protection (HTTP Basic Auth).
64
+ // Exclude /chat and /api/chat/* -- those use device-token auth and opening
65
+ // them in a new tab can corrupt the browser's cached Basic Auth credentials.
55
66
  const dashboardPassword = process.env.DASHBOARD_PASSWORD;
56
67
  if (dashboardPassword) {
57
- app.use("*", basicAuth({ username: "admin", password: dashboardPassword }));
68
+ const auth = basicAuth({ username: "admin", password: dashboardPassword });
69
+ app.use("*", async (c, next) => {
70
+ const path = c.req.path;
71
+ if (path === "/chat" || path.startsWith("/api/chat/") || path.startsWith("/api/webrtc/") || path.startsWith("/api/voice/")) {
72
+ return next();
73
+ }
74
+ return auth(c, next);
75
+ });
58
76
  }
59
77
 
60
78
  // API route groups
@@ -72,6 +90,7 @@ export function createApp(): Hono {
72
90
  app.route("/api/agents", agentsRoutes());
73
91
  app.route("/api/version", versionRoutes());
74
92
  app.route("/api/chat", chatRoutes());
93
+ app.route("/api/voice", voiceRoutes());
75
94
 
76
95
  // Status endpoint (user CLAUDE.md conflict check)
77
96
  app.get("/api/status", async (c) => {
@@ -122,6 +141,43 @@ export async function startDashboard(): Promise<number> {
122
141
  resolve();
123
142
  });
124
143
  server.on("error", reject);
144
+
145
+ // Proxy /media/:token WebSocket upgrades to the Python server
146
+ const wss = new WebSocketServer({ noServer: true });
147
+ server.on("upgrade", (req: IncomingMessage, socket: Duplex, head: Buffer) => {
148
+ const url = req.url ?? "";
149
+ const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
150
+ if (!match) return; // Not a Twilio media WebSocket -- let it fall through
151
+
152
+ const targetWsUrl = VOICE_API_URL.replace(/^http/, "ws") + url;
153
+ const upstream = new WsWebSocket(targetWsUrl);
154
+
155
+ upstream.on("open", () => {
156
+ wss.handleUpgrade(req, socket, head, (clientWs) => {
157
+ // Bidirectional message proxy
158
+ clientWs.on("message", (data) => {
159
+ if (upstream.readyState === WsWebSocket.OPEN) {
160
+ upstream.send(data);
161
+ }
162
+ });
163
+ upstream.on("message", (data) => {
164
+ if (clientWs.readyState === WsWebSocket.OPEN) {
165
+ clientWs.send(data);
166
+ }
167
+ });
168
+
169
+ clientWs.on("close", () => upstream.close());
170
+ upstream.on("close", () => clientWs.close());
171
+ clientWs.on("error", () => upstream.close());
172
+ upstream.on("error", () => clientWs.close());
173
+ });
174
+ });
175
+
176
+ upstream.on("error", (err) => {
177
+ console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
178
+ socket.destroy();
179
+ });
180
+ });
125
181
  });
126
182
 
127
183
  setDashboardPort(port);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicecc",
3
- "version": "1.1.35",
3
+ "version": "1.2.0",
4
4
  "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
5
5
  "repository": {
6
6
  "type": "git",
@@ -35,11 +35,10 @@
35
35
  "linux"
36
36
  ],
37
37
  "dependencies": {
38
- "@anthropic-ai/claude-agent-sdk": "^0.2.70",
38
+ "@anthropic-ai/claude-agent-sdk": "^0.1.8",
39
39
  "@anthropic-ai/sdk": "^0.39.0",
40
40
  "@hono/node-server": "^1.19.9",
41
41
  "archiver": "^7.0.1",
42
- "avr-vad": "^1.0.0",
43
42
  "cloudflared": "^0.7.1",
44
43
  "dotenv": "^16.4.0",
45
44
  "hono": "^4.12.0",
package/server/index.ts CHANGED
@@ -8,7 +8,22 @@
8
8
  * - Auto-start Twilio if enabled (requires tunnel)
9
9
  */
10
10
 
11
- import { writeFileSync, unlinkSync, mkdirSync } from "node:fs";
11
+ // Global error handlers -- must be registered before any async work to prevent
12
+ // silent crashes from unhandled promise rejections or uncaught exceptions.
13
+ process.on("uncaughtException", (err) => {
14
+ console.error("[FATAL] Uncaught exception:", err);
15
+ console.error(err.stack ?? "(no stack trace)");
16
+ });
17
+
18
+ process.on("unhandledRejection", (reason) => {
19
+ console.error("[FATAL] Unhandled rejection:", reason);
20
+ if (reason instanceof Error) {
21
+ console.error(reason.stack ?? "(no stack trace)");
22
+ }
23
+ });
24
+
25
+ import { writeFileSync, unlinkSync, mkdirSync, existsSync } from "node:fs";
26
+ import { spawn, type ChildProcess } from "node:child_process";
12
27
  import { join } from "node:path";
13
28
  import { homedir } from "node:os";
14
29
  import { config } from "dotenv";
@@ -18,8 +33,64 @@ import { startDashboard } from "../dashboard/server.js";
18
33
  import { readEnv } from "./services/env.js";
19
34
  import { startTunnel, stopTunnel, isTunnelRunning, getTunnelUrl } from "./services/tunnel.js";
20
35
  import { startTwilioServer } from "./services/twilio-manager.js";
21
- import { startHeartbeat } from "./services/heartbeat.js";
22
- import { startVoiceServer } from "./voice/voice-server.js";
36
+
37
+ /** Base URL for the Python FastAPI server (for tunnel URL notification) */
38
+ const VOICE_SERVER_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
39
+
40
+ /** Path to the Python voice server directory */
41
+ const VOICE_SERVER_DIR = join(import.meta.dirname ?? ".", "..", "voice-server");
42
+
43
+ /** Reference to the Python voice server child process */
44
+ let pythonProcess: ChildProcess | null = null;
45
+
46
+ /**
47
+ * Start the Python voice server as a child process.
48
+ * Waits for the health endpoint to respond before returning.
49
+ */
50
+ async function startPythonVoiceServer(): Promise<void> {
51
+ const venvPython = join(VOICE_SERVER_DIR, ".venv", "bin", "python");
52
+ if (!existsSync(venvPython)) {
53
+ console.warn(`Python venv not found at ${venvPython} -- voice server will not start`);
54
+ return;
55
+ }
56
+
57
+ console.log("Starting Python voice server...");
58
+ pythonProcess = spawn(venvPython, ["server.py"], {
59
+ cwd: VOICE_SERVER_DIR,
60
+ stdio: ["ignore", "inherit", "inherit"],
61
+ });
62
+
63
+ pythonProcess.on("exit", (code) => {
64
+ console.error(`Python voice server exited with code ${code}`);
65
+ pythonProcess = null;
66
+ });
67
+
68
+ // Wait for health endpoint (up to 15s)
69
+ const deadline = Date.now() + 15_000;
70
+ while (Date.now() < deadline) {
71
+ try {
72
+ const res = await fetch(`${VOICE_SERVER_API_URL}/health`);
73
+ if (res.ok) {
74
+ console.log("Python voice server is ready");
75
+ return;
76
+ }
77
+ } catch {
78
+ // Not ready yet
79
+ }
80
+ await new Promise((r) => setTimeout(r, 500));
81
+ }
82
+ console.warn("Python voice server did not become healthy within 15s -- continuing anyway");
83
+ }
84
+
85
+ /**
86
+ * Stop the Python voice server child process.
87
+ */
88
+ function stopPythonVoiceServer(): void {
89
+ if (pythonProcess) {
90
+ pythonProcess.kill("SIGTERM");
91
+ pythonProcess = null;
92
+ }
93
+ }
23
94
 
24
95
  // Use VOICECC_DIR env var if set (passed by CLI when dropping root privileges),
25
96
  // otherwise fall back to ~/.voicecc.
@@ -64,20 +135,36 @@ function cleanupStatusFile(): void {
64
135
 
65
136
  async function main(): Promise<void> {
66
137
  const dashboardPort = await startDashboard();
67
- const voicePort = await startVoiceServer(dashboardPort);
68
138
 
69
- startHeartbeat();
139
+ // Start the Python voice server (voice pipeline + text chat + heartbeat)
140
+ await startPythonVoiceServer();
70
141
 
71
142
  const envVars = await readEnv();
72
143
 
73
144
  // Write status file early so the CLI can show dashboard info while tunnel starts
74
145
  writeStatusFile(dashboardPort, null);
75
146
 
76
- // Auto-start tunnel if enabled (independent of integrations)
147
+ // Auto-start tunnel if enabled -- tunnel now points at dashboard port
148
+ // so all external traffic goes through dashboard auth
77
149
  if (envVars.TUNNEL_ENABLED === "true") {
78
150
  try {
79
- await startTunnel(voicePort);
80
- writeStatusFile(dashboardPort, getTunnelUrl());
151
+ await startTunnel(dashboardPort);
152
+ const tunnelUrl = getTunnelUrl();
153
+ writeStatusFile(dashboardPort, tunnelUrl);
154
+
155
+ // Notify Python server of the tunnel URL so it can build TwiML URLs
156
+ if (tunnelUrl) {
157
+ try {
158
+ await fetch(`${VOICE_SERVER_API_URL}/config/tunnel-url`, {
159
+ method: "POST",
160
+ headers: { "Content-Type": "application/json" },
161
+ body: JSON.stringify({ url: tunnelUrl }),
162
+ });
163
+ console.log(`Notified Python server of tunnel URL: ${tunnelUrl}`);
164
+ } catch (notifyErr) {
165
+ console.warn(`Failed to notify Python server of tunnel URL: ${notifyErr}`);
166
+ }
167
+ }
81
168
  } catch (err) {
82
169
  const errorMsg = err instanceof Error ? err.message : String(err);
83
170
  console.error(`Tunnel auto-start failed: ${errorMsg}`);
@@ -101,6 +188,7 @@ async function main(): Promise<void> {
101
188
 
102
189
  // Graceful shutdown: stop tunnel subprocess, then clean up status file
103
190
  const shutdown = () => {
191
+ stopPythonVoiceServer();
104
192
  stopTunnel();
105
193
  cleanupStatusFile();
106
194
  process.exit(0);
@@ -47,6 +47,7 @@ export interface PairingValidation {
47
47
 
48
48
  const PAIRING_CODE_TTL_MS = 5 * 60 * 1000;
49
49
  const PAIRING_MAX_ATTEMPTS = 5;
50
+ const DEVICE_TOKEN_TTL_MS = 24 * 60 * 60 * 1000;
50
51
  const DEVICE_TOKENS_PATH = join(process.cwd(), ".device-tokens.json");
51
52
 
52
53
  // ============================================================================
@@ -141,7 +142,14 @@ export function isPairingCodeActive(code: string): boolean {
141
142
  }
142
143
 
143
144
  export function isValidDeviceToken(token: string): boolean {
144
- return deviceTokens.has(token);
145
+ const info = deviceTokens.get(token);
146
+ if (!info) return false;
147
+ if (Date.now() - info.pairedAt > DEVICE_TOKEN_TTL_MS) {
148
+ deviceTokens.delete(token);
149
+ saveDeviceTokens().catch(() => {});
150
+ return false;
151
+ }
152
+ return true;
145
153
  }
146
154
 
147
155
  /**
@@ -151,9 +159,17 @@ export function isValidDeviceToken(token: string): boolean {
151
159
  export async function loadDeviceTokens(): Promise<void> {
152
160
  try {
153
161
  const data = JSON.parse(await readFile(DEVICE_TOKENS_PATH, "utf-8"));
162
+ const now = Date.now();
163
+ let pruned = false;
154
164
  for (const [token, info] of Object.entries(data)) {
155
- deviceTokens.set(token, info as DeviceTokenInfo);
165
+ const typed = info as DeviceTokenInfo;
166
+ if (now - typed.pairedAt > DEVICE_TOKEN_TTL_MS) {
167
+ pruned = true;
168
+ } else {
169
+ deviceTokens.set(token, typed);
170
+ }
156
171
  }
172
+ if (pruned) saveDeviceTokens().catch(() => {});
157
173
  } catch {
158
174
  // File doesn't exist or is invalid -- start fresh
159
175
  }
@@ -1,14 +1,26 @@
1
1
  /**
2
2
  * Twilio integration state management.
3
3
  *
4
- * Tracks whether the Twilio integration is enabled/active and handles
5
- * Twilio-specific setup (webhook URL updates). The actual HTTP/WebSocket
6
- * handling runs in the unified voice server (voice-server.ts).
4
+ * Simplified: tracks whether Twilio is enabled and checks the Python voice
5
+ * server health. The actual Twilio call handling (WebSocket, TwiML, heartbeat)
6
+ * runs in the Python server.
7
+ *
8
+ * Responsibilities:
9
+ * - Track Twilio running state
10
+ * - Update Twilio phone number webhooks on start
11
+ * - Check Python server health via GET /health
7
12
  */
8
13
 
9
14
  import { readEnv } from "./env.js";
10
15
  import twilioSdk from "twilio";
11
16
 
17
+ // ============================================================================
18
+ // CONSTANTS
19
+ // ============================================================================
20
+
21
+ /** Base URL for the Python FastAPI server */
22
+ const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
23
+
12
24
  // ============================================================================
13
25
  // TYPES
14
26
  // ============================================================================
@@ -22,7 +34,7 @@ export interface TwilioStatus {
22
34
  // STATE
23
35
  // ============================================================================
24
36
 
25
- /** Whether the Twilio voice server is running */
37
+ /** Whether the Twilio integration is running */
26
38
  let twilioRunning = false;
27
39
 
28
40
  // ============================================================================
@@ -31,9 +43,7 @@ let twilioRunning = false;
31
43
 
32
44
  /**
33
45
  * Start the Twilio integration.
34
- * Reads .env for TWILIO_AUTH_TOKEN. If tunnelUrl exists, updates phone number
35
- * webhooks via Twilio SDK. The voice server is already running and handles
36
- * Twilio HTTP/WebSocket requests.
46
+ * Checks Python server health, then updates phone number webhooks via Twilio SDK.
37
47
  *
38
48
  * @param _dashboardPort - Unused (kept for API compatibility)
39
49
  * @param tunnelUrl - Optional tunnel public URL for webhook configuration
@@ -43,6 +53,17 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
43
53
  throw new Error("Twilio is already running");
44
54
  }
45
55
 
56
+ // Check Python server health
57
+ try {
58
+ const healthRes = await fetch(`${VOICE_API_URL}/health`);
59
+ if (!healthRes.ok) {
60
+ throw new Error(`Python server returned ${healthRes.status}`);
61
+ }
62
+ } catch (err) {
63
+ const msg = err instanceof Error ? err.message : String(err);
64
+ throw new Error(`Python voice server is not reachable at ${VOICE_API_URL}: ${msg}`);
65
+ }
66
+
46
67
  const envVars = await readEnv();
47
68
 
48
69
  if (!envVars.TWILIO_AUTH_TOKEN) {
@@ -50,7 +71,7 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
50
71
  }
51
72
 
52
73
  const accountSid = envVars.TWILIO_ACCOUNT_SID;
53
- const webhookUrl = tunnelUrl ? `${tunnelUrl}/twilio/incoming-call` : null;
74
+ const webhookUrl = tunnelUrl ? `${tunnelUrl}/api/twilio/incoming-call` : null;
54
75
 
55
76
  if (tunnelUrl && accountSid && envVars.TWILIO_AUTH_TOKEN) {
56
77
  const client = twilioSdk(accountSid, envVars.TWILIO_AUTH_TOKEN);
@@ -80,8 +101,6 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
80
101
  * Stop the Twilio voice server.
81
102
  */
82
103
  export function stopTwilioServer(): void {
83
- // In-process server doesn't have a clean shutdown mechanism yet;
84
- // mark as not running so new calls are rejected.
85
104
  twilioRunning = false;
86
105
  }
87
106