voicecc 1.1.36 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/bin/voicecc.js +94 -1
  2. package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
  3. package/dashboard/dist/index.html +1 -1
  4. package/dashboard/routes/agents.ts +28 -8
  5. package/dashboard/routes/browser-call.ts +3 -2
  6. package/dashboard/routes/chat.ts +75 -55
  7. package/dashboard/routes/providers.ts +5 -74
  8. package/dashboard/routes/twilio.ts +104 -5
  9. package/dashboard/routes/voice.ts +98 -0
  10. package/dashboard/server.ts +48 -1
  11. package/package.json +2 -3
  12. package/server/index.ts +96 -8
  13. package/server/services/twilio-manager.ts +29 -10
  14. package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
  15. package/dashboard/dist/audio-processor.js +0 -126
  16. package/server/services/heartbeat.ts +0 -403
  17. package/server/voice/assets/chime.wav +0 -0
  18. package/server/voice/assets/startup.pcm +0 -0
  19. package/server/voice/audio-adapter.ts +0 -60
  20. package/server/voice/audio-inactivity.test.ts +0 -108
  21. package/server/voice/audio-inactivity.ts +0 -91
  22. package/server/voice/browser-audio-playback.test.ts +0 -149
  23. package/server/voice/browser-audio.ts +0 -147
  24. package/server/voice/browser-server.ts +0 -311
  25. package/server/voice/chat-server.ts +0 -236
  26. package/server/voice/chime.test.ts +0 -69
  27. package/server/voice/chime.ts +0 -36
  28. package/server/voice/claude-session.ts +0 -293
  29. package/server/voice/endpointing.ts +0 -163
  30. package/server/voice/mic-vpio +0 -0
  31. package/server/voice/narration.ts +0 -204
  32. package/server/voice/prompt-builder.ts +0 -108
  33. package/server/voice/session-lock.ts +0 -123
  34. package/server/voice/stt-elevenlabs.ts +0 -210
  35. package/server/voice/stt-provider.ts +0 -106
  36. package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
  37. package/server/voice/tts-elevenlabs.ts +0 -397
  38. package/server/voice/tts-provider.ts +0 -155
  39. package/server/voice/twilio-audio.ts +0 -338
  40. package/server/voice/twilio-server.ts +0 -540
  41. package/server/voice/types.ts +0 -282
  42. package/server/voice/vad.ts +0 -101
  43. package/server/voice/voice-loop-bugs.test.ts +0 -348
  44. package/server/voice/voice-server.ts +0 -129
  45. package/server/voice/voice-session.ts +0 -539
@@ -3,6 +3,7 @@
3
3
  *
4
4
  * Thin wiring file that creates the Hono app and starts listening:
5
5
  * - Mount all API route groups under /api/*
6
+ * - Proxy Twilio media WebSocket upgrades to the Python server
6
7
  * - Serve the Vite build output as static files
7
8
  * - SPA fallback for client-side routing
8
9
  */
@@ -15,6 +16,10 @@ import { readFileSync } from "fs";
15
16
  import { access } from "fs/promises";
16
17
  import { join } from "path";
17
18
  import { homedir } from "os";
19
+ import { WebSocket as WsWebSocket, WebSocketServer } from "ws";
20
+
21
+ import type { IncomingMessage } from "http";
22
+ import type { Duplex } from "stream";
18
23
 
19
24
  import { claudeMdRoutes } from "./routes/claude-md.js";
20
25
  import { conversationRoutes } from "./routes/conversations.js";
@@ -30,6 +35,7 @@ import { providersRoutes } from "./routes/providers.js";
30
35
  import { agentsRoutes } from "./routes/agents.js";
31
36
  import { versionRoutes } from "./routes/version.js";
32
37
  import { chatRoutes } from "./routes/chat.js";
38
+ import { voiceRoutes } from "./routes/voice.js";
33
39
  import { loadDeviceTokens } from "../server/services/device-pairing.js";
34
40
 
35
41
  // ============================================================================
@@ -39,6 +45,9 @@ import { loadDeviceTokens } from "../server/services/device-pairing.js";
39
45
  const PORTS_TO_TRY = [3456, 3457, 3458, 3459, 3460];
40
46
  const USER_CLAUDE_MD_PATH = join(homedir(), ".claude", "CLAUDE.md");
41
47
 
48
+ /** Base URL for the Python FastAPI server (for WebSocket + HTTP proxy) */
49
+ const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
50
+
42
51
  // ============================================================================
43
52
  // MAIN HANDLERS
44
53
  // ============================================================================
@@ -59,7 +68,7 @@ export function createApp(): Hono {
59
68
  const auth = basicAuth({ username: "admin", password: dashboardPassword });
60
69
  app.use("*", async (c, next) => {
61
70
  const path = c.req.path;
62
- if (path === "/chat" || path.startsWith("/api/chat/") || path.startsWith("/api/webrtc/")) {
71
+ if (path === "/chat" || path.startsWith("/api/chat/") || path.startsWith("/api/webrtc/") || path.startsWith("/api/voice/")) {
63
72
  return next();
64
73
  }
65
74
  return auth(c, next);
@@ -81,6 +90,7 @@ export function createApp(): Hono {
81
90
  app.route("/api/agents", agentsRoutes());
82
91
  app.route("/api/version", versionRoutes());
83
92
  app.route("/api/chat", chatRoutes());
93
+ app.route("/api/voice", voiceRoutes());
84
94
 
85
95
  // Status endpoint (user CLAUDE.md conflict check)
86
96
  app.get("/api/status", async (c) => {
@@ -131,6 +141,43 @@ export async function startDashboard(): Promise<number> {
131
141
  resolve();
132
142
  });
133
143
  server.on("error", reject);
144
+
145
+ // Proxy /media/:token WebSocket upgrades to the Python server
146
+ const wss = new WebSocketServer({ noServer: true });
147
+ server.on("upgrade", (req: IncomingMessage, socket: Duplex, head: Buffer) => {
148
+ const url = req.url ?? "";
149
+ const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
150
+ if (!match) return; // Not a Twilio media WebSocket -- let it fall through
151
+
152
+ const targetWsUrl = VOICE_API_URL.replace(/^http/, "ws") + url;
153
+ const upstream = new WsWebSocket(targetWsUrl);
154
+
155
+ upstream.on("open", () => {
156
+ wss.handleUpgrade(req, socket, head, (clientWs) => {
157
+ // Bidirectional message proxy
158
+ clientWs.on("message", (data) => {
159
+ if (upstream.readyState === WsWebSocket.OPEN) {
160
+ upstream.send(data);
161
+ }
162
+ });
163
+ upstream.on("message", (data) => {
164
+ if (clientWs.readyState === WsWebSocket.OPEN) {
165
+ clientWs.send(data);
166
+ }
167
+ });
168
+
169
+ clientWs.on("close", () => upstream.close());
170
+ upstream.on("close", () => clientWs.close());
171
+ clientWs.on("error", () => upstream.close());
172
+ upstream.on("error", () => clientWs.close());
173
+ });
174
+ });
175
+
176
+ upstream.on("error", (err) => {
177
+ console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
178
+ socket.destroy();
179
+ });
180
+ });
134
181
  });
135
182
 
136
183
  setDashboardPort(port);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicecc",
3
- "version": "1.1.36",
3
+ "version": "1.2.0",
4
4
  "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
5
5
  "repository": {
6
6
  "type": "git",
@@ -35,11 +35,10 @@
35
35
  "linux"
36
36
  ],
37
37
  "dependencies": {
38
- "@anthropic-ai/claude-agent-sdk": "^0.2.70",
38
+ "@anthropic-ai/claude-agent-sdk": "^0.1.8",
39
39
  "@anthropic-ai/sdk": "^0.39.0",
40
40
  "@hono/node-server": "^1.19.9",
41
41
  "archiver": "^7.0.1",
42
- "avr-vad": "^1.0.0",
43
42
  "cloudflared": "^0.7.1",
44
43
  "dotenv": "^16.4.0",
45
44
  "hono": "^4.12.0",
package/server/index.ts CHANGED
@@ -8,7 +8,22 @@
8
8
  * - Auto-start Twilio if enabled (requires tunnel)
9
9
  */
10
10
 
11
- import { writeFileSync, unlinkSync, mkdirSync } from "node:fs";
11
+ // Global error handlers -- must be registered before any async work to prevent
12
+ // silent crashes from unhandled promise rejections or uncaught exceptions.
13
+ process.on("uncaughtException", (err) => {
14
+ console.error("[FATAL] Uncaught exception:", err);
15
+ console.error(err.stack ?? "(no stack trace)");
16
+ });
17
+
18
+ process.on("unhandledRejection", (reason) => {
19
+ console.error("[FATAL] Unhandled rejection:", reason);
20
+ if (reason instanceof Error) {
21
+ console.error(reason.stack ?? "(no stack trace)");
22
+ }
23
+ });
24
+
25
+ import { writeFileSync, unlinkSync, mkdirSync, existsSync } from "node:fs";
26
+ import { spawn, type ChildProcess } from "node:child_process";
12
27
  import { join } from "node:path";
13
28
  import { homedir } from "node:os";
14
29
  import { config } from "dotenv";
@@ -18,8 +33,64 @@ import { startDashboard } from "../dashboard/server.js";
18
33
  import { readEnv } from "./services/env.js";
19
34
  import { startTunnel, stopTunnel, isTunnelRunning, getTunnelUrl } from "./services/tunnel.js";
20
35
  import { startTwilioServer } from "./services/twilio-manager.js";
21
- import { startHeartbeat } from "./services/heartbeat.js";
22
- import { startVoiceServer } from "./voice/voice-server.js";
36
+
37
+ /** Base URL for the Python FastAPI server (for tunnel URL notification) */
38
+ const VOICE_SERVER_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
39
+
40
+ /** Path to the Python voice server directory */
41
+ const VOICE_SERVER_DIR = join(import.meta.dirname ?? ".", "..", "voice-server");
42
+
43
+ /** Reference to the Python voice server child process */
44
+ let pythonProcess: ChildProcess | null = null;
45
+
46
+ /**
47
+ * Start the Python voice server as a child process.
48
+ * Waits for the health endpoint to respond before returning.
49
+ */
50
+ async function startPythonVoiceServer(): Promise<void> {
51
+ const venvPython = join(VOICE_SERVER_DIR, ".venv", "bin", "python");
52
+ if (!existsSync(venvPython)) {
53
+ console.warn(`Python venv not found at ${venvPython} -- voice server will not start`);
54
+ return;
55
+ }
56
+
57
+ console.log("Starting Python voice server...");
58
+ pythonProcess = spawn(venvPython, ["server.py"], {
59
+ cwd: VOICE_SERVER_DIR,
60
+ stdio: ["ignore", "inherit", "inherit"],
61
+ });
62
+
63
+ pythonProcess.on("exit", (code) => {
64
+ console.error(`Python voice server exited with code ${code}`);
65
+ pythonProcess = null;
66
+ });
67
+
68
+ // Wait for health endpoint (up to 15s)
69
+ const deadline = Date.now() + 15_000;
70
+ while (Date.now() < deadline) {
71
+ try {
72
+ const res = await fetch(`${VOICE_SERVER_API_URL}/health`);
73
+ if (res.ok) {
74
+ console.log("Python voice server is ready");
75
+ return;
76
+ }
77
+ } catch {
78
+ // Not ready yet
79
+ }
80
+ await new Promise((r) => setTimeout(r, 500));
81
+ }
82
+ console.warn("Python voice server did not become healthy within 15s -- continuing anyway");
83
+ }
84
+
85
+ /**
86
+ * Stop the Python voice server child process.
87
+ */
88
+ function stopPythonVoiceServer(): void {
89
+ if (pythonProcess) {
90
+ pythonProcess.kill("SIGTERM");
91
+ pythonProcess = null;
92
+ }
93
+ }
23
94
 
24
95
  // Use VOICECC_DIR env var if set (passed by CLI when dropping root privileges),
25
96
  // otherwise fall back to ~/.voicecc.
@@ -64,20 +135,36 @@ function cleanupStatusFile(): void {
64
135
 
65
136
  async function main(): Promise<void> {
66
137
  const dashboardPort = await startDashboard();
67
- const voicePort = await startVoiceServer(dashboardPort);
68
138
 
69
- startHeartbeat();
139
+ // Start the Python voice server (voice pipeline + text chat + heartbeat)
140
+ await startPythonVoiceServer();
70
141
 
71
142
  const envVars = await readEnv();
72
143
 
73
144
  // Write status file early so the CLI can show dashboard info while tunnel starts
74
145
  writeStatusFile(dashboardPort, null);
75
146
 
76
- // Auto-start tunnel if enabled (independent of integrations)
147
+ // Auto-start tunnel if enabled -- tunnel now points at dashboard port
148
+ // so all external traffic goes through dashboard auth
77
149
  if (envVars.TUNNEL_ENABLED === "true") {
78
150
  try {
79
- await startTunnel(voicePort);
80
- writeStatusFile(dashboardPort, getTunnelUrl());
151
+ await startTunnel(dashboardPort);
152
+ const tunnelUrl = getTunnelUrl();
153
+ writeStatusFile(dashboardPort, tunnelUrl);
154
+
155
+ // Notify Python server of the tunnel URL so it can build TwiML URLs
156
+ if (tunnelUrl) {
157
+ try {
158
+ await fetch(`${VOICE_SERVER_API_URL}/config/tunnel-url`, {
159
+ method: "POST",
160
+ headers: { "Content-Type": "application/json" },
161
+ body: JSON.stringify({ url: tunnelUrl }),
162
+ });
163
+ console.log(`Notified Python server of tunnel URL: ${tunnelUrl}`);
164
+ } catch (notifyErr) {
165
+ console.warn(`Failed to notify Python server of tunnel URL: ${notifyErr}`);
166
+ }
167
+ }
81
168
  } catch (err) {
82
169
  const errorMsg = err instanceof Error ? err.message : String(err);
83
170
  console.error(`Tunnel auto-start failed: ${errorMsg}`);
@@ -101,6 +188,7 @@ async function main(): Promise<void> {
101
188
 
102
189
  // Graceful shutdown: stop tunnel subprocess, then clean up status file
103
190
  const shutdown = () => {
191
+ stopPythonVoiceServer();
104
192
  stopTunnel();
105
193
  cleanupStatusFile();
106
194
  process.exit(0);
@@ -1,14 +1,26 @@
1
1
  /**
2
2
  * Twilio integration state management.
3
3
  *
4
- * Tracks whether the Twilio integration is enabled/active and handles
5
- * Twilio-specific setup (webhook URL updates). The actual HTTP/WebSocket
6
- * handling runs in the unified voice server (voice-server.ts).
4
+ * Simplified: tracks whether Twilio is enabled and checks the Python voice
5
+ * server health. The actual Twilio call handling (WebSocket, TwiML, heartbeat)
6
+ * runs in the Python server.
7
+ *
8
+ * Responsibilities:
9
+ * - Track Twilio running state
10
+ * - Update Twilio phone number webhooks on start
11
+ * - Check Python server health via GET /health
7
12
  */
8
13
 
9
14
  import { readEnv } from "./env.js";
10
15
  import twilioSdk from "twilio";
11
16
 
17
+ // ============================================================================
18
+ // CONSTANTS
19
+ // ============================================================================
20
+
21
+ /** Base URL for the Python FastAPI server */
22
+ const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
23
+
12
24
  // ============================================================================
13
25
  // TYPES
14
26
  // ============================================================================
@@ -22,7 +34,7 @@ export interface TwilioStatus {
22
34
  // STATE
23
35
  // ============================================================================
24
36
 
25
- /** Whether the Twilio voice server is running */
37
+ /** Whether the Twilio integration is running */
26
38
  let twilioRunning = false;
27
39
 
28
40
  // ============================================================================
@@ -31,9 +43,7 @@ let twilioRunning = false;
31
43
 
32
44
  /**
33
45
  * Start the Twilio integration.
34
- * Reads .env for TWILIO_AUTH_TOKEN. If tunnelUrl exists, updates phone number
35
- * webhooks via Twilio SDK. The voice server is already running and handles
36
- * Twilio HTTP/WebSocket requests.
46
+ * Checks Python server health, then updates phone number webhooks via Twilio SDK.
37
47
  *
38
48
  * @param _dashboardPort - Unused (kept for API compatibility)
39
49
  * @param tunnelUrl - Optional tunnel public URL for webhook configuration
@@ -43,6 +53,17 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
43
53
  throw new Error("Twilio is already running");
44
54
  }
45
55
 
56
+ // Check Python server health
57
+ try {
58
+ const healthRes = await fetch(`${VOICE_API_URL}/health`);
59
+ if (!healthRes.ok) {
60
+ throw new Error(`Python server returned ${healthRes.status}`);
61
+ }
62
+ } catch (err) {
63
+ const msg = err instanceof Error ? err.message : String(err);
64
+ throw new Error(`Python voice server is not reachable at ${VOICE_API_URL}: ${msg}`);
65
+ }
66
+
46
67
  const envVars = await readEnv();
47
68
 
48
69
  if (!envVars.TWILIO_AUTH_TOKEN) {
@@ -50,7 +71,7 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
50
71
  }
51
72
 
52
73
  const accountSid = envVars.TWILIO_ACCOUNT_SID;
53
- const webhookUrl = tunnelUrl ? `${tunnelUrl}/twilio/incoming-call` : null;
74
+ const webhookUrl = tunnelUrl ? `${tunnelUrl}/api/twilio/incoming-call` : null;
54
75
 
55
76
  if (tunnelUrl && accountSid && envVars.TWILIO_AUTH_TOKEN) {
56
77
  const client = twilioSdk(accountSid, envVars.TWILIO_AUTH_TOKEN);
@@ -80,8 +101,6 @@ export async function startTwilioServer(_dashboardPort: number, tunnelUrl?: stri
80
101
  * Stop the Twilio voice server.
81
102
  */
82
103
  export function stopTwilioServer(): void {
83
- // In-process server doesn't have a clean shutdown mechanism yet;
84
- // mark as not running so new calls are rejected.
85
104
  twilioRunning = false;
86
105
  }
87
106