voicecc 1.2.7 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/dashboard-auth.test.ts +88 -0
- package/dashboard/routes/agents.ts +41 -2
- package/dashboard/server.ts +4 -39
- package/dashboard/vite.config.ts +28 -0
- package/dashboard/ws-proxy-frames.test.ts +117 -0
- package/dashboard/ws-proxy-integration.test.ts +189 -0
- package/dashboard/ws-proxy.ts +70 -0
- package/package.json +2 -2
- package/voice-server/twilio_pipeline.py +26 -12
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that dashboard password protection (HTTP Basic Auth) works correctly.
|
|
3
|
+
*
|
|
4
|
+
* Uses Hono's in-memory app.request() -- no real HTTP server needed.
|
|
5
|
+
*
|
|
6
|
+
* Run: npx tsx --test dashboard/dashboard-auth.test.ts
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { test, describe, beforeEach } from "node:test";
|
|
10
|
+
import { strict as assert } from "node:assert";
|
|
11
|
+
|
|
12
|
+
import { createApp } from "./server.js";
|
|
13
|
+
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// CONSTANTS
|
|
16
|
+
// ============================================================================
|
|
17
|
+
|
|
18
|
+
const TEST_PASSWORD = "test-secret-123";
|
|
19
|
+
const TEST_ENDPOINT = "/api/status";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Encode credentials as a Basic Auth header value.
|
|
23
|
+
*/
|
|
24
|
+
function basicAuthHeader(username: string, password: string): string {
|
|
25
|
+
return "Basic " + Buffer.from(`${username}:${password}`).toString("base64");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// TESTS
|
|
30
|
+
// ============================================================================
|
|
31
|
+
|
|
32
|
+
describe("dashboard password protection", () => {
|
|
33
|
+
beforeEach(() => {
|
|
34
|
+
delete process.env.DASHBOARD_PASSWORD;
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("no password set -- requests succeed without credentials", async () => {
|
|
38
|
+
const app = createApp();
|
|
39
|
+
const res = await app.request(TEST_ENDPOINT);
|
|
40
|
+
assert.equal(res.status, 200);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
test("password set, no credentials -- returns 401", async () => {
|
|
44
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
45
|
+
const app = createApp();
|
|
46
|
+
const res = await app.request(TEST_ENDPOINT);
|
|
47
|
+
assert.equal(res.status, 401);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test("password set, wrong credentials -- returns 401", async () => {
|
|
51
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
52
|
+
const app = createApp();
|
|
53
|
+
const res = await app.request(TEST_ENDPOINT, {
|
|
54
|
+
headers: { Authorization: basicAuthHeader("admin", "wrong-password") },
|
|
55
|
+
});
|
|
56
|
+
assert.equal(res.status, 401);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("password set, correct credentials -- returns 200", async () => {
|
|
60
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
61
|
+
const app = createApp();
|
|
62
|
+
const res = await app.request(TEST_ENDPOINT, {
|
|
63
|
+
headers: { Authorization: basicAuthHeader("admin", TEST_PASSWORD) },
|
|
64
|
+
});
|
|
65
|
+
assert.equal(res.status, 200);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("password set, /chat bypasses basic auth", async () => {
|
|
69
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
70
|
+
const app = createApp();
|
|
71
|
+
const res = await app.request("/chat");
|
|
72
|
+
assert.notEqual(res.status, 401);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test("password set, /api/chat/* bypasses basic auth", async () => {
|
|
76
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
77
|
+
const app = createApp();
|
|
78
|
+
const res = await app.request("/api/chat/send", { method: "POST" });
|
|
79
|
+
assert.notEqual(res.status, 401);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test("password set, /api/webrtc/* bypasses basic auth", async () => {
|
|
83
|
+
process.env.DASHBOARD_PASSWORD = TEST_PASSWORD;
|
|
84
|
+
const app = createApp();
|
|
85
|
+
const res = await app.request("/api/webrtc/validate");
|
|
86
|
+
assert.notEqual(res.status, 401);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { Hono } from "hono";
|
|
14
|
+
import twilioSdk from "twilio";
|
|
14
15
|
import {
|
|
15
16
|
listAgents,
|
|
16
17
|
getAgent,
|
|
@@ -21,6 +22,8 @@ import {
|
|
|
21
22
|
importAgent,
|
|
22
23
|
} from "../../server/services/agent-store.js";
|
|
23
24
|
import type { AgentConfig } from "../../server/services/agent-store.js";
|
|
25
|
+
import { readEnv } from "../../server/services/env.js";
|
|
26
|
+
import { getTunnelUrl } from "../../server/services/tunnel.js";
|
|
24
27
|
|
|
25
28
|
/** Base URL for the Python voice server API */
|
|
26
29
|
const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
|
|
@@ -155,11 +158,30 @@ export function agentsRoutes(): Hono {
|
|
|
155
158
|
app.post("/:id/call", async (c) => {
|
|
156
159
|
const id = c.req.param("id");
|
|
157
160
|
try {
|
|
161
|
+
const envVars = await readEnv();
|
|
162
|
+
const accountSid = envVars.TWILIO_ACCOUNT_SID;
|
|
163
|
+
const authToken = envVars.TWILIO_AUTH_TOKEN;
|
|
164
|
+
const userPhone = envVars.USER_PHONE_NUMBER;
|
|
165
|
+
const tunnelUrl = getTunnelUrl();
|
|
166
|
+
|
|
167
|
+
if (!accountSid || !authToken) {
|
|
168
|
+
return c.json({ error: "Twilio credentials not configured" }, 400);
|
|
169
|
+
}
|
|
170
|
+
if (!userPhone) {
|
|
171
|
+
return c.json({ error: "User phone number not configured" }, 400);
|
|
172
|
+
}
|
|
173
|
+
if (!tunnelUrl) {
|
|
174
|
+
return c.json({ error: "Tunnel is not running" }, 400);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const token = crypto.randomUUID();
|
|
178
|
+
|
|
179
|
+
// Register the token with the Python voice server
|
|
158
180
|
const response = await fetch(`${VOICE_API_URL}/register-call`, {
|
|
159
181
|
method: "POST",
|
|
160
182
|
headers: { "Content-Type": "application/json" },
|
|
161
183
|
body: JSON.stringify({
|
|
162
|
-
token
|
|
184
|
+
token,
|
|
163
185
|
agent_id: id,
|
|
164
186
|
initial_prompt: "The user pressed the 'Call Me' button. Greet them and ask how you can help.",
|
|
165
187
|
}),
|
|
@@ -168,7 +190,24 @@ export function agentsRoutes(): Hono {
|
|
|
168
190
|
const data = await response.json();
|
|
169
191
|
throw new Error(data.error ?? "Voice server error");
|
|
170
192
|
}
|
|
171
|
-
|
|
193
|
+
|
|
194
|
+
// Place the actual Twilio call
|
|
195
|
+
const client = twilioSdk(accountSid, authToken);
|
|
196
|
+
const numbers = await client.incomingPhoneNumbers.list({ limit: 1 });
|
|
197
|
+
if (numbers.length === 0) {
|
|
198
|
+
return c.json({ error: "No Twilio phone numbers found on this account" }, 400);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const tunnelHost = tunnelUrl.replace(/^https?:\/\//, "");
|
|
202
|
+
const twiml = `<Response><Connect><Stream url="wss://${tunnelHost}/media/${token}?agentId=${id}" /></Connect></Response>`;
|
|
203
|
+
|
|
204
|
+
const call = await client.calls.create({
|
|
205
|
+
to: userPhone,
|
|
206
|
+
from: numbers[0].phoneNumber,
|
|
207
|
+
twiml,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
return c.json({ success: true, callSid: call.sid });
|
|
172
211
|
} catch (err) {
|
|
173
212
|
return c.json({ error: (err as Error).message }, 400);
|
|
174
213
|
}
|
package/dashboard/server.ts
CHANGED
|
@@ -16,10 +16,9 @@ import { readFileSync } from "fs";
|
|
|
16
16
|
import { access } from "fs/promises";
|
|
17
17
|
import { join } from "path";
|
|
18
18
|
import { homedir } from "os";
|
|
19
|
-
import {
|
|
19
|
+
import { attachMediaProxy } from "./ws-proxy.js";
|
|
20
20
|
|
|
21
|
-
import type
|
|
22
|
-
import type { Duplex } from "stream";
|
|
21
|
+
import type http from "http";
|
|
23
22
|
|
|
24
23
|
import { claudeMdRoutes } from "./routes/claude-md.js";
|
|
25
24
|
import { conversationRoutes } from "./routes/conversations.js";
|
|
@@ -142,42 +141,8 @@ export async function startDashboard(): Promise<number> {
|
|
|
142
141
|
});
|
|
143
142
|
server.on("error", reject);
|
|
144
143
|
|
|
145
|
-
// Proxy /media/:token WebSocket upgrades to the Python server
|
|
146
|
-
|
|
147
|
-
server.on("upgrade", (req: IncomingMessage, socket: Duplex, head: Buffer) => {
|
|
148
|
-
const url = req.url ?? "";
|
|
149
|
-
const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
|
|
150
|
-
if (!match) return; // Not a Twilio media WebSocket -- let it fall through
|
|
151
|
-
|
|
152
|
-
const targetWsUrl = VOICE_API_URL.replace(/^http/, "ws") + url;
|
|
153
|
-
const upstream = new WsWebSocket(targetWsUrl);
|
|
154
|
-
|
|
155
|
-
upstream.on("open", () => {
|
|
156
|
-
wss.handleUpgrade(req, socket, head, (clientWs) => {
|
|
157
|
-
// Bidirectional message proxy
|
|
158
|
-
clientWs.on("message", (data) => {
|
|
159
|
-
if (upstream.readyState === WsWebSocket.OPEN) {
|
|
160
|
-
upstream.send(data);
|
|
161
|
-
}
|
|
162
|
-
});
|
|
163
|
-
upstream.on("message", (data) => {
|
|
164
|
-
if (clientWs.readyState === WsWebSocket.OPEN) {
|
|
165
|
-
clientWs.send(data);
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
clientWs.on("close", () => upstream.close());
|
|
170
|
-
upstream.on("close", () => clientWs.close());
|
|
171
|
-
clientWs.on("error", () => upstream.close());
|
|
172
|
-
upstream.on("error", () => clientWs.close());
|
|
173
|
-
});
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
upstream.on("error", (err) => {
|
|
177
|
-
console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
|
|
178
|
-
socket.destroy();
|
|
179
|
-
});
|
|
180
|
-
});
|
|
144
|
+
// Proxy /media/:token WebSocket upgrades to the Python voice server
|
|
145
|
+
attachMediaProxy(server as unknown as http.Server, VOICE_API_URL);
|
|
181
146
|
});
|
|
182
147
|
|
|
183
148
|
setDashboardPort(port);
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vite configuration for the Claude Voice dashboard frontend.
|
|
3
|
+
*
|
|
4
|
+
* - Uses @vitejs/plugin-react for JSX transform
|
|
5
|
+
* - Proxies /api requests to the Hono backend server during development
|
|
6
|
+
* - Outputs production build to dashboard/dist/
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { defineConfig } from "vite";
|
|
10
|
+
import react from "@vitejs/plugin-react";
|
|
11
|
+
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// CONFIG
|
|
14
|
+
// ============================================================================
|
|
15
|
+
|
|
16
|
+
const API_PROXY_TARGET = "http://localhost:3456";
|
|
17
|
+
|
|
18
|
+
export default defineConfig({
|
|
19
|
+
plugins: [react()],
|
|
20
|
+
server: {
|
|
21
|
+
proxy: {
|
|
22
|
+
"/api": API_PROXY_TARGET,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
build: {
|
|
26
|
+
outDir: "dist",
|
|
27
|
+
},
|
|
28
|
+
});
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests that the WebSocket proxy preserves frame types (text vs binary).
|
|
3
|
+
*
|
|
4
|
+
* Twilio sends and expects JSON as text frames. If the proxy silently converts
|
|
5
|
+
* text→binary, Twilio ignores outbound audio and the caller hears nothing.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test dashboard/ws-proxy-frames.test.ts
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { test, describe, after } from "node:test";
|
|
11
|
+
import { strict as assert } from "node:assert";
|
|
12
|
+
import { WebSocketServer, WebSocket } from "ws";
|
|
13
|
+
import http from "node:http";
|
|
14
|
+
|
|
15
|
+
import { attachMediaProxy } from "./ws-proxy.js";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Helpers
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
function listen(server: http.Server): Promise<number> {
|
|
22
|
+
return new Promise((resolve) => {
|
|
23
|
+
server.listen(0, "127.0.0.1", () => {
|
|
24
|
+
const addr = server.address();
|
|
25
|
+
resolve(typeof addr === "object" ? addr!.port : 0);
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function connect(url: string): Promise<WebSocket> {
|
|
31
|
+
return new Promise((resolve, reject) => {
|
|
32
|
+
const ws = new WebSocket(url);
|
|
33
|
+
ws.on("open", () => resolve(ws));
|
|
34
|
+
ws.on("error", reject);
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function nextMessage(ws: WebSocket): Promise<{ data: Buffer; isBinary: boolean }> {
|
|
39
|
+
return new Promise((resolve) => {
|
|
40
|
+
ws.once("message", (data, isBinary) => {
|
|
41
|
+
resolve({ data: data as Buffer, isBinary });
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Tests
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
describe("WebSocket proxy frame types", () => {
|
|
51
|
+
const servers: http.Server[] = [];
|
|
52
|
+
const sockets: WebSocket[] = [];
|
|
53
|
+
|
|
54
|
+
after(() => {
|
|
55
|
+
for (const ws of sockets) ws.close();
|
|
56
|
+
for (const srv of servers) srv.close();
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
async function setup() {
|
|
60
|
+
// Mock upstream (stands in for Python voice server)
|
|
61
|
+
const upstreamServer = http.createServer();
|
|
62
|
+
const upstreamWss = new WebSocketServer({ server: upstreamServer });
|
|
63
|
+
servers.push(upstreamServer);
|
|
64
|
+
const upstreamPort = await listen(upstreamServer);
|
|
65
|
+
|
|
66
|
+
// Proxy server using real attachMediaProxy from application code
|
|
67
|
+
const proxyServer = http.createServer();
|
|
68
|
+
attachMediaProxy(proxyServer, `http://127.0.0.1:${upstreamPort}`);
|
|
69
|
+
servers.push(proxyServer);
|
|
70
|
+
const proxyPort = await listen(proxyServer);
|
|
71
|
+
|
|
72
|
+
// Wait for both sides to connect
|
|
73
|
+
const upstreamClientP = new Promise<WebSocket>((resolve) => {
|
|
74
|
+
upstreamWss.once("connection", resolve);
|
|
75
|
+
});
|
|
76
|
+
const client = await connect(`ws://127.0.0.1:${proxyPort}/media/aabbccdd-1234-5678-9900-aabbccddeeff`);
|
|
77
|
+
const upstreamClient = await upstreamClientP;
|
|
78
|
+
sockets.push(client, upstreamClient);
|
|
79
|
+
|
|
80
|
+
return { client, upstreamClient };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
test("client→upstream: text frame arrives as text, not binary", async () => {
|
|
84
|
+
const { client, upstreamClient } = await setup();
|
|
85
|
+
const msg = JSON.stringify({ event: "media", streamSid: "MZ123", media: { payload: "AQID" } });
|
|
86
|
+
|
|
87
|
+
const received = nextMessage(upstreamClient);
|
|
88
|
+
client.send(msg); // string → text frame
|
|
89
|
+
const { data, isBinary } = await received;
|
|
90
|
+
|
|
91
|
+
assert.equal(isBinary, false, "upstream should receive a text frame");
|
|
92
|
+
assert.equal(data.toString("utf-8"), msg);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("upstream→client: text frame arrives as text, not binary", async () => {
|
|
96
|
+
const { client, upstreamClient } = await setup();
|
|
97
|
+
const msg = JSON.stringify({ event: "media", streamSid: "MZ123", media: { payload: "AQID" } });
|
|
98
|
+
|
|
99
|
+
const received = nextMessage(client);
|
|
100
|
+
upstreamClient.send(msg); // string → text frame
|
|
101
|
+
const { data, isBinary } = await received;
|
|
102
|
+
|
|
103
|
+
assert.equal(isBinary, false, "client should receive a text frame");
|
|
104
|
+
assert.equal(data.toString("utf-8"), msg);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("binary frames stay binary through the proxy", async () => {
|
|
108
|
+
const { client, upstreamClient } = await setup();
|
|
109
|
+
const buf = Buffer.from([0x01, 0x02, 0x03, 0x04]);
|
|
110
|
+
|
|
111
|
+
const received = nextMessage(upstreamClient);
|
|
112
|
+
client.send(buf); // Buffer → binary frame
|
|
113
|
+
const { isBinary } = await received;
|
|
114
|
+
|
|
115
|
+
assert.equal(isBinary, true, "upstream should receive a binary frame");
|
|
116
|
+
});
|
|
117
|
+
});
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test for the Twilio ↔ Python WebSocket media proxy.
|
|
3
|
+
*
|
|
4
|
+
* Spins up a mock "Python voice server", attaches the real attachMediaProxy
|
|
5
|
+
* from ws-proxy.ts to an HTTP server, then connects a mock "Twilio" client.
|
|
6
|
+
* Verifies that Twilio media JSON round-trips correctly in both directions
|
|
7
|
+
* and that connections tear down cleanly.
|
|
8
|
+
*
|
|
9
|
+
* Run: npx tsx --test dashboard/ws-proxy-integration.test.ts
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { test, describe, after } from "node:test";
|
|
13
|
+
import { strict as assert } from "node:assert";
|
|
14
|
+
import { WebSocketServer, WebSocket } from "ws";
|
|
15
|
+
import http from "node:http";
|
|
16
|
+
|
|
17
|
+
import { attachMediaProxy } from "./ws-proxy.js";
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Helpers
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
function listen(server: http.Server): Promise<number> {
|
|
24
|
+
return new Promise((resolve) => {
|
|
25
|
+
server.listen(0, "127.0.0.1", () => {
|
|
26
|
+
const addr = server.address();
|
|
27
|
+
resolve(typeof addr === "object" ? addr!.port : 0);
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function connect(url: string): Promise<WebSocket> {
|
|
33
|
+
return new Promise((resolve, reject) => {
|
|
34
|
+
const ws = new WebSocket(url);
|
|
35
|
+
ws.on("open", () => resolve(ws));
|
|
36
|
+
ws.on("error", reject);
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function nextMessage(ws: WebSocket): Promise<{ data: Buffer; isBinary: boolean }> {
|
|
41
|
+
return new Promise((resolve) => {
|
|
42
|
+
ws.once("message", (data, isBinary) => {
|
|
43
|
+
resolve({ data: data as Buffer, isBinary });
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function waitForClose(ws: WebSocket): Promise<void> {
|
|
49
|
+
return new Promise((resolve) => {
|
|
50
|
+
if (ws.readyState === WebSocket.CLOSED) return resolve();
|
|
51
|
+
ws.once("close", () => resolve());
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// A realistic Twilio media message
|
|
56
|
+
const TWILIO_MEDIA_MSG = JSON.stringify({
|
|
57
|
+
event: "media",
|
|
58
|
+
streamSid: "MZ0b4ca5d9cfd2658e5b0934ed835c66d8",
|
|
59
|
+
media: { payload: "PCUgJCIdJlGsrL9ELiooJSY12a+s" },
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const TWILIO_START_MSG = JSON.stringify({
|
|
63
|
+
event: "start",
|
|
64
|
+
streamSid: "MZ0b4ca5d9cfd2658e5b0934ed835c66d8",
|
|
65
|
+
start: { callSid: "CA123", streamSid: "MZ0b4ca5d9cfd2658e5b0934ed835c66d8" },
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Tests
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
describe("Twilio media proxy (integration)", () => {
|
|
73
|
+
const servers: http.Server[] = [];
|
|
74
|
+
const sockets: WebSocket[] = [];
|
|
75
|
+
|
|
76
|
+
after(() => {
|
|
77
|
+
for (const ws of sockets) ws.close();
|
|
78
|
+
for (const srv of servers) srv.close();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
/** Set up mock Python server + proxy + mock Twilio client */
|
|
82
|
+
async function setup() {
|
|
83
|
+
// 1. Mock Python voice server (accepts WebSocket connections)
|
|
84
|
+
const pythonServer = http.createServer();
|
|
85
|
+
const pythonWss = new WebSocketServer({ server: pythonServer });
|
|
86
|
+
servers.push(pythonServer);
|
|
87
|
+
const pythonPort = await listen(pythonServer);
|
|
88
|
+
|
|
89
|
+
// 2. Proxy server using the real attachMediaProxy
|
|
90
|
+
const proxyServer = http.createServer();
|
|
91
|
+
attachMediaProxy(proxyServer, `http://127.0.0.1:${pythonPort}`);
|
|
92
|
+
servers.push(proxyServer);
|
|
93
|
+
const proxyPort = await listen(proxyServer);
|
|
94
|
+
|
|
95
|
+
// 3. Connect mock Twilio client to the proxy (must match /media/<token>)
|
|
96
|
+
const pythonClientP = new Promise<WebSocket>((resolve) => {
|
|
97
|
+
pythonWss.once("connection", resolve);
|
|
98
|
+
});
|
|
99
|
+
const twilioClient = await connect(`ws://127.0.0.1:${proxyPort}/media/aabbccdd-1234-5678-9900-aabbccddeeff`);
|
|
100
|
+
const pythonClient = await pythonClientP;
|
|
101
|
+
sockets.push(twilioClient, pythonClient);
|
|
102
|
+
|
|
103
|
+
return { twilioClient, pythonClient };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
test("Twilio start event reaches Python as parseable JSON text", async () => {
|
|
107
|
+
const { twilioClient, pythonClient } = await setup();
|
|
108
|
+
|
|
109
|
+
const received = nextMessage(pythonClient);
|
|
110
|
+
twilioClient.send(TWILIO_START_MSG);
|
|
111
|
+
const { data, isBinary } = await received;
|
|
112
|
+
|
|
113
|
+
assert.equal(isBinary, false, "start event must arrive as text frame");
|
|
114
|
+
const parsed = JSON.parse(data.toString("utf-8"));
|
|
115
|
+
assert.equal(parsed.event, "start");
|
|
116
|
+
assert.equal(parsed.start.callSid, "CA123");
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
test("Twilio media reaches Python as JSON text with intact payload", async () => {
|
|
120
|
+
const { twilioClient, pythonClient } = await setup();
|
|
121
|
+
|
|
122
|
+
const received = nextMessage(pythonClient);
|
|
123
|
+
twilioClient.send(TWILIO_MEDIA_MSG);
|
|
124
|
+
const { data, isBinary } = await received;
|
|
125
|
+
|
|
126
|
+
assert.equal(isBinary, false, "media must arrive as text frame");
|
|
127
|
+
const parsed = JSON.parse(data.toString("utf-8"));
|
|
128
|
+
assert.equal(parsed.event, "media");
|
|
129
|
+
assert.equal(parsed.media.payload, "PCUgJCIdJlGsrL9ELiooJSY12a+s");
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test("Python response media reaches Twilio as JSON text", async () => {
|
|
133
|
+
const { twilioClient, pythonClient } = await setup();
|
|
134
|
+
const responseMsg = JSON.stringify({
|
|
135
|
+
event: "media",
|
|
136
|
+
streamSid: "MZ0b4ca5d9cfd2658e5b0934ed835c66d8",
|
|
137
|
+
media: { payload: "Yu/d7OlyenN0+mrt+ubecGdbYO7f" },
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
const received = nextMessage(twilioClient);
|
|
141
|
+
pythonClient.send(responseMsg);
|
|
142
|
+
const { data, isBinary } = await received;
|
|
143
|
+
|
|
144
|
+
assert.equal(isBinary, false, "response media must arrive as text frame");
|
|
145
|
+
const parsed = JSON.parse(data.toString("utf-8"));
|
|
146
|
+
assert.equal(parsed.event, "media");
|
|
147
|
+
assert.equal(parsed.media.payload, "Yu/d7OlyenN0+mrt+ubecGdbYO7f");
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("closing Twilio side tears down Python connection", async () => {
|
|
151
|
+
const { twilioClient, pythonClient } = await setup();
|
|
152
|
+
|
|
153
|
+
const pythonClosed = waitForClose(pythonClient);
|
|
154
|
+
twilioClient.close();
|
|
155
|
+
await pythonClosed;
|
|
156
|
+
|
|
157
|
+
assert.equal(pythonClient.readyState, WebSocket.CLOSED);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("multiple messages round-trip without corruption", async () => {
|
|
161
|
+
const { twilioClient, pythonClient } = await setup();
|
|
162
|
+
const messages = Array.from({ length: 10 }, (_, i) =>
|
|
163
|
+
JSON.stringify({
|
|
164
|
+
event: "media",
|
|
165
|
+
streamSid: "MZ123",
|
|
166
|
+
media: { payload: `chunk${i}_` + "A".repeat(200) },
|
|
167
|
+
})
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
// Send all from Twilio, collect on Python side
|
|
171
|
+
const allReceived: string[] = [];
|
|
172
|
+
const done = new Promise<void>((resolve) => {
|
|
173
|
+
pythonClient.on("message", (data) => {
|
|
174
|
+
allReceived.push((data as Buffer).toString("utf-8"));
|
|
175
|
+
if (allReceived.length === messages.length) resolve();
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
for (const msg of messages) {
|
|
180
|
+
twilioClient.send(msg);
|
|
181
|
+
}
|
|
182
|
+
await done;
|
|
183
|
+
|
|
184
|
+
assert.equal(allReceived.length, messages.length);
|
|
185
|
+
for (let i = 0; i < messages.length; i++) {
|
|
186
|
+
assert.equal(allReceived[i], messages[i], `message ${i} should match`);
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
});
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Twilio ↔ Python WebSocket proxy utilities.
|
|
3
|
+
*
|
|
4
|
+
* The `ws` library always delivers message data as a Buffer, even for text
|
|
5
|
+
* frames. If you forward that Buffer via ws.send(Buffer), it emits a binary
|
|
6
|
+
* frame — silently changing the frame type. Twilio ignores binary frames for
|
|
7
|
+
* JSON media messages, so the caller hears nothing.
|
|
8
|
+
*
|
|
9
|
+
* wsForward checks the isBinary flag and converts to string when needed
|
|
10
|
+
* so that text frames stay text and binary frames stay binary.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { WebSocket, WebSocketServer } from "ws";
|
|
14
|
+
import type http from "node:http";
|
|
15
|
+
import type { Duplex } from "node:stream";
|
|
16
|
+
|
|
17
|
+
export function wsForward(
|
|
18
|
+
dest: WebSocket,
|
|
19
|
+
data: Buffer | ArrayBuffer | Buffer[],
|
|
20
|
+
isBinary: boolean,
|
|
21
|
+
): void {
|
|
22
|
+
if (dest.readyState !== WebSocket.OPEN) return;
|
|
23
|
+
dest.send(isBinary ? data : (data as Buffer).toString("utf-8"));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Attach a Twilio media WebSocket proxy to an HTTP server.
|
|
28
|
+
*
|
|
29
|
+
* Intercepts upgrade requests matching /media/<token> and proxies them
|
|
30
|
+
* bidirectionally to the given upstream URL, preserving frame types.
|
|
31
|
+
*/
|
|
32
|
+
export function attachMediaProxy(
|
|
33
|
+
server: http.Server,
|
|
34
|
+
upstreamBaseUrl: string,
|
|
35
|
+
): void {
|
|
36
|
+
const wss = new WebSocketServer({ noServer: true });
|
|
37
|
+
|
|
38
|
+
server.on("upgrade", (req: http.IncomingMessage, socket: Duplex, head: Buffer) => {
|
|
39
|
+
const url = req.url ?? "";
|
|
40
|
+
const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
|
|
41
|
+
if (!match) return;
|
|
42
|
+
|
|
43
|
+
const targetWsUrl = upstreamBaseUrl.replace(/^http/, "ws") + url;
|
|
44
|
+
const upstream = new WebSocket(targetWsUrl);
|
|
45
|
+
|
|
46
|
+
let proxyMsgCount = { fromClient: 0, fromUpstream: 0 };
|
|
47
|
+
upstream.on("open", () => {
|
|
48
|
+
wss.handleUpgrade(req, socket, head, (clientWs) => {
|
|
49
|
+
clientWs.on("message", (data, isBinary) => {
|
|
50
|
+
proxyMsgCount.fromClient++;
|
|
51
|
+
wsForward(upstream, data as Buffer, isBinary);
|
|
52
|
+
});
|
|
53
|
+
upstream.on("message", (data, isBinary) => {
|
|
54
|
+
proxyMsgCount.fromUpstream++;
|
|
55
|
+
wsForward(clientWs, data as Buffer, isBinary);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
clientWs.on("close", () => upstream.close());
|
|
59
|
+
upstream.on("close", () => clientWs.close());
|
|
60
|
+
clientWs.on("error", (e) => { console.error(`[ws-proxy] Twilio WS error: ${e.message}`); upstream.close(); });
|
|
61
|
+
upstream.on("error", (e) => { console.error(`[ws-proxy] Python WS error: ${e.message}`); clientWs.close(); });
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
upstream.on("error", (err) => {
|
|
66
|
+
console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
|
|
67
|
+
socket.destroy();
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voicecc",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.9",
|
|
4
4
|
"description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -25,8 +25,8 @@
|
|
|
25
25
|
"bin/",
|
|
26
26
|
"server/",
|
|
27
27
|
"voice-server/",
|
|
28
|
+
"dashboard/*.ts",
|
|
28
29
|
"dashboard/dist/",
|
|
29
|
-
"dashboard/server.ts",
|
|
30
30
|
"dashboard/routes/",
|
|
31
31
|
"init/",
|
|
32
32
|
"tsconfig.json"
|
|
@@ -21,10 +21,16 @@ import os
|
|
|
21
21
|
import aiohttp
|
|
22
22
|
from fastapi import WebSocket
|
|
23
23
|
|
|
24
|
+
from pipecat.frames.frames import LLMFullResponseEndFrame, LLMFullResponseStartFrame
|
|
24
25
|
from pipecat.pipeline.pipeline import Pipeline
|
|
25
26
|
from pipecat.pipeline.runner import PipelineRunner
|
|
26
27
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
27
|
-
from pipecat.
|
|
28
|
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
29
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
30
|
+
from pipecat.processors.aggregators.llm_response_universal import (
|
|
31
|
+
LLMContextAggregatorPair,
|
|
32
|
+
LLMUserAggregatorParams,
|
|
33
|
+
)
|
|
28
34
|
from pipecat.serializers.twilio import TwilioFrameSerializer
|
|
29
35
|
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
|
|
30
36
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
|
@@ -74,14 +80,18 @@ async def handle_twilio_websocket(websocket: WebSocket, call_token: str) -> None
|
|
|
74
80
|
while True:
|
|
75
81
|
message = await websocket.receive()
|
|
76
82
|
|
|
77
|
-
# Skip binary frames (early audio before start)
|
|
78
83
|
if message.get("type") == "websocket.disconnect":
|
|
79
84
|
logger.warning("[twilio] WebSocket disconnected before start event")
|
|
80
85
|
return
|
|
81
|
-
|
|
86
|
+
|
|
87
|
+
# Twilio may send frames as text or binary
|
|
88
|
+
raw = message.get("text") or (
|
|
89
|
+
message.get("bytes", b"").decode("utf-8") if message.get("bytes") else None
|
|
90
|
+
)
|
|
91
|
+
if not raw:
|
|
82
92
|
continue
|
|
83
93
|
|
|
84
|
-
msg = json.loads(
|
|
94
|
+
msg = json.loads(raw)
|
|
85
95
|
|
|
86
96
|
if msg.get("event") == "start":
|
|
87
97
|
start_data = msg.get("start", {})
|
|
@@ -224,8 +234,13 @@ async def _run_twilio_pipeline(
|
|
|
224
234
|
narration = NarrationProcessor()
|
|
225
235
|
|
|
226
236
|
# Context aggregator
|
|
227
|
-
context =
|
|
228
|
-
context_aggregator =
|
|
237
|
+
context = LLMContext()
|
|
238
|
+
context_aggregator = LLMContextAggregatorPair(
|
|
239
|
+
context,
|
|
240
|
+
user_params=LLMUserAggregatorParams(
|
|
241
|
+
vad_analyzer=SileroVADAnalyzer(),
|
|
242
|
+
),
|
|
243
|
+
)
|
|
229
244
|
|
|
230
245
|
# Pipeline
|
|
231
246
|
pipeline = Pipeline(
|
|
@@ -246,16 +261,15 @@ async def _run_twilio_pipeline(
|
|
|
246
261
|
params=PipelineParams(allow_interruptions=True),
|
|
247
262
|
)
|
|
248
263
|
|
|
249
|
-
#
|
|
250
|
-
|
|
251
|
-
async def
|
|
252
|
-
await asyncio.sleep(1) # Let the pipeline fully initialize
|
|
264
|
+
# Send initial prompt once the pipeline is fully ready
|
|
265
|
+
@task.event_handler("on_pipeline_started")
|
|
266
|
+
async def on_pipeline_started(task_ref, *args):
|
|
253
267
|
if llm_config.initial_prompt and not claude_llm._initial_prompt_sent:
|
|
254
268
|
claude_llm._initial_prompt_sent = True
|
|
255
269
|
await claude_llm._ensure_client()
|
|
270
|
+
await claude_llm.push_frame(LLMFullResponseStartFrame())
|
|
256
271
|
await claude_llm._send_to_claude(llm_config.initial_prompt)
|
|
257
|
-
|
|
258
|
-
asyncio.create_task(_send_initial_prompt())
|
|
272
|
+
await claude_llm.push_frame(LLMFullResponseEndFrame())
|
|
259
273
|
|
|
260
274
|
runner = PipelineRunner()
|
|
261
275
|
await runner.run(task)
|