npm - voicecc - Versions diffs - 1.2.7 → 1.2.8 - Mend

voicecc 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dashboard/routes/agents.ts +41 -2
package/dashboard/server.ts +4 -39
package/package.json +1 -1
package/voice-server/twilio_pipeline.py +26 -12

package/dashboard/routes/agents.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  */
 import { Hono } from "hono";
+import twilioSdk from "twilio";
 import {
   listAgents,
   getAgent,
@@ -21,6 +22,8 @@ import {
   importAgent,
 } from "../../server/services/agent-store.js";
 import type { AgentConfig } from "../../server/services/agent-store.js";
+import { readEnv } from "../../server/services/env.js";
+import { getTunnelUrl } from "../../server/services/tunnel.js";
 /** Base URL for the Python voice server API */
 const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
@@ -155,11 +158,30 @@ export function agentsRoutes(): Hono {
   app.post("/:id/call", async (c) => {
     const id = c.req.param("id");
     try {
+      const envVars = await readEnv();
+      const accountSid = envVars.TWILIO_ACCOUNT_SID;
+      const authToken = envVars.TWILIO_AUTH_TOKEN;
+      const userPhone = envVars.USER_PHONE_NUMBER;
+      const tunnelUrl = getTunnelUrl();
+      if (!accountSid || !authToken) {
+        return c.json({ error: "Twilio credentials not configured" }, 400);
+      }
+      if (!userPhone) {
+        return c.json({ error: "User phone number not configured" }, 400);
+      }
+      if (!tunnelUrl) {
+        return c.json({ error: "Tunnel is not running" }, 400);
+      }
+      const token = crypto.randomUUID();
+      // Register the token with the Python voice server
       const response = await fetch(`${VOICE_API_URL}/register-call`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({
-          token: crypto.randomUUID(),
+          token,
           agent_id: id,
           initial_prompt: "The user pressed the 'Call Me' button. Greet them and ask how you can help.",
         }),
@@ -168,7 +190,24 @@ export function agentsRoutes(): Hono {
         const data = await response.json();
         throw new Error(data.error ?? "Voice server error");
       }
-      return c.json({ success: true });
+      // Place the actual Twilio call
+      const client = twilioSdk(accountSid, authToken);
+      const numbers = await client.incomingPhoneNumbers.list({ limit: 1 });
+      if (numbers.length === 0) {
+        return c.json({ error: "No Twilio phone numbers found on this account" }, 400);
+      }
+      const tunnelHost = tunnelUrl.replace(/^https?:\/\//, "");
+      const twiml = `<Response><Connect><Stream url="wss://${tunnelHost}/media/${token}?agentId=${id}" /></Connect></Response>`;
+      const call = await client.calls.create({
+        to: userPhone,
+        from: numbers[0].phoneNumber,
+        twiml,
+      });
+      return c.json({ success: true, callSid: call.sid });
     } catch (err) {
       return c.json({ error: (err as Error).message }, 400);
     }

package/dashboard/server.ts CHANGED Viewed

@@ -16,10 +16,9 @@ import { readFileSync } from "fs";
 import { access } from "fs/promises";
 import { join } from "path";
 import { homedir } from "os";
-import { WebSocket as WsWebSocket, WebSocketServer } from "ws";
+import { attachMediaProxy } from "./ws-proxy.js";
-import type { IncomingMessage } from "http";
-import type { Duplex } from "stream";
+import type http from "http";
 import { claudeMdRoutes } from "./routes/claude-md.js";
 import { conversationRoutes } from "./routes/conversations.js";
@@ -142,42 +141,8 @@ export async function startDashboard(): Promise<number> {
         });
         server.on("error", reject);
-        // Proxy /media/:token WebSocket upgrades to the Python server
-        const wss = new WebSocketServer({ noServer: true });
-        server.on("upgrade", (req: IncomingMessage, socket: Duplex, head: Buffer) => {
-          const url = req.url ?? "";
-          const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
-          if (!match) return; // Not a Twilio media WebSocket -- let it fall through
-          const targetWsUrl = VOICE_API_URL.replace(/^http/, "ws") + url;
-          const upstream = new WsWebSocket(targetWsUrl);
-          upstream.on("open", () => {
-            wss.handleUpgrade(req, socket, head, (clientWs) => {
-              // Bidirectional message proxy
-              clientWs.on("message", (data) => {
-                if (upstream.readyState === WsWebSocket.OPEN) {
-                  upstream.send(data);
-                }
-              });
-              upstream.on("message", (data) => {
-                if (clientWs.readyState === WsWebSocket.OPEN) {
-                  clientWs.send(data);
-                }
-              });
-              clientWs.on("close", () => upstream.close());
-              upstream.on("close", () => clientWs.close());
-              clientWs.on("error", () => upstream.close());
-              upstream.on("error", () => clientWs.close());
-            });
-          });
-          upstream.on("error", (err) => {
-            console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
-            socket.destroy();
-          });
-        });
+        // Proxy /media/:token WebSocket upgrades to the Python voice server
+        attachMediaProxy(server as unknown as http.Server, VOICE_API_URL);
       });
       setDashboardPort(port);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voicecc",
-  "version": "1.2.7",
+  "version": "1.2.8",
   "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
   "repository": {
     "type": "git",

package/voice-server/twilio_pipeline.py CHANGED Viewed

@@ -21,10 +21,16 @@ import os
 import aiohttp
 from fastapi import WebSocket
+from pipecat.frames.frames import LLMFullResponseEndFrame, LLMFullResponseStartFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.llm_response_universal import (
+    LLMContextAggregatorPair,
+    LLMUserAggregatorParams,
+)
 from pipecat.serializers.twilio import TwilioFrameSerializer
 from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
 from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
@@ -74,14 +80,18 @@ async def handle_twilio_websocket(websocket: WebSocket, call_token: str) -> None
         while True:
             message = await websocket.receive()
-            # Skip binary frames (early audio before start)
             if message.get("type") == "websocket.disconnect":
                 logger.warning("[twilio] WebSocket disconnected before start event")
                 return
-            if "text" not in message:
+            # Twilio may send frames as text or binary
+            raw = message.get("text") or (
+                message.get("bytes", b"").decode("utf-8") if message.get("bytes") else None
+            )
+            if not raw:
                 continue
-            msg = json.loads(message["text"])
+            msg = json.loads(raw)
             if msg.get("event") == "start":
                 start_data = msg.get("start", {})
@@ -224,8 +234,13 @@ async def _run_twilio_pipeline(
         narration = NarrationProcessor()
         # Context aggregator
-        context = OpenAILLMContext(messages=[], tools=[])
-        context_aggregator = claude_llm.create_context_aggregator(context)
+        context = LLMContext()
+        context_aggregator = LLMContextAggregatorPair(
+            context,
+            user_params=LLMUserAggregatorParams(
+                vad_analyzer=SileroVADAnalyzer(),
+            ),
+        )
         # Pipeline
         pipeline = Pipeline(
@@ -246,16 +261,15 @@ async def _run_twilio_pipeline(
             params=PipelineParams(allow_interruptions=True),
         )
-        # For Twilio, the WebSocket is already connected, so send the
-        # initial prompt shortly after the pipeline starts.
-        async def _send_initial_prompt():
-            await asyncio.sleep(1)  # Let the pipeline fully initialize
+        # Send initial prompt once the pipeline is fully ready
+        @task.event_handler("on_pipeline_started")
+        async def on_pipeline_started(task_ref, *args):
             if llm_config.initial_prompt and not claude_llm._initial_prompt_sent:
                 claude_llm._initial_prompt_sent = True
                 await claude_llm._ensure_client()
+                await claude_llm.push_frame(LLMFullResponseStartFrame())
                 await claude_llm._send_to_claude(llm_config.initial_prompt)
-        asyncio.create_task(_send_initial_prompt())
+                await claude_llm.push_frame(LLMFullResponseEndFrame())
         runner = PipelineRunner()
         await runner.run(task)