voicecc 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -8
- package/bin/voicecc.js +10 -2
- package/dashboard/routes/agents.ts +41 -2
- package/dashboard/server.ts +4 -39
- package/package.json +1 -1
- package/voice-server/claude_llm_service.py +2 -6
- package/voice-server/heartbeat.py +1 -1
- package/voice-server/initial_prompt_test.py +150 -0
- package/voice-server/twilio_pipeline.py +43 -5
- package/voice-server/voice_pipeline.py +16 -1
package/README.md
CHANGED
|
@@ -9,10 +9,10 @@ A Voice Agent Platform running on Claude Code. Create, manage, and deploy conver
|
|
|
9
9
|
## Project Structure
|
|
10
10
|
|
|
11
11
|
```
|
|
12
|
-
server/
|
|
13
|
-
|
|
14
|
-
services/
|
|
15
|
-
index.ts Entry point (
|
|
12
|
+
voice-server/ Python FastAPI: real-time audio pipeline (VAD, STT, TTS, Claude sessions)
|
|
13
|
+
server/ Node.js orchestration: boots dashboard + voice server, manages integrations
|
|
14
|
+
services/ Tunnel, Twilio, browser calls, agents, device pairing
|
|
15
|
+
index.ts Entry point (spawns voice-server + dashboard, auto-starts integrations)
|
|
16
16
|
dashboard/ Web UI (Vite + React) + API routes (Hono)
|
|
17
17
|
lander/ Static landing page
|
|
18
18
|
init/ Default prompt templates for new agents
|
|
@@ -25,6 +25,7 @@ bin/ CLI entry point (voicecc command)
|
|
|
25
25
|
|
|
26
26
|
- macOS or Linux
|
|
27
27
|
- Node.js 18+
|
|
28
|
+
- Python 3.11+ with `venv`
|
|
28
29
|
- An ElevenLabs API key
|
|
29
30
|
|
|
30
31
|
### Terminal
|
|
@@ -41,11 +42,13 @@ voicecc
|
|
|
41
42
|
|
|
42
43
|
## How It Works
|
|
43
44
|
|
|
44
|
-
|
|
45
|
+
The platform runs two servers: a **Node.js orchestrator** (dashboard, integrations, CLI) and a **Python voice server** (real-time audio pipeline via Pipecat).
|
|
46
|
+
|
|
47
|
+
1. **Mic capture**: Browser captures audio via WebRTC, connected to the Python voice server
|
|
45
48
|
2. **Voice activity detection**: Silero VAD v5 detects speech segments
|
|
46
|
-
3. **Speech-to-text**: ElevenLabs Scribe
|
|
49
|
+
3. **Speech-to-text**: ElevenLabs Scribe transcribes audio
|
|
47
50
|
4. **Endpointing**: VAD silence-based turn detection
|
|
48
51
|
5. **Claude inference**: Transcript sent to Claude Agent SDK session with streaming response
|
|
49
52
|
6. **Narration**: Claude's response stripped of markdown and split into sentences
|
|
50
|
-
7. **Text-to-speech**: ElevenLabs streaming TTS
|
|
51
|
-
8. **Speaker playback**: Audio
|
|
53
|
+
7. **Text-to-speech**: ElevenLabs streaming TTS generates audio
|
|
54
|
+
8. **Speaker playback**: Audio streamed back through WebRTC
|
package/bin/voicecc.js
CHANGED
|
@@ -129,7 +129,11 @@ function ensurePython() {
|
|
|
129
129
|
|
|
130
130
|
if (process.platform !== "linux") {
|
|
131
131
|
console.error("ERROR: Python 3.12+ is required but not found.");
|
|
132
|
-
|
|
132
|
+
if (process.platform === "darwin") {
|
|
133
|
+
console.error("Install it with Homebrew: brew install python@3.12");
|
|
134
|
+
} else {
|
|
135
|
+
console.error("Install Python 3.12+ and run 'voicecc' again.");
|
|
136
|
+
}
|
|
133
137
|
process.exit(1);
|
|
134
138
|
}
|
|
135
139
|
|
|
@@ -157,7 +161,11 @@ function ensureVenvModule(systemPython) {
|
|
|
157
161
|
|
|
158
162
|
if (process.platform !== "linux") {
|
|
159
163
|
console.error("ERROR: Python venv module is missing.");
|
|
160
|
-
|
|
164
|
+
if (process.platform === "darwin") {
|
|
165
|
+
console.error("Reinstall Python with Homebrew: brew install python@3.12");
|
|
166
|
+
} else {
|
|
167
|
+
console.error("Install the venv module and run 'voicecc' again.");
|
|
168
|
+
}
|
|
161
169
|
process.exit(1);
|
|
162
170
|
}
|
|
163
171
|
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { Hono } from "hono";
|
|
14
|
+
import twilioSdk from "twilio";
|
|
14
15
|
import {
|
|
15
16
|
listAgents,
|
|
16
17
|
getAgent,
|
|
@@ -21,6 +22,8 @@ import {
|
|
|
21
22
|
importAgent,
|
|
22
23
|
} from "../../server/services/agent-store.js";
|
|
23
24
|
import type { AgentConfig } from "../../server/services/agent-store.js";
|
|
25
|
+
import { readEnv } from "../../server/services/env.js";
|
|
26
|
+
import { getTunnelUrl } from "../../server/services/tunnel.js";
|
|
24
27
|
|
|
25
28
|
/** Base URL for the Python voice server API */
|
|
26
29
|
const VOICE_API_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
|
|
@@ -155,11 +158,30 @@ export function agentsRoutes(): Hono {
|
|
|
155
158
|
app.post("/:id/call", async (c) => {
|
|
156
159
|
const id = c.req.param("id");
|
|
157
160
|
try {
|
|
161
|
+
const envVars = await readEnv();
|
|
162
|
+
const accountSid = envVars.TWILIO_ACCOUNT_SID;
|
|
163
|
+
const authToken = envVars.TWILIO_AUTH_TOKEN;
|
|
164
|
+
const userPhone = envVars.USER_PHONE_NUMBER;
|
|
165
|
+
const tunnelUrl = getTunnelUrl();
|
|
166
|
+
|
|
167
|
+
if (!accountSid || !authToken) {
|
|
168
|
+
return c.json({ error: "Twilio credentials not configured" }, 400);
|
|
169
|
+
}
|
|
170
|
+
if (!userPhone) {
|
|
171
|
+
return c.json({ error: "User phone number not configured" }, 400);
|
|
172
|
+
}
|
|
173
|
+
if (!tunnelUrl) {
|
|
174
|
+
return c.json({ error: "Tunnel is not running" }, 400);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const token = crypto.randomUUID();
|
|
178
|
+
|
|
179
|
+
// Register the token with the Python voice server
|
|
158
180
|
const response = await fetch(`${VOICE_API_URL}/register-call`, {
|
|
159
181
|
method: "POST",
|
|
160
182
|
headers: { "Content-Type": "application/json" },
|
|
161
183
|
body: JSON.stringify({
|
|
162
|
-
token
|
|
184
|
+
token,
|
|
163
185
|
agent_id: id,
|
|
164
186
|
initial_prompt: "The user pressed the 'Call Me' button. Greet them and ask how you can help.",
|
|
165
187
|
}),
|
|
@@ -168,7 +190,24 @@ export function agentsRoutes(): Hono {
|
|
|
168
190
|
const data = await response.json();
|
|
169
191
|
throw new Error(data.error ?? "Voice server error");
|
|
170
192
|
}
|
|
171
|
-
|
|
193
|
+
|
|
194
|
+
// Place the actual Twilio call
|
|
195
|
+
const client = twilioSdk(accountSid, authToken);
|
|
196
|
+
const numbers = await client.incomingPhoneNumbers.list({ limit: 1 });
|
|
197
|
+
if (numbers.length === 0) {
|
|
198
|
+
return c.json({ error: "No Twilio phone numbers found on this account" }, 400);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const tunnelHost = tunnelUrl.replace(/^https?:\/\//, "");
|
|
202
|
+
const twiml = `<Response><Connect><Stream url="wss://${tunnelHost}/media/${token}?agentId=${id}" /></Connect></Response>`;
|
|
203
|
+
|
|
204
|
+
const call = await client.calls.create({
|
|
205
|
+
to: userPhone,
|
|
206
|
+
from: numbers[0].phoneNumber,
|
|
207
|
+
twiml,
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
return c.json({ success: true, callSid: call.sid });
|
|
172
211
|
} catch (err) {
|
|
173
212
|
return c.json({ error: (err as Error).message }, 400);
|
|
174
213
|
}
|
package/dashboard/server.ts
CHANGED
|
@@ -16,10 +16,9 @@ import { readFileSync } from "fs";
|
|
|
16
16
|
import { access } from "fs/promises";
|
|
17
17
|
import { join } from "path";
|
|
18
18
|
import { homedir } from "os";
|
|
19
|
-
import {
|
|
19
|
+
import { attachMediaProxy } from "./ws-proxy.js";
|
|
20
20
|
|
|
21
|
-
import type
|
|
22
|
-
import type { Duplex } from "stream";
|
|
21
|
+
import type http from "http";
|
|
23
22
|
|
|
24
23
|
import { claudeMdRoutes } from "./routes/claude-md.js";
|
|
25
24
|
import { conversationRoutes } from "./routes/conversations.js";
|
|
@@ -142,42 +141,8 @@ export async function startDashboard(): Promise<number> {
|
|
|
142
141
|
});
|
|
143
142
|
server.on("error", reject);
|
|
144
143
|
|
|
145
|
-
// Proxy /media/:token WebSocket upgrades to the Python server
|
|
146
|
-
|
|
147
|
-
server.on("upgrade", (req: IncomingMessage, socket: Duplex, head: Buffer) => {
|
|
148
|
-
const url = req.url ?? "";
|
|
149
|
-
const match = url.match(/^\/media\/([a-f0-9-]+)(?:\?.*)?$/);
|
|
150
|
-
if (!match) return; // Not a Twilio media WebSocket -- let it fall through
|
|
151
|
-
|
|
152
|
-
const targetWsUrl = VOICE_API_URL.replace(/^http/, "ws") + url;
|
|
153
|
-
const upstream = new WsWebSocket(targetWsUrl);
|
|
154
|
-
|
|
155
|
-
upstream.on("open", () => {
|
|
156
|
-
wss.handleUpgrade(req, socket, head, (clientWs) => {
|
|
157
|
-
// Bidirectional message proxy
|
|
158
|
-
clientWs.on("message", (data) => {
|
|
159
|
-
if (upstream.readyState === WsWebSocket.OPEN) {
|
|
160
|
-
upstream.send(data);
|
|
161
|
-
}
|
|
162
|
-
});
|
|
163
|
-
upstream.on("message", (data) => {
|
|
164
|
-
if (clientWs.readyState === WsWebSocket.OPEN) {
|
|
165
|
-
clientWs.send(data);
|
|
166
|
-
}
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
clientWs.on("close", () => upstream.close());
|
|
170
|
-
upstream.on("close", () => clientWs.close());
|
|
171
|
-
clientWs.on("error", () => upstream.close());
|
|
172
|
-
upstream.on("error", () => clientWs.close());
|
|
173
|
-
});
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
upstream.on("error", (err) => {
|
|
177
|
-
console.error(`[dashboard] Twilio WS proxy error: ${err.message}`);
|
|
178
|
-
socket.destroy();
|
|
179
|
-
});
|
|
180
|
-
});
|
|
144
|
+
// Proxy /media/:token WebSocket upgrades to the Python voice server
|
|
145
|
+
attachMediaProxy(server as unknown as http.Server, VOICE_API_URL);
|
|
181
146
|
});
|
|
182
147
|
|
|
183
148
|
setDashboardPort(port);
|
package/package.json
CHANGED
|
@@ -108,12 +108,8 @@ class ClaudeLLMService(LLMService):
|
|
|
108
108
|
self._settings.user_turn_completion_config = None
|
|
109
109
|
|
|
110
110
|
async def start(self, frame: StartFrame):
|
|
111
|
-
"""Handle pipeline start.
|
|
111
|
+
"""Handle pipeline start."""
|
|
112
112
|
await super().start(frame)
|
|
113
|
-
if self._config.initial_prompt and not self._initial_prompt_sent:
|
|
114
|
-
self._initial_prompt_sent = True
|
|
115
|
-
await self._ensure_client()
|
|
116
|
-
await self._send_to_claude(self._config.initial_prompt)
|
|
117
113
|
|
|
118
114
|
async def stop(self, frame: EndFrame):
|
|
119
115
|
"""Handle pipeline stop. Disconnects the Claude session."""
|
|
@@ -237,7 +233,7 @@ class ClaudeLLMService(LLMService):
|
|
|
237
233
|
allowed_tools=self._config.allowed_tools or [],
|
|
238
234
|
permission_mode="bypassPermissions",
|
|
239
235
|
include_partial_messages=True,
|
|
240
|
-
max_thinking_tokens=
|
|
236
|
+
max_thinking_tokens=0,
|
|
241
237
|
)
|
|
242
238
|
self._client = ClaudeSDKClient(options=options)
|
|
243
239
|
|
|
@@ -314,7 +314,7 @@ async def _run_heartbeat_session(
|
|
|
314
314
|
allowed_tools=[],
|
|
315
315
|
permission_mode="bypassPermissions",
|
|
316
316
|
include_partial_messages=True,
|
|
317
|
-
max_thinking_tokens=
|
|
317
|
+
max_thinking_tokens=0,
|
|
318
318
|
)
|
|
319
319
|
client = ClaudeSDKClient(options=options)
|
|
320
320
|
await client.connect()
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Tests for agent-speaks-first behavior.
|
|
2
|
+
|
|
3
|
+
Verifies that when a call starts with an initial_prompt configured,
|
|
4
|
+
the agent produces a greeting (text output wrapped in response frames)
|
|
5
|
+
without any user input.
|
|
6
|
+
|
|
7
|
+
Run: cd voice-server && .venv/bin/python -m pytest initial-prompt.test.py -v
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from unittest.mock import AsyncMock
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
from claude_agent_sdk import AssistantMessage, ResultMessage, TextBlock
|
|
16
|
+
from pipecat.frames.frames import (
|
|
17
|
+
LLMFullResponseEndFrame,
|
|
18
|
+
LLMFullResponseStartFrame,
|
|
19
|
+
LLMTextFrame,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from claude_llm_service import ClaudeLLMService, ClaudeLLMServiceConfig
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ============================================================================
|
|
26
|
+
# HELPERS
|
|
27
|
+
# ============================================================================
|
|
28
|
+
|
|
29
|
+
def _make_fake_client(response_text: str = "Hello! How can I help?"):
|
|
30
|
+
"""Create a mock ClaudeSDKClient that returns a canned text response."""
|
|
31
|
+
client = AsyncMock()
|
|
32
|
+
client.connect = AsyncMock()
|
|
33
|
+
client.disconnect = AsyncMock()
|
|
34
|
+
client.query = AsyncMock()
|
|
35
|
+
|
|
36
|
+
async def fake_receive():
|
|
37
|
+
yield AssistantMessage(
|
|
38
|
+
content=[TextBlock(text=response_text)],
|
|
39
|
+
model="test",
|
|
40
|
+
)
|
|
41
|
+
yield ResultMessage(
|
|
42
|
+
subtype="success",
|
|
43
|
+
is_error=False,
|
|
44
|
+
duration_ms=0,
|
|
45
|
+
duration_api_ms=0,
|
|
46
|
+
num_turns=1,
|
|
47
|
+
session_id="test",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
client.receive_response = fake_receive
|
|
51
|
+
return client
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _collect_frames(service: ClaudeLLMService) -> list:
|
|
55
|
+
"""Patch push_frame on a service to collect all output frames."""
|
|
56
|
+
frames = []
|
|
57
|
+
|
|
58
|
+
async def capture(frame, *args, **kwargs):
|
|
59
|
+
frames.append(frame)
|
|
60
|
+
|
|
61
|
+
service.push_frame = capture
|
|
62
|
+
return frames
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def _trigger_initial_prompt(service: ClaudeLLMService, prompt: str):
|
|
66
|
+
"""Reproduce what the pipeline's on_pipeline_started handler does."""
|
|
67
|
+
await service._ensure_client()
|
|
68
|
+
await service.push_frame(LLMFullResponseStartFrame())
|
|
69
|
+
await service._send_to_claude(prompt)
|
|
70
|
+
await service.push_frame(LLMFullResponseEndFrame())
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ============================================================================
|
|
74
|
+
# TESTS
|
|
75
|
+
# ============================================================================
|
|
76
|
+
|
|
77
|
+
@pytest.mark.asyncio
|
|
78
|
+
async def test_agent_greets_user_on_call_start():
|
|
79
|
+
"""When a call starts with an initial_prompt, the agent should produce
|
|
80
|
+
a spoken greeting — text frames wrapped in response start/end frames —
|
|
81
|
+
without any user input."""
|
|
82
|
+
client = _make_fake_client("Hey there! Welcome to the call.")
|
|
83
|
+
config = ClaudeLLMServiceConfig(
|
|
84
|
+
cwd="/tmp",
|
|
85
|
+
system_prompt="You are a test agent.",
|
|
86
|
+
initial_prompt="Greet the user briefly.",
|
|
87
|
+
existing_client=client,
|
|
88
|
+
)
|
|
89
|
+
service = ClaudeLLMService(config=config)
|
|
90
|
+
frames = _collect_frames(service)
|
|
91
|
+
|
|
92
|
+
await _trigger_initial_prompt(service, config.initial_prompt)
|
|
93
|
+
|
|
94
|
+
# The agent should have produced spoken output
|
|
95
|
+
text_frames = [f for f in frames if isinstance(f, LLMTextFrame)]
|
|
96
|
+
assert len(text_frames) >= 1, "Agent did not produce any spoken output"
|
|
97
|
+
full_text = " ".join(f.text for f in text_frames)
|
|
98
|
+
assert len(full_text) > 0, "Agent greeting was empty"
|
|
99
|
+
|
|
100
|
+
# The prompt should have been sent to Claude
|
|
101
|
+
client.query.assert_awaited_once_with("Greet the user briefly.")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@pytest.mark.asyncio
|
|
105
|
+
async def test_greeting_is_wrapped_for_tts():
|
|
106
|
+
"""The greeting must be wrapped in response start/end frames so TTS
|
|
107
|
+
treats it as a single utterance (no gaps, no dropped last sentence)."""
|
|
108
|
+
config = ClaudeLLMServiceConfig(
|
|
109
|
+
cwd="/tmp",
|
|
110
|
+
system_prompt="You are a test agent.",
|
|
111
|
+
initial_prompt="Say hello.",
|
|
112
|
+
existing_client=_make_fake_client("Hi! Nice to meet you."),
|
|
113
|
+
)
|
|
114
|
+
service = ClaudeLLMService(config=config)
|
|
115
|
+
frames = _collect_frames(service)
|
|
116
|
+
|
|
117
|
+
await _trigger_initial_prompt(service, config.initial_prompt)
|
|
118
|
+
|
|
119
|
+
frame_types = [type(f) for f in frames]
|
|
120
|
+
|
|
121
|
+
# Must have: start, then text(s), then end
|
|
122
|
+
assert LLMFullResponseStartFrame in frame_types, "Missing response start"
|
|
123
|
+
assert LLMFullResponseEndFrame in frame_types, "Missing response end"
|
|
124
|
+
|
|
125
|
+
start_idx = frame_types.index(LLMFullResponseStartFrame)
|
|
126
|
+
end_idx = frame_types.index(LLMFullResponseEndFrame)
|
|
127
|
+
text_indices = [i for i, t in enumerate(frame_types) if t == LLMTextFrame]
|
|
128
|
+
|
|
129
|
+
assert text_indices, "No text frames between start and end"
|
|
130
|
+
assert all(start_idx < i < end_idx for i in text_indices), (
|
|
131
|
+
"Text frames must appear between start and end for TTS to work correctly"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_no_greeting_without_initial_prompt():
|
|
137
|
+
"""Without an initial_prompt, the agent should stay silent on call start."""
|
|
138
|
+
config = ClaudeLLMServiceConfig(
|
|
139
|
+
cwd="/tmp",
|
|
140
|
+
system_prompt="You are a test agent.",
|
|
141
|
+
initial_prompt=None,
|
|
142
|
+
existing_client=_make_fake_client(),
|
|
143
|
+
)
|
|
144
|
+
service = ClaudeLLMService(config=config)
|
|
145
|
+
frames = _collect_frames(service)
|
|
146
|
+
|
|
147
|
+
# No trigger — the pipeline would not call _trigger_initial_prompt
|
|
148
|
+
# because initial_prompt is None. Verify that's the guard.
|
|
149
|
+
assert config.initial_prompt is None
|
|
150
|
+
assert len(frames) == 0, "Agent should stay silent without initial_prompt"
|
|
@@ -21,10 +21,16 @@ import os
|
|
|
21
21
|
import aiohttp
|
|
22
22
|
from fastapi import WebSocket
|
|
23
23
|
|
|
24
|
+
from pipecat.frames.frames import LLMFullResponseEndFrame, LLMFullResponseStartFrame
|
|
24
25
|
from pipecat.pipeline.pipeline import Pipeline
|
|
25
26
|
from pipecat.pipeline.runner import PipelineRunner
|
|
26
27
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
27
|
-
from pipecat.
|
|
28
|
+
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
29
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
30
|
+
from pipecat.processors.aggregators.llm_response_universal import (
|
|
31
|
+
LLMContextAggregatorPair,
|
|
32
|
+
LLMUserAggregatorParams,
|
|
33
|
+
)
|
|
28
34
|
from pipecat.serializers.twilio import TwilioFrameSerializer
|
|
29
35
|
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
|
|
30
36
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
|
@@ -72,7 +78,19 @@ async def handle_twilio_websocket(websocket: WebSocket, call_token: str) -> None
|
|
|
72
78
|
try:
|
|
73
79
|
# Read messages until we get the "start" event
|
|
74
80
|
while True:
|
|
75
|
-
|
|
81
|
+
message = await websocket.receive()
|
|
82
|
+
|
|
83
|
+
if message.get("type") == "websocket.disconnect":
|
|
84
|
+
logger.warning("[twilio] WebSocket disconnected before start event")
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
# Twilio may send frames as text or binary
|
|
88
|
+
raw = message.get("text") or (
|
|
89
|
+
message.get("bytes", b"").decode("utf-8") if message.get("bytes") else None
|
|
90
|
+
)
|
|
91
|
+
if not raw:
|
|
92
|
+
continue
|
|
93
|
+
|
|
76
94
|
msg = json.loads(raw)
|
|
77
95
|
|
|
78
96
|
if msg.get("event") == "start":
|
|
@@ -173,7 +191,12 @@ async def _run_twilio_pipeline(
|
|
|
173
191
|
llm_config: Claude LLM service configuration
|
|
174
192
|
voice_id: ElevenLabs voice ID
|
|
175
193
|
"""
|
|
176
|
-
serializer = TwilioFrameSerializer(
|
|
194
|
+
serializer = TwilioFrameSerializer(
|
|
195
|
+
stream_sid=stream_sid,
|
|
196
|
+
call_sid=call_sid,
|
|
197
|
+
account_sid=config.twilio_account_sid,
|
|
198
|
+
auth_token=config.twilio_auth_token,
|
|
199
|
+
)
|
|
177
200
|
|
|
178
201
|
transport = FastAPIWebsocketTransport(
|
|
179
202
|
websocket=websocket,
|
|
@@ -211,8 +234,13 @@ async def _run_twilio_pipeline(
|
|
|
211
234
|
narration = NarrationProcessor()
|
|
212
235
|
|
|
213
236
|
# Context aggregator
|
|
214
|
-
context =
|
|
215
|
-
context_aggregator =
|
|
237
|
+
context = LLMContext()
|
|
238
|
+
context_aggregator = LLMContextAggregatorPair(
|
|
239
|
+
context,
|
|
240
|
+
user_params=LLMUserAggregatorParams(
|
|
241
|
+
vad_analyzer=SileroVADAnalyzer(),
|
|
242
|
+
),
|
|
243
|
+
)
|
|
216
244
|
|
|
217
245
|
# Pipeline
|
|
218
246
|
pipeline = Pipeline(
|
|
@@ -233,5 +261,15 @@ async def _run_twilio_pipeline(
|
|
|
233
261
|
params=PipelineParams(allow_interruptions=True),
|
|
234
262
|
)
|
|
235
263
|
|
|
264
|
+
# Send initial prompt once the pipeline is fully ready
|
|
265
|
+
@task.event_handler("on_pipeline_started")
|
|
266
|
+
async def on_pipeline_started(task_ref, *args):
|
|
267
|
+
if llm_config.initial_prompt and not claude_llm._initial_prompt_sent:
|
|
268
|
+
claude_llm._initial_prompt_sent = True
|
|
269
|
+
await claude_llm._ensure_client()
|
|
270
|
+
await claude_llm.push_frame(LLMFullResponseStartFrame())
|
|
271
|
+
await claude_llm._send_to_claude(llm_config.initial_prompt)
|
|
272
|
+
await claude_llm.push_frame(LLMFullResponseEndFrame())
|
|
273
|
+
|
|
236
274
|
runner = PipelineRunner()
|
|
237
275
|
await runner.run(task)
|
|
@@ -19,7 +19,11 @@ Responsibilities:
|
|
|
19
19
|
import aiohttp
|
|
20
20
|
import logging
|
|
21
21
|
|
|
22
|
-
from pipecat.frames.frames import
|
|
22
|
+
from pipecat.frames.frames import (
|
|
23
|
+
LLMFullResponseEndFrame,
|
|
24
|
+
LLMFullResponseStartFrame,
|
|
25
|
+
LLMMessagesFrame,
|
|
26
|
+
)
|
|
23
27
|
from pipecat.pipeline.pipeline import Pipeline
|
|
24
28
|
from pipecat.pipeline.runner import PipelineRunner
|
|
25
29
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
@@ -97,6 +101,7 @@ async def bot(runner_args: SmallWebRTCRunnerArguments):
|
|
|
97
101
|
claude_config = ClaudeLLMServiceConfig(
|
|
98
102
|
cwd=config.default_cwd,
|
|
99
103
|
system_prompt=system_prompt,
|
|
104
|
+
initial_prompt="The user just joined the call. Greet them briefly.",
|
|
100
105
|
)
|
|
101
106
|
claude_llm = ClaudeLLMService(config=claude_config)
|
|
102
107
|
|
|
@@ -135,6 +140,16 @@ async def bot(runner_args: SmallWebRTCRunnerArguments):
|
|
|
135
140
|
params=PipelineParams(allow_interruptions=True),
|
|
136
141
|
)
|
|
137
142
|
|
|
143
|
+
# Send initial prompt once the pipeline is fully ready
|
|
144
|
+
@task.event_handler("on_pipeline_started")
|
|
145
|
+
async def on_pipeline_started(task_ref, *args):
|
|
146
|
+
if claude_config.initial_prompt and not claude_llm._initial_prompt_sent:
|
|
147
|
+
claude_llm._initial_prompt_sent = True
|
|
148
|
+
await claude_llm._ensure_client()
|
|
149
|
+
await claude_llm.push_frame(LLMFullResponseStartFrame())
|
|
150
|
+
await claude_llm._send_to_claude(claude_config.initial_prompt)
|
|
151
|
+
await claude_llm.push_frame(LLMFullResponseEndFrame())
|
|
152
|
+
|
|
138
153
|
runner = PipelineRunner(handle_sigint=False)
|
|
139
154
|
await runner.run(task)
|
|
140
155
|
|