voicecc 1.1.35 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/voicecc.js +94 -1
- package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/routes/agents.ts +28 -8
- package/dashboard/routes/browser-call.ts +3 -2
- package/dashboard/routes/chat.ts +75 -55
- package/dashboard/routes/providers.ts +5 -74
- package/dashboard/routes/twilio.ts +104 -5
- package/dashboard/routes/voice.ts +98 -0
- package/dashboard/server.ts +58 -2
- package/package.json +2 -3
- package/server/index.ts +96 -8
- package/server/services/device-pairing.ts +18 -2
- package/server/services/twilio-manager.ts +29 -10
- package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
- package/dashboard/dist/audio-processor.js +0 -126
- package/server/services/heartbeat.ts +0 -403
- package/server/voice/assets/chime.wav +0 -0
- package/server/voice/assets/startup.pcm +0 -0
- package/server/voice/audio-adapter.ts +0 -60
- package/server/voice/audio-inactivity.test.ts +0 -108
- package/server/voice/audio-inactivity.ts +0 -91
- package/server/voice/browser-audio-playback.test.ts +0 -149
- package/server/voice/browser-audio.ts +0 -147
- package/server/voice/browser-server.ts +0 -311
- package/server/voice/chat-server.ts +0 -236
- package/server/voice/chime.test.ts +0 -69
- package/server/voice/chime.ts +0 -36
- package/server/voice/claude-session.ts +0 -293
- package/server/voice/endpointing.ts +0 -163
- package/server/voice/mic-vpio +0 -0
- package/server/voice/narration.ts +0 -204
- package/server/voice/prompt-builder.ts +0 -108
- package/server/voice/session-lock.ts +0 -123
- package/server/voice/stt-elevenlabs.ts +0 -210
- package/server/voice/stt-provider.ts +0 -106
- package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
- package/server/voice/tts-elevenlabs.ts +0 -397
- package/server/voice/tts-provider.ts +0 -155
- package/server/voice/twilio-audio.ts +0 -338
- package/server/voice/twilio-server.ts +0 -540
- package/server/voice/types.ts +0 -282
- package/server/voice/vad.ts +0 -101
- package/server/voice/voice-loop-bugs.test.ts +0 -348
- package/server/voice/voice-server.ts +0 -129
- package/server/voice/voice-session.ts +0 -539
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* AudioWorklet processor for browser voice calls.
|
|
3
|
-
*
|
|
4
|
-
* Runs in the browser's audio rendering thread. Handles two jobs:
|
|
5
|
-
* - Mic capture: accumulates input samples into chunks, posts them to main thread
|
|
6
|
-
* - Speaker playback: reads from a chunk queue fed by main thread, writes to output
|
|
7
|
-
*
|
|
8
|
-
* Responsibilities:
|
|
9
|
-
* - Buffer incoming mic audio and emit fixed-size chunks to main thread
|
|
10
|
-
* - Accept playback audio from main thread and enqueue into chunk queue
|
|
11
|
-
* - Provide "clear" support to flush the chunk queue on interruption
|
|
12
|
-
*
|
|
13
|
-
* Must be plain JavaScript -- AudioWorklet modules cannot be bundled by Vite.
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
// ============================================================================
|
|
17
|
-
// CONSTANTS
|
|
18
|
-
// ============================================================================
|
|
19
|
-
|
|
20
|
-
/** Number of mic samples to accumulate before posting to main thread */
|
|
21
|
-
const CHUNK_SIZE = 512;
|
|
22
|
-
|
|
23
|
-
// ============================================================================
|
|
24
|
-
// PROCESSOR
|
|
25
|
-
// ============================================================================
|
|
26
|
-
|
|
27
|
-
class AudioProcessor extends AudioWorkletProcessor {
|
|
28
|
-
constructor() {
|
|
29
|
-
super();
|
|
30
|
-
|
|
31
|
-
// Mic capture buffer
|
|
32
|
-
this._micBuffer = new Float32Array(CHUNK_SIZE);
|
|
33
|
-
this._micBufferIndex = 0;
|
|
34
|
-
|
|
35
|
-
// Speaker playback chunk queue
|
|
36
|
-
this._chunks = [];
|
|
37
|
-
this._chunkIndex = 0; // read position within current chunk
|
|
38
|
-
|
|
39
|
-
// Handle messages from main thread
|
|
40
|
-
this.port.onmessage = (event) => {
|
|
41
|
-
const { type, samples } = event.data;
|
|
42
|
-
|
|
43
|
-
if (type === "playback" && samples) {
|
|
44
|
-
this._chunks.push(samples);
|
|
45
|
-
} else if (type === "clear") {
|
|
46
|
-
this._chunks.length = 0;
|
|
47
|
-
this._chunkIndex = 0;
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Read samples from the chunk queue into the output array.
|
|
54
|
-
* Writes silence (0) when the queue is empty.
|
|
55
|
-
*
|
|
56
|
-
* @param {Float32Array} output - Destination array to fill
|
|
57
|
-
*/
|
|
58
|
-
_readFromQueue(output) {
|
|
59
|
-
let written = 0;
|
|
60
|
-
|
|
61
|
-
while (written < output.length) {
|
|
62
|
-
if (this._chunks.length === 0) {
|
|
63
|
-
// Queue empty -- fill remaining with silence
|
|
64
|
-
for (let i = written; i < output.length; i++) {
|
|
65
|
-
output[i] = 0;
|
|
66
|
-
}
|
|
67
|
-
return;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const chunk = this._chunks[0];
|
|
71
|
-
const available = chunk.length - this._chunkIndex;
|
|
72
|
-
const needed = output.length - written;
|
|
73
|
-
const toCopy = Math.min(available, needed);
|
|
74
|
-
|
|
75
|
-
for (let i = 0; i < toCopy; i++) {
|
|
76
|
-
output[written++] = chunk[this._chunkIndex++];
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
if (this._chunkIndex >= chunk.length) {
|
|
80
|
-
this._chunks.shift();
|
|
81
|
-
this._chunkIndex = 0;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Called by the audio rendering thread for each 128-sample frame.
|
|
88
|
-
*
|
|
89
|
-
* @param {Float32Array[][]} inputs - Input audio channels (mic)
|
|
90
|
-
* @param {Float32Array[][]} outputs - Output audio channels (speaker)
|
|
91
|
-
* @param {Record<string, Float32Array>} parameters - AudioParam values (unused)
|
|
92
|
-
* @returns {boolean} true to keep the processor alive
|
|
93
|
-
*/
|
|
94
|
-
process(inputs, outputs, parameters) {
|
|
95
|
-
// -- Mic capture: accumulate input samples and post chunks --
|
|
96
|
-
const input = inputs[0];
|
|
97
|
-
if (input && input[0]) {
|
|
98
|
-
const inputChannel = input[0];
|
|
99
|
-
for (let i = 0; i < inputChannel.length; i++) {
|
|
100
|
-
this._micBuffer[this._micBufferIndex++] = inputChannel[i];
|
|
101
|
-
|
|
102
|
-
if (this._micBufferIndex >= CHUNK_SIZE) {
|
|
103
|
-
// Post a copy to main thread
|
|
104
|
-
this.port.postMessage({
|
|
105
|
-
type: "audio",
|
|
106
|
-
samples: this._micBuffer.slice(),
|
|
107
|
-
});
|
|
108
|
-
this._micBufferIndex = 0;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
// -- Speaker playback: read from chunk queue into output (mono -> all channels) --
|
|
114
|
-
const output = outputs[0];
|
|
115
|
-
if (output && output[0]) {
|
|
116
|
-
this._readFromQueue(output[0]);
|
|
117
|
-
for (let ch = 1; ch < output.length; ch++) {
|
|
118
|
-
output[ch].set(output[0]);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return true;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
registerProcessor("audio-processor", AudioProcessor);
|
|
@@ -1,403 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Interval-based heartbeat scheduler for agent check-ins.
|
|
3
|
-
*
|
|
4
|
-
* Creates a persistent Claude Code session per heartbeat check so the agent can
|
|
5
|
-
* execute whatever HEARTBEAT.md instructs (check email, calendar, APIs, etc.).
|
|
6
|
-
* When a heartbeat determines the user should be contacted, initiates an
|
|
7
|
-
* outbound Twilio call and hands the live Claude session to the voice session
|
|
8
|
-
* so it retains full context of what it checked.
|
|
9
|
-
*
|
|
10
|
-
* - Start/stop a 60-second global interval that checks all enabled agents
|
|
11
|
-
* - Track per-agent check intervals and concurrent-check guards
|
|
12
|
-
* - Create persistent Claude sessions with full tool access
|
|
13
|
-
* - Parse JSON heartbeat responses and initiate outbound calls
|
|
14
|
-
* - Pass live Claude sessions to the Twilio server for voice call continuity
|
|
15
|
-
* - Expose last heartbeat results for the API
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
import { randomUUID } from "crypto";
|
|
19
|
-
import { readFileSync } from "fs";
|
|
20
|
-
import { dirname, join } from "path";
|
|
21
|
-
import { fileURLToPath } from "url";
|
|
22
|
-
|
|
23
|
-
import twilio from "twilio";
|
|
24
|
-
import { createClaudeSession, type ClaudeSession } from "../voice/claude-session.js";
|
|
25
|
-
import { buildAgentPrompt } from "../voice/prompt-builder.js";
|
|
26
|
-
import { listAgents, getAgent, AGENTS_DIR, type Agent } from "./agent-store.js";
|
|
27
|
-
import { readEnv } from "./env.js";
|
|
28
|
-
import { getTunnelUrl, isTunnelRunning } from "./tunnel.js";
|
|
29
|
-
import { isRunning as isTwilioRunning } from "./twilio-manager.js";
|
|
30
|
-
import { setCallClaudeSession } from "../voice/twilio-server.js";
|
|
31
|
-
|
|
32
|
-
// ============================================================================
|
|
33
|
-
// CONSTANTS
|
|
34
|
-
// ============================================================================
|
|
35
|
-
|
|
36
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
37
|
-
|
|
38
|
-
/** Global check interval in milliseconds (60 seconds) */
|
|
39
|
-
const CHECK_INTERVAL_MS = 60_000;
|
|
40
|
-
|
|
41
|
-
/** Default maximum time for a single heartbeat Claude session (5 minutes) */
|
|
42
|
-
const DEFAULT_HEARTBEAT_TIMEOUT_MS = 5 * 60_000;
|
|
43
|
-
|
|
44
|
-
/** User-facing prompt sent to the heartbeat Claude session */
|
|
45
|
-
const HEARTBEAT_PROMPT = readFileSync(join(__dirname, "..", "..", "init", "defaults", "system-heartbeat.md"), "utf-8").trim();
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
// ============================================================================
|
|
49
|
-
// TYPES
|
|
50
|
-
// ============================================================================
|
|
51
|
-
|
|
52
|
-
/** Result of a single agent heartbeat check */
|
|
53
|
-
export interface HeartbeatResult {
|
|
54
|
-
agentId: string;
|
|
55
|
-
shouldCall: boolean;
|
|
56
|
-
reason: string;
|
|
57
|
-
timestamp: number;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// ============================================================================
|
|
61
|
-
// STATE
|
|
62
|
-
// ============================================================================
|
|
63
|
-
|
|
64
|
-
/** Global setInterval handle */
|
|
65
|
-
let intervalTimer: ReturnType<typeof setInterval> | null = null;
|
|
66
|
-
|
|
67
|
-
/** Last heartbeat result per agent */
|
|
68
|
-
let lastResults: Record<string, HeartbeatResult> = {};
|
|
69
|
-
|
|
70
|
-
/** Last check timestamp per agent (for interval tracking) */
|
|
71
|
-
let lastCheckTimes: Record<string, number> = {};
|
|
72
|
-
|
|
73
|
-
/** Currently running agent IDs (concurrent guard) */
|
|
74
|
-
const inFlightChecks = new Set<string>();
|
|
75
|
-
|
|
76
|
-
// ============================================================================
|
|
77
|
-
// MAIN HANDLERS
|
|
78
|
-
// ============================================================================
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* Start the heartbeat scheduler.
|
|
82
|
-
* Runs checkAllAgents every 60 seconds via setInterval.
|
|
83
|
-
*/
|
|
84
|
-
export function startHeartbeat(): void {
|
|
85
|
-
if (intervalTimer) return;
|
|
86
|
-
|
|
87
|
-
intervalTimer = setInterval(() => {
|
|
88
|
-
checkAllAgents().catch((err) => {
|
|
89
|
-
console.error("[heartbeat] checkAllAgents error:", err);
|
|
90
|
-
});
|
|
91
|
-
}, CHECK_INTERVAL_MS);
|
|
92
|
-
|
|
93
|
-
console.log("[heartbeat] scheduler started (60s interval)");
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
/**
|
|
97
|
-
* Stop the heartbeat scheduler.
|
|
98
|
-
* Clears the global interval timer.
|
|
99
|
-
*/
|
|
100
|
-
export function stopHeartbeat(): void {
|
|
101
|
-
if (intervalTimer) {
|
|
102
|
-
clearInterval(intervalTimer);
|
|
103
|
-
intervalTimer = null;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Get the last heartbeat result per agent.
|
|
109
|
-
* Used by the API to expose heartbeat status.
|
|
110
|
-
*
|
|
111
|
-
* @returns Record of agent ID to last HeartbeatResult
|
|
112
|
-
*/
|
|
113
|
-
export function getHeartbeatStatus(): Record<string, HeartbeatResult> {
|
|
114
|
-
return lastResults;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
/**
|
|
118
|
-
* Initiate an outbound Twilio call to an agent's phone number.
|
|
119
|
-
* Used by both the heartbeat scheduler and the API "Call Me" route.
|
|
120
|
-
*
|
|
121
|
-
* Flow:
|
|
122
|
-
* 1. Check preconditions (tunnel + Twilio server running)
|
|
123
|
-
* 2. Generate UUID token and register it with the Twilio server
|
|
124
|
-
* 3. Optionally attach a live Claude session for voice call continuity
|
|
125
|
-
* 4. Place outbound call via Twilio SDK with TwiML streaming to our WebSocket
|
|
126
|
-
*
|
|
127
|
-
* @param agent - Full agent data including config with phone number
|
|
128
|
-
* @param claudeSession - Optional live Claude session to hand off to the voice call
|
|
129
|
-
* @returns The Twilio call SID
|
|
130
|
-
*/
|
|
131
|
-
export async function initiateAgentCall(agent: Agent, opts?: { claudeSession?: ClaudeSession; initialPrompt?: string }): Promise<string> {
|
|
132
|
-
// Check preconditions
|
|
133
|
-
if (!isTunnelRunning()) {
|
|
134
|
-
throw new Error("Tunnel is not running. Cannot place outbound call.");
|
|
135
|
-
}
|
|
136
|
-
if (!isTwilioRunning()) {
|
|
137
|
-
throw new Error("Twilio server is not running. Cannot place outbound call.");
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const token = randomUUID();
|
|
141
|
-
const envVars = await readEnv();
|
|
142
|
-
|
|
143
|
-
const twilioPort = parseInt(envVars.TWILIO_PORT || "8080", 10);
|
|
144
|
-
const accountSid = envVars.TWILIO_ACCOUNT_SID;
|
|
145
|
-
const authToken = envVars.TWILIO_AUTH_TOKEN;
|
|
146
|
-
const userPhoneNumber = envVars.USER_PHONE_NUMBER;
|
|
147
|
-
|
|
148
|
-
if (!accountSid || !authToken) {
|
|
149
|
-
throw new Error("TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN must be set in .env");
|
|
150
|
-
}
|
|
151
|
-
if (!userPhoneNumber) {
|
|
152
|
-
throw new Error("USER_PHONE_NUMBER must be set in Settings > General");
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
// Register the call token with the Twilio server (with optional initial prompt)
|
|
156
|
-
await registerCallToken(twilioPort, token, agent.id, opts?.initialPrompt);
|
|
157
|
-
|
|
158
|
-
// Attach the live Claude session if provided (heartbeat-initiated calls)
|
|
159
|
-
if (opts?.claudeSession) {
|
|
160
|
-
setCallClaudeSession(token, opts.claudeSession);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Get the tunnel URL (strip protocol for WebSocket URL)
|
|
164
|
-
const fullTunnelUrl = getTunnelUrl()!;
|
|
165
|
-
const tunnelHost = fullTunnelUrl.replace(/^https?:\/\//, "");
|
|
166
|
-
|
|
167
|
-
// Get the first Twilio phone number on the account
|
|
168
|
-
const client = twilio(accountSid, authToken);
|
|
169
|
-
const numbers = await client.incomingPhoneNumbers.list({ limit: 1 });
|
|
170
|
-
if (numbers.length === 0) {
|
|
171
|
-
throw new Error("No Twilio phone numbers found on the account");
|
|
172
|
-
}
|
|
173
|
-
const fromNumber = numbers[0].phoneNumber;
|
|
174
|
-
|
|
175
|
-
// Build TwiML with WebSocket stream
|
|
176
|
-
const twiml = `<Response><Connect><Stream url="wss://${tunnelHost}/media/${token}?agentId=${agent.id}" /></Connect></Response>`;
|
|
177
|
-
|
|
178
|
-
// Place the outbound call
|
|
179
|
-
const call = await client.calls.create({
|
|
180
|
-
to: userPhoneNumber,
|
|
181
|
-
from: fromNumber,
|
|
182
|
-
twiml,
|
|
183
|
-
});
|
|
184
|
-
|
|
185
|
-
console.log(`[heartbeat] outbound call placed to ${userPhoneNumber} (callSid=${call.sid})`);
|
|
186
|
-
return call.sid;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// ============================================================================
|
|
190
|
-
// HELPER FUNCTIONS
|
|
191
|
-
// ============================================================================
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Check all enabled agents and spawn heartbeat sessions for those that are due.
|
|
195
|
-
* Skips agents whose configured interval has not elapsed and agents with
|
|
196
|
-
* in-flight checks (concurrent guard).
|
|
197
|
-
*/
|
|
198
|
-
async function checkAllAgents(): Promise<void> {
|
|
199
|
-
const summaries = await listAgents();
|
|
200
|
-
const enabledSummaries = summaries.filter((s) => s.enabled);
|
|
201
|
-
|
|
202
|
-
if (enabledSummaries.length === 0) return;
|
|
203
|
-
|
|
204
|
-
const now = Date.now();
|
|
205
|
-
|
|
206
|
-
for (const summary of enabledSummaries) {
|
|
207
|
-
// Skip if interval has not elapsed
|
|
208
|
-
const lastCheck = lastCheckTimes[summary.id] ?? 0;
|
|
209
|
-
const intervalMs = summary.heartbeatIntervalMinutes * 60_000;
|
|
210
|
-
if (now - lastCheck < intervalMs) continue;
|
|
211
|
-
|
|
212
|
-
// Skip if already checking this agent
|
|
213
|
-
if (inFlightChecks.has(summary.id)) continue;
|
|
214
|
-
|
|
215
|
-
// Load full agent data and spawn check (fire-and-forget)
|
|
216
|
-
const agent = await getAgent(summary.id);
|
|
217
|
-
checkSingleAgent(agent).catch((err) => {
|
|
218
|
-
console.error(`[heartbeat] check failed for agent "${agent.id}":`, err);
|
|
219
|
-
});
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* Run a heartbeat check for a single agent using a persistent Claude session.
|
|
225
|
-
* If the check determines shouldCall, keeps the session alive and passes it
|
|
226
|
-
* to the outbound call so the voice session continues with full context.
|
|
227
|
-
*
|
|
228
|
-
* @param agent - Full agent data with SOUL.md, MEMORY.md, HEARTBEAT.md
|
|
229
|
-
*/
|
|
230
|
-
async function checkSingleAgent(agent: Agent): Promise<HeartbeatResult> {
|
|
231
|
-
inFlightChecks.add(agent.id);
|
|
232
|
-
lastCheckTimes[agent.id] = Date.now();
|
|
233
|
-
|
|
234
|
-
let session: ClaudeSession | null = null;
|
|
235
|
-
|
|
236
|
-
try {
|
|
237
|
-
const timeoutMs = (agent.config.heartbeatTimeoutMinutes ?? 5) * 60_000 || DEFAULT_HEARTBEAT_TIMEOUT_MS;
|
|
238
|
-
const { result, claudeSession } = await runHeartbeatSession(agent, timeoutMs);
|
|
239
|
-
session = claudeSession;
|
|
240
|
-
lastResults[agent.id] = result;
|
|
241
|
-
|
|
242
|
-
console.log(
|
|
243
|
-
`[heartbeat] agent "${agent.id}": shouldCall=${result.shouldCall}, reason="${result.reason}"`,
|
|
244
|
-
);
|
|
245
|
-
|
|
246
|
-
if (result.shouldCall) {
|
|
247
|
-
try {
|
|
248
|
-
// Pass the live session to the call — it will be handed to the voice session
|
|
249
|
-
await initiateAgentCall(agent, { claudeSession: session });
|
|
250
|
-
session = null; // Don't close — voice session owns it now
|
|
251
|
-
} catch (err) {
|
|
252
|
-
console.error(`[heartbeat] failed to call agent "${agent.id}":`, err);
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
return result;
|
|
257
|
-
} finally {
|
|
258
|
-
// Close the session if we still own it (shouldCall was false, or call failed)
|
|
259
|
-
if (session) {
|
|
260
|
-
await session.close();
|
|
261
|
-
}
|
|
262
|
-
inFlightChecks.delete(agent.id);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
/**
|
|
267
|
-
* Run a heartbeat check using a persistent Claude session.
|
|
268
|
-
* Creates the session with the agent's full context (voice instructions +
|
|
269
|
-
* SOUL.md + MEMORY.md + HEARTBEAT.md), sends the heartbeat prompt, and
|
|
270
|
-
* parses the JSON response.
|
|
271
|
-
*
|
|
272
|
-
* Returns both the parsed result and the live session so the caller can
|
|
273
|
-
* decide whether to keep it alive for a voice call.
|
|
274
|
-
*
|
|
275
|
-
* @param agent - Full agent data
|
|
276
|
-
* @returns The heartbeat result and the live Claude session
|
|
277
|
-
*/
|
|
278
|
-
async function runHeartbeatSession(agent: Agent, timeoutMs: number): Promise<{ result: HeartbeatResult; claudeSession: ClaudeSession }> {
|
|
279
|
-
// Use voice overlay so the session is ready for voice call continuity
|
|
280
|
-
const agentDir = join(AGENTS_DIR, agent.id);
|
|
281
|
-
const systemPrompt = await buildAgentPrompt(agent.id, "voice");
|
|
282
|
-
|
|
283
|
-
const claudeSession = await createClaudeSession({
|
|
284
|
-
allowedTools: [],
|
|
285
|
-
permissionMode: "bypassPermissions",
|
|
286
|
-
systemPrompt: "",
|
|
287
|
-
customSystemPrompt: systemPrompt,
|
|
288
|
-
cwd: agentDir,
|
|
289
|
-
});
|
|
290
|
-
|
|
291
|
-
// Set up a timeout to close the session if it takes too long
|
|
292
|
-
let timedOut = false;
|
|
293
|
-
const timeout = setTimeout(() => {
|
|
294
|
-
timedOut = true;
|
|
295
|
-
claudeSession.interrupt();
|
|
296
|
-
}, timeoutMs);
|
|
297
|
-
|
|
298
|
-
try {
|
|
299
|
-
// Send the heartbeat prompt and collect the response text
|
|
300
|
-
let responseText = "";
|
|
301
|
-
const eventStream = claudeSession.sendMessage(HEARTBEAT_PROMPT);
|
|
302
|
-
|
|
303
|
-
for await (const event of eventStream) {
|
|
304
|
-
if (event.type === "text_delta") {
|
|
305
|
-
responseText += event.content;
|
|
306
|
-
}
|
|
307
|
-
if (event.type === "result") break;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
if (timedOut) {
|
|
311
|
-
console.error(`[heartbeat] session timed out for agent "${agent.id}"`);
|
|
312
|
-
return { result: failSafeResult(agent.id), claudeSession };
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
if (!responseText) {
|
|
316
|
-
console.error(`[heartbeat] no response text for agent "${agent.id}"`);
|
|
317
|
-
return { result: failSafeResult(agent.id), claudeSession };
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
const result = parseHeartbeatResponse(agent.id, responseText);
|
|
321
|
-
return { result, claudeSession };
|
|
322
|
-
} catch (err) {
|
|
323
|
-
if (timedOut) {
|
|
324
|
-
console.error(`[heartbeat] session timed out for agent "${agent.id}"`);
|
|
325
|
-
} else {
|
|
326
|
-
console.error(`[heartbeat] session error for agent "${agent.id}":`, err);
|
|
327
|
-
}
|
|
328
|
-
return { result: failSafeResult(agent.id), claudeSession };
|
|
329
|
-
} finally {
|
|
330
|
-
clearTimeout(timeout);
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
/**
|
|
335
|
-
* Parse a heartbeat JSON response string into a HeartbeatResult.
|
|
336
|
-
* Expects a JSON object with shouldCall (boolean) and reason (string).
|
|
337
|
-
* Returns a fail-safe result if parsing fails.
|
|
338
|
-
*
|
|
339
|
-
* @param agentId - Agent identifier for the result
|
|
340
|
-
* @param text - Raw text from the assistant response
|
|
341
|
-
* @returns Parsed HeartbeatResult
|
|
342
|
-
*/
|
|
343
|
-
function parseHeartbeatResponse(agentId: string, text: string): HeartbeatResult {
|
|
344
|
-
try {
|
|
345
|
-
// Extract JSON from the text (may contain surrounding text)
|
|
346
|
-
const jsonMatch = text.match(/\{[\s\S]*"shouldCall"[\s\S]*\}/);
|
|
347
|
-
if (!jsonMatch) {
|
|
348
|
-
console.error(`[heartbeat] no JSON found in response for agent "${agentId}": ${text}`);
|
|
349
|
-
return failSafeResult(agentId);
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
353
|
-
return {
|
|
354
|
-
agentId,
|
|
355
|
-
shouldCall: Boolean(parsed.shouldCall),
|
|
356
|
-
reason: String(parsed.reason || ""),
|
|
357
|
-
timestamp: Date.now(),
|
|
358
|
-
};
|
|
359
|
-
} catch (err) {
|
|
360
|
-
console.error(`[heartbeat] JSON parse error for agent "${agentId}":`, err);
|
|
361
|
-
return failSafeResult(agentId);
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
/**
|
|
366
|
-
* Create a fail-safe HeartbeatResult that does not trigger a call.
|
|
367
|
-
* Used when the session errors, times out, or returns unparseable output.
|
|
368
|
-
*
|
|
369
|
-
* @param agentId - Agent identifier
|
|
370
|
-
* @returns HeartbeatResult with shouldCall: false
|
|
371
|
-
*/
|
|
372
|
-
function failSafeResult(agentId: string): HeartbeatResult {
|
|
373
|
-
return {
|
|
374
|
-
agentId,
|
|
375
|
-
shouldCall: false,
|
|
376
|
-
reason: "heartbeat check failed or timed out",
|
|
377
|
-
timestamp: Date.now(),
|
|
378
|
-
};
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
/**
|
|
382
|
-
* Register a call token with the Twilio server via HTTP POST.
|
|
383
|
-
* Even though the Twilio server now runs in-process, we still use the HTTP
|
|
384
|
-
* endpoint to register tokens since the WebSocket upgrade path validates
|
|
385
|
-
* against the activeCalls map populated by this endpoint.
|
|
386
|
-
*
|
|
387
|
-
* @param port - Twilio server port
|
|
388
|
-
* @param token - UUID token for the call
|
|
389
|
-
* @param agentId - Agent identifier to associate with the call
|
|
390
|
-
*/
|
|
391
|
-
async function registerCallToken(port: number, token: string, agentId: string, initialPrompt?: string): Promise<void> {
|
|
392
|
-
const body = JSON.stringify({ token, agentId, ...(initialPrompt && { initialPrompt }) });
|
|
393
|
-
|
|
394
|
-
const response = await fetch(`http://localhost:${port}/register-call`, {
|
|
395
|
-
method: "POST",
|
|
396
|
-
headers: { "Content-Type": "application/json" },
|
|
397
|
-
body,
|
|
398
|
-
});
|
|
399
|
-
|
|
400
|
-
if (!response.ok) {
|
|
401
|
-
throw new Error(`Failed to register call token: ${response.status} ${response.statusText}`);
|
|
402
|
-
}
|
|
403
|
-
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* AudioAdapter interface for abstracting audio I/O in voice sessions.
|
|
3
|
-
*
|
|
4
|
-
* Any audio transport (local mic, Twilio, WhatsApp) implements this interface
|
|
5
|
-
* so the voice session logic remains transport-agnostic.
|
|
6
|
-
*
|
|
7
|
-
* Responsibilities:
|
|
8
|
-
* - Define a common contract for audio input (microphone) and output (speaker)
|
|
9
|
-
* - Support playback interruption and resumption
|
|
10
|
-
* - Provide a ready chime signal
|
|
11
|
-
* - Clean up resources on destroy
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
// ============================================================================
|
|
15
|
-
// INTERFACES
|
|
16
|
-
// ============================================================================
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Abstraction over audio I/O for the voice session.
|
|
20
|
-
* Implemented by browser-audio.ts (WebSocket) and twilio-audio.ts (WebSocket).
|
|
21
|
-
*/
|
|
22
|
-
export interface AudioAdapter {
|
|
23
|
-
/**
|
|
24
|
-
* Subscribe to incoming audio chunks from the microphone.
|
|
25
|
-
* The callback receives Float32Array samples (16kHz, normalized -1.0 to 1.0).
|
|
26
|
-
* The callback is synchronous -- the consumer wraps async work internally.
|
|
27
|
-
*
|
|
28
|
-
* @param callback - Called with each audio chunk as Float32Array
|
|
29
|
-
*/
|
|
30
|
-
onAudio: (callback: (samples: Float32Array) => void) => void;
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Write PCM audio to the speaker output.
|
|
34
|
-
* Audio format: 16-bit signed, 24kHz mono.
|
|
35
|
-
*
|
|
36
|
-
* @param pcm - Raw PCM buffer to play
|
|
37
|
-
* @returns Resolves when the write completes (backpressure)
|
|
38
|
-
*/
|
|
39
|
-
writeSpeaker: (pcm: Buffer) => Promise<void>;
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Clear the output audio buffer immediately (user interruption).
|
|
43
|
-
*/
|
|
44
|
-
interrupt: () => void;
|
|
45
|
-
|
|
46
|
-
/**
|
|
47
|
-
* Resume output after an interrupt. Must be called before writing new audio.
|
|
48
|
-
*/
|
|
49
|
-
resume: () => void;
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Play the ready chime through the output.
|
|
53
|
-
*/
|
|
54
|
-
playChime: () => void;
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Clean up all resources (kill processes, close connections).
|
|
58
|
-
*/
|
|
59
|
-
destroy: () => void;
|
|
60
|
-
}
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for the audio inactivity watchdog.
|
|
3
|
-
*
|
|
4
|
-
* Verifies that the watchdog fires a callback when audio frames stop arriving,
|
|
5
|
-
* and that ongoing audio keeps the connection alive.
|
|
6
|
-
*
|
|
7
|
-
* Run: npx tsx --test server/voice/audio-inactivity.test.ts
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import { test } from "node:test";
|
|
11
|
-
import { strict as assert } from "node:assert";
|
|
12
|
-
|
|
13
|
-
import { createAudioInactivityWatchdog } from "./audio-inactivity.js";
|
|
14
|
-
|
|
15
|
-
// ============================================================================
|
|
16
|
-
// HELPERS
|
|
17
|
-
// ============================================================================
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Wait for a given number of milliseconds.
|
|
21
|
-
*
|
|
22
|
-
* @param ms - Duration to wait
|
|
23
|
-
* @returns Resolves after the delay
|
|
24
|
-
*/
|
|
25
|
-
function sleep(ms: number): Promise<void> {
|
|
26
|
-
return new Promise((r) => setTimeout(r, ms));
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
// ============================================================================
|
|
30
|
-
// TESTS
|
|
31
|
-
// ============================================================================
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* When no audio arrives within the timeout window, the onTimeout callback
|
|
35
|
-
* must fire. This is the core scenario: caller hangs up, audio stops.
|
|
36
|
-
*/
|
|
37
|
-
test("fires callback when no audio arrives within timeout", async () => {
|
|
38
|
-
let fired = false;
|
|
39
|
-
|
|
40
|
-
const watchdog = createAudioInactivityWatchdog({
|
|
41
|
-
timeoutMs: 100,
|
|
42
|
-
checkIntervalMs: 30,
|
|
43
|
-
onTimeout: () => { fired = true; },
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
try {
|
|
47
|
-
await sleep(200);
|
|
48
|
-
assert.ok(fired, "onTimeout should have fired after 100ms of silence");
|
|
49
|
-
} finally {
|
|
50
|
-
watchdog.dispose();
|
|
51
|
-
}
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* When audio frames keep arriving (via ping()), the callback must not fire.
|
|
56
|
-
* This simulates a healthy active call.
|
|
57
|
-
*/
|
|
58
|
-
test("does not fire callback while audio keeps arriving", async () => {
|
|
59
|
-
let fired = false;
|
|
60
|
-
|
|
61
|
-
const watchdog = createAudioInactivityWatchdog({
|
|
62
|
-
timeoutMs: 100,
|
|
63
|
-
checkIntervalMs: 30,
|
|
64
|
-
onTimeout: () => { fired = true; },
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
try {
|
|
68
|
-
// Send pings for 250ms (well past the 100ms timeout)
|
|
69
|
-
for (let i = 0; i < 8; i++) {
|
|
70
|
-
watchdog.ping();
|
|
71
|
-
await sleep(30);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
assert.ok(!fired, "onTimeout should not fire while audio is arriving");
|
|
75
|
-
} finally {
|
|
76
|
-
watchdog.dispose();
|
|
77
|
-
}
|
|
78
|
-
});
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* When audio stops after a period of activity, the callback fires.
|
|
82
|
-
* Simulates: call is active for a while, then caller hangs up.
|
|
83
|
-
*/
|
|
84
|
-
test("fires callback when audio stops after period of activity", async () => {
|
|
85
|
-
let fired = false;
|
|
86
|
-
|
|
87
|
-
const watchdog = createAudioInactivityWatchdog({
|
|
88
|
-
timeoutMs: 100,
|
|
89
|
-
checkIntervalMs: 30,
|
|
90
|
-
onTimeout: () => { fired = true; },
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
try {
|
|
94
|
-
// Active call: send pings for 150ms
|
|
95
|
-
for (let i = 0; i < 5; i++) {
|
|
96
|
-
watchdog.ping();
|
|
97
|
-
await sleep(30);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
assert.ok(!fired, "should not have fired during active audio");
|
|
101
|
-
|
|
102
|
-
// Caller hangs up: no more pings
|
|
103
|
-
await sleep(200);
|
|
104
|
-
assert.ok(fired, "onTimeout should fire after audio stops");
|
|
105
|
-
} finally {
|
|
106
|
-
watchdog.dispose();
|
|
107
|
-
}
|
|
108
|
-
});
|