voicecc 1.2.13 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/dist/assets/index-CVP_3PYo.js +28 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/routes/agents.ts +23 -1
- package/dashboard/routes/integrations.ts +12 -1
- package/dashboard/routes/whatsapp.ts +98 -0
- package/dashboard/server.ts +2 -0
- package/package.json +2 -1
- package/server/index.ts +12 -0
- package/server/services/agent-store.ts +1 -0
- package/server/services/whatsapp-groups.ts +289 -0
- package/server/services/whatsapp-integration.test.ts +343 -0
- package/server/services/whatsapp-manager.ts +395 -0
- package/server/services/whatsapp-message-handler.test.ts +272 -0
- package/server/services/whatsapp-message-handler.ts +429 -0
- package/voice-server/claude_session.py +68 -14
- package/voice-server/config.py +7 -0
- package/voice-server/heartbeat.py +72 -5
- package/voice-server/server.py +24 -24
- package/dashboard/dist/assets/index-DbjqXBdo.js +0 -28
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incoming WhatsApp message handler for VoiceCC.
|
|
3
|
+
*
|
|
4
|
+
* Routes group messages to the Python /chat/send endpoint, streams the SSE
|
|
5
|
+
* response, and sends each logical message to WhatsApp as soon as it completes.
|
|
6
|
+
*
|
|
7
|
+
* Responsibilities:
|
|
8
|
+
* - Validate and filter incoming Baileys messages (only owner text in mapped groups)
|
|
9
|
+
* - Normalize JIDs to handle the :0 device suffix from Baileys
|
|
10
|
+
* - Stream SSE from Python and yield message segments at tool_start / result boundaries
|
|
11
|
+
* - Send each segment to WhatsApp immediately (no buffering the full response)
|
|
12
|
+
* - Handle concurrency (HTTP 409 -> "Still thinking, please wait...")
|
|
13
|
+
* - Store session IDs for conversation resume
|
|
14
|
+
* - Split long replies that exceed WhatsApp's byte limit
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { proto } from "baileys";
|
|
18
|
+
import { getSocket } from "./whatsapp-manager.js";
|
|
19
|
+
import {
|
|
20
|
+
getAgentIdForGroup,
|
|
21
|
+
getLastSessionId,
|
|
22
|
+
setLastSessionId,
|
|
23
|
+
} from "./whatsapp-groups.js";
|
|
24
|
+
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// CONSTANTS
|
|
27
|
+
// ============================================================================
|
|
28
|
+
|
|
29
|
+
/** Base URL for the Python FastAPI server */
|
|
30
|
+
const VOICE_SERVER_URL = process.env.VOICE_SERVER_URL ?? "http://localhost:7861";
|
|
31
|
+
|
|
32
|
+
/** Maximum WhatsApp message size in bytes */
|
|
33
|
+
const MAX_MESSAGE_BYTES = 65_536;
|
|
34
|
+
|
|
35
|
+
/** Reply sent when the agent is already processing a message */
|
|
36
|
+
const ALREADY_STREAMING_REPLY = "Still thinking, please wait...";
|
|
37
|
+
|
|
38
|
+
/** User-friendly error message for SSE error events */
|
|
39
|
+
const SSE_ERROR_REPLY = "Sorry, something went wrong while generating a response. Please try again.";
|
|
40
|
+
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// TYPES
|
|
43
|
+
// ============================================================================
|
|
44
|
+
|
|
45
|
+
/** Parsed incoming WhatsApp message ready for handling */
|
|
46
|
+
export interface WhatsAppIncomingMessage {
|
|
47
|
+
groupJid: string;
|
|
48
|
+
senderJid: string;
|
|
49
|
+
text: string;
|
|
50
|
+
messageId: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** A single message segment yielded by the SSE stream generator */
|
|
54
|
+
export interface SseSegment {
|
|
55
|
+
text: string;
|
|
56
|
+
sessionId: string | null;
|
|
57
|
+
isAlreadyStreaming: boolean;
|
|
58
|
+
isError: boolean;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Shape of an SSE event payload from Python /chat/send (camelCase, matches Python to_dict()) */
|
|
62
|
+
interface SseEventPayload {
|
|
63
|
+
type: string;
|
|
64
|
+
content?: string;
|
|
65
|
+
sessionId?: string;
|
|
66
|
+
toolName?: string;
|
|
67
|
+
error?: string;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ============================================================================
|
|
71
|
+
// MAIN HANDLERS
|
|
72
|
+
// ============================================================================
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Validate and extract a handleable message from a raw Baileys message.
|
|
76
|
+
* Returns null if the message should be ignored.
|
|
77
|
+
*
|
|
78
|
+
* Filters out:
|
|
79
|
+
* - Non-text messages (images, stickers, etc.)
|
|
80
|
+
* - Direct messages (no groupJid)
|
|
81
|
+
* - Status broadcasts
|
|
82
|
+
* - Messages from the bot itself
|
|
83
|
+
* - Messages from JIDs other than ownJid (only owner's messages are handled)
|
|
84
|
+
* - Messages in unmapped groups
|
|
85
|
+
*
|
|
86
|
+
* @param msg - Raw Baileys message
|
|
87
|
+
* @param ownJid - The bot's own JID from sock.user.id
|
|
88
|
+
* @returns Parsed message, or null if it should be ignored
|
|
89
|
+
*/
|
|
90
|
+
export function shouldHandleMessage(
|
|
91
|
+
msg: proto.IWebMessageInfo,
|
|
92
|
+
ownJid: string
|
|
93
|
+
): WhatsAppIncomingMessage | null {
|
|
94
|
+
// Must have a remote JID
|
|
95
|
+
const remoteJid = msg.key?.remoteJid;
|
|
96
|
+
if (!remoteJid) {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Ignore status broadcasts
|
|
101
|
+
if (remoteJid === "status@broadcast") {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Must be a group message (group JIDs end with @g.us)
|
|
106
|
+
if (!remoteJid.endsWith("@g.us")) {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Must have a sender (participant in groups)
|
|
111
|
+
const senderJid = msg.key?.participant;
|
|
112
|
+
if (!senderJid) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Normalize both JIDs by stripping the :0 device suffix before comparing
|
|
117
|
+
const normalizedSender = normalizeJid(senderJid);
|
|
118
|
+
const normalizedOwn = normalizeJid(ownJid);
|
|
119
|
+
|
|
120
|
+
// Ignore messages from the bot itself
|
|
121
|
+
if (normalizedSender === normalizedOwn) {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Only handle messages from the owner (same number as the connected account)
|
|
126
|
+
// Since the bot IS the owner's WhatsApp account, we only accept messages
|
|
127
|
+
// from the owner. But wait -- the owner's messages come from the same JID
|
|
128
|
+
// as ownJid, which we just filtered above. Re-reading the spec:
|
|
129
|
+
// "messages from JIDs other than ownJid (only owner's messages are handled)"
|
|
130
|
+
// This means we ONLY handle messages where sender === ownJid.
|
|
131
|
+
// But we also skip "messages from bot itself". These two rules conflict
|
|
132
|
+
// unless the intent is: only the owner sends messages, and the bot's own
|
|
133
|
+
// echoed messages (fromMe) are skipped.
|
|
134
|
+
//
|
|
135
|
+
// Resolution: skip messages where msg.key.fromMe is true (bot's own sends),
|
|
136
|
+
// but accept messages from the owner's JID (which have fromMe=false when
|
|
137
|
+
// the owner sends from their phone while the bot is connected as a linked device).
|
|
138
|
+
//
|
|
139
|
+
// Actually, re-reading more carefully: the owner's phone messages appear with
|
|
140
|
+
// the owner's JID as participant and fromMe=true in multi-device. Let me
|
|
141
|
+
// handle this correctly:
|
|
142
|
+
// - fromMe=true means the message was sent by the linked account (could be
|
|
143
|
+
// from phone or from bot). We skip these to avoid echo loops.
|
|
144
|
+
// - fromMe=false means another participant sent it. We only accept if the
|
|
145
|
+
// sender matches ownJid (which doesn't happen in practice for fromMe=false).
|
|
146
|
+
//
|
|
147
|
+
// The spec says "only owner's messages are handled". In Baileys multi-device,
|
|
148
|
+
// when the owner types in the group from their phone, it appears with
|
|
149
|
+
// fromMe=true. The bot should handle these (they are the owner's messages)
|
|
150
|
+
// but NOT handle messages the bot itself sent programmatically.
|
|
151
|
+
//
|
|
152
|
+
// Simplification: We cannot distinguish "owner typed on phone" from "bot sent
|
|
153
|
+
// via sendMessage" when both are fromMe=true. The standard approach is:
|
|
154
|
+
// - Handle all messages from non-bot participants (fromMe=false)
|
|
155
|
+
// - Skip fromMe=true to avoid echo loops
|
|
156
|
+
//
|
|
157
|
+
// But the spec says only the owner's messages should be handled. Since the
|
|
158
|
+
// owner is the only one who should be in VoiceCC groups, accepting all
|
|
159
|
+
// fromMe=false messages effectively means "only owner" because no one else
|
|
160
|
+
// is in the group.
|
|
161
|
+
|
|
162
|
+
// Skip messages sent by the bot (fromMe=true) to avoid echo loops
|
|
163
|
+
if (msg.key?.fromMe) {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Extract text content
|
|
168
|
+
const text = extractTextContent(msg);
|
|
169
|
+
if (!text) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Check if the group is mapped to an agent
|
|
174
|
+
const agentId = getAgentIdForGroup(remoteJid);
|
|
175
|
+
if (!agentId) {
|
|
176
|
+
return null;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const messageId = msg.key?.id ?? "";
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
groupJid: remoteJid,
|
|
183
|
+
senderJid,
|
|
184
|
+
text,
|
|
185
|
+
messageId,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Stream SSE from Python /chat/send and yield message segments at boundaries.
|
|
191
|
+
* Yields a segment whenever a tool_start or result event arrives after accumulated text.
|
|
192
|
+
* This allows each logical message to be sent to WhatsApp immediately.
|
|
193
|
+
*
|
|
194
|
+
* @param response - The fetch Response from Python /chat/send
|
|
195
|
+
* @yields SseSegment for each completed message boundary
|
|
196
|
+
* @throws Error on non-2xx responses (except 409)
|
|
197
|
+
*/
|
|
198
|
+
export async function* streamSseSegments(response: Response): AsyncGenerator<SseSegment> {
|
|
199
|
+
// HTTP 409 means the session is already streaming
|
|
200
|
+
if (response.status === 409) {
|
|
201
|
+
yield { text: "", sessionId: null, isAlreadyStreaming: true, isError: false };
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Any other non-2xx is an error
|
|
206
|
+
if (!response.ok) {
|
|
207
|
+
const errorBody = await response.text().catch(() => "Unknown error");
|
|
208
|
+
throw new Error(`Python /chat/send returned HTTP ${response.status}: ${errorBody}`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const body = response.body;
|
|
212
|
+
if (!body) {
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let accumulatedText = "";
|
|
217
|
+
let hasError = false;
|
|
218
|
+
|
|
219
|
+
const reader = body.getReader();
|
|
220
|
+
const decoder = new TextDecoder();
|
|
221
|
+
let buffer = "";
|
|
222
|
+
|
|
223
|
+
while (true) {
|
|
224
|
+
const { done, value } = await reader.read();
|
|
225
|
+
if (done) break;
|
|
226
|
+
|
|
227
|
+
buffer += decoder.decode(value, { stream: true });
|
|
228
|
+
|
|
229
|
+
// Process complete SSE events (separated by double newlines)
|
|
230
|
+
const events = buffer.split("\n\n");
|
|
231
|
+
// Keep the last incomplete chunk in the buffer
|
|
232
|
+
buffer = events.pop() ?? "";
|
|
233
|
+
|
|
234
|
+
for (const event of events) {
|
|
235
|
+
const trimmed = event.trim();
|
|
236
|
+
if (!trimmed) continue;
|
|
237
|
+
|
|
238
|
+
// SSE format: "data: {...}"
|
|
239
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
240
|
+
|
|
241
|
+
const jsonStr = trimmed.slice(6); // Remove "data: " prefix
|
|
242
|
+
let payload: SseEventPayload;
|
|
243
|
+
try {
|
|
244
|
+
payload = JSON.parse(jsonStr);
|
|
245
|
+
} catch {
|
|
246
|
+
continue; // Skip malformed events
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
if (payload.type === "text_delta" && payload.content) {
|
|
250
|
+
accumulatedText += payload.content;
|
|
251
|
+
} else if (payload.type === "tool_start") {
|
|
252
|
+
// Tool boundary: yield accumulated text as a separate message
|
|
253
|
+
if (accumulatedText) {
|
|
254
|
+
yield { text: accumulatedText, sessionId: null, isAlreadyStreaming: false, isError: false };
|
|
255
|
+
accumulatedText = "";
|
|
256
|
+
}
|
|
257
|
+
} else if (payload.type === "result") {
|
|
258
|
+
// Final event: yield remaining text with session ID
|
|
259
|
+
yield {
|
|
260
|
+
text: accumulatedText,
|
|
261
|
+
sessionId: payload.sessionId ?? null,
|
|
262
|
+
isAlreadyStreaming: false,
|
|
263
|
+
isError: false,
|
|
264
|
+
};
|
|
265
|
+
return;
|
|
266
|
+
} else if (payload.type === "error") {
|
|
267
|
+
hasError = true;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Stream closed without a result event (error path) -- yield whatever we have
|
|
273
|
+
if (hasError && !accumulatedText) {
|
|
274
|
+
yield { text: SSE_ERROR_REPLY, sessionId: null, isAlreadyStreaming: false, isError: true };
|
|
275
|
+
} else if (accumulatedText) {
|
|
276
|
+
yield { text: accumulatedText, sessionId: null, isAlreadyStreaming: false, isError: false };
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Handle an incoming WhatsApp message end-to-end.
|
|
282
|
+
* Resolves the agent, calls Python /chat/send, and sends each logical message
|
|
283
|
+
* segment to WhatsApp as soon as it completes (at tool_start / result boundaries).
|
|
284
|
+
*
|
|
285
|
+
* @param msg - The parsed incoming message
|
|
286
|
+
*/
|
|
287
|
+
export async function handleIncomingMessage(msg: WhatsAppIncomingMessage): Promise<void> {
|
|
288
|
+
const sock = getSocket();
|
|
289
|
+
if (!sock) {
|
|
290
|
+
throw new Error("WhatsApp socket is not connected");
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Resolve agent for this group
|
|
294
|
+
const agentId = getAgentIdForGroup(msg.groupJid);
|
|
295
|
+
if (!agentId) {
|
|
296
|
+
throw new Error(`No agent mapped to group "${msg.groupJid}"`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Get stored session ID for conversation resume
|
|
300
|
+
const resumeSessionId = getLastSessionId(msg.groupJid);
|
|
301
|
+
|
|
302
|
+
// Call Python /chat/send directly (no dashboard proxy, no device token)
|
|
303
|
+
const sessionKey = `wa:${msg.groupJid}`;
|
|
304
|
+
const response = await fetch(`${VOICE_SERVER_URL}/chat/send`, {
|
|
305
|
+
method: "POST",
|
|
306
|
+
headers: { "Content-Type": "application/json" },
|
|
307
|
+
body: JSON.stringify({
|
|
308
|
+
session_key: sessionKey,
|
|
309
|
+
agent_id: agentId,
|
|
310
|
+
text: msg.text,
|
|
311
|
+
resume_session_id: resumeSessionId,
|
|
312
|
+
}),
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
// Stream segments and send each to WhatsApp as soon as it's ready
|
|
316
|
+
for await (const segment of streamSseSegments(response)) {
|
|
317
|
+
if (segment.isAlreadyStreaming) {
|
|
318
|
+
await sock.sendMessage(msg.groupJid, { text: ALREADY_STREAMING_REPLY });
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Store session ID when present (only on the final segment)
|
|
323
|
+
if (segment.sessionId) {
|
|
324
|
+
await setLastSessionId(msg.groupJid, segment.sessionId);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Send text, splitting if it exceeds the byte limit
|
|
328
|
+
if (segment.text) {
|
|
329
|
+
const chunks = splitByByteLength(segment.text, MAX_MESSAGE_BYTES);
|
|
330
|
+
for (const chunk of chunks) {
|
|
331
|
+
await sock.sendMessage(msg.groupJid, { text: `[voicecc] ${chunk}` });
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ============================================================================
|
|
338
|
+
// HELPER FUNCTIONS
|
|
339
|
+
// ============================================================================
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Normalize a WhatsApp JID by stripping the :0 (or any :N) device suffix.
|
|
343
|
+
* Example: "1234567890:0@s.whatsapp.net" -> "1234567890@s.whatsapp.net"
|
|
344
|
+
*
|
|
345
|
+
* @param jid - The raw JID from Baileys
|
|
346
|
+
* @returns The normalized JID without device suffix
|
|
347
|
+
*/
|
|
348
|
+
export function normalizeJid(jid: string): string {
|
|
349
|
+
return jid.replace(/:\d+@/, "@");
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Extract text content from a raw Baileys message.
|
|
354
|
+
* Supports regular text messages and extended text messages (with links/formatting).
|
|
355
|
+
*
|
|
356
|
+
* @param msg - Raw Baileys message
|
|
357
|
+
* @returns The text content, or null if not a text message
|
|
358
|
+
*/
|
|
359
|
+
function extractTextContent(msg: proto.IWebMessageInfo): string | null {
|
|
360
|
+
const message = msg.message;
|
|
361
|
+
if (!message) return null;
|
|
362
|
+
|
|
363
|
+
// Regular text message
|
|
364
|
+
if (message.conversation) {
|
|
365
|
+
return message.conversation;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Extended text message (with URL preview, formatting, etc.)
|
|
369
|
+
if (message.extendedTextMessage?.text) {
|
|
370
|
+
return message.extendedTextMessage.text;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
return null;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* Split a string into chunks that each fit within a byte length limit.
|
|
378
|
+
* Uses TextEncoder to measure actual byte length (handles multi-byte characters).
|
|
379
|
+
* Splits on newline boundaries when possible, otherwise on character boundaries.
|
|
380
|
+
*
|
|
381
|
+
* @param text - The text to split
|
|
382
|
+
* @param maxBytes - Maximum byte length per chunk
|
|
383
|
+
* @returns Array of text chunks
|
|
384
|
+
*/
|
|
385
|
+
function splitByByteLength(text: string, maxBytes: number): string[] {
|
|
386
|
+
const encoder = new TextEncoder();
|
|
387
|
+
const totalBytes = encoder.encode(text).byteLength;
|
|
388
|
+
|
|
389
|
+
// No split needed
|
|
390
|
+
if (totalBytes <= maxBytes) {
|
|
391
|
+
return [text];
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const chunks: string[] = [];
|
|
395
|
+
let remaining = text;
|
|
396
|
+
|
|
397
|
+
while (remaining.length > 0) {
|
|
398
|
+
// Binary search for the max character count that fits within maxBytes
|
|
399
|
+
let low = 0;
|
|
400
|
+
let high = remaining.length;
|
|
401
|
+
|
|
402
|
+
while (low < high) {
|
|
403
|
+
const mid = Math.ceil((low + high) / 2);
|
|
404
|
+
const byteLen = encoder.encode(remaining.slice(0, mid)).byteLength;
|
|
405
|
+
if (byteLen <= maxBytes) {
|
|
406
|
+
low = mid;
|
|
407
|
+
} else {
|
|
408
|
+
high = mid - 1;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (low === 0) {
|
|
413
|
+
// Single character exceeds limit (shouldn't happen with 64KB limit)
|
|
414
|
+
throw new Error("Single character exceeds maximum byte length");
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// Try to split at a newline boundary for readability
|
|
418
|
+
let splitAt = low;
|
|
419
|
+
const lastNewline = remaining.lastIndexOf("\n", low);
|
|
420
|
+
if (lastNewline > 0 && lastNewline > low * 0.5) {
|
|
421
|
+
splitAt = lastNewline + 1;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
chunks.push(remaining.slice(0, splitAt));
|
|
425
|
+
remaining = remaining.slice(splitAt);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return chunks;
|
|
429
|
+
}
|
|
@@ -50,16 +50,20 @@ class ChatSseEvent:
|
|
|
50
50
|
type: Event type ("text_delta", "tool_start", "tool_end", "result", "error")
|
|
51
51
|
content: Text content or error message
|
|
52
52
|
tool_name: Tool name (only for tool_start events)
|
|
53
|
+
session_id: Claude session ID (only for "result" events, used for session resume)
|
|
53
54
|
"""
|
|
54
55
|
type: str
|
|
55
56
|
content: str
|
|
56
57
|
tool_name: str | None = None
|
|
58
|
+
session_id: str | None = None
|
|
57
59
|
|
|
58
60
|
def to_dict(self) -> dict:
|
|
59
61
|
"""Serialize to a JSON-safe dict, omitting None fields."""
|
|
60
62
|
d: dict = {"type": self.type, "content": self.content}
|
|
61
63
|
if self.tool_name is not None:
|
|
62
64
|
d["toolName"] = self.tool_name
|
|
65
|
+
if self.session_id is not None:
|
|
66
|
+
d["sessionId"] = self.session_id
|
|
63
67
|
return d
|
|
64
68
|
|
|
65
69
|
|
|
@@ -93,16 +97,25 @@ _cleanup_task: asyncio.Task | None = None
|
|
|
93
97
|
# MAIN HANDLERS
|
|
94
98
|
# ============================================================================
|
|
95
99
|
|
|
96
|
-
async def get_or_create_session(
|
|
100
|
+
async def get_or_create_session(
|
|
101
|
+
session_key: str,
|
|
102
|
+
agent_id: str | None = None,
|
|
103
|
+
resume_session_id: str | None = None,
|
|
104
|
+
) -> ChatSession:
|
|
97
105
|
"""Get an existing chat session or create a new one.
|
|
98
106
|
|
|
99
107
|
On first call for a session_key, creates a ClaudeSDKClient with the
|
|
100
108
|
appropriate system prompt. Subsequent calls return the existing session.
|
|
101
109
|
Enforces max concurrent sessions from config.
|
|
102
110
|
|
|
111
|
+
If resume_session_id is provided and no existing session exists, creates
|
|
112
|
+
the session with resume=resume_session_id so Claude reloads conversation
|
|
113
|
+
history. Falls back to a fresh session if resume fails.
|
|
114
|
+
|
|
103
115
|
Args:
|
|
104
116
|
session_key: Device token to key the session on
|
|
105
117
|
agent_id: Optional agent ID for agent-specific prompts
|
|
118
|
+
resume_session_id: Optional Claude session ID to resume from
|
|
106
119
|
|
|
107
120
|
Returns:
|
|
108
121
|
The active ChatSession
|
|
@@ -131,18 +144,7 @@ async def get_or_create_session(session_key: str, agent_id: str | None = None) -
|
|
|
131
144
|
if os.path.isdir(agent_dir):
|
|
132
145
|
cwd = agent_dir
|
|
133
146
|
|
|
134
|
-
|
|
135
|
-
system_prompt=system_prompt,
|
|
136
|
-
cwd=cwd,
|
|
137
|
-
allowed_tools=[],
|
|
138
|
-
permission_mode="bypassPermissions",
|
|
139
|
-
include_partial_messages=True,
|
|
140
|
-
max_thinking_tokens=10000,
|
|
141
|
-
setting_sources=["user", "project", "local"],
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
client = ClaudeSDKClient(options=options)
|
|
145
|
-
await client.connect()
|
|
147
|
+
client = await _create_client(system_prompt, cwd, resume_session_id)
|
|
146
148
|
|
|
147
149
|
session = ChatSession(
|
|
148
150
|
session_key=session_key,
|
|
@@ -184,6 +186,8 @@ async def stream_message(session_key: str, text: str):
|
|
|
184
186
|
try:
|
|
185
187
|
await session.client.query(text)
|
|
186
188
|
|
|
189
|
+
captured_session_id: str | None = None
|
|
190
|
+
|
|
187
191
|
async for msg in session.client.receive_response():
|
|
188
192
|
if isinstance(msg, AssistantMessage):
|
|
189
193
|
for block in msg.content:
|
|
@@ -195,13 +199,14 @@ async def stream_message(session_key: str, text: str):
|
|
|
195
199
|
)
|
|
196
200
|
|
|
197
201
|
elif isinstance(msg, ResultMessage):
|
|
202
|
+
captured_session_id = msg.session_id
|
|
198
203
|
if msg.is_error:
|
|
199
204
|
yield ChatSseEvent(
|
|
200
205
|
type="error", content=msg.subtype or "Unknown error"
|
|
201
206
|
)
|
|
202
207
|
break
|
|
203
208
|
|
|
204
|
-
yield ChatSseEvent(type="result", content="")
|
|
209
|
+
yield ChatSseEvent(type="result", content="", session_id=captured_session_id)
|
|
205
210
|
|
|
206
211
|
except Exception as e:
|
|
207
212
|
logger.error(f"[chat] Stream error for {session_key}: {e}")
|
|
@@ -287,6 +292,55 @@ async def cleanup_inactive() -> None:
|
|
|
287
292
|
# HELPER FUNCTIONS
|
|
288
293
|
# ============================================================================
|
|
289
294
|
|
|
295
|
+
async def _create_client(
|
|
296
|
+
system_prompt: str,
|
|
297
|
+
cwd: str,
|
|
298
|
+
resume_session_id: str | None,
|
|
299
|
+
) -> ClaudeSDKClient:
|
|
300
|
+
"""Create and connect a ClaudeSDKClient, optionally resuming a prior session.
|
|
301
|
+
|
|
302
|
+
If resume_session_id is provided, attempts to create the client with
|
|
303
|
+
resume=resume_session_id. If that fails, falls back to a fresh session.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
system_prompt: System prompt for the Claude session
|
|
307
|
+
cwd: Working directory for the session
|
|
308
|
+
resume_session_id: Optional session ID to resume from
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
A connected ClaudeSDKClient
|
|
312
|
+
"""
|
|
313
|
+
base_kwargs = dict(
|
|
314
|
+
system_prompt=system_prompt,
|
|
315
|
+
cwd=cwd,
|
|
316
|
+
allowed_tools=[],
|
|
317
|
+
permission_mode="bypassPermissions",
|
|
318
|
+
include_partial_messages=True,
|
|
319
|
+
max_thinking_tokens=10000,
|
|
320
|
+
setting_sources=["user", "project", "local"],
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Attempt resume if a session ID was provided
|
|
324
|
+
if resume_session_id:
|
|
325
|
+
try:
|
|
326
|
+
options = ClaudeAgentOptions(**base_kwargs, resume=resume_session_id)
|
|
327
|
+
client = ClaudeSDKClient(options=options)
|
|
328
|
+
await client.connect()
|
|
329
|
+
logger.info(f"[chat] Resumed session: {resume_session_id}")
|
|
330
|
+
return client
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.warning(
|
|
333
|
+
f"[chat] Failed to resume session {resume_session_id}, "
|
|
334
|
+
f"creating fresh session: {e}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Fresh session (no resume, or resume failed)
|
|
338
|
+
options = ClaudeAgentOptions(**base_kwargs)
|
|
339
|
+
client = ClaudeSDKClient(options=options)
|
|
340
|
+
await client.connect()
|
|
341
|
+
return client
|
|
342
|
+
|
|
343
|
+
|
|
290
344
|
async def _cleanup_loop() -> None:
|
|
291
345
|
"""Background loop that runs cleanup_inactive every 60 seconds."""
|
|
292
346
|
while True:
|
package/voice-server/config.py
CHANGED
|
@@ -62,6 +62,7 @@ class AgentConfig:
|
|
|
62
62
|
heartbeat_timeout_minutes: int | None = None
|
|
63
63
|
enabled: bool = True
|
|
64
64
|
voice: AgentVoiceConfig | None = None
|
|
65
|
+
outbound_channel: str = "call" # "call" or "whatsapp"
|
|
65
66
|
|
|
66
67
|
|
|
67
68
|
@dataclass
|
|
@@ -334,9 +335,15 @@ def _read_agent_config(config_path: str) -> AgentConfig:
|
|
|
334
335
|
elevenlabs = VoicePreference(id=el["id"], name=el["name"])
|
|
335
336
|
voice_config = AgentVoiceConfig(elevenlabs=elevenlabs)
|
|
336
337
|
|
|
338
|
+
# Validate outbound channel value
|
|
339
|
+
outbound_channel = raw.get("outboundChannel", "call")
|
|
340
|
+
if outbound_channel not in ("call", "whatsapp"):
|
|
341
|
+
raise ValueError(f'Invalid outboundChannel "{outbound_channel}". Must be "call" or "whatsapp".')
|
|
342
|
+
|
|
337
343
|
return AgentConfig(
|
|
338
344
|
heartbeat_interval_minutes=raw.get("heartbeatIntervalMinutes", 10),
|
|
339
345
|
heartbeat_timeout_minutes=raw.get("heartbeatTimeoutMinutes"),
|
|
340
346
|
enabled=raw.get("enabled", True),
|
|
341
347
|
voice=voice_config,
|
|
348
|
+
outbound_channel=outbound_channel,
|
|
342
349
|
)
|
|
@@ -26,6 +26,9 @@ import time
|
|
|
26
26
|
from dataclasses import dataclass, field
|
|
27
27
|
from uuid import uuid4
|
|
28
28
|
|
|
29
|
+
import urllib.request
|
|
30
|
+
import urllib.error
|
|
31
|
+
|
|
29
32
|
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient, AssistantMessage, ResultMessage, TextBlock
|
|
30
33
|
|
|
31
34
|
from config import (
|
|
@@ -274,11 +277,18 @@ async def check_single_agent(agent: Agent) -> HeartbeatResult:
|
|
|
274
277
|
)
|
|
275
278
|
|
|
276
279
|
if result.should_call:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
280
|
+
outbound = agent.config.outbound_channel
|
|
281
|
+
|
|
282
|
+
if outbound == "whatsapp":
|
|
283
|
+
# Send via WhatsApp -- no session handoff needed
|
|
284
|
+
await send_whatsapp_message(agent.id, result.reason)
|
|
285
|
+
else:
|
|
286
|
+
# Default: place a Twilio call with session handoff
|
|
287
|
+
try:
|
|
288
|
+
await initiate_agent_call(agent, client)
|
|
289
|
+
client = None # Don't close -- voice session owns it now
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error(f'[heartbeat] failed to call agent "{agent.id}": {e}')
|
|
282
292
|
|
|
283
293
|
return result
|
|
284
294
|
finally:
|
|
@@ -444,6 +454,63 @@ async def initiate_agent_call(agent: Agent, client: ClaudeSDKClient) -> str:
|
|
|
444
454
|
return call.sid or ""
|
|
445
455
|
|
|
446
456
|
|
|
457
|
+
async def send_whatsapp_message(agent_id: str, text: str) -> None:
|
|
458
|
+
"""Send a WhatsApp message to an agent's group via the dashboard API.
|
|
459
|
+
|
|
460
|
+
POSTs { agentId, text } to the dashboard's POST /api/whatsapp/send endpoint.
|
|
461
|
+
Logs errors but does NOT raise -- the caller should not retry or fall back.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
agent_id: Agent identifier
|
|
465
|
+
text: Message text to send
|
|
466
|
+
"""
|
|
467
|
+
dashboard_port = _get_dashboard_port()
|
|
468
|
+
if not dashboard_port:
|
|
469
|
+
logger.error("[heartbeat] Cannot send WhatsApp message: dashboard port unknown")
|
|
470
|
+
return
|
|
471
|
+
|
|
472
|
+
url = f"http://localhost:{dashboard_port}/api/whatsapp/send"
|
|
473
|
+
try:
|
|
474
|
+
payload = json.dumps({"agentId": agent_id, "text": text}).encode("utf-8")
|
|
475
|
+
req = urllib.request.Request(
|
|
476
|
+
url,
|
|
477
|
+
data=payload,
|
|
478
|
+
headers={"Content-Type": "application/json"},
|
|
479
|
+
method="POST",
|
|
480
|
+
)
|
|
481
|
+
response = await asyncio.to_thread(urllib.request.urlopen, req, timeout=10)
|
|
482
|
+
if response.status == 200:
|
|
483
|
+
logger.info(f'[heartbeat] WhatsApp message sent for agent "{agent_id}"')
|
|
484
|
+
else:
|
|
485
|
+
logger.error(
|
|
486
|
+
f'[heartbeat] WhatsApp send failed for agent "{agent_id}": '
|
|
487
|
+
f"HTTP {response.status}"
|
|
488
|
+
)
|
|
489
|
+
except urllib.error.HTTPError as e:
|
|
490
|
+
logger.error(
|
|
491
|
+
f'[heartbeat] WhatsApp send failed for agent "{agent_id}": '
|
|
492
|
+
f"HTTP {e.code} -- {e.read().decode('utf-8', errors='replace')}"
|
|
493
|
+
)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
logger.error(f'[heartbeat] WhatsApp send error for agent "{agent_id}": {e}')
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _get_dashboard_port() -> int | None:
|
|
499
|
+
"""Read the dashboard port from ~/.voicecc/status.json.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
The dashboard port number, or None if the status file is unreadable
|
|
503
|
+
"""
|
|
504
|
+
voicecc_dir = os.environ.get("VOICECC_DIR", os.path.join(os.path.expanduser("~"), ".voicecc"))
|
|
505
|
+
status_path = os.path.join(voicecc_dir, "status.json")
|
|
506
|
+
try:
|
|
507
|
+
with open(status_path, "r", encoding="utf-8") as f:
|
|
508
|
+
status = json.load(f)
|
|
509
|
+
return int(status["dashboardPort"])
|
|
510
|
+
except Exception:
|
|
511
|
+
return None
|
|
512
|
+
|
|
513
|
+
|
|
447
514
|
async def _cleanup_pending_call(token: str) -> None:
|
|
448
515
|
"""Clean up a pending call after PENDING_CALL_TIMEOUT_S if not claimed.
|
|
449
516
|
|