@conversionpros/aiva 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -0
- package/auto-deploy.js +190 -0
- package/bin/aiva.js +81 -0
- package/cli-sync.js +126 -0
- package/d2a-prompt-template.txt +106 -0
- package/diagnostics-api.js +304 -0
- package/docs/ara-dedup-fix-scope.md +112 -0
- package/docs/ara-fix-round2-scope.md +61 -0
- package/docs/ara-greeting-fix-scope.md +70 -0
- package/docs/calendar-date-fix-scope.md +28 -0
- package/docs/getting-started.md +115 -0
- package/docs/network-architecture-rollout-scope.md +43 -0
- package/docs/scope-google-oauth-integration.md +351 -0
- package/docs/settings-page-scope.md +50 -0
- package/docs/xai-imagine-scope.md +116 -0
- package/docs/xai-voice-integration-scope.md +115 -0
- package/docs/xai-voice-tools-scope.md +165 -0
- package/email-router.js +512 -0
- package/follow-up-handler.js +606 -0
- package/gateway-monitor.js +158 -0
- package/google-email.js +379 -0
- package/google-oauth.js +310 -0
- package/grok-imagine.js +97 -0
- package/health-reporter.js +287 -0
- package/invisible-prefix-base.txt +206 -0
- package/invisible-prefix-owner.txt +26 -0
- package/invisible-prefix-slim.txt +10 -0
- package/invisible-prefix.txt +43 -0
- package/knowledge-base.js +472 -0
- package/lib/cli.js +19 -0
- package/lib/config.js +124 -0
- package/lib/health.js +57 -0
- package/lib/process.js +207 -0
- package/lib/server.js +42 -0
- package/lib/setup.js +472 -0
- package/meta-capi.js +206 -0
- package/meta-leads.js +411 -0
- package/notion-oauth.js +323 -0
- package/package.json +61 -0
- package/public/agent-config.html +241 -0
- package/public/aiva-avatar-anime.png +0 -0
- package/public/css/docs.css.bak +688 -0
- package/public/css/onboarding.css +543 -0
- package/public/diagrams/claude-subscription-pool.html +329 -0
- package/public/diagrams/claude-subscription-pool.png +0 -0
- package/public/docs-icon.png +0 -0
- package/public/escalation.html +237 -0
- package/public/group-config.html +300 -0
- package/public/icon-192.png +0 -0
- package/public/icon-512.png +0 -0
- package/public/icons/agents.svg +1 -0
- package/public/icons/attach.svg +1 -0
- package/public/icons/characters.svg +1 -0
- package/public/icons/chat.svg +1 -0
- package/public/icons/docs.svg +1 -0
- package/public/icons/heartbeat.svg +1 -0
- package/public/icons/messages.svg +1 -0
- package/public/icons/mic.svg +1 -0
- package/public/icons/notes.svg +1 -0
- package/public/icons/settings.svg +1 -0
- package/public/icons/tasks.svg +1 -0
- package/public/images/onboarding/p0-communication-layer.png +0 -0
- package/public/images/onboarding/p0-infinite-surface.png +0 -0
- package/public/images/onboarding/p0-learning-model.png +0 -0
- package/public/images/onboarding/p0-meet-aiva.png +0 -0
- package/public/images/onboarding/p4-contact-intelligence.png +0 -0
- package/public/images/onboarding/p4-context-compounds.png +0 -0
- package/public/images/onboarding/p4-message-router.png +0 -0
- package/public/images/onboarding/p4-per-contact-rules.png +0 -0
- package/public/images/onboarding/p4-send-messages.png +0 -0
- package/public/images/onboarding/p6-be-precise.png +0 -0
- package/public/images/onboarding/p6-review-escalations.png +0 -0
- package/public/images/onboarding/p6-voice-input.png +0 -0
- package/public/images/onboarding/p7-completion.png +0 -0
- package/public/index.html +11594 -0
- package/public/js/onboarding.js +699 -0
- package/public/manifest.json +24 -0
- package/public/messages-v2.html +2824 -0
- package/public/permission-approve.html.bak +107 -0
- package/public/permissions.html +150 -0
- package/public/styles/design-system.css +68 -0
- package/router-db.js +604 -0
- package/router-utils.js +28 -0
- package/router-v2/adapters/imessage.js +191 -0
- package/router-v2/adapters/quo.js +82 -0
- package/router-v2/adapters/whatsapp.js +192 -0
- package/router-v2/contact-manager.js +234 -0
- package/router-v2/conversation-engine.js +498 -0
- package/router-v2/data/knowledge-base.json +176 -0
- package/router-v2/data/router-v2.db +0 -0
- package/router-v2/data/router-v2.db-shm +0 -0
- package/router-v2/data/router-v2.db-wal +0 -0
- package/router-v2/data/router.db +0 -0
- package/router-v2/db.js +457 -0
- package/router-v2/escalation-bridge.js +540 -0
- package/router-v2/follow-up-engine.js +347 -0
- package/router-v2/index.js +441 -0
- package/router-v2/ingestion.js +213 -0
- package/router-v2/knowledge-base.js +231 -0
- package/router-v2/lead-qualifier.js +152 -0
- package/router-v2/learning-loop.js +202 -0
- package/router-v2/outbound-sender.js +160 -0
- package/router-v2/package.json +13 -0
- package/router-v2/permission-gate.js +86 -0
- package/router-v2/playbook.js +177 -0
- package/router-v2/prompts/base.js +52 -0
- package/router-v2/prompts/first-contact.js +38 -0
- package/router-v2/prompts/lead-qualification.js +37 -0
- package/router-v2/prompts/scheduling.js +72 -0
- package/router-v2/prompts/style-overrides.js +22 -0
- package/router-v2/scheduler.js +301 -0
- package/router-v2/scripts/migrate-v1-to-v2.js +215 -0
- package/router-v2/scripts/seed-faq.js +67 -0
- package/router-v2/seed-knowledge-base.js +39 -0
- package/router-v2/utils/ai.js +129 -0
- package/router-v2/utils/phone.js +52 -0
- package/router-v2/utils/response-validator.js +98 -0
- package/router-v2/utils/sanitize.js +222 -0
- package/router.js +5005 -0
- package/routes/google-calendar.js +186 -0
- package/scripts/deploy.sh +62 -0
- package/scripts/macos-calendar.sh +232 -0
- package/scripts/onboard-device.sh +466 -0
- package/server.js +5131 -0
- package/start.sh +24 -0
- package/templates/AGENTS.md +548 -0
- package/templates/IDENTITY.md +15 -0
- package/templates/docs-agents.html +132 -0
- package/templates/docs-app.html +130 -0
- package/templates/docs-home.html +83 -0
- package/templates/docs-imessage.html +121 -0
- package/templates/docs-tasks.html +123 -0
- package/templates/docs-tips.html +175 -0
- package/templates/getting-started.html +809 -0
- package/templates/invisible-prefix-base.txt +171 -0
- package/templates/invisible-prefix-owner.txt +282 -0
- package/templates/invisible-prefix.txt +338 -0
- package/templates/manifest.json +61 -0
- package/templates/memory-org/clients.md +7 -0
- package/templates/memory-org/credentials.md +9 -0
- package/templates/memory-org/devices.md +7 -0
- package/templates/updates.html +464 -0
- package/templates/workspace/AGENTS.md.tmpl +161 -0
- package/templates/workspace/HEARTBEAT.md.tmpl +17 -0
- package/templates/workspace/IDENTITY.md.tmpl +15 -0
- package/templates/workspace/MEMORY.md.tmpl +16 -0
- package/templates/workspace/SOUL.md.tmpl +51 -0
- package/templates/workspace/USER.md.tmpl +25 -0
- package/tts-proxy.js +96 -0
- package/voice-call-local.js +731 -0
- package/voice-call.js +732 -0
- package/wa-listener.js +354 -0
|
@@ -0,0 +1,731 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local Voice Call Module for AIVA — Routes through Main Agent + Local TTS
|
|
3
|
+
*
|
|
4
|
+
* Architecture:
|
|
5
|
+
* 1. Client sends PCM16 audio chunks
|
|
6
|
+
* 2. Server accumulates and transcribes via OpenAI Whisper
|
|
7
|
+
* 3. Transcription is sent to OpenClaw main agent (claude-sonnet-4-5)
|
|
8
|
+
* 4. Agent's text response is converted to speech via local TTS server
|
|
9
|
+
* 5. Audio is streamed back to client
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const WebSocket = require('ws');
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { exec: execCb, execSync } = require('child_process');
|
|
16
|
+
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
|
|
17
|
+
|
|
18
|
+
// Configuration
|
|
19
|
+
const OPENCLAW_API = 'http://127.0.0.1:18789/v1/chat/completions';
|
|
20
|
+
const OPENCLAW_AUTH = loadOpenClawAuth();
|
|
21
|
+
const LOCAL_TTS_API = 'http://127.0.0.1:3851/tts';
|
|
22
|
+
const CONTEXT_API = 'http://localhost:3847/api/context/voice';
|
|
23
|
+
// STT: Local Whisper CLI (free, no API key needed)
|
|
24
|
+
// LLM: OpenClaw proxy (free, existing subscription)
|
|
25
|
+
// TTS: Kokoro TTS (free, local)
|
|
26
|
+
|
|
27
|
+
// Active call sessions
|
|
28
|
+
const activeCalls = new Map();
|
|
29
|
+
|
|
30
|
+
function loadOpenClawAuth() {
|
|
31
|
+
try {
|
|
32
|
+
const configPath = path.join(process.env.HOME, '.openclaw', 'openclaw.json');
|
|
33
|
+
const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
|
|
34
|
+
return config.gateway?.auth?.password || '';
|
|
35
|
+
} catch (e) {
|
|
36
|
+
console.error('[voice-call-local] Failed to load OpenClaw auth:', e.message);
|
|
37
|
+
return '';
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async function fetchContext() {
|
|
42
|
+
try {
|
|
43
|
+
const res = await fetch(CONTEXT_API);
|
|
44
|
+
if (!res.ok) throw new Error(`Context API ${res.status}`);
|
|
45
|
+
return await res.json();
|
|
46
|
+
} catch (e) {
|
|
47
|
+
console.error('[voice-call-local] Failed to fetch context:', e.message);
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function buildSystemPrompt(ctx) {
|
|
53
|
+
const now = new Date();
|
|
54
|
+
const timeStr = now.toLocaleString('en-US', {
|
|
55
|
+
timeZone: 'America/Los_Angeles',
|
|
56
|
+
weekday: 'long',
|
|
57
|
+
year: 'numeric',
|
|
58
|
+
month: 'long',
|
|
59
|
+
day: 'numeric',
|
|
60
|
+
hour: 'numeric',
|
|
61
|
+
minute: '2-digit',
|
|
62
|
+
hour12: true
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
let prompt = `You are AIVA (AI-VA), Brandon Burgan's AI assistant. This is a voice conversation through the AIVA app.
|
|
66
|
+
|
|
67
|
+
CURRENT DATE AND TIME: ${timeStr} (Pacific Time)
|
|
68
|
+
|
|
69
|
+
VOICE CONVERSATION RULES (CRITICAL):
|
|
70
|
+
- Keep responses SHORT and CONVERSATIONAL (1-3 sentences max)
|
|
71
|
+
- Use contractions and casual language
|
|
72
|
+
- NO markdown, bullet points, or formatting — everything is spoken aloud
|
|
73
|
+
- Start with a brief greeting then WAIT for Brandon to tell you what he needs
|
|
74
|
+
- Don't volunteer information (calendar, tasks) unless he asks
|
|
75
|
+
- Be helpful and concise
|
|
76
|
+
|
|
77
|
+
TASK RULES:
|
|
78
|
+
- Unless explicitly asked, don't mention finished/done tasks
|
|
79
|
+
- When listing tasks, default to non-done tasks only
|
|
80
|
+
|
|
81
|
+
PERSONALITY:
|
|
82
|
+
You are warm, efficient, and proactive. You understand Brandon's needs before he asks.`;
|
|
83
|
+
|
|
84
|
+
// Append dynamic context if available
|
|
85
|
+
if (ctx) {
|
|
86
|
+
if (ctx.activeTasks?.length) {
|
|
87
|
+
const tasks = ctx.activeTasks.slice(0, 10).map(t => `- ${t.title} (${t.status})`).join('\n');
|
|
88
|
+
prompt += `\n\nACTIVE TASKS (reference only — do NOT mention unless asked):\n${tasks}`;
|
|
89
|
+
}
|
|
90
|
+
if (ctx.recentChat?.length) {
|
|
91
|
+
const recent = ctx.recentChat.slice(-10).map(m => `${m.from}: ${m.text?.slice(0, 100)}`).join('\n');
|
|
92
|
+
prompt += `\n\nRECENT CHAT (reference only):\n${recent}`;
|
|
93
|
+
}
|
|
94
|
+
const calArray = Array.isArray(ctx.calendar) ? ctx.calendar : [];
|
|
95
|
+
if (calArray.length) {
|
|
96
|
+
const nowTime = new Date();
|
|
97
|
+
const futureEvents = calArray.filter(e => {
|
|
98
|
+
const eventTime = e.time || e.start || '';
|
|
99
|
+
if (!eventTime) return true;
|
|
100
|
+
try {
|
|
101
|
+
const eventDate = new Date(eventTime);
|
|
102
|
+
return eventDate > nowTime;
|
|
103
|
+
} catch { return true; }
|
|
104
|
+
});
|
|
105
|
+
if (futureEvents.length) {
|
|
106
|
+
const events = futureEvents.slice(0, 5).map(e => `- ${e.title || e.summary} (${e.time || e.start || ''})`).join('\n');
|
|
107
|
+
prompt += `\n\nUPCOMING CALENDAR (reference only — do NOT mention unless asked):\n${events}`;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return prompt;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function transcribeAudio(audioBuffer) {
|
|
116
|
+
/**
|
|
117
|
+
* Transcribe audio buffer using OpenAI Whisper API.
|
|
118
|
+
* audioBuffer: Buffer containing audio data (WAV/PCM)
|
|
119
|
+
*/
|
|
120
|
+
const FormData = (await import('form-data')).default;
|
|
121
|
+
|
|
122
|
+
// Wrap raw PCM16 in a proper WAV header (24kHz, 16-bit, mono)
|
|
123
|
+
const sampleRate = 24000;
|
|
124
|
+
const numChannels = 1;
|
|
125
|
+
const bitsPerSample = 16;
|
|
126
|
+
const dataSize = audioBuffer.length;
|
|
127
|
+
const wavHeader = Buffer.alloc(44);
|
|
128
|
+
wavHeader.write('RIFF', 0);
|
|
129
|
+
wavHeader.writeUInt32LE(36 + dataSize, 4);
|
|
130
|
+
wavHeader.write('WAVE', 8);
|
|
131
|
+
wavHeader.write('fmt ', 12);
|
|
132
|
+
wavHeader.writeUInt32LE(16, 16);
|
|
133
|
+
wavHeader.writeUInt16LE(1, 20); // PCM
|
|
134
|
+
wavHeader.writeUInt16LE(numChannels, 22);
|
|
135
|
+
wavHeader.writeUInt32LE(sampleRate, 24);
|
|
136
|
+
wavHeader.writeUInt32LE(sampleRate * numChannels * bitsPerSample / 8, 28);
|
|
137
|
+
wavHeader.writeUInt16LE(numChannels * bitsPerSample / 8, 32);
|
|
138
|
+
wavHeader.writeUInt16LE(bitsPerSample, 34);
|
|
139
|
+
wavHeader.write('data', 36);
|
|
140
|
+
wavHeader.writeUInt32LE(dataSize, 40);
|
|
141
|
+
|
|
142
|
+
const tempPath = path.join('/tmp', `voice-${Date.now()}.wav`);
|
|
143
|
+
fs.writeFileSync(tempPath, Buffer.concat([wavHeader, audioBuffer]));
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
// Convert to proper format for Whisper (if needed)
|
|
147
|
+
const convertedPath = tempPath.replace('.wav', '-converted.wav');
|
|
148
|
+
try {
|
|
149
|
+
execSync(`ffmpeg -y -i "${tempPath}" -ar 16000 -ac 1 -acodec pcm_s16le "${convertedPath}" 2>/dev/null`);
|
|
150
|
+
fs.unlinkSync(tempPath);
|
|
151
|
+
} catch (e) {
|
|
152
|
+
// If conversion fails, use original
|
|
153
|
+
fs.renameSync(tempPath, convertedPath);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Use LOCAL Whisper CLI (free, no API key needed)
|
|
157
|
+
const result = execSync(
|
|
158
|
+
`whisper "${convertedPath}" --model tiny --language en --output_format txt --output_dir /tmp 2>/dev/null`,
|
|
159
|
+
{ timeout: 30000 }
|
|
160
|
+
).toString().trim();
|
|
161
|
+
|
|
162
|
+
// Read the output text file
|
|
163
|
+
const txtPath = convertedPath.replace('.wav', '.txt');
|
|
164
|
+
let transcript = '';
|
|
165
|
+
if (fs.existsSync(txtPath)) {
|
|
166
|
+
transcript = fs.readFileSync(txtPath, 'utf-8').trim();
|
|
167
|
+
fs.unlinkSync(txtPath);
|
|
168
|
+
} else {
|
|
169
|
+
// Sometimes whisper outputs to stdout
|
|
170
|
+
transcript = result;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Clean up audio files
|
|
174
|
+
if (fs.existsSync(convertedPath)) fs.unlinkSync(convertedPath);
|
|
175
|
+
// Clean up any other whisper output files
|
|
176
|
+
for (const ext of ['.vtt', '.srt', '.tsv', '.json']) {
|
|
177
|
+
const f = convertedPath.replace('.wav', ext);
|
|
178
|
+
if (fs.existsSync(f)) fs.unlinkSync(f);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return transcript;
|
|
182
|
+
} catch (e) {
|
|
183
|
+
console.error('[voice-call-local] Transcription error:', e.message);
|
|
184
|
+
if (fs.existsSync(tempPath)) fs.unlinkSync(tempPath);
|
|
185
|
+
throw e;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function stripToolMarkup(text) {
|
|
190
|
+
if (text.includes('Exec:') || text.includes('```') || text.includes('curl ') || text.includes('🛠️')) {
|
|
191
|
+
console.warn('[voice-call-local] Stripping tool markup from response');
|
|
192
|
+
text = text.replace(/⚠️.*$/gm, '').replace(/🛠️.*$/gm, '').replace(/```[\s\S]*?```/g, '').replace(/`[^`]+`/g, '').trim();
|
|
193
|
+
if (!text || text.length < 5) {
|
|
194
|
+
text = "Hey! Sorry, I had a hiccup there. What were you saying?";
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return text;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Stream LLM response with sentence-level TTS chunking.
|
|
202
|
+
* Calls onSentence(sentenceText, audioBase64) for each complete sentence.
|
|
203
|
+
* Returns the full assembled response text.
|
|
204
|
+
*/
|
|
205
|
+
async function queryMainAgentStreaming(messages, systemPrompt, { onSentence, onDone } = {}) {
|
|
206
|
+
const basePayload = {
|
|
207
|
+
model: 'claude-sonnet-4-5',
|
|
208
|
+
messages: [
|
|
209
|
+
{ role: 'system', content: systemPrompt + '\n\nCRITICAL: You are in a VOICE conversation. Respond with plain spoken text ONLY. Do NOT use any tools, commands, code blocks, markdown, or special formatting. Just talk naturally.' },
|
|
210
|
+
...messages
|
|
211
|
+
],
|
|
212
|
+
max_tokens: 500,
|
|
213
|
+
temperature: 0.7,
|
|
214
|
+
user: 'voice-call-brandon'
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
const delays = [0, 3000, 8000];
|
|
218
|
+
for (let attempt = 0; attempt < delays.length; attempt++) {
|
|
219
|
+
if (delays[attempt] > 0) await new Promise(r => setTimeout(r, delays[attempt]));
|
|
220
|
+
const useStream = attempt === 0; // fallback to non-streaming on retries
|
|
221
|
+
try {
|
|
222
|
+
const payload = { ...basePayload, ...(useStream ? { stream: true } : {}) };
|
|
223
|
+
const response = await fetch(OPENCLAW_API, {
|
|
224
|
+
method: 'POST',
|
|
225
|
+
headers: {
|
|
226
|
+
'Content-Type': 'application/json',
|
|
227
|
+
'Authorization': `Bearer ${OPENCLAW_AUTH}`,
|
|
228
|
+
'x-openclaw-agent-id': 'voice',
|
|
229
|
+
'x-openclaw-session-key': 'agent:voice:openai-user:voice-call-brandon'
|
|
230
|
+
},
|
|
231
|
+
body: JSON.stringify(payload)
|
|
232
|
+
});
|
|
233
|
+
if (!response.ok) {
|
|
234
|
+
const err = await response.text();
|
|
235
|
+
console.error(`[voice-call-local] Proxy attempt ${attempt+1} failed: ${response.status} ${err.substring(0, 100)}`);
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (!useStream) {
|
|
240
|
+
// Non-streaming fallback
|
|
241
|
+
const result = await response.json();
|
|
242
|
+
let text = stripToolMarkup(result.choices?.[0]?.message?.content || '');
|
|
243
|
+
if (!text || text.includes('No response from OpenClaw')) continue;
|
|
244
|
+
console.log('[voice-call-local] Got response via OpenClaw proxy (non-streaming fallback)');
|
|
245
|
+
if (onSentence) {
|
|
246
|
+
try {
|
|
247
|
+
const audio = await generateSpeech(text);
|
|
248
|
+
onSentence(text, audio.toString('base64'));
|
|
249
|
+
} catch (e) { console.error('[voice-call-local] TTS error in fallback:', e.message); }
|
|
250
|
+
}
|
|
251
|
+
if (onDone) onDone();
|
|
252
|
+
return text;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Streaming path: parse SSE
|
|
256
|
+
console.log('[voice-call-local] Streaming response from proxy...');
|
|
257
|
+
let fullText = '';
|
|
258
|
+
let sentenceBuffer = '';
|
|
259
|
+
const ttsPromises = []; // track in-flight TTS to await at end
|
|
260
|
+
|
|
261
|
+
const processCompleteSentences = async (flush = false) => {
|
|
262
|
+
// Find sentence boundaries
|
|
263
|
+
const sentenceEndRegex = /([.!?])\s+/g;
|
|
264
|
+
let match;
|
|
265
|
+
let lastEnd = 0;
|
|
266
|
+
const sentences = [];
|
|
267
|
+
|
|
268
|
+
while ((match = sentenceEndRegex.exec(sentenceBuffer)) !== null) {
|
|
269
|
+
sentences.push(sentenceBuffer.substring(lastEnd, match.index + 1));
|
|
270
|
+
lastEnd = match.index + match[0].length;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (sentences.length > 0) {
|
|
274
|
+
sentenceBuffer = sentenceBuffer.substring(lastEnd);
|
|
275
|
+
for (const sentence of sentences) {
|
|
276
|
+
const cleaned = stripToolMarkup(sentence.trim());
|
|
277
|
+
if (cleaned.length < 2) continue;
|
|
278
|
+
if (onSentence) {
|
|
279
|
+
const p = (async () => {
|
|
280
|
+
try {
|
|
281
|
+
const audio = await generateSpeech(cleaned);
|
|
282
|
+
onSentence(cleaned, audio.toString('base64'));
|
|
283
|
+
} catch (e) { console.error('[voice-call-local] TTS chunk error:', e.message); }
|
|
284
|
+
})();
|
|
285
|
+
ttsPromises.push(p);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (flush && sentenceBuffer.trim().length > 1) {
|
|
291
|
+
const cleaned = stripToolMarkup(sentenceBuffer.trim());
|
|
292
|
+
sentenceBuffer = '';
|
|
293
|
+
if (cleaned.length >= 2 && onSentence) {
|
|
294
|
+
const p = (async () => {
|
|
295
|
+
try {
|
|
296
|
+
const audio = await generateSpeech(cleaned);
|
|
297
|
+
onSentence(cleaned, audio.toString('base64'));
|
|
298
|
+
} catch (e) { console.error('[voice-call-local] TTS flush error:', e.message); }
|
|
299
|
+
})();
|
|
300
|
+
ttsPromises.push(p);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
// Read SSE stream
|
|
306
|
+
const reader = response.body;
|
|
307
|
+
let remainder = '';
|
|
308
|
+
|
|
309
|
+
await new Promise((resolve, reject) => {
|
|
310
|
+
reader.on('data', (chunk) => {
|
|
311
|
+
const text = remainder + chunk.toString();
|
|
312
|
+
const lines = text.split('\n');
|
|
313
|
+
remainder = lines.pop() || '';
|
|
314
|
+
|
|
315
|
+
for (const line of lines) {
|
|
316
|
+
if (!line.startsWith('data: ')) continue;
|
|
317
|
+
const data = line.slice(6).trim();
|
|
318
|
+
if (data === '[DONE]') continue;
|
|
319
|
+
try {
|
|
320
|
+
const parsed = JSON.parse(data);
|
|
321
|
+
const delta = parsed.choices?.[0]?.delta?.content || '';
|
|
322
|
+
if (delta) {
|
|
323
|
+
fullText += delta;
|
|
324
|
+
sentenceBuffer += delta;
|
|
325
|
+
processCompleteSentences(false);
|
|
326
|
+
}
|
|
327
|
+
} catch (e) { /* ignore parse errors on SSE lines */ }
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
reader.on('end', resolve);
|
|
331
|
+
reader.on('error', reject);
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// Flush remaining text
|
|
335
|
+
await processCompleteSentences(true);
|
|
336
|
+
// Wait for all TTS chunks to finish
|
|
337
|
+
await Promise.all(ttsPromises);
|
|
338
|
+
|
|
339
|
+
fullText = stripToolMarkup(fullText);
|
|
340
|
+
if (!fullText || fullText.includes('No response from OpenClaw')) continue;
|
|
341
|
+
|
|
342
|
+
console.log('[voice-call-local] Streaming response complete:', fullText.substring(0, 100));
|
|
343
|
+
if (onDone) onDone();
|
|
344
|
+
return fullText;
|
|
345
|
+
|
|
346
|
+
} catch (e) {
|
|
347
|
+
console.error(`[voice-call-local] Proxy attempt ${attempt+1} error:`, e.message);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
throw new Error('All proxy attempts failed');
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Legacy non-streaming wrapper for compatibility
|
|
355
|
+
async function queryMainAgent(messages, systemPrompt) {
|
|
356
|
+
return queryMainAgentStreaming(messages, systemPrompt);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Add 20ms fade-in to prevent audio pop/click at start of TTS playback
|
|
360
|
+
function addFadeIn(audioBuffer, durationMs = 20) {
|
|
361
|
+
const headerSize = 44; // WAV header
|
|
362
|
+
const sampleRate = 24000; // Kokoro default
|
|
363
|
+
const fadeSamples = Math.floor(sampleRate * durationMs / 1000);
|
|
364
|
+
const buf = Buffer.from(audioBuffer);
|
|
365
|
+
|
|
366
|
+
for (let i = 0; i < fadeSamples && (headerSize + i * 2 + 1) < buf.length; i++) {
|
|
367
|
+
const offset = headerSize + i * 2;
|
|
368
|
+
const sample = buf.readInt16LE(offset);
|
|
369
|
+
const gain = i / fadeSamples; // 0.0 to 1.0
|
|
370
|
+
buf.writeInt16LE(Math.round(sample * gain), offset);
|
|
371
|
+
}
|
|
372
|
+
return buf;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
async function generateSpeech(text) {
|
|
376
|
+
/**
|
|
377
|
+
* Generate speech from text using local TTS server.
|
|
378
|
+
* Returns audio buffer with fade-in applied.
|
|
379
|
+
*/
|
|
380
|
+
try {
|
|
381
|
+
const response = await fetch(LOCAL_TTS_API, {
|
|
382
|
+
method: 'POST',
|
|
383
|
+
headers: { 'Content-Type': 'application/json' },
|
|
384
|
+
body: JSON.stringify({ text, rate: 200 })
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
if (!response.ok) {
|
|
388
|
+
const error = await response.text();
|
|
389
|
+
throw new Error(`TTS error: ${error}`);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
393
|
+
return addFadeIn(Buffer.from(arrayBuffer));
|
|
394
|
+
} catch (e) {
|
|
395
|
+
console.error('[voice-call-local] TTS error:', e.message);
|
|
396
|
+
throw e;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function setupVoiceCall(io) {
|
|
401
|
+
io.on('connection', (socket) => {
|
|
402
|
+
|
|
403
|
+
socket.on('voice-call-start', async (data) => {
|
|
404
|
+
console.log('[voice-call-local] Call started by:', data?.userId || 'unknown');
|
|
405
|
+
|
|
406
|
+
// Fetch context for system prompt
|
|
407
|
+
const ctx = await fetchContext().catch(() => null);
|
|
408
|
+
const systemPrompt = buildSystemPrompt(ctx);
|
|
409
|
+
|
|
410
|
+
const callSession = {
|
|
411
|
+
id: Date.now().toString(),
|
|
412
|
+
userId: data?.userId || 'brandon',
|
|
413
|
+
startedAt: new Date().toISOString(),
|
|
414
|
+
transcript: [], // Full conversation history
|
|
415
|
+
messages: [], // OpenAI-format messages for context
|
|
416
|
+
systemPrompt,
|
|
417
|
+
audioBuffer: null, // Accumulator for incoming audio chunks
|
|
418
|
+
isRecording: false,
|
|
419
|
+
processingResponse: false
|
|
420
|
+
};
|
|
421
|
+
|
|
422
|
+
activeCalls.set(socket.id, callSession);
|
|
423
|
+
|
|
424
|
+
// Auto-start recording (frontend sends continuous audio stream)
|
|
425
|
+
callSession.isRecording = true;
|
|
426
|
+
callSession.audioBuffer = Buffer.alloc(0);
|
|
427
|
+
|
|
428
|
+
// Set up silence detection — process audio after 2s of silence
|
|
429
|
+
callSession.silenceTimer = null;
|
|
430
|
+
callSession.lastAudioTime = Date.now();
|
|
431
|
+
|
|
432
|
+
// Send ready signal
|
|
433
|
+
socket.emit('voice-call-ready', { callId: callSession.id });
|
|
434
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
435
|
+
console.log('[voice-call-local] Call ready:', callSession.id);
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
// User starts speaking (push-to-talk or VAD trigger)
|
|
439
|
+
socket.on('voice-call-start-recording', () => {
|
|
440
|
+
const session = activeCalls.get(socket.id);
|
|
441
|
+
if (!session) return;
|
|
442
|
+
|
|
443
|
+
session.isRecording = true;
|
|
444
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
445
|
+
socket.emit('voice-call-status', { status: 'recording' });
|
|
446
|
+
console.log('[voice-call-local] Recording started');
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
// Receive audio chunks (continuous stream with silence detection)
|
|
450
|
+
let chunkCount = 0;
|
|
451
|
+
socket.on('voice-call-audio-chunk', (data) => {
|
|
452
|
+
const session = activeCalls.get(socket.id);
|
|
453
|
+
if (!session || !session.isRecording || session.processingResponse) return;
|
|
454
|
+
|
|
455
|
+
if (data?.audio) {
|
|
456
|
+
const chunk = Buffer.from(data.audio, 'base64');
|
|
457
|
+
|
|
458
|
+
// Check if this chunk has meaningful audio (not silence)
|
|
459
|
+
const samples = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.length / 2);
|
|
460
|
+
let rms = 0;
|
|
461
|
+
for (let i = 0; i < samples.length; i++) rms += samples[i] * samples[i];
|
|
462
|
+
rms = Math.sqrt(rms / samples.length);
|
|
463
|
+
|
|
464
|
+
// Barge-in: user speaks while agent is playing TTS
|
|
465
|
+
if (session.isPlaying && rms > 1000) {
|
|
466
|
+
console.log('[voice-call-local] Barge-in detected! Stopping playback.');
|
|
467
|
+
session.isPlaying = false;
|
|
468
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
469
|
+
session.hasVoice = true;
|
|
470
|
+
session.lastAudioTime = Date.now();
|
|
471
|
+
if (session.silenceTimer) { clearTimeout(session.silenceTimer); session.silenceTimer = null; }
|
|
472
|
+
socket.emit('voice-call-barge-in', {});
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Don't accumulate audio while TTS is playing (unless barge-in just triggered above)
|
|
476
|
+
if (session.isPlaying) return;
|
|
477
|
+
|
|
478
|
+
session.audioBuffer = Buffer.concat([session.audioBuffer, chunk]);
|
|
479
|
+
chunkCount++;
|
|
480
|
+
if (chunkCount % 50 === 1) console.log(`[voice-call-local] Chunk #${chunkCount}, buffer: ${session.audioBuffer.length} bytes`);
|
|
481
|
+
|
|
482
|
+
if (chunkCount % 50 === 1) console.log(`[voice-call-local] RMS: ${Math.round(rms)}, hasVoice: ${!!session.hasVoice}`);
|
|
483
|
+
if (rms > 1000) { // Voice detected
|
|
484
|
+
session.lastAudioTime = Date.now();
|
|
485
|
+
if (!session.hasVoice) {
|
|
486
|
+
session.hasVoice = true;
|
|
487
|
+
console.log('[voice-call-local] Voice detected! RMS:', Math.round(rms));
|
|
488
|
+
socket.emit('voice-call-status', { status: 'recording' });
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// If voice is active again, cancel the silence timer (they're still talking)
|
|
493
|
+
if (rms > 1000) {
|
|
494
|
+
if (session.silenceTimer) { clearTimeout(session.silenceTimer); session.silenceTimer = null; }
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// After voice was detected, if we're now in silence, start the timer (only once)
|
|
498
|
+
if (session.hasVoice && rms <= 1000 && !session.silenceTimer) {
|
|
499
|
+
session.silenceTimer = setTimeout(async () => {
|
|
500
|
+
session.silenceTimer = null; // Clear reference so new timer can start
|
|
501
|
+
if (!session.isRecording || session.processingResponse) return;
|
|
502
|
+
if (session.audioBuffer.length < 16000) return; // Too short
|
|
503
|
+
|
|
504
|
+
session.processingResponse = true;
|
|
505
|
+
session.hasVoice = false;
|
|
506
|
+
console.log('[voice-call-local] Silence detected, processing...');
|
|
507
|
+
socket.emit('voice-call-status', { status: 'processing' });
|
|
508
|
+
|
|
509
|
+
try {
|
|
510
|
+
console.log('[voice-call-local] Transcribing audio...', session.audioBuffer.length, 'bytes');
|
|
511
|
+
const transcript = await transcribeAudio(session.audioBuffer);
|
|
512
|
+
|
|
513
|
+
if (!transcript) {
|
|
514
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
515
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
516
|
+
session.processingResponse = false;
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Filter Whisper hallucinations
|
|
521
|
+
const hallucinations = ['thank you', 'thanks for watching', 'thank you for watching', 'bye', 'you', 'the end', "i'm sorry", 'goodbye', 'well congratulations', 'thanks', 'okay'];
|
|
522
|
+
const normalized = transcript.toLowerCase().replace(/[.,!?]/g, '').trim();
|
|
523
|
+
if ((hallucinations.includes(normalized) || normalized.length < 3) && session.audioBuffer.length < 144000) {
|
|
524
|
+
console.log('[voice-call-local] Filtered likely hallucination:', transcript);
|
|
525
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
526
|
+
session.processingResponse = false;
|
|
527
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
console.log('[voice-call-local] User said:', transcript);
|
|
532
|
+
socket.emit('voice-call-user-transcript', { text: transcript });
|
|
533
|
+
|
|
534
|
+
// Get AI response
|
|
535
|
+
session.messages.push({ role: 'user', content: transcript });
|
|
536
|
+
session.transcript.push({ role: 'user', text: transcript, ts: new Date().toISOString() });
|
|
537
|
+
|
|
538
|
+
session.isPlaying = true;
|
|
539
|
+
const aiResponse = await queryMainAgentStreaming(session.messages, session.systemPrompt, {
|
|
540
|
+
onSentence: (text, audioBase64) => {
|
|
541
|
+
socket.emit('voice-call-transcript-delta', { text });
|
|
542
|
+
socket.emit('voice-call-audio-delta', { audio: audioBase64 });
|
|
543
|
+
},
|
|
544
|
+
onDone: () => { session.isPlaying = false; socket.emit('voice-call-response-done', {}); }
|
|
545
|
+
});
|
|
546
|
+
console.log('[voice-call-local] Agent response:', aiResponse?.substring(0, 100));
|
|
547
|
+
|
|
548
|
+
if (aiResponse) {
|
|
549
|
+
session.messages.push({ role: 'assistant', content: aiResponse });
|
|
550
|
+
session.transcript.push({ role: 'assistant', text: aiResponse, ts: new Date().toISOString() });
|
|
551
|
+
}
|
|
552
|
+
} catch (e) {
|
|
553
|
+
console.error('[voice-call-local] Processing error:', e.message);
|
|
554
|
+
socket.emit('voice-call-error', { error: e.message });
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
558
|
+
session.processingResponse = false;
|
|
559
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
560
|
+
}, 1500); // Allow natural pauses between sentences
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
// User stops speaking
|
|
566
|
+
socket.on('voice-call-stop-recording', async () => {
|
|
567
|
+
const session = activeCalls.get(socket.id);
|
|
568
|
+
if (!session || !session.isRecording) return;
|
|
569
|
+
|
|
570
|
+
session.isRecording = false;
|
|
571
|
+
socket.emit('voice-call-status', { status: 'processing' });
|
|
572
|
+
|
|
573
|
+
try {
|
|
574
|
+
// Transcribe audio
|
|
575
|
+
console.log('[voice-call-local] Transcribing audio...');
|
|
576
|
+
const transcript = await transcribeAudio(session.audioBuffer);
|
|
577
|
+
|
|
578
|
+
if (!transcript) {
|
|
579
|
+
socket.emit('voice-call-error', { error: 'Could not understand audio' });
|
|
580
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
581
|
+
return;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Filter Whisper hallucinations
|
|
585
|
+
const hallucinations2 = ['thank you', 'thanks for watching', 'thank you for watching', 'bye', 'you', 'the end', "i'm sorry", 'goodbye', 'well congratulations', 'thanks', 'okay'];
|
|
586
|
+
const normalized2 = transcript.toLowerCase().replace(/[.,!?]/g, '').trim();
|
|
587
|
+
if ((hallucinations2.includes(normalized2) || normalized2.length < 3) && session.audioBuffer.length < 144000) {
|
|
588
|
+
console.log('[voice-call-local] Filtered likely hallucination:', transcript);
|
|
589
|
+
session.audioBuffer = Buffer.alloc(0);
|
|
590
|
+
session.processingResponse = false;
|
|
591
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
592
|
+
return;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
console.log('[voice-call-local] User said:', transcript);
|
|
596
|
+
session.transcript.push({ role: 'user', text: transcript });
|
|
597
|
+
session.messages.push({ role: 'user', content: transcript });
|
|
598
|
+
socket.emit('voice-call-user-transcript', { text: transcript });
|
|
599
|
+
|
|
600
|
+
// Query main agent with streaming
|
|
601
|
+
console.log('[voice-call-local] Querying main agent (streaming)...');
|
|
602
|
+
session.processingResponse = true;
|
|
603
|
+
socket.emit('voice-call-status', { status: 'thinking' });
|
|
604
|
+
|
|
605
|
+
let firstChunkSent = false;
|
|
606
|
+
session.isPlaying = true;
|
|
607
|
+
const responseText = await queryMainAgentStreaming(session.messages, session.systemPrompt, {
|
|
608
|
+
onSentence: (text, audioBase64) => {
|
|
609
|
+
if (!firstChunkSent) {
|
|
610
|
+
socket.emit('voice-call-status', { status: 'speaking' });
|
|
611
|
+
firstChunkSent = true;
|
|
612
|
+
}
|
|
613
|
+
socket.emit('voice-call-transcript-delta', { text });
|
|
614
|
+
socket.emit('voice-call-audio-delta', { audio: audioBase64 });
|
|
615
|
+
// Also emit legacy event for compatibility
|
|
616
|
+
socket.emit('voice-call-audio-response', { audio: audioBase64, text });
|
|
617
|
+
},
|
|
618
|
+
onDone: () => { session.isPlaying = false; socket.emit('voice-call-response-done', { text: '' }); }
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
if (!responseText) {
|
|
622
|
+
throw new Error('No response from agent');
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
console.log('[voice-call-local] Agent responded:', responseText.substring(0, 100));
|
|
626
|
+
session.transcript.push({ role: 'assistant', text: responseText });
|
|
627
|
+
session.messages.push({ role: 'assistant', content: responseText });
|
|
628
|
+
|
|
629
|
+
console.log('[voice-call-local] Response sent (streamed)');
|
|
630
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
631
|
+
session.processingResponse = false;
|
|
632
|
+
|
|
633
|
+
} catch (e) {
|
|
634
|
+
console.error('[voice-call-local] Processing error:', e.message);
|
|
635
|
+
socket.emit('voice-call-error', { error: e.message });
|
|
636
|
+
socket.emit('voice-call-status', { status: 'listening' });
|
|
637
|
+
session.processingResponse = false;
|
|
638
|
+
}
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
socket.on('voice-call-end', () => {
|
|
642
|
+
const session = activeCalls.get(socket.id);
|
|
643
|
+
if (!session) return;
|
|
644
|
+
cleanupCall(socket.id, session);
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
socket.on('disconnect', () => {
|
|
648
|
+
const session = activeCalls.get(socket.id);
|
|
649
|
+
if (session) {
|
|
650
|
+
console.log('[voice-call-local] Socket disconnected, cleaning up call');
|
|
651
|
+
cleanupCall(socket.id, session);
|
|
652
|
+
}
|
|
653
|
+
});
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
function cleanupCall(socketId, session) {
|
|
657
|
+
console.log(`[voice-call-local] Call ended (${session.transcript.length} turns)`);
|
|
658
|
+
|
|
659
|
+
// Save transcript if there was conversation
|
|
660
|
+
if (session.transcript.length > 0) {
|
|
661
|
+
try {
|
|
662
|
+
const logDir = path.join(process.env.HOME || '', '.openclaw', 'workspace', 'memory', 'call-logs');
|
|
663
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
664
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
665
|
+
const pendingFile = path.join(logDir, `pending_local_${ts}.json`);
|
|
666
|
+
|
|
667
|
+
// Calculate duration
|
|
668
|
+
const startTime = new Date(session.startedAt).getTime();
|
|
669
|
+
const endTime = Date.now();
|
|
670
|
+
const durationSeconds = Math.round((endTime - startTime) / 1000);
|
|
671
|
+
const durationMin = Math.floor(durationSeconds / 60);
|
|
672
|
+
const durationSec = durationSeconds % 60;
|
|
673
|
+
|
|
674
|
+
const logData = {
|
|
675
|
+
type: 'local-voice-call',
|
|
676
|
+
timestamp: new Date().toISOString(),
|
|
677
|
+
duration: durationSeconds,
|
|
678
|
+
transcript: session.transcript,
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
fs.writeFileSync(pendingFile, JSON.stringify(logData, null, 2));
|
|
682
|
+
console.log(`[voice-call-local] Transcript saved: ${pendingFile}`);
|
|
683
|
+
|
|
684
|
+
// Fire wake hook to main agent with full transcript
|
|
685
|
+
const readableTranscript = session.transcript.map(t =>
|
|
686
|
+
`${t.role === 'user' ? 'Brandon' : 'AIVA'}: ${t.text}`
|
|
687
|
+
).join('\n');
|
|
688
|
+
|
|
689
|
+
const wakeText = `[VOICE-CALL-COMPLETE] Voice call with Brandon just ended. Duration: ${durationMin}m ${durationSec}s.\n\nFull transcript:\n${readableTranscript}\n\nProcess any action items from this conversation immediately. If Brandon asked for something to be done, delegate it. If there are insights about Brandon's preferences or decisions, log them to memory.`;
|
|
690
|
+
|
|
691
|
+
// Send transcript directly to main agent via sessions_send
|
|
692
|
+
const invokeData = JSON.stringify({
|
|
693
|
+
tool: 'sessions_send',
|
|
694
|
+
args: {
|
|
695
|
+
sessionKey: 'agent:main:main',
|
|
696
|
+
message: wakeText
|
|
697
|
+
}
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
const invokeReq = require('http').request({
|
|
701
|
+
hostname: '127.0.0.1',
|
|
702
|
+
port: 18789,
|
|
703
|
+
path: '/tools/invoke',
|
|
704
|
+
method: 'POST',
|
|
705
|
+
headers: {
|
|
706
|
+
'Content-Type': 'application/json',
|
|
707
|
+
'Authorization': `Bearer ${OPENCLAW_AUTH}`
|
|
708
|
+
}
|
|
709
|
+
}, (res) => {
|
|
710
|
+
let body = '';
|
|
711
|
+
res.on('data', d => body += d);
|
|
712
|
+
res.on('end', () => {
|
|
713
|
+
console.log(`[voice-call-local] sessions_send result: ${res.statusCode}`);
|
|
714
|
+
});
|
|
715
|
+
});
|
|
716
|
+
invokeReq.on('error', e => console.error('[voice-call-local] sessions_send failed:', e.message));
|
|
717
|
+
invokeReq.write(invokeData);
|
|
718
|
+
invokeReq.end();
|
|
719
|
+
|
|
720
|
+
} catch (e) {
|
|
721
|
+
console.error('[voice-call-local] Failed to save transcript:', e.message);
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
activeCalls.delete(socketId);
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
console.log('[voice-call-local] Voice call handler initialized (Local TTS + Main Agent)');
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
module.exports = { setupVoiceCall };
|