@inetafrica/open-claudia 2.6.44 → 2.6.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bot-agent.js CHANGED
@@ -767,25 +767,9 @@ function transcribeAudio(oggPath) {
767
767
  }
768
768
 
769
769
  // ── Text-to-Speech ────────────────────────────────────────────────
770
+ // Shared with direct mode: ElevenLabs natural voice, falling back to `say`.
770
771
 
771
- const TTS_CMD = process.platform === "darwin" ? "say" : null;
772
-
773
- function textToVoice(text) {
774
- if (!TTS_CMD || !FFMPEG) return null;
775
- try {
776
- const clean = text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
777
- if (!clean) return null;
778
- const aiffPath = path.join(TEMP_DIR, `tts-${Date.now()}.aiff`);
779
- const oggPath = aiffPath.replace(".aiff", ".ogg");
780
- execSync(`${TTS_CMD} ${JSON.stringify(clean)} -o "${aiffPath}"`, { timeout: 30000 });
781
- execSync(`"${FFMPEG}" -i "${aiffPath}" -c:a libopus -y "${oggPath}" 2>/dev/null`, { timeout: 30000 });
782
- try { fs.unlinkSync(aiffPath); } catch (e) {}
783
- return oggPath;
784
- } catch (e) {
785
- console.error("TTS error:", e.message);
786
- return null;
787
- }
788
- }
772
+ const { textToVoice } = require("./core/media");
789
773
 
790
774
  async function sendVoice(oggPath) {
791
775
  try {
@@ -1420,11 +1404,16 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
1420
1404
  }
1421
1405
  if (code !== 0 && code !== null) await send(`Exit code: ${code}`);
1422
1406
 
1423
- // Send voice reply if input was a voice note
1424
- if (lastInputWasVoice && TTS_CMD) {
1407
+ // Spoken auto-replies on voice input are off by default on chat
1408
+ // transports (they were unwanted noise on Telegram). Opt in with
1409
+ // VOICE_REPLY_ON_VOICE=1. The hands-free voice channel speaks back
1410
+ // regardless via its own path.
1411
+ if (lastInputWasVoice) {
1425
1412
  lastInputWasVoice = false;
1426
- const voicePath = textToVoice(finalText);
1427
- if (voicePath) await sendVoice(voicePath);
1413
+ if (process.env.VOICE_REPLY_ON_VOICE === "1") {
1414
+ const voicePath = await textToVoice(finalText);
1415
+ if (voicePath) await sendVoice(voicePath);
1416
+ }
1428
1417
  }
1429
1418
  } catch (e) {
1430
1419
  console.error("Final message delivery failed:", e.message);
@@ -0,0 +1,431 @@
1
+ // VoiceAdapter — an official Open Claudia channel for the companion app.
2
+ //
3
+ // Runs a LAN HTTP + WebSocket server the Expo client connects to (over
4
+ // WireGuard / local network). Inbound: push-to-talk audio, typed text, and
5
+ // file/image uploads. Outbound: assistant text (with live edits), spoken
6
+ // replies (ElevenLabs ogg), and files — pushed to the client over the
7
+ // WebSocket. Everything routes through the same core handlers as Telegram
8
+ // and Kazee, so the voice channel has the full agent capability set.
9
+ //
10
+ // Security: single-owner channel. Every HTTP request and WS upgrade must
11
+ // carry the bearer token (VOICE_BRIDGE_TOKEN). The owner identity is fixed
12
+ // (VOICE_OWNER_USER_ID), so access.js authorizes it as the bot owner.
13
+
14
+ const http = require("http");
15
+ const fs = require("fs");
16
+ const path = require("path");
17
+ const crypto = require("crypto");
18
+ const { WebSocketServer } = require("ws");
19
+ const { TEMP_DIR, FILES_DIR } = require("../../core/config");
20
+ const { canonicalForChannel } = require("../../core/identity");
21
+ const { inlineKeyboardToPortable } = require("../types");
22
+ const { parseMultipart } = require("./multipart");
23
+
24
+ const MEDIA_TTL_MS = 10 * 60 * 1000; // outbound media stays fetchable 10 min
25
+ const MAX_UPLOAD = 25 * 1024 * 1024;
26
+
27
+ class VoiceAdapter {
28
+ constructor({ id = "voice", host, port, token, ownerUserId } = {}) {
29
+ this.id = id;
30
+ this.type = "voice";
31
+ this.host = host || "0.0.0.0";
32
+ this.port = parseInt(port, 10) || 8787;
33
+ this.token = token || "";
34
+ this.ownerUserId = String(ownerUserId || "voice-owner");
35
+ // Single conversation: the channel id is the owner identity.
36
+ this.channelId = this.ownerUserId;
37
+ this._listeners = { message: new Set(), action: new Set() };
38
+ this._server = null;
39
+ this._wss = null;
40
+ this._clients = new Set();
41
+ this._media = new Map(); // id -> { path, mime, fileName, expires }
42
+ this._commands = [];
43
+ this._sweepTimer = null;
44
+ }
45
+
46
+ on(event, fn) {
47
+ if (!this._listeners[event]) return () => {};
48
+ this._listeners[event].add(fn);
49
+ return () => this._listeners[event].delete(fn);
50
+ }
51
+
52
+ _emit(event, envelope) {
53
+ for (const fn of this._listeners[event] || []) {
54
+ try { Promise.resolve(fn(envelope)).catch((e) => console.error(`voice ${event} handler:`, e.message)); }
55
+ catch (e) { console.error(`voice ${event} handler:`, e.message); }
56
+ }
57
+ }
58
+
59
+ // ── lifecycle ───────────────────────────────────────────────────
60
+
61
+ async start() {
62
+ if (!this.token) {
63
+ console.error("Voice adapter: VOICE_BRIDGE_TOKEN is unset — refusing to start an unauthenticated LAN server.");
64
+ throw new Error("VOICE_BRIDGE_TOKEN required");
65
+ }
66
+ this._server = http.createServer((req, res) => this._handleHttp(req, res));
67
+ this._wss = new WebSocketServer({ noServer: true });
68
+ this._server.on("upgrade", (req, socket, head) => this._handleUpgrade(req, socket, head));
69
+ this._wss.on("connection", (ws) => this._handleWsConnection(ws));
70
+
71
+ await new Promise((resolve, reject) => {
72
+ this._server.once("error", reject);
73
+ this._server.listen(this.port, this.host, () => {
74
+ this._server.removeListener("error", reject);
75
+ resolve();
76
+ });
77
+ });
78
+ this._sweepTimer = setInterval(() => this._sweepMedia(), 60 * 1000);
79
+ if (this._sweepTimer.unref) this._sweepTimer.unref();
80
+ console.log(`Voice channel listening on http://${this.host}:${this.port} (ws + http)`);
81
+ }
82
+
83
+ async stop() {
84
+ if (this._sweepTimer) clearInterval(this._sweepTimer);
85
+ for (const ws of this._clients) { try { ws.close(); } catch (e) {} }
86
+ this._clients.clear();
87
+ try { this._wss?.close(); } catch (e) {}
88
+ await new Promise((resolve) => { try { this._server?.close(() => resolve()); } catch (e) { resolve(); } });
89
+ this._server = null;
90
+ this._wss = null;
91
+ }
92
+
93
+ _authOk(req, url) {
94
+ const header = req.headers["authorization"] || "";
95
+ if (header === `Bearer ${this.token}`) return true;
96
+ const qsToken = url && url.searchParams.get("token");
97
+ return qsToken === this.token;
98
+ }
99
+
100
+ // ── HTTP ────────────────────────────────────────────────────────
101
+
102
+ _handleHttp(req, res) {
103
+ const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
104
+ const pathname = url.pathname;
105
+
106
+ if (req.method === "OPTIONS") return this._cors(res, 204);
107
+
108
+ if (req.method === "GET" && (pathname === "/" || pathname === "/health")) {
109
+ return this._json(res, 200, { ok: true, service: "open-claudia-voice", clients: this._clients.size });
110
+ }
111
+
112
+ // Outbound media fetch — token via query so <audio>/download works.
113
+ if (req.method === "GET" && pathname.startsWith("/v1/media/")) {
114
+ if (!this._authOk(req, url)) return this._json(res, 401, { ok: false, error: "unauthorized" });
115
+ return this._serveMedia(res, pathname.slice("/v1/media/".length));
116
+ }
117
+
118
+ if (!this._authOk(req, url)) return this._json(res, 401, { ok: false, error: "unauthorized" });
119
+
120
+ if (req.method === "GET" && pathname === "/v1/info") {
121
+ return this._json(res, 200, {
122
+ ok: true,
123
+ channel: this.id,
124
+ owner: this.ownerUserId,
125
+ commands: this._commands,
126
+ clients: this._clients.size,
127
+ });
128
+ }
129
+
130
+ if (req.method === "POST" && pathname === "/v1/messages/text") {
131
+ return this._readBody(req, res, (buf) => this._onText(buf, res));
132
+ }
133
+ if (req.method === "POST" && pathname === "/v1/messages/audio") {
134
+ return this._readBody(req, res, (buf) => this._onUpload(buf, req, res, "voice"));
135
+ }
136
+ if (req.method === "POST" && pathname === "/v1/messages/media") {
137
+ return this._readBody(req, res, (buf) => this._onUpload(buf, req, res, "auto"));
138
+ }
139
+
140
+ return this._json(res, 404, { ok: false, error: "not found" });
141
+ }
142
+
143
+ _readBody(req, res, cb) {
144
+ const chunks = [];
145
+ let size = 0;
146
+ req.on("data", (c) => {
147
+ size += c.length;
148
+ if (size > MAX_UPLOAD) { req.destroy(); this._json(res, 413, { ok: false, error: "too large" }); return; }
149
+ chunks.push(c);
150
+ });
151
+ req.on("end", () => { try { cb(Buffer.concat(chunks)); } catch (e) { this._json(res, 500, { ok: false, error: e.message }); } });
152
+ req.on("error", () => { try { this._json(res, 400, { ok: false, error: "read error" }); } catch (e) {} });
153
+ }
154
+
155
+ _onText(buf, res) {
156
+ let body = {};
157
+ try { body = JSON.parse(buf.toString("utf-8") || "{}"); } catch (e) {}
158
+ const text = (body.text || "").toString();
159
+ if (!text.trim()) return this._json(res, 400, { ok: false, error: "empty text" });
160
+ const messageId = this._mkId("t");
161
+ const isCommand = text.trim().startsWith("/");
162
+ this._emit("message", {
163
+ adapter: this,
164
+ channelId: this.channelId,
165
+ canonicalUserId: canonicalForChannel("voice", this.channelId),
166
+ userId: this.ownerUserId,
167
+ type: isCommand ? "command" : "text",
168
+ text,
169
+ messageId,
170
+ from: { id: this.ownerUserId, name: "Owner", username: "" },
171
+ raw: body,
172
+ });
173
+ return this._json(res, 202, { ok: true, messageId });
174
+ }
175
+
176
+ _onUpload(buf, req, res, kind) {
177
+ const ct = req.headers["content-type"] || "";
178
+ const m = /boundary=("?)([^";]+)\1/i.exec(ct);
179
+ if (!m) return this._json(res, 400, { ok: false, error: "expected multipart/form-data" });
180
+ const parsed = parseMultipart(buf, m[2]);
181
+ const file = parsed.files[0];
182
+ if (!file || !file.data || !file.data.length) return this._json(res, 400, { ok: false, error: "missing file" });
183
+
184
+ const caption = parsed.fields.caption || parsed.fields.text || "";
185
+ let type = kind;
186
+ const mime = (file.contentType || "").toLowerCase();
187
+ if (type === "auto") {
188
+ if (mime.startsWith("image/")) type = "photo";
189
+ else if (mime.startsWith("audio/")) type = "voice";
190
+ else type = "document";
191
+ }
192
+ const isVoice = type === "voice";
193
+ const baseDir = type === "document" ? FILES_DIR : TEMP_DIR;
194
+ const safeName = (file.filename || `${type}-${Date.now()}`).replace(/[^\w.\-]/g, "_");
195
+ const ext = path.extname(safeName) || (isVoice ? ".m4a" : type === "photo" ? ".jpg" : ".bin");
196
+ const localPath = type === "document"
197
+ ? path.join(baseDir, safeName)
198
+ : path.join(baseDir, `voice-in-${Date.now()}${ext}`);
199
+ fs.writeFileSync(localPath, file.data);
200
+
201
+ const messageId = this._mkId(type);
202
+ const envelope = {
203
+ adapter: this,
204
+ channelId: this.channelId,
205
+ canonicalUserId: canonicalForChannel("voice", this.channelId),
206
+ userId: this.ownerUserId,
207
+ type,
208
+ text: caption,
209
+ caption,
210
+ messageId,
211
+ from: { id: this.ownerUserId, name: "Owner", username: "" },
212
+ // fileId is the already-saved local path; downloadMedia just returns it.
213
+ media: [{ type, fileId: localPath, fileName: file.filename || path.basename(localPath), mimeType: file.contentType, size: file.data.length }],
214
+ raw: {},
215
+ };
216
+ this._emit("message", envelope);
217
+ return this._json(res, 202, { ok: true, messageId });
218
+ }
219
+
220
+ // ── WebSocket ───────────────────────────────────────────────────
221
+
222
+ _handleUpgrade(req, socket, head) {
223
+ let url;
224
+ try { url = new URL(req.url, `http://${req.headers.host || "localhost"}`); }
225
+ catch (e) { socket.destroy(); return; }
226
+ if (url.pathname !== "/v1/stream" || !this._authOk(req, url)) {
227
+ socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
228
+ socket.destroy();
229
+ return;
230
+ }
231
+ this._wss.handleUpgrade(req, socket, head, (ws) => this._wss.emit("connection", ws));
232
+ }
233
+
234
+ _handleWsConnection(ws) {
235
+ this._clients.add(ws);
236
+ ws.isAlive = true;
237
+ ws.on("pong", () => { ws.isAlive = true; });
238
+ ws.on("close", () => this._clients.delete(ws));
239
+ ws.on("error", () => this._clients.delete(ws));
240
+ ws.on("message", (data) => this._onWsMessage(data));
241
+ this._wsSend(ws, { kind: "hello", channel: this.id, commands: this._commands, ts: Date.now() });
242
+ }
243
+
244
+ // Clients may send typed messages over the socket for lowest latency.
245
+ _onWsMessage(data) {
246
+ let msg = {};
247
+ try { msg = JSON.parse(data.toString()); } catch (e) { return; }
248
+ if (msg.kind === "ping") return; // keepalive
249
+ if (msg.kind === "text" && (msg.text || "").trim()) {
250
+ const text = String(msg.text);
251
+ const messageId = this._mkId("t");
252
+ this._emit("message", {
253
+ adapter: this,
254
+ channelId: this.channelId,
255
+ canonicalUserId: canonicalForChannel("voice", this.channelId),
256
+ userId: this.ownerUserId,
257
+ type: text.trim().startsWith("/") ? "command" : "text",
258
+ text,
259
+ messageId,
260
+ from: { id: this.ownerUserId, name: "Owner", username: "" },
261
+ raw: msg,
262
+ });
263
+ }
264
+ }
265
+
266
+ _broadcast(frame) {
267
+ const payload = JSON.stringify(frame);
268
+ for (const ws of this._clients) {
269
+ try { if (ws.readyState === ws.OPEN) ws.send(payload); } catch (e) {}
270
+ }
271
+ }
272
+
273
+ _wsSend(ws, frame) { try { ws.send(JSON.stringify(frame)); } catch (e) {} }
274
+
275
+ // ── outbound contract (called by core/io.js) ────────────────────
276
+
277
+ _normalizeKeyboard(keyboard) {
278
+ if (!keyboard) return null;
279
+ if (keyboard.buttons) return keyboard.buttons;
280
+ if (keyboard.inline_keyboard) return inlineKeyboardToPortable(keyboard.inline_keyboard);
281
+ return null;
282
+ }
283
+
284
+ async send(channelId, text, opts = {}) {
285
+ const messageId = this._mkId("a");
286
+ this._broadcast({
287
+ kind: "message",
288
+ role: "assistant",
289
+ messageId,
290
+ text: text || "",
291
+ buttons: this._normalizeKeyboard(opts.keyboard),
292
+ replyTo: opts.replyTo || null,
293
+ ts: Date.now(),
294
+ });
295
+ return messageId;
296
+ }
297
+
298
+ async edit(channelId, messageId, text, opts = {}) {
299
+ this._broadcast({
300
+ kind: "edit",
301
+ messageId,
302
+ text: text || "",
303
+ buttons: this._normalizeKeyboard(opts.keyboard),
304
+ ts: Date.now(),
305
+ });
306
+ }
307
+
308
+ async delete(channelId, messageId) {
309
+ this._broadcast({ kind: "delete", messageId, ts: Date.now() });
310
+ }
311
+
312
+ async sendVoice(channelId, oggPath) {
313
+ try {
314
+ const id = this._registerMedia(oggPath, "audio/ogg", path.basename(oggPath));
315
+ this._broadcast({ kind: "voice", messageId: this._mkId("v"), url: `/v1/media/${id}`, mime: "audio/ogg", ts: Date.now() });
316
+ return true;
317
+ } catch (e) {
318
+ console.error("voice sendVoice error:", e.message);
319
+ return false;
320
+ }
321
+ }
322
+
323
+ async sendPhoto(channelId, filePath, caption) { return this.sendFile(channelId, filePath, caption); }
324
+
325
+ async sendFile(channelId, filePath, caption) {
326
+ try {
327
+ const fileName = path.basename(filePath);
328
+ const mime = this._guessMime(fileName);
329
+ const id = this._registerMedia(filePath, mime, fileName, /* keep */ true);
330
+ this._broadcast({
331
+ kind: "file",
332
+ messageId: this._mkId("f"),
333
+ url: `/v1/media/${id}`,
334
+ fileName,
335
+ mime,
336
+ caption: caption || "",
337
+ ts: Date.now(),
338
+ });
339
+ return true;
340
+ } catch (e) {
341
+ console.error("voice sendFile error:", e.message);
342
+ return false;
343
+ }
344
+ }
345
+
346
+ async typing(channelId) {
347
+ this._broadcast({ kind: "typing", ts: Date.now() });
348
+ }
349
+
350
+ // Inbound media was saved to disk at upload time; fileId holds the path.
351
+ async downloadMedia(media) {
352
+ if (!media) return null;
353
+ return media.fileId || null;
354
+ }
355
+
356
+ async registerCommands(commands) {
357
+ this._commands = (commands || [])
358
+ .filter((c) => c && c.name)
359
+ .map((c) => ({ name: String(c.name).replace(/^\//, ""), description: String(c.description || ""), args: typeof c.args === "string" ? c.args : "" }));
360
+ this._broadcast({ kind: "commands", commands: this._commands, ts: Date.now() });
361
+ }
362
+
363
+ // ── media store ─────────────────────────────────────────────────
364
+
365
+ // keep=true means don't delete the source after serving (files the user
366
+ // may still want); voice replies are throwaway and removed after TTL.
367
+ _registerMedia(filePath, mime, fileName, keep = false) {
368
+ const id = crypto.randomBytes(9).toString("hex");
369
+ this._media.set(id, { path: filePath, mime, fileName, keep, expires: Date.now() + MEDIA_TTL_MS });
370
+ return id;
371
+ }
372
+
373
+ _serveMedia(res, id) {
374
+ const entry = this._media.get(id);
375
+ if (!entry || !fs.existsSync(entry.path)) return this._json(res, 404, { ok: false, error: "expired" });
376
+ const stat = fs.statSync(entry.path);
377
+ res.writeHead(200, {
378
+ "Content-Type": entry.mime || "application/octet-stream",
379
+ "Content-Length": stat.size,
380
+ "Content-Disposition": `inline; filename="${entry.fileName || "file"}"`,
381
+ "Access-Control-Allow-Origin": "*",
382
+ });
383
+ fs.createReadStream(entry.path).pipe(res);
384
+ }
385
+
386
+ _sweepMedia() {
387
+ const now = Date.now();
388
+ for (const [id, entry] of this._media) {
389
+ if (entry.expires <= now) {
390
+ this._media.delete(id);
391
+ if (!entry.keep) { try { fs.unlinkSync(entry.path); } catch (e) {} }
392
+ }
393
+ }
394
+ }
395
+
396
+ // ── helpers ─────────────────────────────────────────────────────
397
+
398
+ _mkId(prefix) { return `${prefix}-${Date.now()}-${crypto.randomBytes(4).toString("hex")}`; }
399
+
400
+ _guessMime(fileName) {
401
+ const ext = path.extname(fileName).toLowerCase();
402
+ const map = {
403
+ ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif",
404
+ ".webp": "image/webp", ".ogg": "audio/ogg", ".mp3": "audio/mpeg", ".m4a": "audio/mp4",
405
+ ".wav": "audio/wav", ".mp4": "video/mp4", ".pdf": "application/pdf", ".txt": "text/plain",
406
+ ".json": "application/json", ".csv": "text/csv",
407
+ };
408
+ return map[ext] || "application/octet-stream";
409
+ }
410
+
411
+ _json(res, code, payload) {
412
+ const body = JSON.stringify(payload);
413
+ res.writeHead(code, {
414
+ "Content-Type": "application/json; charset=utf-8",
415
+ "Content-Length": Buffer.byteLength(body),
416
+ "Access-Control-Allow-Origin": "*",
417
+ });
418
+ res.end(body);
419
+ }
420
+
421
+ _cors(res, code) {
422
+ res.writeHead(code, {
423
+ "Access-Control-Allow-Origin": "*",
424
+ "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
425
+ "Access-Control-Allow-Headers": "Authorization, Content-Type",
426
+ });
427
+ res.end();
428
+ }
429
+ }
430
+
431
+ module.exports = { VoiceAdapter };
@@ -0,0 +1,70 @@
1
+ // Minimal multipart/form-data parser. The voice channel accepts audio and
2
+ // file uploads from the app; we parse them without pulling in a body-parser
3
+ // dependency. Returns { fields: {name: string}, files: [{name, filename,
4
+ // contentType, data}] }.
5
+
6
+ function parseMultipart(buffer, boundary) {
7
+ const result = { fields: {}, files: [] };
8
+ if (!boundary) return result;
9
+ const delimiter = Buffer.from(`--${boundary}`);
10
+ const parts = splitBuffer(buffer, delimiter);
11
+ for (let part of parts) {
12
+ // Trim leading CRLF and ignore the closing "--" / empty preamble.
13
+ part = trimEdges(part);
14
+ if (!part.length) continue;
15
+ const headerEnd = indexOfBuffer(part, Buffer.from("\r\n\r\n"));
16
+ if (headerEnd < 0) continue;
17
+ const headerBlock = part.slice(0, headerEnd).toString("utf-8");
18
+ let data = part.slice(headerEnd + 4);
19
+ // Each part's body is terminated by a trailing CRLF before the next
20
+ // delimiter — strip it.
21
+ if (data.length >= 2 && data[data.length - 2] === 0x0d && data[data.length - 1] === 0x0a) {
22
+ data = data.slice(0, data.length - 2);
23
+ }
24
+ const disposition = /content-disposition:[^\r\n]*/i.exec(headerBlock);
25
+ if (!disposition) continue;
26
+ const nameMatch = /name="([^"]*)"/i.exec(disposition[0]);
27
+ const filenameMatch = /filename="([^"]*)"/i.exec(disposition[0]);
28
+ const ctMatch = /content-type:\s*([^\r\n]+)/i.exec(headerBlock);
29
+ const name = nameMatch ? nameMatch[1] : "";
30
+ if (filenameMatch) {
31
+ result.files.push({
32
+ name,
33
+ filename: filenameMatch[1],
34
+ contentType: ctMatch ? ctMatch[1].trim() : "application/octet-stream",
35
+ data,
36
+ });
37
+ } else if (name) {
38
+ result.fields[name] = data.toString("utf-8");
39
+ }
40
+ }
41
+ return result;
42
+ }
43
+
44
+ function splitBuffer(buffer, delimiter) {
45
+ const parts = [];
46
+ let start = 0;
47
+ let idx;
48
+ while ((idx = indexOfBuffer(buffer, delimiter, start)) !== -1) {
49
+ parts.push(buffer.slice(start, idx));
50
+ start = idx + delimiter.length;
51
+ }
52
+ parts.push(buffer.slice(start));
53
+ return parts;
54
+ }
55
+
56
+ function indexOfBuffer(haystack, needle, from = 0) {
57
+ return haystack.indexOf(needle, from);
58
+ }
59
+
60
+ function trimEdges(part) {
61
+ // Each part begins with the CRLF that followed the previous boundary.
62
+ // Strip one leading CRLF; closing "--\r\n" and the empty preamble fall
63
+ // out upstream (no header delimiter / zero length).
64
+ if (part.length >= 2 && part[0] === 0x0d && part[1] === 0x0a) {
65
+ return part.slice(2);
66
+ }
67
+ return part;
68
+ }
69
+
70
+ module.exports = { parseMultipart };
package/core/access.js CHANGED
@@ -14,6 +14,9 @@ const { currentTransport, currentUserId } = require("./context");
14
14
  // /channel add can update the value without a process restart.
15
15
  function transportOwnerUserId(transport) {
16
16
  if (transport === "kazee") return config.KAZEE_OWNER_USER_ID || "";
17
+ // The voice channel is single-owner: the bearer token gates the connection,
18
+ // and every envelope carries this fixed owner id, so it authorizes as owner.
19
+ if (transport === "voice") return config.VOICE_OWNER_USER_ID || "voice-owner";
17
20
  return "";
18
21
  }
19
22
 
@@ -12,6 +12,7 @@ const { setAdapters } = require("./scheduler");
12
12
 
13
13
  const { TelegramAdapter } = require("../channels/telegram/adapter");
14
14
  const { KazeeAdapter } = require("../channels/kazee/adapter");
15
+ const { VoiceAdapter } = require("../channels/voice/adapter");
15
16
 
16
17
  const adapters = [];
17
18
  let messageHandler = null;
@@ -20,6 +21,7 @@ let actionHandler = null;
20
21
  function createAdapter(spec) {
21
22
  if (spec.type === "telegram") return new TelegramAdapter({ id: spec.id, ...spec.opts });
22
23
  if (spec.type === "kazee") return new KazeeAdapter({ id: spec.id, ...spec.opts });
24
+ if (spec.type === "voice") return new VoiceAdapter({ id: spec.id, ...spec.opts });
23
25
  console.error(`Unknown adapter type: ${spec.type}`);
24
26
  return null;
25
27
  }
package/core/config.js CHANGED
@@ -99,6 +99,9 @@ const TRANSCRIPTS_DIR = config.TRANSCRIPTS_DIR || process.env.TRANSCRIPTS_DIR ||
99
99
  const WHISPER_CLI = config.WHISPER_CLI || "";
100
100
  const WHISPER_MODEL = config.WHISPER_MODEL || "";
101
101
  const FFMPEG = config.FFMPEG || "";
102
+ const ELEVENLABS_API_KEY = config.ELEVENLABS_API_KEY || process.env.ELEVENLABS_API_KEY || "";
103
+ const ELEVENLABS_VOICE_ID = config.ELEVENLABS_VOICE_ID || process.env.ELEVENLABS_VOICE_ID || "EXAVITQu4vr4xnSDxMaL";
104
+ const ELEVENLABS_MODEL = config.ELEVENLABS_MODEL || process.env.ELEVENLABS_MODEL || "eleven_v3";
102
105
  const SOUL_FILE = config.SOUL_FILE || path.join(CONFIG_DIR, "soul.md");
103
106
  const CRONS_FILE = config.CRONS_FILE || path.join(CONFIG_DIR, "crons.json");
104
107
  const JOBS_FILE = config.JOBS_FILE || path.join(CONFIG_DIR, "jobs.json");
@@ -192,6 +195,22 @@ function loadChannels() {
192
195
  type: "kazee",
193
196
  opts: { url, token, ownerUserId, botUserId },
194
197
  });
198
+ } else if (type === "voice") {
199
+ const token = config.VOICE_BRIDGE_TOKEN;
200
+ if (!token) {
201
+ console.error(`CHANNELS includes ${entry} but VOICE_BRIDGE_TOKEN is unset — skipping.`);
202
+ continue;
203
+ }
204
+ channels.push({
205
+ id,
206
+ type: "voice",
207
+ opts: {
208
+ host: config.VOICE_BRIDGE_HOST || "0.0.0.0",
209
+ port: config.VOICE_BRIDGE_PORT || "8787",
210
+ token,
211
+ ownerUserId: config.VOICE_OWNER_USER_ID || "voice-owner",
212
+ },
213
+ });
195
214
  } else {
196
215
  console.error(`Unknown channel type: ${type} — skipping.`);
197
216
  }
@@ -233,6 +252,7 @@ module.exports = {
233
252
  TRANSCRIPT_MAX_ENTRY_CHARS,
234
253
  TRANSCRIPTS_DIR,
235
254
  WHISPER_CLI, WHISPER_MODEL, FFMPEG,
255
+ ELEVENLABS_API_KEY, ELEVENLABS_VOICE_ID, ELEVENLABS_MODEL,
236
256
  SOUL_FILE, CRONS_FILE, JOBS_FILE, TASKS_DIR, VAULT_FILE, AUTH_FILE, IDENTITIES_FILE,
237
257
  PEOPLE_FILE, INTROS_FILE, AUDIT_FILE,
238
258
  STATE_FILE, SESSIONS_FILE,
package/core/media.js CHANGED
@@ -4,7 +4,7 @@
4
4
  const fs = require("fs");
5
5
  const path = require("path");
6
6
  const { execSync } = require("child_process");
7
- const { WHISPER_CLI, WHISPER_MODEL, FFMPEG, TEMP_DIR } = require("./config");
7
+ const { WHISPER_CLI, WHISPER_MODEL, FFMPEG, TEMP_DIR, ELEVENLABS_API_KEY, ELEVENLABS_VOICE_ID, ELEVENLABS_MODEL } = require("./config");
8
8
 
9
9
  const TTS_CMD = process.platform === "darwin" ? "say" : null;
10
10
 
@@ -19,11 +19,14 @@ function transcribeAudio(oggPath) {
19
19
  .join(" ").trim();
20
20
  }
21
21
 
22
- function textToVoice(text) {
22
+ function cleanForTTS(text) {
23
+ return text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
24
+ }
25
+
26
+ // macOS `say` fallback. Synchronous. Returns ogg path or null.
27
+ function sayToVoice(clean) {
23
28
  if (!TTS_CMD || !FFMPEG) return null;
24
29
  try {
25
- const clean = text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
26
- if (!clean) return null;
27
30
  const aiffPath = path.join(TEMP_DIR, `tts-${Date.now()}.aiff`);
28
31
  const oggPath = aiffPath.replace(".aiff", ".ogg");
29
32
  execSync(`${TTS_CMD} ${JSON.stringify(clean)} -o "${aiffPath}"`, { timeout: 30000 });
@@ -31,9 +34,49 @@ function textToVoice(text) {
31
34
  try { fs.unlinkSync(aiffPath); } catch (e) {}
32
35
  return oggPath;
33
36
  } catch (e) {
34
- console.error("TTS error:", e.message);
37
+ console.error("say TTS error:", e.message);
35
38
  return null;
36
39
  }
37
40
  }
38
41
 
42
+ // Natural TTS via ElevenLabs. Returns ogg path or null on any failure.
43
+ async function elevenLabsToVoice(clean) {
44
+ if (!ELEVENLABS_API_KEY || !FFMPEG) return null;
45
+ try {
46
+ const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${ELEVENLABS_VOICE_ID}`, {
47
+ method: "POST",
48
+ headers: { "xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json" },
49
+ body: JSON.stringify({
50
+ text: clean,
51
+ model_id: ELEVENLABS_MODEL,
52
+ voice_settings: { stability: 0.5, similarity_boost: 0.85, style: 0.5, use_speaker_boost: true },
53
+ }),
54
+ });
55
+ if (!res.ok) {
56
+ const body = await res.text().catch(() => "");
57
+ console.error(`ElevenLabs TTS failed: ${res.status} ${body}`.slice(0, 300));
58
+ return null;
59
+ }
60
+ const buf = Buffer.from(await res.arrayBuffer());
61
+ const mp3Path = path.join(TEMP_DIR, `tts-${Date.now()}.mp3`);
62
+ const oggPath = mp3Path.replace(".mp3", ".ogg");
63
+ fs.writeFileSync(mp3Path, buf);
64
+ execSync(`"${FFMPEG}" -i "${mp3Path}" -c:a libopus -y "${oggPath}" 2>/dev/null`, { timeout: 30000 });
65
+ try { fs.unlinkSync(mp3Path); } catch (e) {}
66
+ return oggPath;
67
+ } catch (e) {
68
+ console.error("ElevenLabs TTS error:", e.message);
69
+ return null;
70
+ }
71
+ }
72
+
73
+ // Natural voice via ElevenLabs, falling back to macOS `say` only on no-key/error.
74
+ async function textToVoice(text) {
75
+ const clean = cleanForTTS(text);
76
+ if (!clean) return null;
77
+ const eleven = await elevenLabsToVoice(clean);
78
+ if (eleven) return eleven;
79
+ return sayToVoice(clean);
80
+ }
81
+
39
82
  module.exports = { transcribeAudio, textToVoice, TTS_CMD };
package/core/runner.js CHANGED
@@ -16,7 +16,7 @@ const { chatContext, currentChannelId, currentAdapter } = require("./context");
16
16
  const { buildSystemPrompt, promptWithDynamicContext } = require("./system-prompt");
17
17
  const { redactSensitive } = require("./redact");
18
18
  const { send, editMessage, sendVoice, splitMessage } = require("./io");
19
- const { textToVoice, TTS_CMD } = require("./media");
19
+ const { textToVoice } = require("./media");
20
20
  const { killProcessTree } = require("./process-tree");
21
21
  const {
22
22
  appendProjectTranscript, transcriptProjectInfo,
@@ -1193,10 +1193,16 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
1193
1193
  }
1194
1194
  if (code !== 0 && code !== null) await send(`Exit code: ${code}`);
1195
1195
 
1196
- if (state.lastInputWasVoice && TTS_CMD) {
1196
+ if (state.lastInputWasVoice) {
1197
1197
  state.lastInputWasVoice = false;
1198
- const voicePath = textToVoice(finalText);
1199
- if (voicePath) await sendVoice(voicePath);
1198
+ // Spoken replies belong to the hands-free voice channel. On chat
1199
+ // transports (Telegram/Kazee) an auto voice note on every voice
1200
+ // input is unwanted noise, so gate it to the voice channel.
1201
+ const { currentTransport } = require("./context");
1202
+ if (currentTransport() === "voice") {
1203
+ const voicePath = await textToVoice(finalText);
1204
+ if (voicePath) await sendVoice(voicePath);
1205
+ }
1200
1206
  }
1201
1207
  } catch (e) {
1202
1208
  console.error("Final message delivery failed:", e.message);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inetafrica/open-claudia",
3
- "version": "2.6.44",
3
+ "version": "2.6.46",
4
4
  "description": "Your always-on AI coding assistant — Claude Code, Cursor Agent, and OpenAI Codex via Telegram or Kazee Chat",
5
5
  "main": "bot.js",
6
6
  "bin": {
@@ -61,6 +61,7 @@
61
61
  "dependencies": {
62
62
  "node-cron": "^4.2.1",
63
63
  "node-telegram-bot-api": "^0.67.0",
64
- "socket.io-client": "^4.7.5"
64
+ "socket.io-client": "^4.7.5",
65
+ "ws": "^8.18.0"
65
66
  }
66
67
  }