npm - @inetafrica/open-claudia - Versions diffs - 2.6.44 → 2.6.46 - Mend

@inetafrica/open-claudia 2.6.44 → 2.6.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/bot-agent.js +11 -22
package/channels/voice/adapter.js +431 -0
package/channels/voice/multipart.js +70 -0
package/core/access.js +3 -0
package/core/adapter-registry.js +2 -0
package/core/config.js +20 -0
package/core/media.js +48 -5
package/core/runner.js +10 -4
package/package.json +3 -2

package/bot-agent.js CHANGED Viewed

@@ -767,25 +767,9 @@ function transcribeAudio(oggPath) {
 }
 // ── Text-to-Speech ────────────────────────────────────────────────
+// Shared with direct mode: ElevenLabs natural voice, falling back to `say`.
-const TTS_CMD = process.platform === "darwin" ? "say" : null;
-function textToVoice(text) {
-  if (!TTS_CMD || !FFMPEG) return null;
-  try {
-    const clean = text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
-    if (!clean) return null;
-    const aiffPath = path.join(TEMP_DIR, `tts-${Date.now()}.aiff`);
-    const oggPath = aiffPath.replace(".aiff", ".ogg");
-    execSync(`${TTS_CMD} ${JSON.stringify(clean)} -o "${aiffPath}"`, { timeout: 30000 });
-    execSync(`"${FFMPEG}" -i "${aiffPath}" -c:a libopus -y "${oggPath}" 2>/dev/null`, { timeout: 30000 });
-    try { fs.unlinkSync(aiffPath); } catch (e) {}
-    return oggPath;
-  } catch (e) {
-    console.error("TTS error:", e.message);
-    return null;
-  }
-}
+const { textToVoice } = require("./core/media");
 async function sendVoice(oggPath) {
   try {
@@ -1420,11 +1404,16 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
       }
       if (code !== 0 && code !== null) await send(`Exit code: ${code}`);
-      // Send voice reply if input was a voice note
-      if (lastInputWasVoice && TTS_CMD) {
+      // Spoken auto-replies on voice input are off by default on chat
+      // transports (they were unwanted noise on Telegram). Opt in with
+      // VOICE_REPLY_ON_VOICE=1. The hands-free voice channel speaks back
+      // regardless via its own path.
+      if (lastInputWasVoice) {
         lastInputWasVoice = false;
-        const voicePath = textToVoice(finalText);
-        if (voicePath) await sendVoice(voicePath);
+        if (process.env.VOICE_REPLY_ON_VOICE === "1") {
+          const voicePath = await textToVoice(finalText);
+          if (voicePath) await sendVoice(voicePath);
+        }
       }
     } catch (e) {
       console.error("Final message delivery failed:", e.message);

package/channels/voice/adapter.js ADDED Viewed

@@ -0,0 +1,431 @@
+// VoiceAdapter — an official Open Claudia channel for the companion app.
+//
+// Runs a LAN HTTP + WebSocket server the Expo client connects to (over
+// WireGuard / local network). Inbound: push-to-talk audio, typed text, and
+// file/image uploads. Outbound: assistant text (with live edits), spoken
+// replies (ElevenLabs ogg), and files — pushed to the client over the
+// WebSocket. Everything routes through the same core handlers as Telegram
+// and Kazee, so the voice channel has the full agent capability set.
+//
+// Security: single-owner channel. Every HTTP request and WS upgrade must
+// carry the bearer token (VOICE_BRIDGE_TOKEN). The owner identity is fixed
+// (VOICE_OWNER_USER_ID), so access.js authorizes it as the bot owner.
+const http = require("http");
+const fs = require("fs");
+const path = require("path");
+const crypto = require("crypto");
+const { WebSocketServer } = require("ws");
+const { TEMP_DIR, FILES_DIR } = require("../../core/config");
+const { canonicalForChannel } = require("../../core/identity");
+const { inlineKeyboardToPortable } = require("../types");
+const { parseMultipart } = require("./multipart");
+const MEDIA_TTL_MS = 10 * 60 * 1000; // outbound media stays fetchable 10 min
+const MAX_UPLOAD = 25 * 1024 * 1024;
+class VoiceAdapter {
+  constructor({ id = "voice", host, port, token, ownerUserId } = {}) {
+    this.id = id;
+    this.type = "voice";
+    this.host = host || "0.0.0.0";
+    this.port = parseInt(port, 10) || 8787;
+    this.token = token || "";
+    this.ownerUserId = String(ownerUserId || "voice-owner");
+    // Single conversation: the channel id is the owner identity.
+    this.channelId = this.ownerUserId;
+    this._listeners = { message: new Set(), action: new Set() };
+    this._server = null;
+    this._wss = null;
+    this._clients = new Set();
+    this._media = new Map(); // id -> { path, mime, fileName, expires }
+    this._commands = [];
+    this._sweepTimer = null;
+  }
+  on(event, fn) {
+    if (!this._listeners[event]) return () => {};
+    this._listeners[event].add(fn);
+    return () => this._listeners[event].delete(fn);
+  }
+  _emit(event, envelope) {
+    for (const fn of this._listeners[event] || []) {
+      try { Promise.resolve(fn(envelope)).catch((e) => console.error(`voice ${event} handler:`, e.message)); }
+      catch (e) { console.error(`voice ${event} handler:`, e.message); }
+    }
+  }
+  // ── lifecycle ───────────────────────────────────────────────────
+  async start() {
+    if (!this.token) {
+      console.error("Voice adapter: VOICE_BRIDGE_TOKEN is unset — refusing to start an unauthenticated LAN server.");
+      throw new Error("VOICE_BRIDGE_TOKEN required");
+    }
+    this._server = http.createServer((req, res) => this._handleHttp(req, res));
+    this._wss = new WebSocketServer({ noServer: true });
+    this._server.on("upgrade", (req, socket, head) => this._handleUpgrade(req, socket, head));
+    this._wss.on("connection", (ws) => this._handleWsConnection(ws));
+    await new Promise((resolve, reject) => {
+      this._server.once("error", reject);
+      this._server.listen(this.port, this.host, () => {
+        this._server.removeListener("error", reject);
+        resolve();
+      });
+    });
+    this._sweepTimer = setInterval(() => this._sweepMedia(), 60 * 1000);
+    if (this._sweepTimer.unref) this._sweepTimer.unref();
+    console.log(`Voice channel listening on http://${this.host}:${this.port} (ws + http)`);
+  }
+  async stop() {
+    if (this._sweepTimer) clearInterval(this._sweepTimer);
+    for (const ws of this._clients) { try { ws.close(); } catch (e) {} }
+    this._clients.clear();
+    try { this._wss?.close(); } catch (e) {}
+    await new Promise((resolve) => { try { this._server?.close(() => resolve()); } catch (e) { resolve(); } });
+    this._server = null;
+    this._wss = null;
+  }
+  _authOk(req, url) {
+    const header = req.headers["authorization"] || "";
+    if (header === `Bearer ${this.token}`) return true;
+    const qsToken = url && url.searchParams.get("token");
+    return qsToken === this.token;
+  }
+  // ── HTTP ────────────────────────────────────────────────────────
+  _handleHttp(req, res) {
+    const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
+    const pathname = url.pathname;
+    if (req.method === "OPTIONS") return this._cors(res, 204);
+    if (req.method === "GET" && (pathname === "/" || pathname === "/health")) {
+      return this._json(res, 200, { ok: true, service: "open-claudia-voice", clients: this._clients.size });
+    }
+    // Outbound media fetch — token via query so <audio>/download works.
+    if (req.method === "GET" && pathname.startsWith("/v1/media/")) {
+      if (!this._authOk(req, url)) return this._json(res, 401, { ok: false, error: "unauthorized" });
+      return this._serveMedia(res, pathname.slice("/v1/media/".length));
+    }
+    if (!this._authOk(req, url)) return this._json(res, 401, { ok: false, error: "unauthorized" });
+    if (req.method === "GET" && pathname === "/v1/info") {
+      return this._json(res, 200, {
+        ok: true,
+        channel: this.id,
+        owner: this.ownerUserId,
+        commands: this._commands,
+        clients: this._clients.size,
+      });
+    }
+    if (req.method === "POST" && pathname === "/v1/messages/text") {
+      return this._readBody(req, res, (buf) => this._onText(buf, res));
+    }
+    if (req.method === "POST" && pathname === "/v1/messages/audio") {
+      return this._readBody(req, res, (buf) => this._onUpload(buf, req, res, "voice"));
+    }
+    if (req.method === "POST" && pathname === "/v1/messages/media") {
+      return this._readBody(req, res, (buf) => this._onUpload(buf, req, res, "auto"));
+    }
+    return this._json(res, 404, { ok: false, error: "not found" });
+  }
+  _readBody(req, res, cb) {
+    const chunks = [];
+    let size = 0;
+    req.on("data", (c) => {
+      size += c.length;
+      if (size > MAX_UPLOAD) { req.destroy(); this._json(res, 413, { ok: false, error: "too large" }); return; }
+      chunks.push(c);
+    });
+    req.on("end", () => { try { cb(Buffer.concat(chunks)); } catch (e) { this._json(res, 500, { ok: false, error: e.message }); } });
+    req.on("error", () => { try { this._json(res, 400, { ok: false, error: "read error" }); } catch (e) {} });
+  }
+  _onText(buf, res) {
+    let body = {};
+    try { body = JSON.parse(buf.toString("utf-8") || "{}"); } catch (e) {}
+    const text = (body.text || "").toString();
+    if (!text.trim()) return this._json(res, 400, { ok: false, error: "empty text" });
+    const messageId = this._mkId("t");
+    const isCommand = text.trim().startsWith("/");
+    this._emit("message", {
+      adapter: this,
+      channelId: this.channelId,
+      canonicalUserId: canonicalForChannel("voice", this.channelId),
+      userId: this.ownerUserId,
+      type: isCommand ? "command" : "text",
+      text,
+      messageId,
+      from: { id: this.ownerUserId, name: "Owner", username: "" },
+      raw: body,
+    });
+    return this._json(res, 202, { ok: true, messageId });
+  }
+  _onUpload(buf, req, res, kind) {
+    const ct = req.headers["content-type"] || "";
+    const m = /boundary=("?)([^";]+)\1/i.exec(ct);
+    if (!m) return this._json(res, 400, { ok: false, error: "expected multipart/form-data" });
+    const parsed = parseMultipart(buf, m[2]);
+    const file = parsed.files[0];
+    if (!file || !file.data || !file.data.length) return this._json(res, 400, { ok: false, error: "missing file" });
+    const caption = parsed.fields.caption || parsed.fields.text || "";
+    let type = kind;
+    const mime = (file.contentType || "").toLowerCase();
+    if (type === "auto") {
+      if (mime.startsWith("image/")) type = "photo";
+      else if (mime.startsWith("audio/")) type = "voice";
+      else type = "document";
+    }
+    const isVoice = type === "voice";
+    const baseDir = type === "document" ? FILES_DIR : TEMP_DIR;
+    const safeName = (file.filename || `${type}-${Date.now()}`).replace(/[^\w.\-]/g, "_");
+    const ext = path.extname(safeName) || (isVoice ? ".m4a" : type === "photo" ? ".jpg" : ".bin");
+    const localPath = type === "document"
+      ? path.join(baseDir, safeName)
+      : path.join(baseDir, `voice-in-${Date.now()}${ext}`);
+    fs.writeFileSync(localPath, file.data);
+    const messageId = this._mkId(type);
+    const envelope = {
+      adapter: this,
+      channelId: this.channelId,
+      canonicalUserId: canonicalForChannel("voice", this.channelId),
+      userId: this.ownerUserId,
+      type,
+      text: caption,
+      caption,
+      messageId,
+      from: { id: this.ownerUserId, name: "Owner", username: "" },
+      // fileId is the already-saved local path; downloadMedia just returns it.
+      media: [{ type, fileId: localPath, fileName: file.filename || path.basename(localPath), mimeType: file.contentType, size: file.data.length }],
+      raw: {},
+    };
+    this._emit("message", envelope);
+    return this._json(res, 202, { ok: true, messageId });
+  }
+  // ── WebSocket ───────────────────────────────────────────────────
+  _handleUpgrade(req, socket, head) {
+    let url;
+    try { url = new URL(req.url, `http://${req.headers.host || "localhost"}`); }
+    catch (e) { socket.destroy(); return; }
+    if (url.pathname !== "/v1/stream" || !this._authOk(req, url)) {
+      socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
+      socket.destroy();
+      return;
+    }
+    this._wss.handleUpgrade(req, socket, head, (ws) => this._wss.emit("connection", ws));
+  }
+  _handleWsConnection(ws) {
+    this._clients.add(ws);
+    ws.isAlive = true;
+    ws.on("pong", () => { ws.isAlive = true; });
+    ws.on("close", () => this._clients.delete(ws));
+    ws.on("error", () => this._clients.delete(ws));
+    ws.on("message", (data) => this._onWsMessage(data));
+    this._wsSend(ws, { kind: "hello", channel: this.id, commands: this._commands, ts: Date.now() });
+  }
+  // Clients may send typed messages over the socket for lowest latency.
+  _onWsMessage(data) {
+    let msg = {};
+    try { msg = JSON.parse(data.toString()); } catch (e) { return; }
+    if (msg.kind === "ping") return; // keepalive
+    if (msg.kind === "text" && (msg.text || "").trim()) {
+      const text = String(msg.text);
+      const messageId = this._mkId("t");
+      this._emit("message", {
+        adapter: this,
+        channelId: this.channelId,
+        canonicalUserId: canonicalForChannel("voice", this.channelId),
+        userId: this.ownerUserId,
+        type: text.trim().startsWith("/") ? "command" : "text",
+        text,
+        messageId,
+        from: { id: this.ownerUserId, name: "Owner", username: "" },
+        raw: msg,
+      });
+    }
+  }
+  _broadcast(frame) {
+    const payload = JSON.stringify(frame);
+    for (const ws of this._clients) {
+      try { if (ws.readyState === ws.OPEN) ws.send(payload); } catch (e) {}
+    }
+  }
+  _wsSend(ws, frame) { try { ws.send(JSON.stringify(frame)); } catch (e) {} }
+  // ── outbound contract (called by core/io.js) ────────────────────
+  _normalizeKeyboard(keyboard) {
+    if (!keyboard) return null;
+    if (keyboard.buttons) return keyboard.buttons;
+    if (keyboard.inline_keyboard) return inlineKeyboardToPortable(keyboard.inline_keyboard);
+    return null;
+  }
+  async send(channelId, text, opts = {}) {
+    const messageId = this._mkId("a");
+    this._broadcast({
+      kind: "message",
+      role: "assistant",
+      messageId,
+      text: text || "",
+      buttons: this._normalizeKeyboard(opts.keyboard),
+      replyTo: opts.replyTo || null,
+      ts: Date.now(),
+    });
+    return messageId;
+  }
+  async edit(channelId, messageId, text, opts = {}) {
+    this._broadcast({
+      kind: "edit",
+      messageId,
+      text: text || "",
+      buttons: this._normalizeKeyboard(opts.keyboard),
+      ts: Date.now(),
+    });
+  }
+  async delete(channelId, messageId) {
+    this._broadcast({ kind: "delete", messageId, ts: Date.now() });
+  }
+  async sendVoice(channelId, oggPath) {
+    try {
+      const id = this._registerMedia(oggPath, "audio/ogg", path.basename(oggPath));
+      this._broadcast({ kind: "voice", messageId: this._mkId("v"), url: `/v1/media/${id}`, mime: "audio/ogg", ts: Date.now() });
+      return true;
+    } catch (e) {
+      console.error("voice sendVoice error:", e.message);
+      return false;
+    }
+  }
+  async sendPhoto(channelId, filePath, caption) { return this.sendFile(channelId, filePath, caption); }
+  async sendFile(channelId, filePath, caption) {
+    try {
+      const fileName = path.basename(filePath);
+      const mime = this._guessMime(fileName);
+      const id = this._registerMedia(filePath, mime, fileName, /* keep */ true);
+      this._broadcast({
+        kind: "file",
+        messageId: this._mkId("f"),
+        url: `/v1/media/${id}`,
+        fileName,
+        mime,
+        caption: caption || "",
+        ts: Date.now(),
+      });
+      return true;
+    } catch (e) {
+      console.error("voice sendFile error:", e.message);
+      return false;
+    }
+  }
+  async typing(channelId) {
+    this._broadcast({ kind: "typing", ts: Date.now() });
+  }
+  // Inbound media was saved to disk at upload time; fileId holds the path.
+  async downloadMedia(media) {
+    if (!media) return null;
+    return media.fileId || null;
+  }
+  async registerCommands(commands) {
+    this._commands = (commands || [])
+      .filter((c) => c && c.name)
+      .map((c) => ({ name: String(c.name).replace(/^\//, ""), description: String(c.description || ""), args: typeof c.args === "string" ? c.args : "" }));
+    this._broadcast({ kind: "commands", commands: this._commands, ts: Date.now() });
+  }
+  // ── media store ─────────────────────────────────────────────────
+  // keep=true means don't delete the source after serving (files the user
+  // may still want); voice replies are throwaway and removed after TTL.
+  _registerMedia(filePath, mime, fileName, keep = false) {
+    const id = crypto.randomBytes(9).toString("hex");
+    this._media.set(id, { path: filePath, mime, fileName, keep, expires: Date.now() + MEDIA_TTL_MS });
+    return id;
+  }
+  _serveMedia(res, id) {
+    const entry = this._media.get(id);
+    if (!entry || !fs.existsSync(entry.path)) return this._json(res, 404, { ok: false, error: "expired" });
+    const stat = fs.statSync(entry.path);
+    res.writeHead(200, {
+      "Content-Type": entry.mime || "application/octet-stream",
+      "Content-Length": stat.size,
+      "Content-Disposition": `inline; filename="${entry.fileName || "file"}"`,
+      "Access-Control-Allow-Origin": "*",
+    });
+    fs.createReadStream(entry.path).pipe(res);
+  }
+  _sweepMedia() {
+    const now = Date.now();
+    for (const [id, entry] of this._media) {
+      if (entry.expires <= now) {
+        this._media.delete(id);
+        if (!entry.keep) { try { fs.unlinkSync(entry.path); } catch (e) {} }
+      }
+    }
+  }
+  // ── helpers ─────────────────────────────────────────────────────
+  _mkId(prefix) { return `${prefix}-${Date.now()}-${crypto.randomBytes(4).toString("hex")}`; }
+  _guessMime(fileName) {
+    const ext = path.extname(fileName).toLowerCase();
+    const map = {
+      ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif",
+      ".webp": "image/webp", ".ogg": "audio/ogg", ".mp3": "audio/mpeg", ".m4a": "audio/mp4",
+      ".wav": "audio/wav", ".mp4": "video/mp4", ".pdf": "application/pdf", ".txt": "text/plain",
+      ".json": "application/json", ".csv": "text/csv",
+    };
+    return map[ext] || "application/octet-stream";
+  }
+  _json(res, code, payload) {
+    const body = JSON.stringify(payload);
+    res.writeHead(code, {
+      "Content-Type": "application/json; charset=utf-8",
+      "Content-Length": Buffer.byteLength(body),
+      "Access-Control-Allow-Origin": "*",
+    });
+    res.end(body);
+  }
+  _cors(res, code) {
+    res.writeHead(code, {
+      "Access-Control-Allow-Origin": "*",
+      "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+      "Access-Control-Allow-Headers": "Authorization, Content-Type",
+    });
+    res.end();
+  }
+}
+module.exports = { VoiceAdapter };

package/channels/voice/multipart.js ADDED Viewed

@@ -0,0 +1,70 @@
+// Minimal multipart/form-data parser. The voice channel accepts audio and
+// file uploads from the app; we parse them without pulling in a body-parser
+// dependency. Returns { fields: {name: string}, files: [{name, filename,
+// contentType, data}] }.
+function parseMultipart(buffer, boundary) {
+  const result = { fields: {}, files: [] };
+  if (!boundary) return result;
+  const delimiter = Buffer.from(`--${boundary}`);
+  const parts = splitBuffer(buffer, delimiter);
+  for (let part of parts) {
+    // Trim leading CRLF and ignore the closing "--" / empty preamble.
+    part = trimEdges(part);
+    if (!part.length) continue;
+    const headerEnd = indexOfBuffer(part, Buffer.from("\r\n\r\n"));
+    if (headerEnd < 0) continue;
+    const headerBlock = part.slice(0, headerEnd).toString("utf-8");
+    let data = part.slice(headerEnd + 4);
+    // Each part's body is terminated by a trailing CRLF before the next
+    // delimiter — strip it.
+    if (data.length >= 2 && data[data.length - 2] === 0x0d && data[data.length - 1] === 0x0a) {
+      data = data.slice(0, data.length - 2);
+    }
+    const disposition = /content-disposition:[^\r\n]*/i.exec(headerBlock);
+    if (!disposition) continue;
+    const nameMatch = /name="([^"]*)"/i.exec(disposition[0]);
+    const filenameMatch = /filename="([^"]*)"/i.exec(disposition[0]);
+    const ctMatch = /content-type:\s*([^\r\n]+)/i.exec(headerBlock);
+    const name = nameMatch ? nameMatch[1] : "";
+    if (filenameMatch) {
+      result.files.push({
+        name,
+        filename: filenameMatch[1],
+        contentType: ctMatch ? ctMatch[1].trim() : "application/octet-stream",
+        data,
+      });
+    } else if (name) {
+      result.fields[name] = data.toString("utf-8");
+    }
+  }
+  return result;
+}
+function splitBuffer(buffer, delimiter) {
+  const parts = [];
+  let start = 0;
+  let idx;
+  while ((idx = indexOfBuffer(buffer, delimiter, start)) !== -1) {
+    parts.push(buffer.slice(start, idx));
+    start = idx + delimiter.length;
+  }
+  parts.push(buffer.slice(start));
+  return parts;
+}
+function indexOfBuffer(haystack, needle, from = 0) {
+  return haystack.indexOf(needle, from);
+}
+function trimEdges(part) {
+  // Each part begins with the CRLF that followed the previous boundary.
+  // Strip one leading CRLF; closing "--\r\n" and the empty preamble fall
+  // out upstream (no header delimiter / zero length).
+  if (part.length >= 2 && part[0] === 0x0d && part[1] === 0x0a) {
+    return part.slice(2);
+  }
+  return part;
+}
+module.exports = { parseMultipart };

package/core/access.js CHANGED Viewed

@@ -14,6 +14,9 @@ const { currentTransport, currentUserId } = require("./context");
 // /channel add can update the value without a process restart.
 function transportOwnerUserId(transport) {
   if (transport === "kazee") return config.KAZEE_OWNER_USER_ID || "";
+  // The voice channel is single-owner: the bearer token gates the connection,
+  // and every envelope carries this fixed owner id, so it authorizes as owner.
+  if (transport === "voice") return config.VOICE_OWNER_USER_ID || "voice-owner";
   return "";
 }

package/core/adapter-registry.js CHANGED Viewed

@@ -12,6 +12,7 @@ const { setAdapters } = require("./scheduler");
 const { TelegramAdapter } = require("../channels/telegram/adapter");
 const { KazeeAdapter } = require("../channels/kazee/adapter");
+const { VoiceAdapter } = require("../channels/voice/adapter");
 const adapters = [];
 let messageHandler = null;
@@ -20,6 +21,7 @@ let actionHandler = null;
 function createAdapter(spec) {
   if (spec.type === "telegram") return new TelegramAdapter({ id: spec.id, ...spec.opts });
   if (spec.type === "kazee") return new KazeeAdapter({ id: spec.id, ...spec.opts });
+  if (spec.type === "voice") return new VoiceAdapter({ id: spec.id, ...spec.opts });
   console.error(`Unknown adapter type: ${spec.type}`);
   return null;
 }

package/core/config.js CHANGED Viewed

@@ -99,6 +99,9 @@ const TRANSCRIPTS_DIR = config.TRANSCRIPTS_DIR || process.env.TRANSCRIPTS_DIR ||
 const WHISPER_CLI = config.WHISPER_CLI || "";
 const WHISPER_MODEL = config.WHISPER_MODEL || "";
 const FFMPEG = config.FFMPEG || "";
+const ELEVENLABS_API_KEY = config.ELEVENLABS_API_KEY || process.env.ELEVENLABS_API_KEY || "";
+const ELEVENLABS_VOICE_ID = config.ELEVENLABS_VOICE_ID || process.env.ELEVENLABS_VOICE_ID || "EXAVITQu4vr4xnSDxMaL";
+const ELEVENLABS_MODEL = config.ELEVENLABS_MODEL || process.env.ELEVENLABS_MODEL || "eleven_v3";
 const SOUL_FILE = config.SOUL_FILE || path.join(CONFIG_DIR, "soul.md");
 const CRONS_FILE = config.CRONS_FILE || path.join(CONFIG_DIR, "crons.json");
 const JOBS_FILE = config.JOBS_FILE || path.join(CONFIG_DIR, "jobs.json");
@@ -192,6 +195,22 @@ function loadChannels() {
         type: "kazee",
         opts: { url, token, ownerUserId, botUserId },
       });
+    } else if (type === "voice") {
+      const token = config.VOICE_BRIDGE_TOKEN;
+      if (!token) {
+        console.error(`CHANNELS includes ${entry} but VOICE_BRIDGE_TOKEN is unset — skipping.`);
+        continue;
+      }
+      channels.push({
+        id,
+        type: "voice",
+        opts: {
+          host: config.VOICE_BRIDGE_HOST || "0.0.0.0",
+          port: config.VOICE_BRIDGE_PORT || "8787",
+          token,
+          ownerUserId: config.VOICE_OWNER_USER_ID || "voice-owner",
+        },
+      });
     } else {
       console.error(`Unknown channel type: ${type} — skipping.`);
     }
@@ -233,6 +252,7 @@ module.exports = {
   TRANSCRIPT_MAX_ENTRY_CHARS,
   TRANSCRIPTS_DIR,
   WHISPER_CLI, WHISPER_MODEL, FFMPEG,
+  ELEVENLABS_API_KEY, ELEVENLABS_VOICE_ID, ELEVENLABS_MODEL,
   SOUL_FILE, CRONS_FILE, JOBS_FILE, TASKS_DIR, VAULT_FILE, AUTH_FILE, IDENTITIES_FILE,
   PEOPLE_FILE, INTROS_FILE, AUDIT_FILE,
   STATE_FILE, SESSIONS_FILE,

package/core/media.js CHANGED Viewed

@@ -4,7 +4,7 @@
 const fs = require("fs");
 const path = require("path");
 const { execSync } = require("child_process");
-const { WHISPER_CLI, WHISPER_MODEL, FFMPEG, TEMP_DIR } = require("./config");
+const { WHISPER_CLI, WHISPER_MODEL, FFMPEG, TEMP_DIR, ELEVENLABS_API_KEY, ELEVENLABS_VOICE_ID, ELEVENLABS_MODEL } = require("./config");
 const TTS_CMD = process.platform === "darwin" ? "say" : null;
@@ -19,11 +19,14 @@ function transcribeAudio(oggPath) {
     .join(" ").trim();
 }
-function textToVoice(text) {
+function cleanForTTS(text) {
+  return text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
+}
+// macOS `say` fallback. Synchronous. Returns ogg path or null.
+function sayToVoice(clean) {
   if (!TTS_CMD || !FFMPEG) return null;
   try {
-    const clean = text.replace(/[*_`#>\[\]()]/g, "").replace(/\n{2,}/g, ". ").replace(/\n/g, " ").trim();
-    if (!clean) return null;
     const aiffPath = path.join(TEMP_DIR, `tts-${Date.now()}.aiff`);
     const oggPath = aiffPath.replace(".aiff", ".ogg");
     execSync(`${TTS_CMD} ${JSON.stringify(clean)} -o "${aiffPath}"`, { timeout: 30000 });
@@ -31,9 +34,49 @@ function textToVoice(text) {
     try { fs.unlinkSync(aiffPath); } catch (e) {}
     return oggPath;
   } catch (e) {
-    console.error("TTS error:", e.message);
+    console.error("say TTS error:", e.message);
     return null;
   }
 }
+// Natural TTS via ElevenLabs. Returns ogg path or null on any failure.
+async function elevenLabsToVoice(clean) {
+  if (!ELEVENLABS_API_KEY || !FFMPEG) return null;
+  try {
+    const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${ELEVENLABS_VOICE_ID}`, {
+      method: "POST",
+      headers: { "xi-api-key": ELEVENLABS_API_KEY, "Content-Type": "application/json" },
+      body: JSON.stringify({
+        text: clean,
+        model_id: ELEVENLABS_MODEL,
+        voice_settings: { stability: 0.5, similarity_boost: 0.85, style: 0.5, use_speaker_boost: true },
+      }),
+    });
+    if (!res.ok) {
+      const body = await res.text().catch(() => "");
+      console.error(`ElevenLabs TTS failed: ${res.status} ${body}`.slice(0, 300));
+      return null;
+    }
+    const buf = Buffer.from(await res.arrayBuffer());
+    const mp3Path = path.join(TEMP_DIR, `tts-${Date.now()}.mp3`);
+    const oggPath = mp3Path.replace(".mp3", ".ogg");
+    fs.writeFileSync(mp3Path, buf);
+    execSync(`"${FFMPEG}" -i "${mp3Path}" -c:a libopus -y "${oggPath}" 2>/dev/null`, { timeout: 30000 });
+    try { fs.unlinkSync(mp3Path); } catch (e) {}
+    return oggPath;
+  } catch (e) {
+    console.error("ElevenLabs TTS error:", e.message);
+    return null;
+  }
+}
+// Natural voice via ElevenLabs, falling back to macOS `say` only on no-key/error.
+async function textToVoice(text) {
+  const clean = cleanForTTS(text);
+  if (!clean) return null;
+  const eleven = await elevenLabsToVoice(clean);
+  if (eleven) return eleven;
+  return sayToVoice(clean);
+}
 module.exports = { transcribeAudio, textToVoice, TTS_CMD };

package/core/runner.js CHANGED Viewed

@@ -16,7 +16,7 @@ const { chatContext, currentChannelId, currentAdapter } = require("./context");
 const { buildSystemPrompt, promptWithDynamicContext } = require("./system-prompt");
 const { redactSensitive } = require("./redact");
 const { send, editMessage, sendVoice, splitMessage } = require("./io");
-const { textToVoice, TTS_CMD } = require("./media");
+const { textToVoice } = require("./media");
 const { killProcessTree } = require("./process-tree");
 const {
   appendProjectTranscript, transcriptProjectInfo,
@@ -1193,10 +1193,16 @@ async function runClaude(prompt, cwd, replyToMsgId, opts = {}) {
       }
       if (code !== 0 && code !== null) await send(`Exit code: ${code}`);
-      if (state.lastInputWasVoice && TTS_CMD) {
+      if (state.lastInputWasVoice) {
         state.lastInputWasVoice = false;
-        const voicePath = textToVoice(finalText);
-        if (voicePath) await sendVoice(voicePath);
+        // Spoken replies belong to the hands-free voice channel. On chat
+        // transports (Telegram/Kazee) an auto voice note on every voice
+        // input is unwanted noise, so gate it to the voice channel.
+        const { currentTransport } = require("./context");
+        if (currentTransport() === "voice") {
+          const voicePath = await textToVoice(finalText);
+          if (voicePath) await sendVoice(voicePath);
+        }
       }
     } catch (e) {
       console.error("Final message delivery failed:", e.message);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inetafrica/open-claudia",
-  "version": "2.6.44",
+  "version": "2.6.46",
   "description": "Your always-on AI coding assistant — Claude Code, Cursor Agent, and OpenAI Codex via Telegram or Kazee Chat",
   "main": "bot.js",
   "bin": {
@@ -61,6 +61,7 @@
   "dependencies": {
     "node-cron": "^4.2.1",
     "node-telegram-bot-api": "^0.67.0",
-    "socket.io-client": "^4.7.5"
+    "socket.io-client": "^4.7.5",
+    "ws": "^8.18.0"
   }
 }