npm - agent-voice - Versions diffs - 0.2.4 → 0.3.0 - Mend

agent-voice 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/ask-5J4JCHM4.js +307 -0
package/dist/{ask-KM3JPI36.js → ask-F6CPRZ22.js} +31 -23
package/dist/{auth-KET5DNSE.js → auth-4VUEFCFK.js} +1 -1
package/dist/chunk-3YEHGYHI.js +115 -0
package/dist/chunk-NHLAAFR3.js +276 -0
package/dist/chunk-YU5FF2L7.js +12 -0
package/dist/chunk-ZNUQXGGO.js +145 -0
package/dist/cli.js +375 -32
package/dist/daemon-client-6GF277XU.js +94 -0
package/dist/daemon-lifecycle-BNXENMXI.js +25 -0
package/dist/daemon.js +473 -0
package/dist/index.js +36 -23
package/dist/say-6EJTKNJJ.js +195 -0
package/package.json +4 -3
package/dist/chunk-RGYWLATZ.js +0 -61

package/dist/daemon.js ADDED Viewed

@@ -0,0 +1,473 @@
+import {
+  BIT_DEPTH,
+  CHANNELS,
+  SAMPLE_RATE
+} from "./chunk-YU5FF2L7.js";
+// src/daemon.ts
+import { rmSync as rmSync3 } from "fs";
+import { createRequire } from "module";
+import { createServer } from "net";
+// src/config.ts
+import { chmodSync, mkdirSync, readFileSync, writeFileSync } from "fs";
+import { homedir } from "os";
+import { join } from "path";
+var CONFIG_DIR = join(homedir(), ".agent-voice");
+var CONFIG_PATH = join(CONFIG_DIR, "config.json");
+var DAEMON_SOCKET_PATH = join(CONFIG_DIR, "daemon.sock");
+var DAEMON_PID_PATH = join(CONFIG_DIR, "daemon.pid");
+var LOG_DIR = join(CONFIG_DIR, "logs");
+var AUDIO_LOG_DIR = join(LOG_DIR, "audio");
+var EVENTS_LOG_PATH = join(LOG_DIR, "events.ndjson");
+var DAEMON_DEFAULTS = {
+  idleTimeoutMinutes: 30,
+  audioRingBufferSize: 50
+};
+function readConfig() {
+  try {
+    return JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
+  } catch {
+    return {};
+  }
+}
+function resolveAuth() {
+  const config = readConfig();
+  if (config.auth?.apiKey) {
+    return config.auth;
+  }
+  if (process.env.OPENAI_API_KEY) {
+    return { apiKey: process.env.OPENAI_API_KEY };
+  }
+  throw new Error(
+    "No API key found. Run `agent-voice auth` or set OPENAI_API_KEY."
+  );
+}
+function isDebugEnabled() {
+  if (process.env.AGENT_VOICE_DEBUG === "1") return true;
+  return readConfig().debug === true;
+}
+function isDebugAudioEnabled() {
+  if (process.env.AGENT_VOICE_DEBUG_AUDIO === "1") return true;
+  return readConfig()["debug.audio"] === true;
+}
+function resolveDaemonConfig() {
+  const config = readConfig();
+  return {
+    idleTimeoutMinutes: config.daemon?.idleTimeoutMinutes ?? DAEMON_DEFAULTS.idleTimeoutMinutes,
+    audioRingBufferSize: config.daemon?.audioRingBufferSize ?? DAEMON_DEFAULTS.audioRingBufferSize
+  };
+}
+// src/daemon-lifecycle.ts
+import { spawn } from "child_process";
+import {
+  existsSync,
+  mkdirSync as mkdirSync2,
+  readFileSync as readFileSync2,
+  rmSync,
+  writeFileSync as writeFileSync2
+} from "fs";
+import { connect } from "net";
+import { dirname, join as join2 } from "path";
+// src/daemon-protocol.ts
+import { z } from "zod";
+var DaemonRequest = z.discriminatedUnion("type", [
+  z.object({
+    type: z.literal("say"),
+    id: z.string(),
+    message: z.string(),
+    voice: z.string()
+  }),
+  z.object({
+    type: z.literal("ask"),
+    id: z.string(),
+    message: z.string(),
+    voice: z.string(),
+    timeout: z.number(),
+    ack: z.boolean()
+  }),
+  z.object({ type: z.literal("ping") }),
+  z.object({ type: z.literal("shutdown") })
+]);
+var TraceEntry = z.object({
+  atMs: z.number(),
+  event: z.string(),
+  detail: z.record(z.unknown()).optional()
+});
+var DaemonResponse = z.discriminatedUnion("type", [
+  z.object({ type: z.literal("say:done"), id: z.string() }),
+  z.object({
+    type: z.literal("ask:done"),
+    id: z.string(),
+    transcript: z.string()
+  }),
+  z.object({ type: z.literal("error"), id: z.string(), message: z.string() }),
+  z.object({
+    type: z.literal("pong"),
+    uptime: z.number(),
+    commandCount: z.number()
+  }),
+  z.object({ type: z.literal("log"), id: z.string(), entry: TraceEntry })
+]);
+function encodeMessage(msg) {
+  const json = JSON.stringify(msg);
+  const payload = Buffer.from(`${json}
+`, "utf-8");
+  const header = Buffer.alloc(4);
+  header.writeUInt32BE(payload.length, 0);
+  return Buffer.concat([header, payload]);
+}
+function createMessageParser(onMessage) {
+  let buffer = Buffer.alloc(0);
+  return (chunk) => {
+    buffer = Buffer.concat([buffer, chunk]);
+    while (buffer.length >= 4) {
+      const length = buffer.readUInt32BE(0);
+      if (buffer.length < 4 + length) break;
+      const payload = buffer.subarray(4, 4 + length).toString("utf-8");
+      buffer = buffer.subarray(4 + length);
+      onMessage(JSON.parse(payload));
+    }
+  };
+}
+// src/daemon-lifecycle.ts
+function writeDaemonPid(pid) {
+  mkdirSync2(dirname(DAEMON_PID_PATH), { recursive: true });
+  writeFileSync2(DAEMON_PID_PATH, `${pid}
+`);
+}
+function removeDaemonPid() {
+  try {
+    rmSync(DAEMON_PID_PATH);
+  } catch {
+  }
+}
+// src/daemon-log.ts
+import {
+  appendFileSync,
+  mkdirSync as mkdirSync3,
+  readdirSync,
+  rmSync as rmSync2,
+  writeFileSync as writeFileSync3
+} from "fs";
+import { join as join3 } from "path";
+function ensureLogDir() {
+  mkdirSync3(LOG_DIR, { recursive: true });
+}
+function ensureAudioDir() {
+  mkdirSync3(AUDIO_LOG_DIR, { recursive: true });
+}
+function appendLogEntry(entry) {
+  if (!isDebugEnabled()) return;
+  ensureLogDir();
+  appendFileSync(EVENTS_LOG_PATH, `${JSON.stringify(entry)}
+`);
+}
+function createCommandLogger(cmd, id) {
+  const startMs = Date.now();
+  return {
+    log(event, detail) {
+      appendLogEntry({
+        ts: (/* @__PURE__ */ new Date()).toISOString(),
+        cmd,
+        id,
+        event,
+        detail
+      });
+    },
+    trace(event) {
+      appendLogEntry({
+        ts: (/* @__PURE__ */ new Date()).toISOString(),
+        cmd,
+        id,
+        event: event.event,
+        detail: { ...event.detail, atMs: event.atMs }
+      });
+    },
+    get startMs() {
+      return startMs;
+    }
+  };
+}
+function createWavBuffer(pcm16) {
+  const header = Buffer.alloc(44);
+  const dataSize = pcm16.length;
+  const fileSize = 36 + dataSize;
+  const byteRate = SAMPLE_RATE * CHANNELS * (BIT_DEPTH / 8);
+  const blockAlign = CHANNELS * (BIT_DEPTH / 8);
+  header.write("RIFF", 0);
+  header.writeUInt32LE(fileSize, 4);
+  header.write("WAVE", 8);
+  header.write("fmt ", 12);
+  header.writeUInt32LE(16, 16);
+  header.writeUInt16LE(1, 20);
+  header.writeUInt16LE(CHANNELS, 22);
+  header.writeUInt32LE(SAMPLE_RATE, 24);
+  header.writeUInt32LE(byteRate, 28);
+  header.writeUInt16LE(blockAlign, 32);
+  header.writeUInt16LE(BIT_DEPTH, 34);
+  header.write("data", 36);
+  header.writeUInt32LE(dataSize, 40);
+  return Buffer.concat([header, pcm16]);
+}
+function writeAudioCapture(id, streams) {
+  if (!isDebugAudioEnabled()) return [];
+  ensureAudioDir();
+  const written = [];
+  for (const [name, chunks] of Object.entries(streams)) {
+    if (chunks.length === 0) continue;
+    const path = join3(AUDIO_LOG_DIR, `${id}-${name}.wav`);
+    writeFileSync3(path, createWavBuffer(Buffer.concat(chunks)));
+    written.push(path);
+  }
+  enforceRingBuffer();
+  return written;
+}
+function enforceRingBuffer() {
+  const { audioRingBufferSize } = resolveDaemonConfig();
+  let files;
+  try {
+    files = readdirSync(AUDIO_LOG_DIR).filter((f) => f.endsWith(".wav")).sort();
+  } catch {
+    return;
+  }
+  const commandIds = /* @__PURE__ */ new Set();
+  for (const file of files) {
+    const match = file.match(/^(.+)-(?:assistant|mic|model-input)\.wav$/);
+    if (match) commandIds.add(match[1]);
+  }
+  const ids = [...commandIds].sort();
+  const excess = ids.length - audioRingBufferSize;
+  if (excess <= 0) return;
+  const idsToRemove = new Set(ids.slice(0, excess));
+  for (const file of files) {
+    const match = file.match(/^(.+)-(?:assistant|mic|model-input)\.wav$/);
+    if (match && idsToRemove.has(match[1])) {
+      try {
+        rmSync2(join3(AUDIO_LOG_DIR, file));
+      } catch {
+      }
+    }
+  }
+}
+// src/daemon.ts
+var require2 = createRequire(import.meta.url);
+var engineState = null;
+var commandCount = 0;
+var startedAt = Date.now();
+var idleTimer = null;
+function resetIdleTimer() {
+  if (idleTimer) clearTimeout(idleTimer);
+  const { idleTimeoutMinutes } = resolveDaemonConfig();
+  idleTimer = setTimeout(
+    () => {
+      shutdown();
+    },
+    idleTimeoutMinutes * 60 * 1e3
+  );
+}
+function getOrCreateEngine(mode) {
+  if (engineState && engineState.mode === mode) {
+    return engineState.engine;
+  }
+  if (engineState) {
+    try {
+      engineState.engine.stop();
+      engineState.engine.close();
+    } catch {
+    }
+    engineState = null;
+  }
+  const { AudioEngine } = require2("agent-voice-audio");
+  const engine = new AudioEngine({
+    sampleRate: SAMPLE_RATE,
+    channels: 1,
+    enableAec: mode === "ask",
+    streamDelayMs: mode === "ask" ? 30 : void 0
+  });
+  engine.start();
+  engineState = { engine, mode };
+  return engine;
+}
+function createEngineProxy(engine) {
+  return {
+    start() {
+    },
+    stop() {
+    },
+    close() {
+    },
+    play: engine.play.bind(engine),
+    readProcessedCapture: engine.readProcessedCapture.bind(engine),
+    readRawCapture: engine.readRawCapture.bind(engine),
+    setStreamDelayMs: engine.setStreamDelayMs.bind(engine),
+    getStats: engine.getStats.bind(engine)
+  };
+}
+var commandQueue = [];
+var processing = false;
+async function processQueue() {
+  if (processing) return;
+  processing = true;
+  while (commandQueue.length > 0) {
+    const item = commandQueue.shift();
+    if (!item) break;
+    await executeCommand(item.request, item.socket);
+  }
+  processing = false;
+}
+function send(socket, msg) {
+  if (!socket.destroyed) {
+    socket.write(encodeMessage(msg));
+  }
+}
+async function executeCommand(request, socket) {
+  if (request.type === "ping") {
+    send(socket, {
+      type: "pong",
+      uptime: Date.now() - startedAt,
+      commandCount
+    });
+    return;
+  }
+  if (request.type === "shutdown") {
+    shutdown();
+    return;
+  }
+  commandCount++;
+  resetIdleTimer();
+  if (request.type === "say") {
+    await executeSay(request, socket);
+  } else if (request.type === "ask") {
+    await executeAsk(request, socket);
+  }
+}
+async function executeSay(request, socket) {
+  const logger = createCommandLogger("say", request.id);
+  const assistantChunks = [];
+  try {
+    const engine = getOrCreateEngine("say");
+    const proxy = createEngineProxy(engine);
+    const auth = resolveAuth();
+    const { say } = await import("./say-6EJTKNJJ.js");
+    await say(request.message, {
+      voice: request.voice,
+      auth,
+      createAudioEngine: () => proxy,
+      onAssistantAudio(pcm16) {
+        assistantChunks.push(Buffer.from(pcm16));
+      },
+      onTrace(event) {
+        logger.trace(event);
+        send(socket, { type: "log", id: request.id, entry: event });
+      }
+    });
+    writeAudioCapture(request.id, { assistant: assistantChunks });
+    logger.log("done");
+    send(socket, { type: "say:done", id: request.id });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    logger.log("error", { message });
+    writeAudioCapture(request.id, { assistant: assistantChunks });
+    send(socket, { type: "error", id: request.id, message });
+  }
+}
+async function executeAsk(request, socket) {
+  const logger = createCommandLogger("ask", request.id);
+  const assistantChunks = [];
+  const micChunks = [];
+  const modelInputChunks = [];
+  try {
+    const engine = getOrCreateEngine("ask");
+    const proxy = createEngineProxy(engine);
+    const auth = resolveAuth();
+    const { ask } = await import("./ask-5J4JCHM4.js");
+    const transcript = await ask(request.message, {
+      voice: request.voice,
+      timeout: request.timeout,
+      ack: request.ack,
+      auth,
+      createAudioEngine: () => proxy,
+      onAssistantAudio(pcm16) {
+        assistantChunks.push(Buffer.from(pcm16));
+      },
+      onMicAudio(pcm16) {
+        micChunks.push(Buffer.from(pcm16));
+      },
+      onAudioFrameSent(pcm16) {
+        modelInputChunks.push(Buffer.from(pcm16));
+      },
+      onTrace(event) {
+        logger.trace(event);
+        send(socket, { type: "log", id: request.id, entry: event });
+      }
+    });
+    writeAudioCapture(request.id, {
+      assistant: assistantChunks,
+      mic: micChunks,
+      "model-input": modelInputChunks
+    });
+    logger.log("done", { transcript });
+    send(socket, { type: "ask:done", id: request.id, transcript });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    logger.log("error", { message });
+    writeAudioCapture(request.id, {
+      assistant: assistantChunks,
+      mic: micChunks,
+      "model-input": modelInputChunks
+    });
+    send(socket, { type: "error", id: request.id, message });
+  }
+}
+var server = createServer((socket) => {
+  const parse = createMessageParser((msg) => {
+    const result = DaemonRequest.safeParse(msg);
+    if (!result.success) {
+      send(socket, {
+        type: "error",
+        id: "unknown",
+        message: `Invalid request: ${result.error.message}`
+      });
+      return;
+    }
+    commandQueue.push({ request: result.data, socket });
+    processQueue();
+  });
+  socket.on("data", parse);
+  socket.on("error", () => {
+  });
+});
+function shutdown() {
+  if (idleTimer) clearTimeout(idleTimer);
+  server.close();
+  if (engineState) {
+    try {
+      engineState.engine.stop();
+      engineState.engine.close();
+    } catch {
+    }
+    engineState = null;
+  }
+  removeDaemonPid();
+  try {
+    rmSync3(DAEMON_SOCKET_PATH);
+  } catch {
+  }
+  process.exit(0);
+}
+try {
+  rmSync3(DAEMON_SOCKET_PATH);
+} catch {
+}
+server.listen(DAEMON_SOCKET_PATH, () => {
+  writeDaemonPid(process.pid);
+  resetIdleTimer();
+});
+process.on("SIGTERM", shutdown);
+process.on("SIGINT", shutdown);

package/dist/index.js CHANGED Viewed

@@ -197,6 +197,7 @@ async function ask(message, options = {}) {
     let lastAssistantAudioAt = 0;
     let nearEndEvidenceSeen = false;
     let nearEndEvidenceAtMs = 0;
+    let nearEndEvidenceConfirmed = false;
     let cleaned = false;
     let settled = false;
     async function cleanup() {
@@ -265,6 +266,19 @@ async function ask(message, options = {}) {
         if (rms >= minSpeechRms) {
           nearEndEvidenceSeen = true;
           nearEndEvidenceAtMs = Date.now();
+          if (!nearEndEvidenceConfirmed && speechStartedAtMs > 0) {
+            const evidencePreRollMs = readEnvInt(
+              "AGENT_VOICE_SPEECH_EVIDENCE_PREROLL_MS",
+              200
+            );
+            const evidencePostRollMs = readEnvInt(
+              "AGENT_VOICE_SPEECH_EVIDENCE_POSTROLL_MS",
+              1500
+            );
+            if (nearEndEvidenceAtMs >= speechStartedAtMs - evidencePreRollMs && nearEndEvidenceAtMs <= speechStartedAtMs + evidencePostRollMs) {
+              nearEndEvidenceConfirmed = true;
+            }
+          }
           trace("audio:near_end_evidence", { rms, minSpeechRms });
         }
         onAudioFrameSent?.(frame);
@@ -303,29 +317,14 @@ async function ask(message, options = {}) {
         }
         logEvent("realtime:transcript", `text="${text}"`);
         trace("realtime:transcript", { text });
-        if (speechDetected) {
-          const evidencePreRollMs = readEnvInt(
-            "AGENT_VOICE_SPEECH_EVIDENCE_PREROLL_MS",
-            200
-          );
-          const evidencePostRollMs = readEnvInt(
-            "AGENT_VOICE_SPEECH_EVIDENCE_POSTROLL_MS",
-            1500
-          );
-          const evidenceEarliestMs = speechStartedAtMs - evidencePreRollMs;
-          const evidenceLatestMs = speechStartedAtMs + evidencePostRollMs;
-          const hasTimelyNearEndEvidence = nearEndEvidenceSeen && nearEndEvidenceAtMs >= evidenceEarliestMs && nearEndEvidenceAtMs <= evidenceLatestMs;
-          if (!hasTimelyNearEndEvidence) {
-            trace("realtime:transcript_ignored_no_near_end_evidence", {
-              text,
-              speechStartedAtMs,
-              nearEndEvidenceSeen,
-              nearEndEvidenceAtMs,
-              evidenceEarliestMs,
-              evidenceLatestMs
-            });
-            return;
-          }
+        if (speechDetected && !nearEndEvidenceConfirmed) {
+          trace("realtime:transcript_ignored_no_near_end_evidence", {
+            text,
+            speechStartedAtMs,
+            nearEndEvidenceSeen,
+            nearEndEvidenceAtMs
+          });
+          return;
         }
         if (transcriptTimer) {
           clearTimeout(transcriptTimer);
@@ -339,6 +338,15 @@ async function ask(message, options = {}) {
         trace("realtime:speech_started");
         speechDetected = true;
         speechStartedAtMs = Date.now();
+        if (nearEndEvidenceSeen && !nearEndEvidenceConfirmed) {
+          const evidencePreRollMs = readEnvInt(
+            "AGENT_VOICE_SPEECH_EVIDENCE_PREROLL_MS",
+            200
+          );
+          if (nearEndEvidenceAtMs >= speechStartedAtMs - evidencePreRollMs) {
+            nearEndEvidenceConfirmed = true;
+          }
+        }
         if (timeoutTimer) {
           clearTimeout(timeoutTimer);
           timeoutTimer = null;
@@ -424,6 +432,11 @@ import { homedir } from "os";
 import { join } from "path";
 var CONFIG_DIR = join(homedir(), ".agent-voice");
 var CONFIG_PATH = join(CONFIG_DIR, "config.json");
+var DAEMON_SOCKET_PATH = join(CONFIG_DIR, "daemon.sock");
+var DAEMON_PID_PATH = join(CONFIG_DIR, "daemon.pid");
+var LOG_DIR = join(CONFIG_DIR, "logs");
+var AUDIO_LOG_DIR = join(LOG_DIR, "audio");
+var EVENTS_LOG_PATH = join(LOG_DIR, "events.ndjson");
 function readConfig() {
   try {
     return JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));