npm - voicecc - Versions diffs - 1.1.36 → 1.2.0 - Mend

voicecc 1.1.36 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/bin/voicecc.js +94 -1
package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
package/dashboard/dist/index.html +1 -1
package/dashboard/routes/agents.ts +28 -8
package/dashboard/routes/browser-call.ts +3 -2
package/dashboard/routes/chat.ts +75 -55
package/dashboard/routes/providers.ts +5 -74
package/dashboard/routes/twilio.ts +104 -5
package/dashboard/routes/voice.ts +98 -0
package/dashboard/server.ts +48 -1
package/package.json +2 -3
package/server/index.ts +96 -8
package/server/services/twilio-manager.ts +29 -10
package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
package/dashboard/dist/audio-processor.js +0 -126
package/server/services/heartbeat.ts +0 -403
package/server/voice/assets/chime.wav +0 -0
package/server/voice/assets/startup.pcm +0 -0
package/server/voice/audio-adapter.ts +0 -60
package/server/voice/audio-inactivity.test.ts +0 -108
package/server/voice/audio-inactivity.ts +0 -91
package/server/voice/browser-audio-playback.test.ts +0 -149
package/server/voice/browser-audio.ts +0 -147
package/server/voice/browser-server.ts +0 -311
package/server/voice/chat-server.ts +0 -236
package/server/voice/chime.test.ts +0 -69
package/server/voice/chime.ts +0 -36
package/server/voice/claude-session.ts +0 -293
package/server/voice/endpointing.ts +0 -163
package/server/voice/mic-vpio +0 -0
package/server/voice/narration.ts +0 -204
package/server/voice/prompt-builder.ts +0 -108
package/server/voice/session-lock.ts +0 -123
package/server/voice/stt-elevenlabs.ts +0 -210
package/server/voice/stt-provider.ts +0 -106
package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
package/server/voice/tts-elevenlabs.ts +0 -397
package/server/voice/tts-provider.ts +0 -155
package/server/voice/twilio-audio.ts +0 -338
package/server/voice/twilio-server.ts +0 -540
package/server/voice/types.ts +0 -282
package/server/voice/vad.ts +0 -101
package/server/voice/voice-loop-bugs.test.ts +0 -348
package/server/voice/voice-server.ts +0 -129
package/server/voice/voice-session.ts +0 -539

package/server/voice/endpointing.ts DELETED Viewed

@@ -1,163 +0,0 @@
-/**
- * Endpointing module -- determines when the user is done speaking.
- *
- * Uses a two-tier approach to decide turn completion:
- * - Fast path: VAD silence duration + sufficient word count (0ms latency)
- * - Slow path: Haiku semantic check for short/ambiguous utterances (~200ms)
- * - Timeout path: Forces completion after extended silence regardless of content
- *
- * Responsibilities:
- * - Track silence duration from VAD events
- * - Apply fast-path completion for longer utterances
- * - Call Haiku API for semantic turn-completion on short utterances
- * - Force timeout after extended silence
- * - Reset state between turns
- */
-import Anthropic from "@anthropic-ai/sdk";
-import type { EndpointDecision, EndpointingConfig, VadEvent } from "./types.js";
-// ============================================================================
-// CONSTANTS
-// ============================================================================
-const HAIKU_MODEL = "claude-haiku-4-5-20251001";
-const HAIKU_MAX_TOKENS = 10;
-// ============================================================================
-// INTERFACES
-// ============================================================================
-/**
- * Endpointer that processes VAD events and decides when the user is done speaking.
- */
-export interface Endpointer {
-  /**
-   * Process a VAD event and determine if the user's turn is complete.
-   * @param event - The VAD event from the voice activity detector
-   * @param currentTranscript - The accumulated transcript so far
-   * @returns Decision on whether the user has finished speaking
-   */
-  onVadEvent(event: VadEvent, currentTranscript: string): Promise<EndpointDecision>;
-  /**
-   * Reset internal state for a new turn.
-   */
-  reset(): void;
-}
-// ============================================================================
-// MAIN ENTRYPOINT
-// ============================================================================
-/**
- * Create an endpointer instance with the given configuration.
- * @param config - Endpointing thresholds and feature flags
- * @returns A configured Endpointer
- */
-export function createEndpointer(config: EndpointingConfig): Endpointer {
-  const anthropicClient = config.enableHaikuFallback ? new Anthropic() : null;
-  return {
-    onVadEvent(event: VadEvent, currentTranscript: string): Promise<EndpointDecision> {
-      return handleVadEvent(event, currentTranscript, config, anthropicClient);
-    },
-    reset(): void {
-      // No internal state to reset -- completion is evaluated per SPEECH_END event.
-    },
-  };
-}
-// ============================================================================
-// MAIN LOGIC
-// ============================================================================
-/**
- * Handle a single VAD event and produce an endpoint decision.
- * @param event - The VAD event to process
- * @param transcript - Current accumulated transcript
- * @param config - Endpointing configuration
- * @param client - Anthropic client for Haiku calls (null if disabled)
- * @returns The endpoint decision
- */
-async function handleVadEvent(
-  event: VadEvent,
-  transcript: string,
-  config: EndpointingConfig,
-  client: Anthropic | null,
-): Promise<EndpointDecision> {
-  // Active speech -- not complete
-  if (event.type === "SPEECH_START" || event.type === "SPEECH_CONTINUE") {
-    return { isComplete: false, transcript, method: "vad_fast" };
-  }
-  // Speech ended -- evaluate completion immediately.
-  // avr-vad's SPEECH_END fires after internal debouncing (redemptionFrames),
-  // so silence has already been confirmed by the VAD. No need to wait for
-  // separate SILENCE events (avr-vad doesn't emit them).
-  if (event.type === "SPEECH_END") {
-    const wordCount = countWords(transcript);
-    // Fast path: sufficient words, complete immediately
-    if (wordCount >= config.minWordCountForFastPath) {
-      return { isComplete: true, transcript, method: "vad_fast" };
-    }
-    // Short utterance: ask Haiku for semantic turn-completion check
-    if (config.enableHaikuFallback && client !== null) {
-      const isComplete = await checkTurnCompletionWithHaiku(client, transcript);
-      return { isComplete, transcript, method: "haiku_semantic" };
-    }
-    // Haiku disabled, treat as complete
-    return { isComplete: true, transcript, method: "vad_fast" };
-  }
-  // Unknown event type -- not complete
-  return { isComplete: false, transcript, method: "vad_fast" };
-}
-// ============================================================================
-// HELPER FUNCTIONS
-// ============================================================================
-/**
- * Count the number of words in a transcript string.
- * @param text - The transcript text
- * @returns Number of whitespace-separated words
- */
-function countWords(text: string): number {
-  const trimmed = text.trim();
-  if (trimmed.length === 0) {
-    return 0;
-  }
-  return trimmed.split(/\s+/).length;
-}
-/**
- * Call Haiku to determine if a short transcript represents a complete user turn.
- * @param client - The Anthropic SDK client
- * @param transcript - The short transcript to evaluate
- * @returns True if Haiku considers the turn complete
- */
-async function checkTurnCompletionWithHaiku(client: Anthropic, transcript: string): Promise<boolean> {
-  const response = await client.messages.create({
-    model: HAIKU_MODEL,
-    max_tokens: HAIKU_MAX_TOKENS,
-    messages: [
-      {
-        role: "user",
-        content: `Is this a complete user turn? Answer only "yes" or "no".\n\nTranscript: "${transcript}"`,
-      },
-    ],
-  });
-  const firstBlock = response.content[0];
-  if (firstBlock.type !== "text") {
-    throw new Error(`Unexpected Haiku response block type: ${firstBlock.type}`);
-  }
-  const answer = firstBlock.text.trim().toLowerCase();
-  return answer.startsWith("yes");
-}

package/server/voice/mic-vpio DELETED Viewed

Binary file

package/server/voice/narration.ts DELETED Viewed

@@ -1,204 +0,0 @@
-/**
- * Processes Claude's streaming output into TTS-friendly text.
- *
- * Two modes of operation:
- * - Response mode: passes text_delta content through immediately for streaming
- *   TTS. Text is buffered into sentences downstream in the TTS module.
- * - Long-task mode: emits periodic template-based summaries during tool use
- *   (e.g. "Running Bash...", "Still working on Bash...").
- *
- * Responsibilities:
- * - Pass through streaming text deltas immediately for low-latency TTS
- * - Track tool execution and emit periodic spoken summaries
- * - Flush remaining text on result/error events
- */
-import type { ClaudeStreamEvent, NarrationConfig } from "./types.js";
-/** Strip markdown syntax so text reads naturally when spoken. */
-function stripMarkdown(text: string): string {
-  return text
-    .replace(/\*+/g, "")       // bold/italic asterisks
-    .replace(/#+\s*/g, "")     // heading markers
-    .replace(/`+/g, "")        // inline code / code fences
-    .replace(/\[([^\]]*)\]\([^)]*\)/g, "$1") // [text](url) → text
-    .replace(/^-\s+/gm, "")   // unordered list markers
-    .replace(/^\d+\.\s+/gm, ""); // ordered list markers
-}
-// ============================================================================
-// INTERFACES
-// ============================================================================
-/**
- * Narrator instance that processes Claude stream events into speakable text.
- */
-export interface Narrator {
-  /**
-   * Process a single Claude stream event and return any text ready to be spoken.
-   * @param event - The Claude stream event to process
-   * @returns Array of strings to speak (often empty, sometimes 1-2 sentences)
-   */
-  processEvent(event: ClaudeStreamEvent): string[];
-  /**
-   * Flush any remaining buffered text that hasn't been emitted yet.
-   * @returns Array of remaining text strings to speak
-   */
-  flush(): string[];
-  /**
-   * Reset all internal state for a new conversation turn.
-   */
-  reset(): void;
-}
-// ============================================================================
-// MAIN HANDLERS
-// ============================================================================
-/**
- * Create a new Narrator instance that converts Claude stream events into
- * TTS-friendly sentence chunks.
- * @param config - Narration configuration (summaryIntervalMs controls long-task summary frequency)
- * @returns A Narrator instance
- */
-export function createNarrator(config: NarrationConfig, onEmit?: (text: string) => void): Narrator {
-  // -- internal state --
-  let currentToolName: string | null = null;
-  let summaryTimer: NodeJS.Timeout | null = null;
-  let inLongTask = false;
-  /**
-   * Process a single Claude stream event.
-   * @param event - The streaming event from Claude
-   * @returns Array of strings to speak
-   */
-  function processEvent(event: ClaudeStreamEvent): string[] {
-    switch (event.type) {
-      case "text_delta":
-        return handleTextDelta(event);
-      case "tool_start":
-        return handleToolStart(event);
-      case "tool_end":
-        return handleToolEnd();
-      case "result":
-      case "error":
-        return handleTerminal();
-      default:
-        return [];
-    }
-  }
-  /**
-   * Flush any remaining text in the buffer.
-   * @returns Array of remaining text strings
-   */
-  function flush(): string[] {
-    return [];
-  }
-  /**
-   * Reset all state for a new conversation turn.
-   */
-  function reset(): void {
-    currentToolName = null;
-    clearSummaryTimer();
-    inLongTask = false;
-  }
-  return { processEvent, flush, reset };
-  // ============================================================================
-  // HELPER FUNCTIONS
-  // ============================================================================
-  /**
-   * Handle a text_delta event: pass through immediately, exit long-task mode.
-   * Text chunking for TTS is handled downstream by TextSplitterStream.
-   * @param event - The text_delta event
-   * @returns Array containing the delta text
-   */
-  function handleTextDelta(event: ClaudeStreamEvent): string[] {
-    // Text arriving means Claude is responding directly -- leave long-task mode
-    if (inLongTask) {
-      clearSummaryTimer();
-      inLongTask = false;
-      currentToolName = null;
-    }
-    const results: string[] = [];
-    if (event.content) {
-      const clean = stripMarkdown(event.content);
-      if (clean) results.push(clean);
-    }
-    return results;
-  }
-  /**
-   * Handle a tool_start event: enter long-task mode, start the summary timer,
-   * and emit an initial "Running {toolName}..." message.
-   * @param event - The tool_start event (must have toolName)
-   * @returns Array containing the initial tool message
-   */
-  function handleToolStart(event: ClaudeStreamEvent): string[] {
-    const toolName = event.toolName ?? "unknown tool";
-    currentToolName = toolName;
-    inLongTask = true;
-    // Clear any existing timer before starting a new one
-    clearSummaryTimer();
-    startSummaryTimer();
-    return [`Running ${toolName}...`];
-  }
-  /**
-   * Handle a tool_end event: clear current tool context but stay in long-task
-   * mode since more tools might follow.
-   * @returns Empty array
-   */
-  function handleToolEnd(): string[] {
-    currentToolName = null;
-    return [];
-  }
-  /**
-   * Handle result or error events: flush all remaining text and reset state.
-   * @returns Array of any remaining text
-   */
-  function handleTerminal(): string[] {
-    const remaining = flush();
-    // Full reset for next turn
-    clearSummaryTimer();
-    currentToolName = null;
-    inLongTask = false;
-    return remaining;
-  }
-  /**
-   * Start the periodic summary timer for long-task mode.
-   * Emits "Still working on {toolName}..." at the configured interval.
-   */
-  function startSummaryTimer(): void {
-    summaryTimer = setInterval(() => {
-      const name = currentToolName ?? "the task";
-      const summary = `Still working on ${name}...`;
-      if (onEmit) {
-        onEmit(summary);
-      }
-    }, config.summaryIntervalMs);
-  }
-  /**
-   * Clear the summary timer if one is active.
-   */
-  function clearSummaryTimer(): void {
-    if (summaryTimer !== null) {
-      clearInterval(summaryTimer);
-      summaryTimer = null;
-    }
-  }
-}

package/server/voice/prompt-builder.ts DELETED Viewed

@@ -1,108 +0,0 @@
-/**
- * Shared prompt builder for all session types (voice, text).
- *
- * Loads the base system.md template once at module level and replaces the
- * <<MODE_OVERLAY>> placeholder with the appropriate overlay file for the
- * given session mode. For agent sessions, also injects SOUL/MEMORY/HEARTBEAT
- * files and the agent working directory.
- *
- * - buildAgentPrompt: full agent prompt with mode overlay + agent files
- * - buildDefaultPrompt: base prompt with mode overlay only (no agent files)
- */
-import { readFileSync } from "fs";
-import { dirname, join } from "path";
-import { fileURLToPath } from "url";
-import { getAgent, AGENTS_DIR } from "../services/agent-store.js";
-// ============================================================================
-// CONSTANTS
-// ============================================================================
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const DEFAULTS_DIR = join(__dirname, "..", "..", "init", "defaults");
-/** Base system prompt template with <<MODE_OVERLAY>> placeholder */
-const BASE_SYSTEM_PROMPT = readFileSync(join(DEFAULTS_DIR, "system.md"), "utf-8").trim();
-/** Voice-specific behavioral instructions */
-const VOICE_OVERLAY = readFileSync(join(DEFAULTS_DIR, "system-voice-overlay.md"), "utf-8").trim();
-/** Text-specific behavioral instructions */
-const TEXT_OVERLAY = readFileSync(join(DEFAULTS_DIR, "system-text-overlay.md"), "utf-8").trim();
-/** Map of session mode to overlay content */
-const OVERLAY_MAP: Record<SessionMode, string> = {
-  voice: VOICE_OVERLAY,
-  text: TEXT_OVERLAY,
-};
-// ============================================================================
-// TYPES
-// ============================================================================
-/** Session mode determines which overlay is injected into the base prompt */
-export type SessionMode = "voice" | "text";
-// ============================================================================
-// MAIN HANDLERS
-// ============================================================================
-/**
- * Build a complete system prompt for an agent session.
- *
- * Loads the agent's SOUL.md, MEMORY.md, and HEARTBEAT.md via getAgent(),
- * replaces <<MODE_OVERLAY>>, <<AGENT_DIR>>, and <<AGENT_FILES>> placeholders
- * in the base system prompt.
- *
- * @param agentId - The agent identifier to load files for
- * @param mode - Session mode ("voice" or "text") to select the overlay
- * @returns Complete system prompt string ready for customSystemPrompt
- */
-export async function buildAgentPrompt(agentId: string, mode: SessionMode): Promise<string> {
-  const agent = await getAgent(agentId);
-  const agentDir = join(AGENTS_DIR, agentId);
-  const agentFiles = [
-    `<SOUL.md>\n${agent.soulMd}\n</SOUL.md>`,
-    `<HEARTBEAT.md>\n${agent.heartbeatMd}\n</HEARTBEAT.md>`,
-    `<MEMORY.md>\n${agent.memoryMd}\n</MEMORY.md>`,
-  ].join("\n\n");
-  return applyOverlay(BASE_SYSTEM_PROMPT, mode)
-    .replaceAll("<<AGENT_DIR>>", agentDir)
-    .replace("<<AGENT_FILES>>", agentFiles);
-}
-/**
- * Build a base system prompt without agent files.
- *
- * Replaces <<MODE_OVERLAY>> with the appropriate overlay for the given mode.
- * Used for non-agent sessions (e.g. claude-session fallback, default Twilio calls).
- *
- * @param mode - Session mode ("voice" or "text") to select the overlay
- * @returns System prompt string with overlay applied but no agent files
- */
-export function buildDefaultPrompt(mode: SessionMode): string {
-  return applyOverlay(BASE_SYSTEM_PROMPT, mode);
-}
-// ============================================================================
-// HELPER FUNCTIONS
-// ============================================================================
-/**
- * Replace <<MODE_OVERLAY>> placeholders in a template with the overlay for the given mode.
- *
- * @param template - Base prompt template containing <<MODE_OVERLAY>> placeholders
- * @param mode - Session mode to select the overlay content
- * @returns Template with all <<MODE_OVERLAY>> placeholders replaced
- */
-function applyOverlay(template: string, mode: SessionMode): string {
-  const overlay = OVERLAY_MAP[mode];
-  if (!overlay) {
-    throw new Error(`Unknown session mode: "${mode}"`);
-  }
-  return template.replaceAll("<<MODE_OVERLAY>>", overlay);
-}

package/server/voice/session-lock.ts DELETED Viewed

@@ -1,123 +0,0 @@
-/**
- * Cross-process session limiter using PID-based lock files.
- *
- * Ensures the total number of active voice sessions (local mic + Twilio combined)
- * does not exceed MAX_CONCURRENT_SESSIONS. Stale lock files from crashed processes
- * are automatically cleaned up on every acquire.
- *
- * Responsibilities:
- * - Acquire a session slot by creating a PID lock file in ~/.claude-voice-sessions/
- * - Validate existing lock files by checking if their PIDs are still alive
- * - Clean up stale lock files from dead processes
- * - Release the lock file on session stop or process exit
- */
-import { mkdirSync, readdirSync, readFileSync, writeFileSync, unlinkSync } from "fs";
-import { join } from "path";
-import { homedir } from "os";
-import { randomUUID } from "crypto";
-// ============================================================================
-// CONSTANTS
-// ============================================================================
-/** Directory where PID lock files are stored */
-const LOCK_DIR = join(homedir(), ".claude-voice-sessions");
-// ============================================================================
-// INTERFACES
-// ============================================================================
-/**
- * Handle returned by acquireSessionLock. Call release() to free the session slot.
- */
-export interface SessionLock {
-  /** Release the session lock (deletes the lock file) */
-  release: () => void;
-}
-// ============================================================================
-// MAIN HANDLERS
-// ============================================================================
-/**
- * Acquire a session lock slot. Throws if the maximum number of concurrent
- * sessions has been reached.
- *
- * Cleans up stale lock files (dead PIDs) on every call. Creates a new lock
- * file containing the current PID. Registers a process.on('exit') handler
- * as a safety net to release on shutdown.
- *
- * @param maxSessions - Maximum number of concurrent sessions allowed
- * @returns A SessionLock handle with a release() method
- * @throws Error if maxSessions has been reached
- */
-export function acquireSessionLock(maxSessions: number): SessionLock {
-  // Ensure lock directory exists
-  mkdirSync(LOCK_DIR, { recursive: true });
-  // List existing lock files and validate their PIDs
-  const files = readdirSync(LOCK_DIR).filter((f) => f.endsWith(".lock"));
-  let activeCount = 0;
-  for (const file of files) {
-    const filePath = join(LOCK_DIR, file);
-    try {
-      const pid = parseInt(readFileSync(filePath, "utf-8").trim(), 10);
-      if (isNaN(pid) || !isProcessAlive(pid)) {
-        // Stale lock file -- process is dead, clean it up
-        unlinkSync(filePath);
-      } else {
-        activeCount++;
-      }
-    } catch {
-      // File disappeared between readdir and read, or parse error -- skip
-      try { unlinkSync(filePath); } catch { /* already gone */ }
-    }
-  }
-  if (activeCount >= maxSessions) {
-    throw new Error(
-      `Session limit reached (${activeCount}/${maxSessions}). ` +
-      `Cannot start another voice session.`
-    );
-  }
-  // Create a new lock file with the current PID
-  const lockFile = join(LOCK_DIR, `${randomUUID()}.lock`);
-  writeFileSync(lockFile, String(process.pid), "utf-8");
-  let released = false;
-  /** Delete the lock file if it hasn't been released yet */
-  function release(): void {
-    if (released) return;
-    released = true;
-    try { unlinkSync(lockFile); } catch { /* already gone */ }
-  }
-  // Safety net: release on process exit
-  process.on("exit", release);
-  return { release };
-}
-// ============================================================================
-// HELPER FUNCTIONS
-// ============================================================================
-/**
- * Check if a process with the given PID is still alive.
- * Uses signal 0 which does not kill the process -- it only checks existence.
- *
- * @param pid - The process ID to check
- * @returns true if the process is alive, false otherwise
- */
-export function isProcessAlive(pid: number): boolean {
-  try {
-    process.kill(pid, 0);
-    return true;
-  } catch {
-    return false;
-  }
-}