npm - voicecc - Versions diffs - 1.2.2 → 1.2.4 - Mend

voicecc 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/bin/voicecc.js +92 -68
package/package.json +2 -1
package/voice-server/.python-version +1 -0
package/voice-server/claude_llm_service.py +333 -0
package/voice-server/claude_session.py +312 -0
package/voice-server/config.py +340 -0
package/voice-server/dev-server-start.sh +128 -0
package/voice-server/heartbeat.py +505 -0
package/voice-server/narration_processor.py +140 -0
package/voice-server/requirements.txt +8 -0
package/voice-server/server.py +335 -0
package/voice-server/stop_phrase_processor.py +50 -0
package/voice-server/twilio_pipeline.py +237 -0
package/voice-server/voice_pipeline.py +147 -0

package/bin/voicecc.js CHANGED Viewed

@@ -95,13 +95,88 @@ function generatePassword() {
   return randomBytes(18).toString("base64url");
 }
+function findPython() {
+  const candidates = ["python3.12", "python3.13", "python3", "python"];
+  for (const candidate of candidates) {
+    if (commandExists(candidate)) {
+      try {
+        const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
+        const match = version.match(/Python (\d+)\.(\d+)/);
+        if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
+          return candidate;
+        }
+      } catch { /* skip */ }
+    }
+  }
+  return null;
+}
+function linuxInstallPackage(pkg) {
+  if (commandExists("apt-get")) {
+    execSync(`apt-get update -qq && apt-get install -y -qq ${pkg} 2>&1`, { stdio: "inherit" });
+  } else if (commandExists("dnf")) {
+    execSync(`dnf install -y ${pkg} 2>&1`, { stdio: "inherit" });
+  } else if (commandExists("yum")) {
+    execSync(`yum install -y ${pkg} 2>&1`, { stdio: "inherit" });
+  } else {
+    throw new Error("No supported package manager found (apt-get, dnf, yum).");
+  }
+}
+function ensurePython() {
+  let systemPython = findPython();
+  if (systemPython) return systemPython;
+  if (process.platform !== "linux") {
+    console.error("ERROR: Python 3.12+ is required but not found.");
+    console.error("Install Python 3.12+ and run 'voicecc' again.");
+    process.exit(1);
+  }
+  console.log("Python 3.12+ not found. Installing...");
+  try {
+    linuxInstallPackage("python3.12 python3.12-venv python3.12-dev");
+  } catch (err) {
+    console.error(`Failed to install Python: ${err.message}`);
+    process.exit(1);
+  }
+  systemPython = findPython();
+  if (!systemPython) {
+    console.error("Python installation completed but Python 3.12+ still not found.");
+    process.exit(1);
+  }
+  return systemPython;
+}
+function ensureVenvModule(systemPython) {
+  try {
+    execSync(`${systemPython} -c "import venv" 2>&1`, { encoding: "utf-8" });
+    return;
+  } catch { /* venv not available */ }
+  if (process.platform !== "linux") {
+    console.error("ERROR: Python venv module is missing.");
+    console.error("Install it and run 'voicecc' again.");
+    process.exit(1);
+  }
+  const version = execSync(`${systemPython} --version 2>&1`, { encoding: "utf-8" }).trim().match(/Python (\d+)\.(\d+)/);
+  const venvPkg = version ? `python${version[1]}.${version[2]}-venv` : "python3-venv";
+  console.log(`Python venv module missing. Installing ${venvPkg}...`);
+  try {
+    linuxInstallPackage(venvPkg);
+  } catch (err) {
+    console.error(`Failed to install ${venvPkg}: ${err.message}`);
+    process.exit(1);
+  }
+}
 /**
  * Ensure the Python virtual environment exists and dependencies are installed.
  *
  * Creates voice-server/.venv if missing, installs requirements.txt, and
  * stores a checksum so subsequent runs skip installation unless deps change.
- *
- * @returns true if the venv is ready, false if Python is unavailable
  */
 function ensurePythonVenv() {
   const voiceServerDir = join(PKG_ROOT, "voice-server");
@@ -114,72 +189,13 @@ function ensurePythonVenv() {
     return true; // No voice-server requirements, nothing to do
   }
-  // Find a working Python 3.12+
-  const pythonCandidates = ["python3.12", "python3.13", "python3", "python"];
-  let systemPython = null;
-  for (const candidate of pythonCandidates) {
-    if (commandExists(candidate)) {
-      try {
-        const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
-        const match = version.match(/Python (\d+)\.(\d+)/);
-        if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
-          systemPython = candidate;
-          break;
-        }
-      } catch { /* skip */ }
-    }
-  }
+  // Step 1: Ensure Python 3.12+ is installed
+  const systemPython = ensurePython();
-  if (!systemPython) {
-    // Attempt to install Python 3.12 automatically on Linux
-    if (process.platform === "linux") {
-      console.log("Python 3.12+ not found. Installing automatically...");
-      try {
-        if (commandExists("apt-get")) {
-          execSync("apt-get update -qq && apt-get install -y -qq python3.12 python3.12-venv python3.12-dev 2>&1", { stdio: "inherit" });
-        } else if (commandExists("dnf")) {
-          execSync("dnf install -y python3.12 2>&1", { stdio: "inherit" });
-        } else if (commandExists("yum")) {
-          execSync("yum install -y python3.12 2>&1", { stdio: "inherit" });
-        } else {
-          console.error("No supported package manager found (apt-get, dnf, yum).");
-          console.error("Install Python 3.12+ manually and run 'voicecc' again.");
-          process.exit(1);
-        }
-        // Re-check for Python after installation
-        for (const candidate of pythonCandidates) {
-          if (commandExists(candidate)) {
-            try {
-              const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
-              const match = version.match(/Python (\d+)\.(\d+)/);
-              if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
-                systemPython = candidate;
-                console.log(`Python installed successfully: ${version}`);
-                break;
-              }
-            } catch { /* skip */ }
-          }
-        }
-        if (!systemPython) {
-          console.error("Python installation completed but Python 3.12+ still not found.");
-          console.error("Install Python 3.12+ manually and run 'voicecc' again.");
-          process.exit(1);
-        }
-      } catch (err) {
-        console.error(`Failed to install Python 3.12: ${err.message}`);
-        console.error("Install Python 3.12+ manually and run 'voicecc' again.");
-        process.exit(1);
-      }
-    } else {
-      console.error("");
-      console.error("ERROR: Python 3.12+ is required but not found.");
-      console.error("Install Python 3.12+ and run 'voicecc' again.");
-      console.error("");
-      process.exit(1);
-    }
-  }
+  // Step 2: Ensure venv module is available
+  ensureVenvModule(systemPython);
-  // Check if venv needs to be created
+  // Step 3: Create venv if needed
   if (!existsSync(venvPython)) {
     console.log("Setting up Python environment for voice server...");
     try {
@@ -190,7 +206,7 @@ function ensurePythonVenv() {
     }
   }
-  // Check if requirements have changed since last install
+  // Step 4: Install/update dependencies if requirements changed
   const currentChecksum = (() => {
     try {
       const content = readFileSync(requirementsFile, "utf-8");
@@ -207,7 +223,6 @@ function ensurePythonVenv() {
     return true; // Dependencies up to date
   }
-  // Install/update dependencies
   console.log("Installing Python dependencies for voice server...");
   try {
     execSync(`${venvPython} -m pip install -r ${requirementsFile}`, {
@@ -740,6 +755,15 @@ if (!commandExists("claude")) {
 // Runs on every start but skips pip install if requirements.txt hasn't changed.
 ensurePythonVenv();
+// Hard check: verify the venv actually exists after setup
+const expectedVenvPython = join(PKG_ROOT, "voice-server", ".venv", "bin", "python");
+if (!existsSync(expectedVenvPython)) {
+  console.error(`ERROR: Python venv not found at ${expectedVenvPython}`);
+  console.error("The voice-server directory or its venv is missing from the installation.");
+  console.error("Try reinstalling: npm install -g voicecc");
+  process.exit(1);
+}
 // If already running, show info and exit
 if (isRunning()) {
   showInfo();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voicecc",
-  "version": "1.2.2",
+  "version": "1.2.4",
   "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
   "repository": {
     "type": "git",
@@ -24,6 +24,7 @@
   "files": [
     "bin/",
     "server/",
+    "voice-server/",
     "dashboard/dist/",
     "dashboard/server.ts",
     "dashboard/routes/",

package/voice-server/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

package/voice-server/claude_llm_service.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""
+Custom Pipecat LLMService wrapping the Python Claude Agent SDK (ClaudeSDKClient).
+Uses ClaudeSDKClient for persistent multi-turn voice sessions with full tool use.
+Does NOT use Pipecat's built-in context accumulation -- the Claude session maintains
+its own conversation history internally.
+Responsibilities:
+- Override process_frame to handle LLM context frames from Pipecat aggregators
+- Extract only the last user message from Pipecat context (SDK tracks history)
+- Clear Pipecat context after each turn to prevent unbounded memory growth
+- Support existing_client for heartbeat session handoff
+- Support initial_prompt for agent-speaks-first flows
+"""
+import asyncio
+import logging
+from dataclasses import dataclass
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    ResultMessage,
+    TextBlock,
+    ToolUseBlock,
+)
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    Frame,
+    FunctionCallsStartedFrame,
+    InterruptionFrame,
+    LLMContextFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    LLMMessagesFrame,
+    LLMTextFrame,
+    StartFrame,
+    TextFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.llm_service import LLMService
+logger = logging.getLogger(__name__)
+# ============================================================================
+# TYPES
+# ============================================================================
+@dataclass
+class ClaudeLLMServiceConfig:
+    """Configuration for ClaudeLLMService.
+    Args:
+        cwd: Working directory for the Claude Code session
+        system_prompt: System prompt for voice mode
+        allowed_tools: Tool allowlist (empty list = all tools allowed)
+        initial_prompt: Optional first message so the agent speaks first
+        existing_client: Pre-existing ClaudeSDKClient (e.g. from heartbeat handoff)
+    """
+    cwd: str
+    system_prompt: str
+    allowed_tools: list[str] | None = None
+    initial_prompt: str | None = None
+    existing_client: ClaudeSDKClient | None = None
+# ============================================================================
+# MAIN HANDLERS
+# ============================================================================
+class ClaudeLLMService(LLMService):
+    """Pipecat LLMService that wraps ClaudeSDKClient for voice conversations.
+    Intercepts LLM context frames from the user aggregator, extracts the last
+    user message, sends it to Claude via the SDK, and pushes text frames
+    downstream for TTS.
+    """
+    def __init__(self, config: ClaudeLLMServiceConfig, **kwargs):
+        super().__init__(**kwargs)
+        self._config = config
+        self._client: ClaudeSDKClient | None = config.existing_client
+        self._connected = config.existing_client is not None
+        self._initial_prompt_sent = False
+        self._processing = False
+        self._current_task: asyncio.Task | None = None
+        # Initialize LLMSettings fields — Claude SDK manages these internally,
+        # so we set them all to None (unsupported).
+        self._settings.model = None
+        self._settings.system_instruction = None
+        self._settings.temperature = None
+        self._settings.max_tokens = None
+        self._settings.top_p = None
+        self._settings.top_k = None
+        self._settings.frequency_penalty = None
+        self._settings.presence_penalty = None
+        self._settings.seed = None
+        self._settings.filter_incomplete_user_turns = None
+        self._settings.user_turn_completion_config = None
+    async def start(self, frame: StartFrame):
+        """Handle pipeline start. Sends initial_prompt if configured."""
+        await super().start(frame)
+        if self._config.initial_prompt and not self._initial_prompt_sent:
+            self._initial_prompt_sent = True
+            await self._ensure_client()
+            await self._send_to_claude(self._config.initial_prompt)
+    async def stop(self, frame: EndFrame):
+        """Handle pipeline stop. Disconnects the Claude session."""
+        await self.close()
+        await super().stop(frame)
+    async def cancel(self, frame: CancelFrame):
+        """Handle pipeline cancel. Disconnects the Claude session."""
+        await self.close()
+        await super().cancel(frame)
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames.
+        Handles context frames from Pipecat's aggregators by extracting the last
+        user message and sending it to Claude. All other frames pass through.
+        Args:
+            frame: The incoming frame
+            direction: Frame direction (upstream/downstream)
+        """
+        await super().process_frame(frame, direction)
+        context = None
+        if isinstance(frame, OpenAILLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMMessagesFrame):
+            context = OpenAILLMContext.from_messages(frame.messages)
+        elif isinstance(frame, InterruptionFrame):
+            await self.interrupt()
+            await self.push_frame(frame, direction)
+            return
+        else:
+            await self.push_frame(frame, direction)
+            return
+        if context:
+            # Extract the last user message text from the Pipecat context
+            user_text = _extract_last_user_message(context)
+            if not user_text:
+                logger.warning("[claude-llm] No user message found in context")
+                return
+            # Clear Pipecat context to prevent unbounded growth
+            # (Claude SDK maintains its own conversation history)
+            if isinstance(context, OpenAILLMContext):
+                context.set_messages([])
+            elif isinstance(context, LLMContext):
+                context.messages.clear()
+            # Cancel any in-flight query before starting a new one
+            await self._cancel_current_task()
+            await self._ensure_client()
+            async def _run_query():
+                try:
+                    await self.push_frame(LLMFullResponseStartFrame())
+                    await self.start_processing_metrics()
+                    await self._send_to_claude(user_text)
+                except asyncio.CancelledError:
+                    logger.info("[claude-llm] Query cancelled by new input")
+                except Exception as e:
+                    logger.error(f"[claude-llm] Error during Claude query: {e}")
+                    await self.push_error(error_msg=f"Claude query error: {e}", exception=e)
+                finally:
+                    await self.stop_processing_metrics()
+                    await self.push_frame(LLMFullResponseEndFrame())
+            self._current_task = asyncio.create_task(_run_query())
+            await self._current_task
+    async def _cancel_current_task(self) -> None:
+        """Cancel the in-flight query task if one is running."""
+        if self._current_task and not self._current_task.done():
+            self._current_task.cancel()
+            try:
+                await self._current_task
+            except (asyncio.CancelledError, Exception):
+                pass
+            self._current_task = None
+    async def interrupt(self) -> None:
+        """Interrupt the current Claude response and cancel the query task."""
+        await self._cancel_current_task()
+        if self._client and self._connected:
+            try:
+                await self._client.interrupt()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Interrupt error: {e}")
+    async def close(self) -> None:
+        """Disconnect the Claude session."""
+        if self._client and self._connected:
+            try:
+                await self._client.disconnect()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Disconnect error: {e}")
+            finally:
+                self._connected = False
+                self._client = None
+    # ============================================================================
+    # HELPER FUNCTIONS
+    # ============================================================================
+    async def _ensure_client(self) -> None:
+        """Create and connect ClaudeSDKClient if not already connected.
+        Uses existing_client if provided in config, otherwise creates a new one.
+        """
+        if self._client and self._connected:
+            return
+        if not self._client:
+            options = ClaudeAgentOptions(
+                system_prompt=self._config.system_prompt,
+                cwd=self._config.cwd,
+                allowed_tools=self._config.allowed_tools or [],
+                permission_mode="bypassPermissions",
+                include_partial_messages=True,
+                max_thinking_tokens=10000,
+            )
+            self._client = ClaudeSDKClient(options=options)
+        await self._client.connect()
+        self._connected = True
+        logger.info("[claude-llm] Claude session connected")
+    async def _send_to_claude(self, text: str) -> None:
+        """Send a user message to Claude and push response text frames downstream.
+        Iterates over the streaming response, extracting text deltas and tool use
+        events. Text is pushed as LLMTextFrame for TTS. Tool starts are pushed as
+        FunctionCallsStartedFrame for the narration processor.
+        Args:
+            text: The user message to send
+        """
+        if not self._client:
+            raise RuntimeError("Claude client not connected")
+        self._processing = True
+        has_streamed = False
+        try:
+            await self._client.query(text)
+            async for msg in self._client.receive_response():
+                if isinstance(msg, AssistantMessage):
+                    # Process content blocks from the assistant message
+                    for block in msg.content:
+                        if isinstance(block, TextBlock) and block.text:
+                            if not has_streamed:
+                                has_streamed = True
+                                await self.start_ttfb_metrics()
+                                await self.stop_ttfb_metrics()
+                            await self.push_frame(LLMTextFrame(block.text))
+                        elif isinstance(block, ToolUseBlock):
+                            logger.info(f"[claude-llm] Tool use: {block.name}")
+                            # Push a text frame announcing tool use for narration
+                            await self.push_frame(TextFrame(f"__tool_start:{block.name}"))
+                elif isinstance(msg, ResultMessage):
+                    if msg.is_error:
+                        logger.error(f"[claude-llm] Result error: {msg.subtype}")
+                    else:
+                        logger.info("[claude-llm] Turn complete")
+                    break
+        finally:
+            self._processing = False
+def _extract_last_user_message(context: OpenAILLMContext | LLMContext | object) -> str | None:
+    """Extract the last user message text from a Pipecat LLM context.
+    The context contains OpenAI-format messages. We find the last message
+    with role="user" and extract its text content.
+    Args:
+        context: Pipecat LLM context (OpenAILLMContext, LLMContext, or other)
+    Returns:
+        The last user message text, or None if no user message found
+    """
+    if isinstance(context, OpenAILLMContext):
+        messages = context.get_messages()
+    elif isinstance(context, LLMContext):
+        messages = context.messages
+    else:
+        messages = getattr(context, "messages", [])
+    if not messages:
+        return None
+    # Walk backwards to find the last user message
+    for msg in reversed(messages):
+        msg_dict = msg if isinstance(msg, dict) else vars(msg) if hasattr(msg, "__dict__") else {}
+        if msg_dict.get("role") == "user":
+            content = msg_dict.get("content", "")
+            if isinstance(content, str):
+                return content.strip() or None
+            # Content might be a list of content blocks
+            if isinstance(content, list):
+                texts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        texts.append(block.get("text", ""))
+                    elif isinstance(block, str):
+                        texts.append(block)
+                joined = " ".join(texts).strip()
+                return joined or None
+    return None