npm - voicecc - Versions diffs - 1.2.1 → 1.2.3 - Mend

voicecc 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/bin/voicecc.js +66 -10
package/package.json +2 -1
package/voice-server/.python-version +1 -0
package/voice-server/claude_llm_service.py +333 -0
package/voice-server/claude_session.py +312 -0
package/voice-server/config.py +340 -0
package/voice-server/dev-server-start.sh +128 -0
package/voice-server/heartbeat.py +505 -0
package/voice-server/narration_processor.py +140 -0
package/voice-server/requirements.txt +8 -0
package/voice-server/server.py +335 -0
package/voice-server/stop_phrase_processor.py +50 -0
package/voice-server/twilio_pipeline.py +237 -0
package/voice-server/voice_pipeline.py +147 -0

package/bin/voicecc.js CHANGED Viewed

@@ -131,11 +131,52 @@ function ensurePythonVenv() {
   }
   if (!systemPython) {
-    console.log("");
-    console.log("WARNING: Python 3.12+ not found. Voice server will not be available.");
-    console.log("Install Python 3.12+ and run 'voicecc' again to enable voice features.");
-    console.log("");
-    return false;
+    // Attempt to install Python 3.12 automatically on Linux
+    if (process.platform === "linux") {
+      console.log("Python 3.12+ not found. Installing automatically...");
+      try {
+        if (commandExists("apt-get")) {
+          execSync("apt-get update -qq && apt-get install -y -qq python3.12 python3.12-venv python3.12-dev 2>&1", { stdio: "inherit" });
+        } else if (commandExists("dnf")) {
+          execSync("dnf install -y python3.12 2>&1", { stdio: "inherit" });
+        } else if (commandExists("yum")) {
+          execSync("yum install -y python3.12 2>&1", { stdio: "inherit" });
+        } else {
+          console.error("No supported package manager found (apt-get, dnf, yum).");
+          console.error("Install Python 3.12+ manually and run 'voicecc' again.");
+          process.exit(1);
+        }
+        // Re-check for Python after installation
+        for (const candidate of pythonCandidates) {
+          if (commandExists(candidate)) {
+            try {
+              const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
+              const match = version.match(/Python (\d+)\.(\d+)/);
+              if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
+                systemPython = candidate;
+                console.log(`Python installed successfully: ${version}`);
+                break;
+              }
+            } catch { /* skip */ }
+          }
+        }
+        if (!systemPython) {
+          console.error("Python installation completed but Python 3.12+ still not found.");
+          console.error("Install Python 3.12+ manually and run 'voicecc' again.");
+          process.exit(1);
+        }
+      } catch (err) {
+        console.error(`Failed to install Python 3.12: ${err.message}`);
+        console.error("Install Python 3.12+ manually and run 'voicecc' again.");
+        process.exit(1);
+      }
+    } else {
+      console.error("");
+      console.error("ERROR: Python 3.12+ is required but not found.");
+      console.error("Install Python 3.12+ and run 'voicecc' again.");
+      console.error("");
+      process.exit(1);
+    }
   }
   // Check if venv needs to be created
@@ -144,9 +185,8 @@ function ensurePythonVenv() {
     try {
       execSync(`${systemPython} -m venv ${venvDir}`, { stdio: "inherit" });
     } catch (err) {
-      console.log(`Failed to create Python venv: ${err.message}`);
-      console.log("Voice server will not be available.");
-      return false;
+      console.error(`Failed to create Python venv: ${err.message}`);
+      process.exit(1);
     }
   }
@@ -177,8 +217,8 @@ function ensurePythonVenv() {
     writeFileSync(checksumFile, currentChecksum);
     console.log("Python dependencies installed.");
   } catch (err) {
-    console.log(`Failed to install Python dependencies: ${err.message}`);
-    console.log("Voice server may not work correctly.");
+    console.error(`Failed to install Python dependencies: ${err.message}`);
+    process.exit(1);
   }
   return true;
@@ -689,10 +729,26 @@ if (!existsSync(ENV_PATH)) {
   await runSetupWizard();
 }
+// Verify Claude CLI is available
+if (!commandExists("claude")) {
+  console.error("ERROR: Claude Code CLI ('claude') is not installed.");
+  console.error("Install it with: npm install -g @anthropic-ai/claude-code");
+  process.exit(1);
+}
 // Ensure Python venv and dependencies are set up for the voice server.
 // Runs on every start but skips pip install if requirements.txt hasn't changed.
 ensurePythonVenv();
+// Hard check: verify the venv actually exists after setup
+const expectedVenvPython = join(PKG_ROOT, "voice-server", ".venv", "bin", "python");
+if (!existsSync(expectedVenvPython)) {
+  console.error(`ERROR: Python venv not found at ${expectedVenvPython}`);
+  console.error("The voice-server directory or its venv is missing from the installation.");
+  console.error("Try reinstalling: npm install -g voicecc");
+  process.exit(1);
+}
 // If already running, show info and exit
 if (isRunning()) {
   showInfo();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voicecc",
-  "version": "1.2.1",
+  "version": "1.2.3",
   "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
   "repository": {
     "type": "git",
@@ -24,6 +24,7 @@
   "files": [
     "bin/",
     "server/",
+    "voice-server/",
     "dashboard/dist/",
     "dashboard/server.ts",
     "dashboard/routes/",

package/voice-server/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

package/voice-server/claude_llm_service.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""
+Custom Pipecat LLMService wrapping the Python Claude Agent SDK (ClaudeSDKClient).
+Uses ClaudeSDKClient for persistent multi-turn voice sessions with full tool use.
+Does NOT use Pipecat's built-in context accumulation -- the Claude session maintains
+its own conversation history internally.
+Responsibilities:
+- Override process_frame to handle LLM context frames from Pipecat aggregators
+- Extract only the last user message from Pipecat context (SDK tracks history)
+- Clear Pipecat context after each turn to prevent unbounded memory growth
+- Support existing_client for heartbeat session handoff
+- Support initial_prompt for agent-speaks-first flows
+"""
+import asyncio
+import logging
+from dataclasses import dataclass
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    ResultMessage,
+    TextBlock,
+    ToolUseBlock,
+)
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    Frame,
+    FunctionCallsStartedFrame,
+    InterruptionFrame,
+    LLMContextFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    LLMMessagesFrame,
+    LLMTextFrame,
+    StartFrame,
+    TextFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.llm_service import LLMService
+logger = logging.getLogger(__name__)
+# ============================================================================
+# TYPES
+# ============================================================================
+@dataclass
+class ClaudeLLMServiceConfig:
+    """Configuration for ClaudeLLMService.
+    Args:
+        cwd: Working directory for the Claude Code session
+        system_prompt: System prompt for voice mode
+        allowed_tools: Tool allowlist (empty list = all tools allowed)
+        initial_prompt: Optional first message so the agent speaks first
+        existing_client: Pre-existing ClaudeSDKClient (e.g. from heartbeat handoff)
+    """
+    cwd: str
+    system_prompt: str
+    allowed_tools: list[str] | None = None
+    initial_prompt: str | None = None
+    existing_client: ClaudeSDKClient | None = None
+# ============================================================================
+# MAIN HANDLERS
+# ============================================================================
+class ClaudeLLMService(LLMService):
+    """Pipecat LLMService that wraps ClaudeSDKClient for voice conversations.
+    Intercepts LLM context frames from the user aggregator, extracts the last
+    user message, sends it to Claude via the SDK, and pushes text frames
+    downstream for TTS.
+    """
+    def __init__(self, config: ClaudeLLMServiceConfig, **kwargs):
+        super().__init__(**kwargs)
+        self._config = config
+        self._client: ClaudeSDKClient | None = config.existing_client
+        self._connected = config.existing_client is not None
+        self._initial_prompt_sent = False
+        self._processing = False
+        self._current_task: asyncio.Task | None = None
+        # Initialize LLMSettings fields — Claude SDK manages these internally,
+        # so we set them all to None (unsupported).
+        self._settings.model = None
+        self._settings.system_instruction = None
+        self._settings.temperature = None
+        self._settings.max_tokens = None
+        self._settings.top_p = None
+        self._settings.top_k = None
+        self._settings.frequency_penalty = None
+        self._settings.presence_penalty = None
+        self._settings.seed = None
+        self._settings.filter_incomplete_user_turns = None
+        self._settings.user_turn_completion_config = None
+    async def start(self, frame: StartFrame):
+        """Handle pipeline start. Sends initial_prompt if configured."""
+        await super().start(frame)
+        if self._config.initial_prompt and not self._initial_prompt_sent:
+            self._initial_prompt_sent = True
+            await self._ensure_client()
+            await self._send_to_claude(self._config.initial_prompt)
+    async def stop(self, frame: EndFrame):
+        """Handle pipeline stop. Disconnects the Claude session."""
+        await self.close()
+        await super().stop(frame)
+    async def cancel(self, frame: CancelFrame):
+        """Handle pipeline cancel. Disconnects the Claude session."""
+        await self.close()
+        await super().cancel(frame)
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames.
+        Handles context frames from Pipecat's aggregators by extracting the last
+        user message and sending it to Claude. All other frames pass through.
+        Args:
+            frame: The incoming frame
+            direction: Frame direction (upstream/downstream)
+        """
+        await super().process_frame(frame, direction)
+        context = None
+        if isinstance(frame, OpenAILLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMMessagesFrame):
+            context = OpenAILLMContext.from_messages(frame.messages)
+        elif isinstance(frame, InterruptionFrame):
+            await self.interrupt()
+            await self.push_frame(frame, direction)
+            return
+        else:
+            await self.push_frame(frame, direction)
+            return
+        if context:
+            # Extract the last user message text from the Pipecat context
+            user_text = _extract_last_user_message(context)
+            if not user_text:
+                logger.warning("[claude-llm] No user message found in context")
+                return
+            # Clear Pipecat context to prevent unbounded growth
+            # (Claude SDK maintains its own conversation history)
+            if isinstance(context, OpenAILLMContext):
+                context.set_messages([])
+            elif isinstance(context, LLMContext):
+                context.messages.clear()
+            # Cancel any in-flight query before starting a new one
+            await self._cancel_current_task()
+            await self._ensure_client()
+            async def _run_query():
+                try:
+                    await self.push_frame(LLMFullResponseStartFrame())
+                    await self.start_processing_metrics()
+                    await self._send_to_claude(user_text)
+                except asyncio.CancelledError:
+                    logger.info("[claude-llm] Query cancelled by new input")
+                except Exception as e:
+                    logger.error(f"[claude-llm] Error during Claude query: {e}")
+                    await self.push_error(error_msg=f"Claude query error: {e}", exception=e)
+                finally:
+                    await self.stop_processing_metrics()
+                    await self.push_frame(LLMFullResponseEndFrame())
+            self._current_task = asyncio.create_task(_run_query())
+            await self._current_task
+    async def _cancel_current_task(self) -> None:
+        """Cancel the in-flight query task if one is running."""
+        if self._current_task and not self._current_task.done():
+            self._current_task.cancel()
+            try:
+                await self._current_task
+            except (asyncio.CancelledError, Exception):
+                pass
+            self._current_task = None
+    async def interrupt(self) -> None:
+        """Interrupt the current Claude response and cancel the query task."""
+        await self._cancel_current_task()
+        if self._client and self._connected:
+            try:
+                await self._client.interrupt()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Interrupt error: {e}")
+    async def close(self) -> None:
+        """Disconnect the Claude session."""
+        if self._client and self._connected:
+            try:
+                await self._client.disconnect()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Disconnect error: {e}")
+            finally:
+                self._connected = False
+                self._client = None
+    # ============================================================================
+    # HELPER FUNCTIONS
+    # ============================================================================
+    async def _ensure_client(self) -> None:
+        """Create and connect ClaudeSDKClient if not already connected.
+        Uses existing_client if provided in config, otherwise creates a new one.
+        """
+        if self._client and self._connected:
+            return
+        if not self._client:
+            options = ClaudeAgentOptions(
+                system_prompt=self._config.system_prompt,
+                cwd=self._config.cwd,
+                allowed_tools=self._config.allowed_tools or [],
+                permission_mode="bypassPermissions",
+                include_partial_messages=True,
+                max_thinking_tokens=10000,
+            )
+            self._client = ClaudeSDKClient(options=options)
+        await self._client.connect()
+        self._connected = True
+        logger.info("[claude-llm] Claude session connected")
+    async def _send_to_claude(self, text: str) -> None:
+        """Send a user message to Claude and push response text frames downstream.
+        Iterates over the streaming response, extracting text deltas and tool use
+        events. Text is pushed as LLMTextFrame for TTS. Tool starts are pushed as
+        FunctionCallsStartedFrame for the narration processor.
+        Args:
+            text: The user message to send
+        """
+        if not self._client:
+            raise RuntimeError("Claude client not connected")
+        self._processing = True
+        has_streamed = False
+        try:
+            await self._client.query(text)
+            async for msg in self._client.receive_response():
+                if isinstance(msg, AssistantMessage):
+                    # Process content blocks from the assistant message
+                    for block in msg.content:
+                        if isinstance(block, TextBlock) and block.text:
+                            if not has_streamed:
+                                has_streamed = True
+                                await self.start_ttfb_metrics()
+                                await self.stop_ttfb_metrics()
+                            await self.push_frame(LLMTextFrame(block.text))
+                        elif isinstance(block, ToolUseBlock):
+                            logger.info(f"[claude-llm] Tool use: {block.name}")
+                            # Push a text frame announcing tool use for narration
+                            await self.push_frame(TextFrame(f"__tool_start:{block.name}"))
+                elif isinstance(msg, ResultMessage):
+                    if msg.is_error:
+                        logger.error(f"[claude-llm] Result error: {msg.subtype}")
+                    else:
+                        logger.info("[claude-llm] Turn complete")
+                    break
+        finally:
+            self._processing = False
+def _extract_last_user_message(context: OpenAILLMContext | LLMContext | object) -> str | None:
+    """Extract the last user message text from a Pipecat LLM context.
+    The context contains OpenAI-format messages. We find the last message
+    with role="user" and extract its text content.
+    Args:
+        context: Pipecat LLM context (OpenAILLMContext, LLMContext, or other)
+    Returns:
+        The last user message text, or None if no user message found
+    """
+    if isinstance(context, OpenAILLMContext):
+        messages = context.get_messages()
+    elif isinstance(context, LLMContext):
+        messages = context.messages
+    else:
+        messages = getattr(context, "messages", [])
+    if not messages:
+        return None
+    # Walk backwards to find the last user message
+    for msg in reversed(messages):
+        msg_dict = msg if isinstance(msg, dict) else vars(msg) if hasattr(msg, "__dict__") else {}
+        if msg_dict.get("role") == "user":
+            content = msg_dict.get("content", "")
+            if isinstance(content, str):
+                return content.strip() or None
+            # Content might be a list of content blocks
+            if isinstance(content, list):
+                texts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        texts.append(block.get("text", ""))
+                    elif isinstance(block, str):
+                        texts.append(block)
+                joined = " ".join(texts).strip()
+                return joined or None
+    return None