npm - voicecc - Versions diffs - 1.2.2 → 1.2.3 - Mend

voicecc 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/bin/voicecc.js +9 -0
package/package.json +2 -1
package/voice-server/.python-version +1 -0
package/voice-server/claude_llm_service.py +333 -0
package/voice-server/claude_session.py +312 -0
package/voice-server/config.py +340 -0
package/voice-server/dev-server-start.sh +128 -0
package/voice-server/heartbeat.py +505 -0
package/voice-server/narration_processor.py +140 -0
package/voice-server/requirements.txt +8 -0
package/voice-server/server.py +335 -0
package/voice-server/stop_phrase_processor.py +50 -0
package/voice-server/twilio_pipeline.py +237 -0
package/voice-server/voice_pipeline.py +147 -0

package/bin/voicecc.js CHANGED Viewed

@@ -740,6 +740,15 @@ if (!commandExists("claude")) {
 // Runs on every start but skips pip install if requirements.txt hasn't changed.
 ensurePythonVenv();
+// Hard check: verify the venv actually exists after setup
+const expectedVenvPython = join(PKG_ROOT, "voice-server", ".venv", "bin", "python");
+if (!existsSync(expectedVenvPython)) {
+  console.error(`ERROR: Python venv not found at ${expectedVenvPython}`);
+  console.error("The voice-server directory or its venv is missing from the installation.");
+  console.error("Try reinstalling: npm install -g voicecc");
+  process.exit(1);
+}
 // If already running, show info and exit
 if (isRunning()) {
   showInfo();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "voicecc",
-  "version": "1.2.2",
+  "version": "1.2.3",
   "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
   "repository": {
     "type": "git",
@@ -24,6 +24,7 @@
   "files": [
     "bin/",
     "server/",
+    "voice-server/",
     "dashboard/dist/",
     "dashboard/server.ts",
     "dashboard/routes/",

package/voice-server/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

package/voice-server/claude_llm_service.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""
+Custom Pipecat LLMService wrapping the Python Claude Agent SDK (ClaudeSDKClient).
+Uses ClaudeSDKClient for persistent multi-turn voice sessions with full tool use.
+Does NOT use Pipecat's built-in context accumulation -- the Claude session maintains
+its own conversation history internally.
+Responsibilities:
+- Override process_frame to handle LLM context frames from Pipecat aggregators
+- Extract only the last user message from Pipecat context (SDK tracks history)
+- Clear Pipecat context after each turn to prevent unbounded memory growth
+- Support existing_client for heartbeat session handoff
+- Support initial_prompt for agent-speaks-first flows
+"""
+import asyncio
+import logging
+from dataclasses import dataclass
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    ResultMessage,
+    TextBlock,
+    ToolUseBlock,
+)
+from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
+    Frame,
+    FunctionCallsStartedFrame,
+    InterruptionFrame,
+    LLMContextFrame,
+    LLMFullResponseEndFrame,
+    LLMFullResponseStartFrame,
+    LLMMessagesFrame,
+    LLMTextFrame,
+    StartFrame,
+    TextFrame,
+)
+from pipecat.processors.aggregators.llm_context import LLMContext
+from pipecat.processors.aggregators.openai_llm_context import (
+    OpenAILLMContext,
+    OpenAILLMContextFrame,
+)
+from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.llm_service import LLMService
+logger = logging.getLogger(__name__)
+# ============================================================================
+# TYPES
+# ============================================================================
+@dataclass
+class ClaudeLLMServiceConfig:
+    """Configuration for ClaudeLLMService.
+    Args:
+        cwd: Working directory for the Claude Code session
+        system_prompt: System prompt for voice mode
+        allowed_tools: Tool allowlist (empty list = all tools allowed)
+        initial_prompt: Optional first message so the agent speaks first
+        existing_client: Pre-existing ClaudeSDKClient (e.g. from heartbeat handoff)
+    """
+    cwd: str
+    system_prompt: str
+    allowed_tools: list[str] | None = None
+    initial_prompt: str | None = None
+    existing_client: ClaudeSDKClient | None = None
+# ============================================================================
+# MAIN HANDLERS
+# ============================================================================
+class ClaudeLLMService(LLMService):
+    """Pipecat LLMService that wraps ClaudeSDKClient for voice conversations.
+    Intercepts LLM context frames from the user aggregator, extracts the last
+    user message, sends it to Claude via the SDK, and pushes text frames
+    downstream for TTS.
+    """
+    def __init__(self, config: ClaudeLLMServiceConfig, **kwargs):
+        super().__init__(**kwargs)
+        self._config = config
+        self._client: ClaudeSDKClient | None = config.existing_client
+        self._connected = config.existing_client is not None
+        self._initial_prompt_sent = False
+        self._processing = False
+        self._current_task: asyncio.Task | None = None
+        # Initialize LLMSettings fields — Claude SDK manages these internally,
+        # so we set them all to None (unsupported).
+        self._settings.model = None
+        self._settings.system_instruction = None
+        self._settings.temperature = None
+        self._settings.max_tokens = None
+        self._settings.top_p = None
+        self._settings.top_k = None
+        self._settings.frequency_penalty = None
+        self._settings.presence_penalty = None
+        self._settings.seed = None
+        self._settings.filter_incomplete_user_turns = None
+        self._settings.user_turn_completion_config = None
+    async def start(self, frame: StartFrame):
+        """Handle pipeline start. Sends initial_prompt if configured."""
+        await super().start(frame)
+        if self._config.initial_prompt and not self._initial_prompt_sent:
+            self._initial_prompt_sent = True
+            await self._ensure_client()
+            await self._send_to_claude(self._config.initial_prompt)
+    async def stop(self, frame: EndFrame):
+        """Handle pipeline stop. Disconnects the Claude session."""
+        await self.close()
+        await super().stop(frame)
+    async def cancel(self, frame: CancelFrame):
+        """Handle pipeline cancel. Disconnects the Claude session."""
+        await self.close()
+        await super().cancel(frame)
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        """Process incoming frames.
+        Handles context frames from Pipecat's aggregators by extracting the last
+        user message and sending it to Claude. All other frames pass through.
+        Args:
+            frame: The incoming frame
+            direction: Frame direction (upstream/downstream)
+        """
+        await super().process_frame(frame, direction)
+        context = None
+        if isinstance(frame, OpenAILLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMContextFrame):
+            context = frame.context
+        elif isinstance(frame, LLMMessagesFrame):
+            context = OpenAILLMContext.from_messages(frame.messages)
+        elif isinstance(frame, InterruptionFrame):
+            await self.interrupt()
+            await self.push_frame(frame, direction)
+            return
+        else:
+            await self.push_frame(frame, direction)
+            return
+        if context:
+            # Extract the last user message text from the Pipecat context
+            user_text = _extract_last_user_message(context)
+            if not user_text:
+                logger.warning("[claude-llm] No user message found in context")
+                return
+            # Clear Pipecat context to prevent unbounded growth
+            # (Claude SDK maintains its own conversation history)
+            if isinstance(context, OpenAILLMContext):
+                context.set_messages([])
+            elif isinstance(context, LLMContext):
+                context.messages.clear()
+            # Cancel any in-flight query before starting a new one
+            await self._cancel_current_task()
+            await self._ensure_client()
+            async def _run_query():
+                try:
+                    await self.push_frame(LLMFullResponseStartFrame())
+                    await self.start_processing_metrics()
+                    await self._send_to_claude(user_text)
+                except asyncio.CancelledError:
+                    logger.info("[claude-llm] Query cancelled by new input")
+                except Exception as e:
+                    logger.error(f"[claude-llm] Error during Claude query: {e}")
+                    await self.push_error(error_msg=f"Claude query error: {e}", exception=e)
+                finally:
+                    await self.stop_processing_metrics()
+                    await self.push_frame(LLMFullResponseEndFrame())
+            self._current_task = asyncio.create_task(_run_query())
+            await self._current_task
+    async def _cancel_current_task(self) -> None:
+        """Cancel the in-flight query task if one is running."""
+        if self._current_task and not self._current_task.done():
+            self._current_task.cancel()
+            try:
+                await self._current_task
+            except (asyncio.CancelledError, Exception):
+                pass
+            self._current_task = None
+    async def interrupt(self) -> None:
+        """Interrupt the current Claude response and cancel the query task."""
+        await self._cancel_current_task()
+        if self._client and self._connected:
+            try:
+                await self._client.interrupt()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Interrupt error: {e}")
+    async def close(self) -> None:
+        """Disconnect the Claude session."""
+        if self._client and self._connected:
+            try:
+                await self._client.disconnect()
+            except Exception as e:
+                logger.warning(f"[claude-llm] Disconnect error: {e}")
+            finally:
+                self._connected = False
+                self._client = None
+    # ============================================================================
+    # HELPER FUNCTIONS
+    # ============================================================================
+    async def _ensure_client(self) -> None:
+        """Create and connect ClaudeSDKClient if not already connected.
+        Uses existing_client if provided in config, otherwise creates a new one.
+        """
+        if self._client and self._connected:
+            return
+        if not self._client:
+            options = ClaudeAgentOptions(
+                system_prompt=self._config.system_prompt,
+                cwd=self._config.cwd,
+                allowed_tools=self._config.allowed_tools or [],
+                permission_mode="bypassPermissions",
+                include_partial_messages=True,
+                max_thinking_tokens=10000,
+            )
+            self._client = ClaudeSDKClient(options=options)
+        await self._client.connect()
+        self._connected = True
+        logger.info("[claude-llm] Claude session connected")
+    async def _send_to_claude(self, text: str) -> None:
+        """Send a user message to Claude and push response text frames downstream.
+        Iterates over the streaming response, extracting text deltas and tool use
+        events. Text is pushed as LLMTextFrame for TTS. Tool starts are pushed as
+        FunctionCallsStartedFrame for the narration processor.
+        Args:
+            text: The user message to send
+        """
+        if not self._client:
+            raise RuntimeError("Claude client not connected")
+        self._processing = True
+        has_streamed = False
+        try:
+            await self._client.query(text)
+            async for msg in self._client.receive_response():
+                if isinstance(msg, AssistantMessage):
+                    # Process content blocks from the assistant message
+                    for block in msg.content:
+                        if isinstance(block, TextBlock) and block.text:
+                            if not has_streamed:
+                                has_streamed = True
+                                await self.start_ttfb_metrics()
+                                await self.stop_ttfb_metrics()
+                            await self.push_frame(LLMTextFrame(block.text))
+                        elif isinstance(block, ToolUseBlock):
+                            logger.info(f"[claude-llm] Tool use: {block.name}")
+                            # Push a text frame announcing tool use for narration
+                            await self.push_frame(TextFrame(f"__tool_start:{block.name}"))
+                elif isinstance(msg, ResultMessage):
+                    if msg.is_error:
+                        logger.error(f"[claude-llm] Result error: {msg.subtype}")
+                    else:
+                        logger.info("[claude-llm] Turn complete")
+                    break
+        finally:
+            self._processing = False
+def _extract_last_user_message(context: OpenAILLMContext | LLMContext | object) -> str | None:
+    """Extract the last user message text from a Pipecat LLM context.
+    The context contains OpenAI-format messages. We find the last message
+    with role="user" and extract its text content.
+    Args:
+        context: Pipecat LLM context (OpenAILLMContext, LLMContext, or other)
+    Returns:
+        The last user message text, or None if no user message found
+    """
+    if isinstance(context, OpenAILLMContext):
+        messages = context.get_messages()
+    elif isinstance(context, LLMContext):
+        messages = context.messages
+    else:
+        messages = getattr(context, "messages", [])
+    if not messages:
+        return None
+    # Walk backwards to find the last user message
+    for msg in reversed(messages):
+        msg_dict = msg if isinstance(msg, dict) else vars(msg) if hasattr(msg, "__dict__") else {}
+        if msg_dict.get("role") == "user":
+            content = msg_dict.get("content", "")
+            if isinstance(content, str):
+                return content.strip() or None
+            # Content might be a list of content blocks
+            if isinstance(content, list):
+                texts = []
+                for block in content:
+                    if isinstance(block, dict) and block.get("type") == "text":
+                        texts.append(block.get("text", ""))
+                    elif isinstance(block, str):
+                        texts.append(block)
+                joined = " ".join(texts).strip()
+                return joined or None
+    return None

package/voice-server/claude_session.py ADDED Viewed

@@ -0,0 +1,312 @@
+"""
+Text chat session manager for the Python voice server.
+Port of chat-server.ts + claude-session.ts. Manages ClaudeSDKClient lifecycle
+for text chat: lazy creation on first message, multi-turn reuse, inactivity
+cleanup after 10 minutes.
+Responsibilities:
+- Create and reuse ClaudeSDKClient sessions keyed by device token
+- Stream Claude responses as ChatSseEvent async generators
+- Enforce max concurrent sessions
+- Auto-cleanup inactive sessions on a 60-second timer
+"""
+import asyncio
+import logging
+import time
+from dataclasses import dataclass, field
+from claude_agent_sdk import (
+    AssistantMessage,
+    ClaudeAgentOptions,
+    ClaudeSDKClient,
+    ResultMessage,
+    TextBlock,
+    ToolUseBlock,
+)
+from config import build_system_prompt, load_config, DEFAULT_AGENTS_DIR
+logger = logging.getLogger(__name__)
+# ============================================================================
+# CONSTANTS
+# ============================================================================
+INACTIVITY_TIMEOUT_SECONDS = 600  # 10 minutes
+CLEANUP_INTERVAL_SECONDS = 60
+# ============================================================================
+# TYPES
+# ============================================================================
+@dataclass
+class ChatSseEvent:
+    """SSE event sent to the client during text chat streaming.
+    Attributes:
+        type: Event type ("text_delta", "tool_start", "tool_end", "result", "error")
+        content: Text content or error message
+        tool_name: Tool name (only for tool_start events)
+    """
+    type: str
+    content: str
+    tool_name: str | None = None
+    def to_dict(self) -> dict:
+        """Serialize to a JSON-safe dict, omitting None fields."""
+        d: dict = {"type": self.type, "content": self.content}
+        if self.tool_name is not None:
+            d["toolName"] = self.tool_name
+        return d
+@dataclass
+class ChatSession:
+    """Tracks an active text chat session.
+    Attributes:
+        session_key: Device token used as the session key
+        client: Persistent ClaudeSDKClient for multi-turn chat
+        agent_id: Optional agent identifier for agent-specific prompts
+        streaming: Whether the session is currently streaming a response
+        last_activity: Unix timestamp of last activity (for inactivity timeout)
+    """
+    session_key: str
+    client: ClaudeSDKClient
+    agent_id: str | None = None
+    streaming: bool = False
+    last_activity: float = field(default_factory=time.time)
+# ============================================================================
+# STATE
+# ============================================================================
+_active_sessions: dict[str, ChatSession] = {}
+_cleanup_task: asyncio.Task | None = None
+# ============================================================================
+# MAIN HANDLERS
+# ============================================================================
+async def get_or_create_session(session_key: str, agent_id: str | None = None) -> ChatSession:
+    """Get an existing chat session or create a new one.
+    On first call for a session_key, creates a ClaudeSDKClient with the
+    appropriate system prompt. Subsequent calls return the existing session.
+    Enforces max concurrent sessions from config.
+    Args:
+        session_key: Device token to key the session on
+        agent_id: Optional agent ID for agent-specific prompts
+    Returns:
+        The active ChatSession
+    Raises:
+        RuntimeError: If max concurrent sessions exceeded
+    """
+    existing = _active_sessions.get(session_key)
+    if existing:
+        existing.last_activity = time.time()
+        return existing
+    config = load_config()
+    if len(_active_sessions) >= config.max_concurrent_sessions:
+        raise RuntimeError(
+            f"Max concurrent sessions ({config.max_concurrent_sessions}) reached"
+        )
+    system_prompt = build_system_prompt(agent_id, "text")
+    # Determine working directory
+    import os
+    cwd = config.default_cwd
+    if agent_id:
+        agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
+        if os.path.isdir(agent_dir):
+            cwd = agent_dir
+    options = ClaudeAgentOptions(
+        system_prompt=system_prompt,
+        cwd=cwd,
+        allowed_tools=[],
+        permission_mode="bypassPermissions",
+        include_partial_messages=True,
+        max_thinking_tokens=10000,
+    )
+    client = ClaudeSDKClient(options=options)
+    await client.connect()
+    session = ChatSession(
+        session_key=session_key,
+        client=client,
+        agent_id=agent_id,
+    )
+    _active_sessions[session_key] = session
+    logger.info(f"[chat] Session created, key: {session_key}")
+    return session
+async def stream_message(session_key: str, text: str):
+    """Send a user message and yield SSE events from Claude's response.
+    Guards against concurrent streaming on the same session. Yields
+    ChatSseEvent objects for each streaming event from Claude.
+    Args:
+        session_key: Device token identifying the session
+        text: User message text
+    Yields:
+        ChatSseEvent objects for each streaming event
+    Raises:
+        RuntimeError: If no active session or already streaming
+    """
+    session = _active_sessions.get(session_key)
+    if not session:
+        raise RuntimeError("No active session")
+    if session.streaming:
+        raise RuntimeError("ALREADY_STREAMING")
+    session.last_activity = time.time()
+    session.streaming = True
+    try:
+        await session.client.query(text)
+        async for msg in session.client.receive_response():
+            if isinstance(msg, AssistantMessage):
+                for block in msg.content:
+                    if isinstance(block, TextBlock) and block.text:
+                        yield ChatSseEvent(type="text_delta", content=block.text)
+                    elif isinstance(block, ToolUseBlock):
+                        yield ChatSseEvent(
+                            type="tool_start", content="", tool_name=block.name
+                        )
+            elif isinstance(msg, ResultMessage):
+                if msg.is_error:
+                    yield ChatSseEvent(
+                        type="error", content=msg.subtype or "Unknown error"
+                    )
+                break
+        yield ChatSseEvent(type="result", content="")
+    except Exception as e:
+        logger.error(f"[chat] Stream error for {session_key}: {e}")
+        yield ChatSseEvent(type="error", content=str(e))
+    finally:
+        session.streaming = False
+        session.last_activity = time.time()
+async def close_session(session_key: str) -> None:
+    """Close a chat session, disconnecting the Claude client.
+    Args:
+        session_key: Device token identifying the session
+    """
+    session = _active_sessions.pop(session_key, None)
+    if not session:
+        return
+    try:
+        await session.client.disconnect()
+    except Exception as e:
+        logger.warning(f"[chat] Error disconnecting session {session_key}: {e}")
+    logger.info(f"[chat] Session closed, key: {session_key}")
+async def interrupt_session(session_key: str) -> bool:
+    """Interrupt the current streaming response for a session.
+    Args:
+        session_key: Device token identifying the session
+    Returns:
+        True if a streaming session was interrupted, False otherwise
+    """
+    session = _active_sessions.get(session_key)
+    if not session or not session.streaming:
+        return False
+    try:
+        await session.client.interrupt()
+    except Exception as e:
+        logger.warning(f"[chat] Interrupt error for {session_key}: {e}")
+    session.streaming = False
+    session.last_activity = time.time()
+    logger.info(f"[chat] Session interrupted, key: {session_key}")
+    return True
+def has_session(session_key: str) -> bool:
+    """Check if a session exists for the given key.
+    Args:
+        session_key: Device token to check
+    Returns:
+        True if a session exists
+    """
+    return session_key in _active_sessions
+async def cleanup_inactive() -> None:
+    """Close sessions that have been inactive for 10+ minutes.
+    Called on a periodic timer. Safe to call concurrently.
+    """
+    now = time.time()
+    stale_keys = [
+        key
+        for key, session in _active_sessions.items()
+        if now - session.last_activity > INACTIVITY_TIMEOUT_SECONDS
+    ]
+    for key in stale_keys:
+        logger.info(f"[chat] Session timed out due to inactivity, key: {key}")
+        await close_session(key)
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+async def _cleanup_loop() -> None:
+    """Background loop that runs cleanup_inactive every 60 seconds."""
+    while True:
+        await asyncio.sleep(CLEANUP_INTERVAL_SECONDS)
+        try:
+            await cleanup_inactive()
+        except Exception as e:
+            logger.error(f"[chat] Cleanup error: {e}")
+def start_cleanup_timer() -> None:
+    """Start the background cleanup timer. Call once at server startup."""
+    global _cleanup_task
+    if _cleanup_task is None:
+        _cleanup_task = asyncio.create_task(_cleanup_loop())
+        logger.info("[chat] Inactivity cleanup timer started")
+def stop_cleanup_timer() -> None:
+    """Stop the background cleanup timer."""
+    global _cleanup_task
+    if _cleanup_task is not None:
+        _cleanup_task.cancel()
+        _cleanup_task = None