voicecc 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/voicecc.js CHANGED
@@ -131,11 +131,52 @@ function ensurePythonVenv() {
131
131
  }
132
132
 
133
133
  if (!systemPython) {
134
- console.log("");
135
- console.log("WARNING: Python 3.12+ not found. Voice server will not be available.");
136
- console.log("Install Python 3.12+ and run 'voicecc' again to enable voice features.");
137
- console.log("");
138
- return false;
134
+ // Attempt to install Python 3.12 automatically on Linux
135
+ if (process.platform === "linux") {
136
+ console.log("Python 3.12+ not found. Installing automatically...");
137
+ try {
138
+ if (commandExists("apt-get")) {
139
+ execSync("apt-get update -qq && apt-get install -y -qq python3.12 python3.12-venv python3.12-dev 2>&1", { stdio: "inherit" });
140
+ } else if (commandExists("dnf")) {
141
+ execSync("dnf install -y python3.12 2>&1", { stdio: "inherit" });
142
+ } else if (commandExists("yum")) {
143
+ execSync("yum install -y python3.12 2>&1", { stdio: "inherit" });
144
+ } else {
145
+ console.error("No supported package manager found (apt-get, dnf, yum).");
146
+ console.error("Install Python 3.12+ manually and run 'voicecc' again.");
147
+ process.exit(1);
148
+ }
149
+ // Re-check for Python after installation
150
+ for (const candidate of pythonCandidates) {
151
+ if (commandExists(candidate)) {
152
+ try {
153
+ const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
154
+ const match = version.match(/Python (\d+)\.(\d+)/);
155
+ if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
156
+ systemPython = candidate;
157
+ console.log(`Python installed successfully: ${version}`);
158
+ break;
159
+ }
160
+ } catch { /* skip */ }
161
+ }
162
+ }
163
+ if (!systemPython) {
164
+ console.error("Python installation completed but Python 3.12+ still not found.");
165
+ console.error("Install Python 3.12+ manually and run 'voicecc' again.");
166
+ process.exit(1);
167
+ }
168
+ } catch (err) {
169
+ console.error(`Failed to install Python 3.12: ${err.message}`);
170
+ console.error("Install Python 3.12+ manually and run 'voicecc' again.");
171
+ process.exit(1);
172
+ }
173
+ } else {
174
+ console.error("");
175
+ console.error("ERROR: Python 3.12+ is required but not found.");
176
+ console.error("Install Python 3.12+ and run 'voicecc' again.");
177
+ console.error("");
178
+ process.exit(1);
179
+ }
139
180
  }
140
181
 
141
182
  // Check if venv needs to be created
@@ -144,9 +185,8 @@ function ensurePythonVenv() {
144
185
  try {
145
186
  execSync(`${systemPython} -m venv ${venvDir}`, { stdio: "inherit" });
146
187
  } catch (err) {
147
- console.log(`Failed to create Python venv: ${err.message}`);
148
- console.log("Voice server will not be available.");
149
- return false;
188
+ console.error(`Failed to create Python venv: ${err.message}`);
189
+ process.exit(1);
150
190
  }
151
191
  }
152
192
 
@@ -177,8 +217,8 @@ function ensurePythonVenv() {
177
217
  writeFileSync(checksumFile, currentChecksum);
178
218
  console.log("Python dependencies installed.");
179
219
  } catch (err) {
180
- console.log(`Failed to install Python dependencies: ${err.message}`);
181
- console.log("Voice server may not work correctly.");
220
+ console.error(`Failed to install Python dependencies: ${err.message}`);
221
+ process.exit(1);
182
222
  }
183
223
 
184
224
  return true;
@@ -689,10 +729,26 @@ if (!existsSync(ENV_PATH)) {
689
729
  await runSetupWizard();
690
730
  }
691
731
 
732
+ // Verify Claude CLI is available
733
+ if (!commandExists("claude")) {
734
+ console.error("ERROR: Claude Code CLI ('claude') is not installed.");
735
+ console.error("Install it with: npm install -g @anthropic-ai/claude-code");
736
+ process.exit(1);
737
+ }
738
+
692
739
  // Ensure Python venv and dependencies are set up for the voice server.
693
740
  // Runs on every start but skips pip install if requirements.txt hasn't changed.
694
741
  ensurePythonVenv();
695
742
 
743
+ // Hard check: verify the venv actually exists after setup
744
+ const expectedVenvPython = join(PKG_ROOT, "voice-server", ".venv", "bin", "python");
745
+ if (!existsSync(expectedVenvPython)) {
746
+ console.error(`ERROR: Python venv not found at ${expectedVenvPython}`);
747
+ console.error("The voice-server directory or its venv is missing from the installation.");
748
+ console.error("Try reinstalling: npm install -g voicecc");
749
+ process.exit(1);
750
+ }
751
+
696
752
  // If already running, show info and exit
697
753
  if (isRunning()) {
698
754
  showInfo();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicecc",
3
- "version": "1.2.1",
3
+ "version": "1.2.3",
4
4
  "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
5
5
  "repository": {
6
6
  "type": "git",
@@ -24,6 +24,7 @@
24
24
  "files": [
25
25
  "bin/",
26
26
  "server/",
27
+ "voice-server/",
27
28
  "dashboard/dist/",
28
29
  "dashboard/server.ts",
29
30
  "dashboard/routes/",
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,333 @@
1
+ """
2
+ Custom Pipecat LLMService wrapping the Python Claude Agent SDK (ClaudeSDKClient).
3
+
4
+ Uses ClaudeSDKClient for persistent multi-turn voice sessions with full tool use.
5
+ Does NOT use Pipecat's built-in context accumulation -- the Claude session maintains
6
+ its own conversation history internally.
7
+
8
+ Responsibilities:
9
+ - Override process_frame to handle LLM context frames from Pipecat aggregators
10
+ - Extract only the last user message from Pipecat context (SDK tracks history)
11
+ - Clear Pipecat context after each turn to prevent unbounded memory growth
12
+ - Support existing_client for heartbeat session handoff
13
+ - Support initial_prompt for agent-speaks-first flows
14
+ """
15
+
16
+ import asyncio
17
+ import logging
18
+ from dataclasses import dataclass
19
+
20
+ from claude_agent_sdk import (
21
+ AssistantMessage,
22
+ ClaudeAgentOptions,
23
+ ClaudeSDKClient,
24
+ ResultMessage,
25
+ TextBlock,
26
+ ToolUseBlock,
27
+ )
28
+ from pipecat.frames.frames import (
29
+ CancelFrame,
30
+ EndFrame,
31
+ Frame,
32
+ FunctionCallsStartedFrame,
33
+ InterruptionFrame,
34
+ LLMContextFrame,
35
+ LLMFullResponseEndFrame,
36
+ LLMFullResponseStartFrame,
37
+ LLMMessagesFrame,
38
+ LLMTextFrame,
39
+ StartFrame,
40
+ TextFrame,
41
+ )
42
+ from pipecat.processors.aggregators.llm_context import LLMContext
43
+ from pipecat.processors.aggregators.openai_llm_context import (
44
+ OpenAILLMContext,
45
+ OpenAILLMContextFrame,
46
+ )
47
+ from pipecat.processors.frame_processor import FrameDirection
48
+ from pipecat.services.llm_service import LLMService
49
+
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ # ============================================================================
54
+ # TYPES
55
+ # ============================================================================
56
+
57
+ @dataclass
58
+ class ClaudeLLMServiceConfig:
59
+ """Configuration for ClaudeLLMService.
60
+
61
+ Args:
62
+ cwd: Working directory for the Claude Code session
63
+ system_prompt: System prompt for voice mode
64
+ allowed_tools: Tool allowlist (empty list = all tools allowed)
65
+ initial_prompt: Optional first message so the agent speaks first
66
+ existing_client: Pre-existing ClaudeSDKClient (e.g. from heartbeat handoff)
67
+ """
68
+ cwd: str
69
+ system_prompt: str
70
+ allowed_tools: list[str] | None = None
71
+ initial_prompt: str | None = None
72
+ existing_client: ClaudeSDKClient | None = None
73
+
74
+
75
+ # ============================================================================
76
+ # MAIN HANDLERS
77
+ # ============================================================================
78
+
79
+ class ClaudeLLMService(LLMService):
80
+ """Pipecat LLMService that wraps ClaudeSDKClient for voice conversations.
81
+
82
+ Intercepts LLM context frames from the user aggregator, extracts the last
83
+ user message, sends it to Claude via the SDK, and pushes text frames
84
+ downstream for TTS.
85
+ """
86
+
87
+ def __init__(self, config: ClaudeLLMServiceConfig, **kwargs):
88
+ super().__init__(**kwargs)
89
+ self._config = config
90
+ self._client: ClaudeSDKClient | None = config.existing_client
91
+ self._connected = config.existing_client is not None
92
+ self._initial_prompt_sent = False
93
+ self._processing = False
94
+ self._current_task: asyncio.Task | None = None
95
+
96
+ # Initialize LLMSettings fields — Claude SDK manages these internally,
97
+ # so we set them all to None (unsupported).
98
+ self._settings.model = None
99
+ self._settings.system_instruction = None
100
+ self._settings.temperature = None
101
+ self._settings.max_tokens = None
102
+ self._settings.top_p = None
103
+ self._settings.top_k = None
104
+ self._settings.frequency_penalty = None
105
+ self._settings.presence_penalty = None
106
+ self._settings.seed = None
107
+ self._settings.filter_incomplete_user_turns = None
108
+ self._settings.user_turn_completion_config = None
109
+
110
+ async def start(self, frame: StartFrame):
111
+ """Handle pipeline start. Sends initial_prompt if configured."""
112
+ await super().start(frame)
113
+ if self._config.initial_prompt and not self._initial_prompt_sent:
114
+ self._initial_prompt_sent = True
115
+ await self._ensure_client()
116
+ await self._send_to_claude(self._config.initial_prompt)
117
+
118
+ async def stop(self, frame: EndFrame):
119
+ """Handle pipeline stop. Disconnects the Claude session."""
120
+ await self.close()
121
+ await super().stop(frame)
122
+
123
+ async def cancel(self, frame: CancelFrame):
124
+ """Handle pipeline cancel. Disconnects the Claude session."""
125
+ await self.close()
126
+ await super().cancel(frame)
127
+
128
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
129
+ """Process incoming frames.
130
+
131
+ Handles context frames from Pipecat's aggregators by extracting the last
132
+ user message and sending it to Claude. All other frames pass through.
133
+
134
+ Args:
135
+ frame: The incoming frame
136
+ direction: Frame direction (upstream/downstream)
137
+ """
138
+ await super().process_frame(frame, direction)
139
+
140
+ context = None
141
+ if isinstance(frame, OpenAILLMContextFrame):
142
+ context = frame.context
143
+ elif isinstance(frame, LLMContextFrame):
144
+ context = frame.context
145
+ elif isinstance(frame, LLMMessagesFrame):
146
+ context = OpenAILLMContext.from_messages(frame.messages)
147
+ elif isinstance(frame, InterruptionFrame):
148
+ await self.interrupt()
149
+ await self.push_frame(frame, direction)
150
+ return
151
+ else:
152
+ await self.push_frame(frame, direction)
153
+ return
154
+
155
+ if context:
156
+ # Extract the last user message text from the Pipecat context
157
+ user_text = _extract_last_user_message(context)
158
+ if not user_text:
159
+ logger.warning("[claude-llm] No user message found in context")
160
+ return
161
+
162
+ # Clear Pipecat context to prevent unbounded growth
163
+ # (Claude SDK maintains its own conversation history)
164
+ if isinstance(context, OpenAILLMContext):
165
+ context.set_messages([])
166
+ elif isinstance(context, LLMContext):
167
+ context.messages.clear()
168
+
169
+ # Cancel any in-flight query before starting a new one
170
+ await self._cancel_current_task()
171
+
172
+ await self._ensure_client()
173
+
174
+ async def _run_query():
175
+ try:
176
+ await self.push_frame(LLMFullResponseStartFrame())
177
+ await self.start_processing_metrics()
178
+ await self._send_to_claude(user_text)
179
+ except asyncio.CancelledError:
180
+ logger.info("[claude-llm] Query cancelled by new input")
181
+ except Exception as e:
182
+ logger.error(f"[claude-llm] Error during Claude query: {e}")
183
+ await self.push_error(error_msg=f"Claude query error: {e}", exception=e)
184
+ finally:
185
+ await self.stop_processing_metrics()
186
+ await self.push_frame(LLMFullResponseEndFrame())
187
+
188
+ self._current_task = asyncio.create_task(_run_query())
189
+ await self._current_task
190
+
191
+ async def _cancel_current_task(self) -> None:
192
+ """Cancel the in-flight query task if one is running."""
193
+ if self._current_task and not self._current_task.done():
194
+ self._current_task.cancel()
195
+ try:
196
+ await self._current_task
197
+ except (asyncio.CancelledError, Exception):
198
+ pass
199
+ self._current_task = None
200
+
201
+ async def interrupt(self) -> None:
202
+ """Interrupt the current Claude response and cancel the query task."""
203
+ await self._cancel_current_task()
204
+ if self._client and self._connected:
205
+ try:
206
+ await self._client.interrupt()
207
+ except Exception as e:
208
+ logger.warning(f"[claude-llm] Interrupt error: {e}")
209
+
210
+ async def close(self) -> None:
211
+ """Disconnect the Claude session."""
212
+ if self._client and self._connected:
213
+ try:
214
+ await self._client.disconnect()
215
+ except Exception as e:
216
+ logger.warning(f"[claude-llm] Disconnect error: {e}")
217
+ finally:
218
+ self._connected = False
219
+ self._client = None
220
+
221
+ # ============================================================================
222
+ # HELPER FUNCTIONS
223
+ # ============================================================================
224
+
225
+ async def _ensure_client(self) -> None:
226
+ """Create and connect ClaudeSDKClient if not already connected.
227
+
228
+ Uses existing_client if provided in config, otherwise creates a new one.
229
+ """
230
+ if self._client and self._connected:
231
+ return
232
+
233
+ if not self._client:
234
+ options = ClaudeAgentOptions(
235
+ system_prompt=self._config.system_prompt,
236
+ cwd=self._config.cwd,
237
+ allowed_tools=self._config.allowed_tools or [],
238
+ permission_mode="bypassPermissions",
239
+ include_partial_messages=True,
240
+ max_thinking_tokens=10000,
241
+ )
242
+ self._client = ClaudeSDKClient(options=options)
243
+
244
+ await self._client.connect()
245
+ self._connected = True
246
+ logger.info("[claude-llm] Claude session connected")
247
+
248
+ async def _send_to_claude(self, text: str) -> None:
249
+ """Send a user message to Claude and push response text frames downstream.
250
+
251
+ Iterates over the streaming response, extracting text deltas and tool use
252
+ events. Text is pushed as LLMTextFrame for TTS. Tool starts are pushed as
253
+ FunctionCallsStartedFrame for the narration processor.
254
+
255
+ Args:
256
+ text: The user message to send
257
+ """
258
+ if not self._client:
259
+ raise RuntimeError("Claude client not connected")
260
+
261
+ self._processing = True
262
+ has_streamed = False
263
+
264
+ try:
265
+ await self._client.query(text)
266
+
267
+ async for msg in self._client.receive_response():
268
+ if isinstance(msg, AssistantMessage):
269
+ # Process content blocks from the assistant message
270
+ for block in msg.content:
271
+ if isinstance(block, TextBlock) and block.text:
272
+ if not has_streamed:
273
+ has_streamed = True
274
+ await self.start_ttfb_metrics()
275
+ await self.stop_ttfb_metrics()
276
+ await self.push_frame(LLMTextFrame(block.text))
277
+ elif isinstance(block, ToolUseBlock):
278
+ logger.info(f"[claude-llm] Tool use: {block.name}")
279
+ # Push a text frame announcing tool use for narration
280
+ await self.push_frame(TextFrame(f"__tool_start:{block.name}"))
281
+
282
+ elif isinstance(msg, ResultMessage):
283
+ if msg.is_error:
284
+ logger.error(f"[claude-llm] Result error: {msg.subtype}")
285
+ else:
286
+ logger.info("[claude-llm] Turn complete")
287
+ break
288
+
289
+ finally:
290
+ self._processing = False
291
+
292
+
293
+ def _extract_last_user_message(context: OpenAILLMContext | LLMContext | object) -> str | None:
294
+ """Extract the last user message text from a Pipecat LLM context.
295
+
296
+ The context contains OpenAI-format messages. We find the last message
297
+ with role="user" and extract its text content.
298
+
299
+ Args:
300
+ context: Pipecat LLM context (OpenAILLMContext, LLMContext, or other)
301
+
302
+ Returns:
303
+ The last user message text, or None if no user message found
304
+ """
305
+ if isinstance(context, OpenAILLMContext):
306
+ messages = context.get_messages()
307
+ elif isinstance(context, LLMContext):
308
+ messages = context.messages
309
+ else:
310
+ messages = getattr(context, "messages", [])
311
+
312
+ if not messages:
313
+ return None
314
+
315
+ # Walk backwards to find the last user message
316
+ for msg in reversed(messages):
317
+ msg_dict = msg if isinstance(msg, dict) else vars(msg) if hasattr(msg, "__dict__") else {}
318
+ if msg_dict.get("role") == "user":
319
+ content = msg_dict.get("content", "")
320
+ if isinstance(content, str):
321
+ return content.strip() or None
322
+ # Content might be a list of content blocks
323
+ if isinstance(content, list):
324
+ texts = []
325
+ for block in content:
326
+ if isinstance(block, dict) and block.get("type") == "text":
327
+ texts.append(block.get("text", ""))
328
+ elif isinstance(block, str):
329
+ texts.append(block)
330
+ joined = " ".join(texts).strip()
331
+ return joined or None
332
+
333
+ return None