voicecc 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/voicecc.js CHANGED
@@ -95,13 +95,88 @@ function generatePassword() {
95
95
  return randomBytes(18).toString("base64url");
96
96
  }
97
97
 
98
+ function findPython() {
99
+ const candidates = ["python3.12", "python3.13", "python3", "python"];
100
+ for (const candidate of candidates) {
101
+ if (commandExists(candidate)) {
102
+ try {
103
+ const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
104
+ const match = version.match(/Python (\d+)\.(\d+)/);
105
+ if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
106
+ return candidate;
107
+ }
108
+ } catch { /* skip */ }
109
+ }
110
+ }
111
+ return null;
112
+ }
113
+
114
+ function linuxInstallPackage(pkg) {
115
+ if (commandExists("apt-get")) {
116
+ execSync(`apt-get update -qq && apt-get install -y -qq ${pkg} 2>&1`, { stdio: "inherit" });
117
+ } else if (commandExists("dnf")) {
118
+ execSync(`dnf install -y ${pkg} 2>&1`, { stdio: "inherit" });
119
+ } else if (commandExists("yum")) {
120
+ execSync(`yum install -y ${pkg} 2>&1`, { stdio: "inherit" });
121
+ } else {
122
+ throw new Error("No supported package manager found (apt-get, dnf, yum).");
123
+ }
124
+ }
125
+
126
+ function ensurePython() {
127
+ let systemPython = findPython();
128
+ if (systemPython) return systemPython;
129
+
130
+ if (process.platform !== "linux") {
131
+ console.error("ERROR: Python 3.12+ is required but not found.");
132
+ console.error("Install Python 3.12+ and run 'voicecc' again.");
133
+ process.exit(1);
134
+ }
135
+
136
+ console.log("Python 3.12+ not found. Installing...");
137
+ try {
138
+ linuxInstallPackage("python3.12 python3.12-venv python3.12-dev");
139
+ } catch (err) {
140
+ console.error(`Failed to install Python: ${err.message}`);
141
+ process.exit(1);
142
+ }
143
+
144
+ systemPython = findPython();
145
+ if (!systemPython) {
146
+ console.error("Python installation completed but Python 3.12+ still not found.");
147
+ process.exit(1);
148
+ }
149
+ return systemPython;
150
+ }
151
+
152
+ function ensureVenvModule(systemPython) {
153
+ try {
154
+ execSync(`${systemPython} -c "import venv" 2>&1`, { encoding: "utf-8" });
155
+ return;
156
+ } catch { /* venv not available */ }
157
+
158
+ if (process.platform !== "linux") {
159
+ console.error("ERROR: Python venv module is missing.");
160
+ console.error("Install it and run 'voicecc' again.");
161
+ process.exit(1);
162
+ }
163
+
164
+ const version = execSync(`${systemPython} --version 2>&1`, { encoding: "utf-8" }).trim().match(/Python (\d+)\.(\d+)/);
165
+ const venvPkg = version ? `python${version[1]}.${version[2]}-venv` : "python3-venv";
166
+ console.log(`Python venv module missing. Installing ${venvPkg}...`);
167
+ try {
168
+ linuxInstallPackage(venvPkg);
169
+ } catch (err) {
170
+ console.error(`Failed to install ${venvPkg}: ${err.message}`);
171
+ process.exit(1);
172
+ }
173
+ }
174
+
98
175
  /**
99
176
  * Ensure the Python virtual environment exists and dependencies are installed.
100
177
  *
101
178
  * Creates voice-server/.venv if missing, installs requirements.txt, and
102
179
  * stores a checksum so subsequent runs skip installation unless deps change.
103
- *
104
- * @returns true if the venv is ready, false if Python is unavailable
105
180
  */
106
181
  function ensurePythonVenv() {
107
182
  const voiceServerDir = join(PKG_ROOT, "voice-server");
@@ -114,72 +189,13 @@ function ensurePythonVenv() {
114
189
  return true; // No voice-server requirements, nothing to do
115
190
  }
116
191
 
117
- // Find a working Python 3.12+
118
- const pythonCandidates = ["python3.12", "python3.13", "python3", "python"];
119
- let systemPython = null;
120
- for (const candidate of pythonCandidates) {
121
- if (commandExists(candidate)) {
122
- try {
123
- const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
124
- const match = version.match(/Python (\d+)\.(\d+)/);
125
- if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
126
- systemPython = candidate;
127
- break;
128
- }
129
- } catch { /* skip */ }
130
- }
131
- }
192
+ // Step 1: Ensure Python 3.12+ is installed
193
+ const systemPython = ensurePython();
132
194
 
133
- if (!systemPython) {
134
- // Attempt to install Python 3.12 automatically on Linux
135
- if (process.platform === "linux") {
136
- console.log("Python 3.12+ not found. Installing automatically...");
137
- try {
138
- if (commandExists("apt-get")) {
139
- execSync("apt-get update -qq && apt-get install -y -qq python3.12 python3.12-venv python3.12-dev 2>&1", { stdio: "inherit" });
140
- } else if (commandExists("dnf")) {
141
- execSync("dnf install -y python3.12 2>&1", { stdio: "inherit" });
142
- } else if (commandExists("yum")) {
143
- execSync("yum install -y python3.12 2>&1", { stdio: "inherit" });
144
- } else {
145
- console.error("No supported package manager found (apt-get, dnf, yum).");
146
- console.error("Install Python 3.12+ manually and run 'voicecc' again.");
147
- process.exit(1);
148
- }
149
- // Re-check for Python after installation
150
- for (const candidate of pythonCandidates) {
151
- if (commandExists(candidate)) {
152
- try {
153
- const version = execSync(`${candidate} --version 2>&1`, { encoding: "utf-8" }).trim();
154
- const match = version.match(/Python (\d+)\.(\d+)/);
155
- if (match && (parseInt(match[1]) > 3 || (parseInt(match[1]) === 3 && parseInt(match[2]) >= 12))) {
156
- systemPython = candidate;
157
- console.log(`Python installed successfully: ${version}`);
158
- break;
159
- }
160
- } catch { /* skip */ }
161
- }
162
- }
163
- if (!systemPython) {
164
- console.error("Python installation completed but Python 3.12+ still not found.");
165
- console.error("Install Python 3.12+ manually and run 'voicecc' again.");
166
- process.exit(1);
167
- }
168
- } catch (err) {
169
- console.error(`Failed to install Python 3.12: ${err.message}`);
170
- console.error("Install Python 3.12+ manually and run 'voicecc' again.");
171
- process.exit(1);
172
- }
173
- } else {
174
- console.error("");
175
- console.error("ERROR: Python 3.12+ is required but not found.");
176
- console.error("Install Python 3.12+ and run 'voicecc' again.");
177
- console.error("");
178
- process.exit(1);
179
- }
180
- }
195
+ // Step 2: Ensure venv module is available
196
+ ensureVenvModule(systemPython);
181
197
 
182
- // Check if venv needs to be created
198
+ // Step 3: Create venv if needed
183
199
  if (!existsSync(venvPython)) {
184
200
  console.log("Setting up Python environment for voice server...");
185
201
  try {
@@ -190,7 +206,7 @@ function ensurePythonVenv() {
190
206
  }
191
207
  }
192
208
 
193
- // Check if requirements have changed since last install
209
+ // Step 4: Install/update dependencies if requirements changed
194
210
  const currentChecksum = (() => {
195
211
  try {
196
212
  const content = readFileSync(requirementsFile, "utf-8");
@@ -207,7 +223,6 @@ function ensurePythonVenv() {
207
223
  return true; // Dependencies up to date
208
224
  }
209
225
 
210
- // Install/update dependencies
211
226
  console.log("Installing Python dependencies for voice server...");
212
227
  try {
213
228
  execSync(`${venvPython} -m pip install -r ${requirementsFile}`, {
@@ -740,6 +755,15 @@ if (!commandExists("claude")) {
740
755
  // Runs on every start but skips pip install if requirements.txt hasn't changed.
741
756
  ensurePythonVenv();
742
757
 
758
+ // Hard check: verify the venv actually exists after setup
759
+ const expectedVenvPython = join(PKG_ROOT, "voice-server", ".venv", "bin", "python");
760
+ if (!existsSync(expectedVenvPython)) {
761
+ console.error(`ERROR: Python venv not found at ${expectedVenvPython}`);
762
+ console.error("The voice-server directory or its venv is missing from the installation.");
763
+ console.error("Try reinstalling: npm install -g voicecc");
764
+ process.exit(1);
765
+ }
766
+
743
767
  // If already running, show info and exit
744
768
  if (isRunning()) {
745
769
  showInfo();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicecc",
3
- "version": "1.2.2",
3
+ "version": "1.2.4",
4
4
  "description": "Voice Agent Platform running on Claude Code -- create and deploy conversational voice agents with ElevenLabs STT/TTS and VAD",
5
5
  "repository": {
6
6
  "type": "git",
@@ -24,6 +24,7 @@
24
24
  "files": [
25
25
  "bin/",
26
26
  "server/",
27
+ "voice-server/",
27
28
  "dashboard/dist/",
28
29
  "dashboard/server.ts",
29
30
  "dashboard/routes/",
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,333 @@
1
+ """
2
+ Custom Pipecat LLMService wrapping the Python Claude Agent SDK (ClaudeSDKClient).
3
+
4
+ Uses ClaudeSDKClient for persistent multi-turn voice sessions with full tool use.
5
+ Does NOT use Pipecat's built-in context accumulation -- the Claude session maintains
6
+ its own conversation history internally.
7
+
8
+ Responsibilities:
9
+ - Override process_frame to handle LLM context frames from Pipecat aggregators
10
+ - Extract only the last user message from Pipecat context (SDK tracks history)
11
+ - Clear Pipecat context after each turn to prevent unbounded memory growth
12
+ - Support existing_client for heartbeat session handoff
13
+ - Support initial_prompt for agent-speaks-first flows
14
+ """
15
+
16
+ import asyncio
17
+ import logging
18
+ from dataclasses import dataclass
19
+
20
+ from claude_agent_sdk import (
21
+ AssistantMessage,
22
+ ClaudeAgentOptions,
23
+ ClaudeSDKClient,
24
+ ResultMessage,
25
+ TextBlock,
26
+ ToolUseBlock,
27
+ )
28
+ from pipecat.frames.frames import (
29
+ CancelFrame,
30
+ EndFrame,
31
+ Frame,
32
+ FunctionCallsStartedFrame,
33
+ InterruptionFrame,
34
+ LLMContextFrame,
35
+ LLMFullResponseEndFrame,
36
+ LLMFullResponseStartFrame,
37
+ LLMMessagesFrame,
38
+ LLMTextFrame,
39
+ StartFrame,
40
+ TextFrame,
41
+ )
42
+ from pipecat.processors.aggregators.llm_context import LLMContext
43
+ from pipecat.processors.aggregators.openai_llm_context import (
44
+ OpenAILLMContext,
45
+ OpenAILLMContextFrame,
46
+ )
47
+ from pipecat.processors.frame_processor import FrameDirection
48
+ from pipecat.services.llm_service import LLMService
49
+
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ # ============================================================================
54
+ # TYPES
55
+ # ============================================================================
56
+
57
+ @dataclass
58
+ class ClaudeLLMServiceConfig:
59
+ """Configuration for ClaudeLLMService.
60
+
61
+ Args:
62
+ cwd: Working directory for the Claude Code session
63
+ system_prompt: System prompt for voice mode
64
+ allowed_tools: Tool allowlist (empty list = all tools allowed)
65
+ initial_prompt: Optional first message so the agent speaks first
66
+ existing_client: Pre-existing ClaudeSDKClient (e.g. from heartbeat handoff)
67
+ """
68
+ cwd: str
69
+ system_prompt: str
70
+ allowed_tools: list[str] | None = None
71
+ initial_prompt: str | None = None
72
+ existing_client: ClaudeSDKClient | None = None
73
+
74
+
75
+ # ============================================================================
76
+ # MAIN HANDLERS
77
+ # ============================================================================
78
+
79
+ class ClaudeLLMService(LLMService):
80
+ """Pipecat LLMService that wraps ClaudeSDKClient for voice conversations.
81
+
82
+ Intercepts LLM context frames from the user aggregator, extracts the last
83
+ user message, sends it to Claude via the SDK, and pushes text frames
84
+ downstream for TTS.
85
+ """
86
+
87
+ def __init__(self, config: ClaudeLLMServiceConfig, **kwargs):
88
+ super().__init__(**kwargs)
89
+ self._config = config
90
+ self._client: ClaudeSDKClient | None = config.existing_client
91
+ self._connected = config.existing_client is not None
92
+ self._initial_prompt_sent = False
93
+ self._processing = False
94
+ self._current_task: asyncio.Task | None = None
95
+
96
+ # Initialize LLMSettings fields — Claude SDK manages these internally,
97
+ # so we set them all to None (unsupported).
98
+ self._settings.model = None
99
+ self._settings.system_instruction = None
100
+ self._settings.temperature = None
101
+ self._settings.max_tokens = None
102
+ self._settings.top_p = None
103
+ self._settings.top_k = None
104
+ self._settings.frequency_penalty = None
105
+ self._settings.presence_penalty = None
106
+ self._settings.seed = None
107
+ self._settings.filter_incomplete_user_turns = None
108
+ self._settings.user_turn_completion_config = None
109
+
110
+ async def start(self, frame: StartFrame):
111
+ """Handle pipeline start. Sends initial_prompt if configured."""
112
+ await super().start(frame)
113
+ if self._config.initial_prompt and not self._initial_prompt_sent:
114
+ self._initial_prompt_sent = True
115
+ await self._ensure_client()
116
+ await self._send_to_claude(self._config.initial_prompt)
117
+
118
+ async def stop(self, frame: EndFrame):
119
+ """Handle pipeline stop. Disconnects the Claude session."""
120
+ await self.close()
121
+ await super().stop(frame)
122
+
123
+ async def cancel(self, frame: CancelFrame):
124
+ """Handle pipeline cancel. Disconnects the Claude session."""
125
+ await self.close()
126
+ await super().cancel(frame)
127
+
128
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
129
+ """Process incoming frames.
130
+
131
+ Handles context frames from Pipecat's aggregators by extracting the last
132
+ user message and sending it to Claude. All other frames pass through.
133
+
134
+ Args:
135
+ frame: The incoming frame
136
+ direction: Frame direction (upstream/downstream)
137
+ """
138
+ await super().process_frame(frame, direction)
139
+
140
+ context = None
141
+ if isinstance(frame, OpenAILLMContextFrame):
142
+ context = frame.context
143
+ elif isinstance(frame, LLMContextFrame):
144
+ context = frame.context
145
+ elif isinstance(frame, LLMMessagesFrame):
146
+ context = OpenAILLMContext.from_messages(frame.messages)
147
+ elif isinstance(frame, InterruptionFrame):
148
+ await self.interrupt()
149
+ await self.push_frame(frame, direction)
150
+ return
151
+ else:
152
+ await self.push_frame(frame, direction)
153
+ return
154
+
155
+ if context:
156
+ # Extract the last user message text from the Pipecat context
157
+ user_text = _extract_last_user_message(context)
158
+ if not user_text:
159
+ logger.warning("[claude-llm] No user message found in context")
160
+ return
161
+
162
+ # Clear Pipecat context to prevent unbounded growth
163
+ # (Claude SDK maintains its own conversation history)
164
+ if isinstance(context, OpenAILLMContext):
165
+ context.set_messages([])
166
+ elif isinstance(context, LLMContext):
167
+ context.messages.clear()
168
+
169
+ # Cancel any in-flight query before starting a new one
170
+ await self._cancel_current_task()
171
+
172
+ await self._ensure_client()
173
+
174
+ async def _run_query():
175
+ try:
176
+ await self.push_frame(LLMFullResponseStartFrame())
177
+ await self.start_processing_metrics()
178
+ await self._send_to_claude(user_text)
179
+ except asyncio.CancelledError:
180
+ logger.info("[claude-llm] Query cancelled by new input")
181
+ except Exception as e:
182
+ logger.error(f"[claude-llm] Error during Claude query: {e}")
183
+ await self.push_error(error_msg=f"Claude query error: {e}", exception=e)
184
+ finally:
185
+ await self.stop_processing_metrics()
186
+ await self.push_frame(LLMFullResponseEndFrame())
187
+
188
+ self._current_task = asyncio.create_task(_run_query())
189
+ await self._current_task
190
+
191
+ async def _cancel_current_task(self) -> None:
192
+ """Cancel the in-flight query task if one is running."""
193
+ if self._current_task and not self._current_task.done():
194
+ self._current_task.cancel()
195
+ try:
196
+ await self._current_task
197
+ except (asyncio.CancelledError, Exception):
198
+ pass
199
+ self._current_task = None
200
+
201
+ async def interrupt(self) -> None:
202
+ """Interrupt the current Claude response and cancel the query task."""
203
+ await self._cancel_current_task()
204
+ if self._client and self._connected:
205
+ try:
206
+ await self._client.interrupt()
207
+ except Exception as e:
208
+ logger.warning(f"[claude-llm] Interrupt error: {e}")
209
+
210
+ async def close(self) -> None:
211
+ """Disconnect the Claude session."""
212
+ if self._client and self._connected:
213
+ try:
214
+ await self._client.disconnect()
215
+ except Exception as e:
216
+ logger.warning(f"[claude-llm] Disconnect error: {e}")
217
+ finally:
218
+ self._connected = False
219
+ self._client = None
220
+
221
+ # ============================================================================
222
+ # HELPER FUNCTIONS
223
+ # ============================================================================
224
+
225
+ async def _ensure_client(self) -> None:
226
+ """Create and connect ClaudeSDKClient if not already connected.
227
+
228
+ Uses existing_client if provided in config, otherwise creates a new one.
229
+ """
230
+ if self._client and self._connected:
231
+ return
232
+
233
+ if not self._client:
234
+ options = ClaudeAgentOptions(
235
+ system_prompt=self._config.system_prompt,
236
+ cwd=self._config.cwd,
237
+ allowed_tools=self._config.allowed_tools or [],
238
+ permission_mode="bypassPermissions",
239
+ include_partial_messages=True,
240
+ max_thinking_tokens=10000,
241
+ )
242
+ self._client = ClaudeSDKClient(options=options)
243
+
244
+ await self._client.connect()
245
+ self._connected = True
246
+ logger.info("[claude-llm] Claude session connected")
247
+
248
+ async def _send_to_claude(self, text: str) -> None:
249
+ """Send a user message to Claude and push response text frames downstream.
250
+
251
+ Iterates over the streaming response, extracting text deltas and tool use
252
+ events. Text is pushed as LLMTextFrame for TTS. Tool starts are pushed as
253
+ FunctionCallsStartedFrame for the narration processor.
254
+
255
+ Args:
256
+ text: The user message to send
257
+ """
258
+ if not self._client:
259
+ raise RuntimeError("Claude client not connected")
260
+
261
+ self._processing = True
262
+ has_streamed = False
263
+
264
+ try:
265
+ await self._client.query(text)
266
+
267
+ async for msg in self._client.receive_response():
268
+ if isinstance(msg, AssistantMessage):
269
+ # Process content blocks from the assistant message
270
+ for block in msg.content:
271
+ if isinstance(block, TextBlock) and block.text:
272
+ if not has_streamed:
273
+ has_streamed = True
274
+ await self.start_ttfb_metrics()
275
+ await self.stop_ttfb_metrics()
276
+ await self.push_frame(LLMTextFrame(block.text))
277
+ elif isinstance(block, ToolUseBlock):
278
+ logger.info(f"[claude-llm] Tool use: {block.name}")
279
+ # Push a text frame announcing tool use for narration
280
+ await self.push_frame(TextFrame(f"__tool_start:{block.name}"))
281
+
282
+ elif isinstance(msg, ResultMessage):
283
+ if msg.is_error:
284
+ logger.error(f"[claude-llm] Result error: {msg.subtype}")
285
+ else:
286
+ logger.info("[claude-llm] Turn complete")
287
+ break
288
+
289
+ finally:
290
+ self._processing = False
291
+
292
+
293
+ def _extract_last_user_message(context: OpenAILLMContext | LLMContext | object) -> str | None:
294
+ """Extract the last user message text from a Pipecat LLM context.
295
+
296
+ The context contains OpenAI-format messages. We find the last message
297
+ with role="user" and extract its text content.
298
+
299
+ Args:
300
+ context: Pipecat LLM context (OpenAILLMContext, LLMContext, or other)
301
+
302
+ Returns:
303
+ The last user message text, or None if no user message found
304
+ """
305
+ if isinstance(context, OpenAILLMContext):
306
+ messages = context.get_messages()
307
+ elif isinstance(context, LLMContext):
308
+ messages = context.messages
309
+ else:
310
+ messages = getattr(context, "messages", [])
311
+
312
+ if not messages:
313
+ return None
314
+
315
+ # Walk backwards to find the last user message
316
+ for msg in reversed(messages):
317
+ msg_dict = msg if isinstance(msg, dict) else vars(msg) if hasattr(msg, "__dict__") else {}
318
+ if msg_dict.get("role") == "user":
319
+ content = msg_dict.get("content", "")
320
+ if isinstance(content, str):
321
+ return content.strip() or None
322
+ # Content might be a list of content blocks
323
+ if isinstance(content, list):
324
+ texts = []
325
+ for block in content:
326
+ if isinstance(block, dict) and block.get("type") == "text":
327
+ texts.append(block.get("text", ""))
328
+ elif isinstance(block, str):
329
+ texts.append(block)
330
+ joined = " ".join(texts).strip()
331
+ return joined or None
332
+
333
+ return None