voicecc 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ """
2
+ Text chat session manager for the Python voice server.
3
+
4
+ Port of chat-server.ts + claude-session.ts. Manages ClaudeSDKClient lifecycle
5
+ for text chat: lazy creation on first message, multi-turn reuse, inactivity
6
+ cleanup after 10 minutes.
7
+
8
+ Responsibilities:
9
+ - Create and reuse ClaudeSDKClient sessions keyed by device token
10
+ - Stream Claude responses as ChatSseEvent async generators
11
+ - Enforce max concurrent sessions
12
+ - Auto-cleanup inactive sessions on a 60-second timer
13
+ """
14
+
15
+ import asyncio
16
+ import logging
17
+ import time
18
+ from dataclasses import dataclass, field
19
+
20
+ from claude_agent_sdk import (
21
+ AssistantMessage,
22
+ ClaudeAgentOptions,
23
+ ClaudeSDKClient,
24
+ ResultMessage,
25
+ TextBlock,
26
+ ToolUseBlock,
27
+ )
28
+
29
+ from config import build_system_prompt, load_config, DEFAULT_AGENTS_DIR
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # ============================================================================
34
+ # CONSTANTS
35
+ # ============================================================================
36
+
37
+ INACTIVITY_TIMEOUT_SECONDS = 600 # 10 minutes
38
+ CLEANUP_INTERVAL_SECONDS = 60
39
+
40
+
41
+ # ============================================================================
42
+ # TYPES
43
+ # ============================================================================
44
+
45
+ @dataclass
46
+ class ChatSseEvent:
47
+ """SSE event sent to the client during text chat streaming.
48
+
49
+ Attributes:
50
+ type: Event type ("text_delta", "tool_start", "tool_end", "result", "error")
51
+ content: Text content or error message
52
+ tool_name: Tool name (only for tool_start events)
53
+ """
54
+ type: str
55
+ content: str
56
+ tool_name: str | None = None
57
+
58
+ def to_dict(self) -> dict:
59
+ """Serialize to a JSON-safe dict, omitting None fields."""
60
+ d: dict = {"type": self.type, "content": self.content}
61
+ if self.tool_name is not None:
62
+ d["toolName"] = self.tool_name
63
+ return d
64
+
65
+
66
+ @dataclass
67
+ class ChatSession:
68
+ """Tracks an active text chat session.
69
+
70
+ Attributes:
71
+ session_key: Device token used as the session key
72
+ client: Persistent ClaudeSDKClient for multi-turn chat
73
+ agent_id: Optional agent identifier for agent-specific prompts
74
+ streaming: Whether the session is currently streaming a response
75
+ last_activity: Unix timestamp of last activity (for inactivity timeout)
76
+ """
77
+ session_key: str
78
+ client: ClaudeSDKClient
79
+ agent_id: str | None = None
80
+ streaming: bool = False
81
+ last_activity: float = field(default_factory=time.time)
82
+
83
+
84
+ # ============================================================================
85
+ # STATE
86
+ # ============================================================================
87
+
88
+ _active_sessions: dict[str, ChatSession] = {}
89
+ _cleanup_task: asyncio.Task | None = None
90
+
91
+
92
+ # ============================================================================
93
+ # MAIN HANDLERS
94
+ # ============================================================================
95
+
96
+ async def get_or_create_session(session_key: str, agent_id: str | None = None) -> ChatSession:
97
+ """Get an existing chat session or create a new one.
98
+
99
+ On first call for a session_key, creates a ClaudeSDKClient with the
100
+ appropriate system prompt. Subsequent calls return the existing session.
101
+ Enforces max concurrent sessions from config.
102
+
103
+ Args:
104
+ session_key: Device token to key the session on
105
+ agent_id: Optional agent ID for agent-specific prompts
106
+
107
+ Returns:
108
+ The active ChatSession
109
+
110
+ Raises:
111
+ RuntimeError: If max concurrent sessions exceeded
112
+ """
113
+ existing = _active_sessions.get(session_key)
114
+ if existing:
115
+ existing.last_activity = time.time()
116
+ return existing
117
+
118
+ config = load_config()
119
+ if len(_active_sessions) >= config.max_concurrent_sessions:
120
+ raise RuntimeError(
121
+ f"Max concurrent sessions ({config.max_concurrent_sessions}) reached"
122
+ )
123
+
124
+ system_prompt = build_system_prompt(agent_id, "text")
125
+
126
+ # Determine working directory
127
+ import os
128
+ cwd = config.default_cwd
129
+ if agent_id:
130
+ agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
131
+ if os.path.isdir(agent_dir):
132
+ cwd = agent_dir
133
+
134
+ options = ClaudeAgentOptions(
135
+ system_prompt=system_prompt,
136
+ cwd=cwd,
137
+ allowed_tools=[],
138
+ permission_mode="bypassPermissions",
139
+ include_partial_messages=True,
140
+ max_thinking_tokens=10000,
141
+ )
142
+
143
+ client = ClaudeSDKClient(options=options)
144
+ await client.connect()
145
+
146
+ session = ChatSession(
147
+ session_key=session_key,
148
+ client=client,
149
+ agent_id=agent_id,
150
+ )
151
+ _active_sessions[session_key] = session
152
+ logger.info(f"[chat] Session created, key: {session_key}")
153
+
154
+ return session
155
+
156
+
157
+ async def stream_message(session_key: str, text: str):
158
+ """Send a user message and yield SSE events from Claude's response.
159
+
160
+ Guards against concurrent streaming on the same session. Yields
161
+ ChatSseEvent objects for each streaming event from Claude.
162
+
163
+ Args:
164
+ session_key: Device token identifying the session
165
+ text: User message text
166
+
167
+ Yields:
168
+ ChatSseEvent objects for each streaming event
169
+
170
+ Raises:
171
+ RuntimeError: If no active session or already streaming
172
+ """
173
+ session = _active_sessions.get(session_key)
174
+ if not session:
175
+ raise RuntimeError("No active session")
176
+
177
+ if session.streaming:
178
+ raise RuntimeError("ALREADY_STREAMING")
179
+
180
+ session.last_activity = time.time()
181
+ session.streaming = True
182
+
183
+ try:
184
+ await session.client.query(text)
185
+
186
+ async for msg in session.client.receive_response():
187
+ if isinstance(msg, AssistantMessage):
188
+ for block in msg.content:
189
+ if isinstance(block, TextBlock) and block.text:
190
+ yield ChatSseEvent(type="text_delta", content=block.text)
191
+ elif isinstance(block, ToolUseBlock):
192
+ yield ChatSseEvent(
193
+ type="tool_start", content="", tool_name=block.name
194
+ )
195
+
196
+ elif isinstance(msg, ResultMessage):
197
+ if msg.is_error:
198
+ yield ChatSseEvent(
199
+ type="error", content=msg.subtype or "Unknown error"
200
+ )
201
+ break
202
+
203
+ yield ChatSseEvent(type="result", content="")
204
+
205
+ except Exception as e:
206
+ logger.error(f"[chat] Stream error for {session_key}: {e}")
207
+ yield ChatSseEvent(type="error", content=str(e))
208
+
209
+ finally:
210
+ session.streaming = False
211
+ session.last_activity = time.time()
212
+
213
+
214
+ async def close_session(session_key: str) -> None:
215
+ """Close a chat session, disconnecting the Claude client.
216
+
217
+ Args:
218
+ session_key: Device token identifying the session
219
+ """
220
+ session = _active_sessions.pop(session_key, None)
221
+ if not session:
222
+ return
223
+
224
+ try:
225
+ await session.client.disconnect()
226
+ except Exception as e:
227
+ logger.warning(f"[chat] Error disconnecting session {session_key}: {e}")
228
+
229
+ logger.info(f"[chat] Session closed, key: {session_key}")
230
+
231
+
232
+ async def interrupt_session(session_key: str) -> bool:
233
+ """Interrupt the current streaming response for a session.
234
+
235
+ Args:
236
+ session_key: Device token identifying the session
237
+
238
+ Returns:
239
+ True if a streaming session was interrupted, False otherwise
240
+ """
241
+ session = _active_sessions.get(session_key)
242
+ if not session or not session.streaming:
243
+ return False
244
+
245
+ try:
246
+ await session.client.interrupt()
247
+ except Exception as e:
248
+ logger.warning(f"[chat] Interrupt error for {session_key}: {e}")
249
+
250
+ session.streaming = False
251
+ session.last_activity = time.time()
252
+ logger.info(f"[chat] Session interrupted, key: {session_key}")
253
+ return True
254
+
255
+
256
+ def has_session(session_key: str) -> bool:
257
+ """Check if a session exists for the given key.
258
+
259
+ Args:
260
+ session_key: Device token to check
261
+
262
+ Returns:
263
+ True if a session exists
264
+ """
265
+ return session_key in _active_sessions
266
+
267
+
268
+ async def cleanup_inactive() -> None:
269
+ """Close sessions that have been inactive for 10+ minutes.
270
+
271
+ Called on a periodic timer. Safe to call concurrently.
272
+ """
273
+ now = time.time()
274
+ stale_keys = [
275
+ key
276
+ for key, session in _active_sessions.items()
277
+ if now - session.last_activity > INACTIVITY_TIMEOUT_SECONDS
278
+ ]
279
+
280
+ for key in stale_keys:
281
+ logger.info(f"[chat] Session timed out due to inactivity, key: {key}")
282
+ await close_session(key)
283
+
284
+
285
+ # ============================================================================
286
+ # HELPER FUNCTIONS
287
+ # ============================================================================
288
+
289
+ async def _cleanup_loop() -> None:
290
+ """Background loop that runs cleanup_inactive every 60 seconds."""
291
+ while True:
292
+ await asyncio.sleep(CLEANUP_INTERVAL_SECONDS)
293
+ try:
294
+ await cleanup_inactive()
295
+ except Exception as e:
296
+ logger.error(f"[chat] Cleanup error: {e}")
297
+
298
+
299
+ def start_cleanup_timer() -> None:
300
+ """Start the background cleanup timer. Call once at server startup."""
301
+ global _cleanup_task
302
+ if _cleanup_task is None:
303
+ _cleanup_task = asyncio.create_task(_cleanup_loop())
304
+ logger.info("[chat] Inactivity cleanup timer started")
305
+
306
+
307
+ def stop_cleanup_timer() -> None:
308
+ """Stop the background cleanup timer."""
309
+ global _cleanup_task
310
+ if _cleanup_task is not None:
311
+ _cleanup_task.cancel()
312
+ _cleanup_task = None
@@ -0,0 +1,340 @@
1
+ """
2
+ Configuration, environment loading, prompt builder, and agent loader for the voice server.
3
+
4
+ Ports the TypeScript env.ts + prompt-builder.ts + agent-store.ts patterns to Python.
5
+
6
+ Responsibilities:
7
+ - Load environment variables from ~/.voicecc/.env
8
+ - Build system prompts with mode overlays and agent files
9
+ - Load agent config from ~/.claude-voice-agents/<agentId>/
10
+ - Provide typed VoiceServerConfig dataclass
11
+ """
12
+
13
+ import json
14
+ import os
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+
18
+ from dotenv import load_dotenv
19
+
20
+ # ============================================================================
21
+ # CONSTANTS
22
+ # ============================================================================
23
+
24
+ DEFAULT_VOICECC_DIR = os.path.join(os.path.expanduser("~"), ".voicecc")
25
+ DEFAULT_AGENTS_DIR = os.path.join(os.path.expanduser("~"), ".claude-voice-agents")
26
+ DEFAULT_AGENT_VOICE_ID = "IKne3meq5aSn9XLyUdCD" # Charlie
27
+ DEFAULT_NON_AGENT_VOICE_ID = "WrjxnKxK0m1uiaH0uteU"
28
+ DEFAULT_TTS_MODEL = "eleven_turbo_v2_5"
29
+ DEFAULT_STT_MODEL = "scribe_v1"
30
+ DEFAULT_WEBRTC_PORT = 7860
31
+ DEFAULT_API_PORT = 7861
32
+ DEFAULT_TWILIO_PORT = 8080
33
+ DEFAULT_MAX_CONCURRENT_SESSIONS = 2
34
+
35
+ # Project root is the parent of voice-server/
36
+ PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
37
+ DEFAULTS_DIR = os.path.join(PROJECT_ROOT, "init", "defaults")
38
+
39
+
40
+ # ============================================================================
41
+ # TYPES
42
+ # ============================================================================
43
+
44
+ @dataclass
45
+ class VoicePreference:
46
+ """Voice preference for a TTS provider."""
47
+ id: str
48
+ name: str
49
+
50
+
51
+ @dataclass
52
+ class AgentVoiceConfig:
53
+ """Per-provider voice preferences."""
54
+ elevenlabs: VoicePreference | None = None
55
+ local: VoicePreference | None = None
56
+
57
+
58
+ @dataclass
59
+ class AgentConfig:
60
+ """Configuration stored in config.json for each agent."""
61
+ heartbeat_interval_minutes: int = 10
62
+ heartbeat_timeout_minutes: int | None = None
63
+ enabled: bool = True
64
+ voice: AgentVoiceConfig | None = None
65
+
66
+
67
+ @dataclass
68
+ class Agent:
69
+ """Full agent data including all file contents."""
70
+ id: str
71
+ soul_md: str
72
+ memory_md: str
73
+ heartbeat_md: str
74
+ config: AgentConfig
75
+
76
+
77
+ @dataclass
78
+ class VoiceServerConfig:
79
+ """Typed configuration for the voice server."""
80
+ webrtc_port: int
81
+ api_port: int
82
+ tunnel_url: str | None
83
+ elevenlabs_api_key: str
84
+ elevenlabs_voice_id: str
85
+ elevenlabs_tts_model: str
86
+ elevenlabs_stt_model: str
87
+ agents_dir: str
88
+ default_cwd: str
89
+ project_root: str
90
+ twilio_account_sid: str
91
+ twilio_auth_token: str
92
+ user_phone_number: str
93
+ max_concurrent_sessions: int
94
+
95
+
96
+ # ============================================================================
97
+ # MAIN HANDLERS
98
+ # ============================================================================
99
+
100
+ def load_config() -> VoiceServerConfig:
101
+ """Load environment variables from ~/.voicecc/.env and return a typed config.
102
+
103
+ Reads .env using python-dotenv, then extracts all required values.
104
+ Fails fast if ELEVENLABS_API_KEY is missing.
105
+
106
+ Returns:
107
+ VoiceServerConfig with all settings populated
108
+ """
109
+ voicecc_dir = os.environ.get("VOICECC_DIR", DEFAULT_VOICECC_DIR)
110
+ env_path = os.path.join(voicecc_dir, ".env")
111
+ load_dotenv(env_path)
112
+
113
+ api_key = os.environ.get("ELEVENLABS_API_KEY", "")
114
+ if not api_key:
115
+ raise ValueError("ELEVENLABS_API_KEY is required in ~/.voicecc/.env")
116
+
117
+ return VoiceServerConfig(
118
+ webrtc_port=int(os.environ.get("WEBRTC_PORT", str(DEFAULT_WEBRTC_PORT))),
119
+ api_port=int(os.environ.get("API_PORT", str(DEFAULT_API_PORT))),
120
+ tunnel_url=os.environ.get("TUNNEL_URL"),
121
+ elevenlabs_api_key=api_key,
122
+ elevenlabs_voice_id=os.environ.get("ELEVENLABS_VOICE_ID", DEFAULT_NON_AGENT_VOICE_ID),
123
+ elevenlabs_tts_model=os.environ.get("ELEVENLABS_MODEL_ID", DEFAULT_TTS_MODEL),
124
+ elevenlabs_stt_model=os.environ.get("ELEVENLABS_STT_MODEL_ID", DEFAULT_STT_MODEL),
125
+ agents_dir=os.environ.get("AGENTS_DIR", DEFAULT_AGENTS_DIR),
126
+ default_cwd=os.environ.get("DEFAULT_CWD", os.path.expanduser("~")),
127
+ project_root=PROJECT_ROOT,
128
+ twilio_account_sid=os.environ.get("TWILIO_ACCOUNT_SID", ""),
129
+ twilio_auth_token=os.environ.get("TWILIO_AUTH_TOKEN", ""),
130
+ user_phone_number=os.environ.get("USER_PHONE_NUMBER", ""),
131
+ max_concurrent_sessions=int(
132
+ os.environ.get("MAX_CONCURRENT_SESSIONS") or DEFAULT_MAX_CONCURRENT_SESSIONS
133
+ ),
134
+ )
135
+
136
+
137
+ def build_system_prompt(agent_id: str | None, overlay: str) -> str:
138
+ """Build a complete system prompt with mode overlay and optional agent files.
139
+
140
+ Reads the base system.md template, replaces <<MODE_OVERLAY>> with the
141
+ given overlay, and if agent_id is provided, injects SOUL/MEMORY/HEARTBEAT
142
+ files and the agent directory path.
143
+
144
+ Args:
145
+ agent_id: Agent identifier, or None for default prompt
146
+ overlay: "voice" or "text" -- selects the overlay file
147
+
148
+ Returns:
149
+ Complete system prompt string
150
+ """
151
+ base_template = _read_template("system.md")
152
+ overlay_content = _read_overlay(overlay)
153
+
154
+ prompt = base_template.replace("<<MODE_OVERLAY>>", overlay_content)
155
+
156
+ if agent_id:
157
+ agent = load_agent(agent_id)
158
+ agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
159
+
160
+ agent_files = "\n\n".join([
161
+ f"<SOUL.md>\n{agent.soul_md}\n</SOUL.md>",
162
+ f"<HEARTBEAT.md>\n{agent.heartbeat_md}\n</HEARTBEAT.md>",
163
+ f"<MEMORY.md>\n{agent.memory_md}\n</MEMORY.md>",
164
+ ])
165
+
166
+ prompt = prompt.replace("<<AGENT_DIR>>", agent_dir)
167
+ prompt = prompt.replace("<<AGENT_FILES>>", agent_files)
168
+
169
+ return prompt
170
+
171
+
172
+ def load_agent(agent_id: str) -> Agent:
173
+ """Read agent data from ~/.claude-voice-agents/<agentId>/.
174
+
175
+ Reads SOUL.md, MEMORY.md, HEARTBEAT.md, and config.json.
176
+ Fails fast if the agent directory does not exist.
177
+
178
+ Args:
179
+ agent_id: Agent identifier
180
+
181
+ Returns:
182
+ Agent with all file contents loaded
183
+ """
184
+ agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
185
+ if not os.path.isdir(agent_dir):
186
+ raise FileNotFoundError(f'Agent "{agent_id}" not found at {agent_dir}')
187
+
188
+ soul_md = _read_file(os.path.join(agent_dir, "SOUL.md"))
189
+ memory_md = _read_file(os.path.join(agent_dir, "MEMORY.md"))
190
+ heartbeat_md = _read_file(os.path.join(agent_dir, "HEARTBEAT.md"))
191
+ config = _read_agent_config(os.path.join(agent_dir, "config.json"))
192
+
193
+ return Agent(
194
+ id=agent_id,
195
+ soul_md=soul_md,
196
+ memory_md=memory_md,
197
+ heartbeat_md=heartbeat_md,
198
+ config=config,
199
+ )
200
+
201
+
202
+ def list_agents(agents_dir: str | None = None) -> list[Agent]:
203
+ """List all agents that have heartbeat enabled.
204
+
205
+ Scans the agents directory for subdirectories with config.json,
206
+ returns only those with enabled=True.
207
+
208
+ Args:
209
+ agents_dir: Override agents directory path (defaults to DEFAULT_AGENTS_DIR)
210
+
211
+ Returns:
212
+ List of Agent objects with enabled=True
213
+ """
214
+ dir_path = agents_dir or DEFAULT_AGENTS_DIR
215
+ if not os.path.isdir(dir_path):
216
+ return []
217
+
218
+ agents: list[Agent] = []
219
+ for entry in os.listdir(dir_path):
220
+ entry_path = os.path.join(dir_path, entry)
221
+ if not os.path.isdir(entry_path):
222
+ continue
223
+ config_path = os.path.join(entry_path, "config.json")
224
+ if not os.path.isfile(config_path):
225
+ continue
226
+
227
+ try:
228
+ agent = load_agent(entry)
229
+ if agent.config.enabled:
230
+ agents.append(agent)
231
+ except Exception as e:
232
+ print(f"[config] Skipping agent {entry}: {e}")
233
+
234
+ return agents
235
+
236
+
237
+ def get_agent_voice_id(agent_id: str | None) -> str:
238
+ """Get the ElevenLabs voice ID for an agent, falling back to defaults.
239
+
240
+ Args:
241
+ agent_id: Agent identifier, or None
242
+
243
+ Returns:
244
+ ElevenLabs voice ID string
245
+ """
246
+ if not agent_id:
247
+ return DEFAULT_NON_AGENT_VOICE_ID
248
+
249
+ try:
250
+ agent = load_agent(agent_id)
251
+ if agent.config.voice and agent.config.voice.elevenlabs:
252
+ return agent.config.voice.elevenlabs.id
253
+ except FileNotFoundError:
254
+ pass
255
+
256
+ return DEFAULT_AGENT_VOICE_ID
257
+
258
+
259
+ # ============================================================================
260
+ # HELPER FUNCTIONS
261
+ # ============================================================================
262
+
263
+ def _read_file(path: str) -> str:
264
+ """Read a file and return its contents as a string.
265
+
266
+ Args:
267
+ path: Absolute path to the file
268
+
269
+ Returns:
270
+ File contents, or empty string if file does not exist
271
+ """
272
+ try:
273
+ with open(path, "r", encoding="utf-8") as f:
274
+ return f.read().strip()
275
+ except FileNotFoundError:
276
+ return ""
277
+
278
+
279
+ def _read_template(filename: str) -> str:
280
+ """Read a template file from init/defaults/.
281
+
282
+ Args:
283
+ filename: Name of the template file
284
+
285
+ Returns:
286
+ Template contents
287
+ """
288
+ path = os.path.join(DEFAULTS_DIR, filename)
289
+ content = _read_file(path)
290
+ if not content:
291
+ raise FileNotFoundError(f"Template not found: {path}")
292
+ return content
293
+
294
+
295
+ def _read_overlay(overlay: str) -> str:
296
+ """Read a mode overlay file (voice or text).
297
+
298
+ Args:
299
+ overlay: "voice" or "text"
300
+
301
+ Returns:
302
+ Overlay file contents
303
+ """
304
+ filename_map = {
305
+ "voice": "system-voice-overlay.md",
306
+ "text": "system-text-overlay.md",
307
+ }
308
+ filename = filename_map.get(overlay)
309
+ if not filename:
310
+ raise ValueError(f'Unknown overlay mode: "{overlay}". Expected "voice" or "text".')
311
+ return _read_template(filename)
312
+
313
+
314
+ def _read_agent_config(config_path: str) -> AgentConfig:
315
+ """Parse an agent's config.json into an AgentConfig dataclass.
316
+
317
+ Args:
318
+ config_path: Path to config.json
319
+
320
+ Returns:
321
+ Parsed AgentConfig
322
+ """
323
+ with open(config_path, "r", encoding="utf-8") as f:
324
+ raw = json.load(f)
325
+
326
+ voice_config = None
327
+ if "voice" in raw:
328
+ voice_raw = raw["voice"]
329
+ elevenlabs = None
330
+ if "elevenlabs" in voice_raw:
331
+ el = voice_raw["elevenlabs"]
332
+ elevenlabs = VoicePreference(id=el["id"], name=el["name"])
333
+ voice_config = AgentVoiceConfig(elevenlabs=elevenlabs)
334
+
335
+ return AgentConfig(
336
+ heartbeat_interval_minutes=raw.get("heartbeatIntervalMinutes", 10),
337
+ heartbeat_timeout_minutes=raw.get("heartbeatTimeoutMinutes"),
338
+ enabled=raw.get("enabled", True),
339
+ voice=voice_config,
340
+ )