voicecc 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,340 @@
1
+ """
2
+ Configuration, environment loading, prompt builder, and agent loader for the voice server.
3
+
4
+ Ports the TypeScript env.ts + prompt-builder.ts + agent-store.ts patterns to Python.
5
+
6
+ Responsibilities:
7
+ - Load environment variables from ~/.voicecc/.env
8
+ - Build system prompts with mode overlays and agent files
9
+ - Load agent config from ~/.claude-voice-agents/<agentId>/
10
+ - Provide typed VoiceServerConfig dataclass
11
+ """
12
+
13
+ import json
14
+ import os
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+
18
+ from dotenv import load_dotenv
19
+
20
+ # ============================================================================
21
+ # CONSTANTS
22
+ # ============================================================================
23
+
24
+ DEFAULT_VOICECC_DIR = os.path.join(os.path.expanduser("~"), ".voicecc")
25
+ DEFAULT_AGENTS_DIR = os.path.join(os.path.expanduser("~"), ".claude-voice-agents")
26
+ DEFAULT_AGENT_VOICE_ID = "IKne3meq5aSn9XLyUdCD" # Charlie
27
+ DEFAULT_NON_AGENT_VOICE_ID = "WrjxnKxK0m1uiaH0uteU"
28
+ DEFAULT_TTS_MODEL = "eleven_turbo_v2_5"
29
+ DEFAULT_STT_MODEL = "scribe_v1"
30
+ DEFAULT_WEBRTC_PORT = 7860
31
+ DEFAULT_API_PORT = 7861
32
+ DEFAULT_TWILIO_PORT = 8080
33
+ DEFAULT_MAX_CONCURRENT_SESSIONS = 2
34
+
35
+ # Project root is the parent of voice-server/
36
+ PROJECT_ROOT = str(Path(__file__).resolve().parent.parent)
37
+ DEFAULTS_DIR = os.path.join(PROJECT_ROOT, "init", "defaults")
38
+
39
+
40
+ # ============================================================================
41
+ # TYPES
42
+ # ============================================================================
43
+
44
+ @dataclass
45
+ class VoicePreference:
46
+ """Voice preference for a TTS provider."""
47
+ id: str
48
+ name: str
49
+
50
+
51
+ @dataclass
52
+ class AgentVoiceConfig:
53
+ """Per-provider voice preferences."""
54
+ elevenlabs: VoicePreference | None = None
55
+ local: VoicePreference | None = None
56
+
57
+
58
+ @dataclass
59
+ class AgentConfig:
60
+ """Configuration stored in config.json for each agent."""
61
+ heartbeat_interval_minutes: int = 10
62
+ heartbeat_timeout_minutes: int | None = None
63
+ enabled: bool = True
64
+ voice: AgentVoiceConfig | None = None
65
+
66
+
67
+ @dataclass
68
+ class Agent:
69
+ """Full agent data including all file contents."""
70
+ id: str
71
+ soul_md: str
72
+ memory_md: str
73
+ heartbeat_md: str
74
+ config: AgentConfig
75
+
76
+
77
+ @dataclass
78
+ class VoiceServerConfig:
79
+ """Typed configuration for the voice server."""
80
+ webrtc_port: int
81
+ api_port: int
82
+ tunnel_url: str | None
83
+ elevenlabs_api_key: str
84
+ elevenlabs_voice_id: str
85
+ elevenlabs_tts_model: str
86
+ elevenlabs_stt_model: str
87
+ agents_dir: str
88
+ default_cwd: str
89
+ project_root: str
90
+ twilio_account_sid: str
91
+ twilio_auth_token: str
92
+ user_phone_number: str
93
+ max_concurrent_sessions: int
94
+
95
+
96
+ # ============================================================================
97
+ # MAIN HANDLERS
98
+ # ============================================================================
99
+
100
+ def load_config() -> VoiceServerConfig:
101
+ """Load environment variables from ~/.voicecc/.env and return a typed config.
102
+
103
+ Reads .env using python-dotenv, then extracts all required values.
104
+ Fails fast if ELEVENLABS_API_KEY is missing.
105
+
106
+ Returns:
107
+ VoiceServerConfig with all settings populated
108
+ """
109
+ voicecc_dir = os.environ.get("VOICECC_DIR", DEFAULT_VOICECC_DIR)
110
+ env_path = os.path.join(voicecc_dir, ".env")
111
+ load_dotenv(env_path)
112
+
113
+ api_key = os.environ.get("ELEVENLABS_API_KEY", "")
114
+ if not api_key:
115
+ raise ValueError("ELEVENLABS_API_KEY is required in ~/.voicecc/.env")
116
+
117
+ return VoiceServerConfig(
118
+ webrtc_port=int(os.environ.get("WEBRTC_PORT", str(DEFAULT_WEBRTC_PORT))),
119
+ api_port=int(os.environ.get("API_PORT", str(DEFAULT_API_PORT))),
120
+ tunnel_url=os.environ.get("TUNNEL_URL"),
121
+ elevenlabs_api_key=api_key,
122
+ elevenlabs_voice_id=os.environ.get("ELEVENLABS_VOICE_ID", DEFAULT_NON_AGENT_VOICE_ID),
123
+ elevenlabs_tts_model=os.environ.get("ELEVENLABS_MODEL_ID", DEFAULT_TTS_MODEL),
124
+ elevenlabs_stt_model=os.environ.get("ELEVENLABS_STT_MODEL_ID", DEFAULT_STT_MODEL),
125
+ agents_dir=os.environ.get("AGENTS_DIR", DEFAULT_AGENTS_DIR),
126
+ default_cwd=os.environ.get("DEFAULT_CWD", os.path.expanduser("~")),
127
+ project_root=PROJECT_ROOT,
128
+ twilio_account_sid=os.environ.get("TWILIO_ACCOUNT_SID", ""),
129
+ twilio_auth_token=os.environ.get("TWILIO_AUTH_TOKEN", ""),
130
+ user_phone_number=os.environ.get("USER_PHONE_NUMBER", ""),
131
+ max_concurrent_sessions=int(
132
+ os.environ.get("MAX_CONCURRENT_SESSIONS") or DEFAULT_MAX_CONCURRENT_SESSIONS
133
+ ),
134
+ )
135
+
136
+
137
+ def build_system_prompt(agent_id: str | None, overlay: str) -> str:
138
+ """Build a complete system prompt with mode overlay and optional agent files.
139
+
140
+ Reads the base system.md template, replaces <<MODE_OVERLAY>> with the
141
+ given overlay, and if agent_id is provided, injects SOUL/MEMORY/HEARTBEAT
142
+ files and the agent directory path.
143
+
144
+ Args:
145
+ agent_id: Agent identifier, or None for default prompt
146
+ overlay: "voice" or "text" -- selects the overlay file
147
+
148
+ Returns:
149
+ Complete system prompt string
150
+ """
151
+ base_template = _read_template("system.md")
152
+ overlay_content = _read_overlay(overlay)
153
+
154
+ prompt = base_template.replace("<<MODE_OVERLAY>>", overlay_content)
155
+
156
+ if agent_id:
157
+ agent = load_agent(agent_id)
158
+ agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
159
+
160
+ agent_files = "\n\n".join([
161
+ f"<SOUL.md>\n{agent.soul_md}\n</SOUL.md>",
162
+ f"<HEARTBEAT.md>\n{agent.heartbeat_md}\n</HEARTBEAT.md>",
163
+ f"<MEMORY.md>\n{agent.memory_md}\n</MEMORY.md>",
164
+ ])
165
+
166
+ prompt = prompt.replace("<<AGENT_DIR>>", agent_dir)
167
+ prompt = prompt.replace("<<AGENT_FILES>>", agent_files)
168
+
169
+ return prompt
170
+
171
+
172
+ def load_agent(agent_id: str) -> Agent:
173
+ """Read agent data from ~/.claude-voice-agents/<agentId>/.
174
+
175
+ Reads SOUL.md, MEMORY.md, HEARTBEAT.md, and config.json.
176
+ Fails fast if the agent directory does not exist.
177
+
178
+ Args:
179
+ agent_id: Agent identifier
180
+
181
+ Returns:
182
+ Agent with all file contents loaded
183
+ """
184
+ agent_dir = os.path.join(DEFAULT_AGENTS_DIR, agent_id)
185
+ if not os.path.isdir(agent_dir):
186
+ raise FileNotFoundError(f'Agent "{agent_id}" not found at {agent_dir}')
187
+
188
+ soul_md = _read_file(os.path.join(agent_dir, "SOUL.md"))
189
+ memory_md = _read_file(os.path.join(agent_dir, "MEMORY.md"))
190
+ heartbeat_md = _read_file(os.path.join(agent_dir, "HEARTBEAT.md"))
191
+ config = _read_agent_config(os.path.join(agent_dir, "config.json"))
192
+
193
+ return Agent(
194
+ id=agent_id,
195
+ soul_md=soul_md,
196
+ memory_md=memory_md,
197
+ heartbeat_md=heartbeat_md,
198
+ config=config,
199
+ )
200
+
201
+
202
+ def list_agents(agents_dir: str | None = None) -> list[Agent]:
203
+ """List all agents that have heartbeat enabled.
204
+
205
+ Scans the agents directory for subdirectories with config.json,
206
+ returns only those with enabled=True.
207
+
208
+ Args:
209
+ agents_dir: Override agents directory path (defaults to DEFAULT_AGENTS_DIR)
210
+
211
+ Returns:
212
+ List of Agent objects with enabled=True
213
+ """
214
+ dir_path = agents_dir or DEFAULT_AGENTS_DIR
215
+ if not os.path.isdir(dir_path):
216
+ return []
217
+
218
+ agents: list[Agent] = []
219
+ for entry in os.listdir(dir_path):
220
+ entry_path = os.path.join(dir_path, entry)
221
+ if not os.path.isdir(entry_path):
222
+ continue
223
+ config_path = os.path.join(entry_path, "config.json")
224
+ if not os.path.isfile(config_path):
225
+ continue
226
+
227
+ try:
228
+ agent = load_agent(entry)
229
+ if agent.config.enabled:
230
+ agents.append(agent)
231
+ except Exception as e:
232
+ print(f"[config] Skipping agent {entry}: {e}")
233
+
234
+ return agents
235
+
236
+
237
+ def get_agent_voice_id(agent_id: str | None) -> str:
238
+ """Get the ElevenLabs voice ID for an agent, falling back to defaults.
239
+
240
+ Args:
241
+ agent_id: Agent identifier, or None
242
+
243
+ Returns:
244
+ ElevenLabs voice ID string
245
+ """
246
+ if not agent_id:
247
+ return DEFAULT_NON_AGENT_VOICE_ID
248
+
249
+ try:
250
+ agent = load_agent(agent_id)
251
+ if agent.config.voice and agent.config.voice.elevenlabs:
252
+ return agent.config.voice.elevenlabs.id
253
+ except FileNotFoundError:
254
+ pass
255
+
256
+ return DEFAULT_AGENT_VOICE_ID
257
+
258
+
259
+ # ============================================================================
260
+ # HELPER FUNCTIONS
261
+ # ============================================================================
262
+
263
+ def _read_file(path: str) -> str:
264
+ """Read a file and return its contents as a string.
265
+
266
+ Args:
267
+ path: Absolute path to the file
268
+
269
+ Returns:
270
+ File contents, or empty string if file does not exist
271
+ """
272
+ try:
273
+ with open(path, "r", encoding="utf-8") as f:
274
+ return f.read().strip()
275
+ except FileNotFoundError:
276
+ return ""
277
+
278
+
279
+ def _read_template(filename: str) -> str:
280
+ """Read a template file from init/defaults/.
281
+
282
+ Args:
283
+ filename: Name of the template file
284
+
285
+ Returns:
286
+ Template contents
287
+ """
288
+ path = os.path.join(DEFAULTS_DIR, filename)
289
+ content = _read_file(path)
290
+ if not content:
291
+ raise FileNotFoundError(f"Template not found: {path}")
292
+ return content
293
+
294
+
295
+ def _read_overlay(overlay: str) -> str:
296
+ """Read a mode overlay file (voice or text).
297
+
298
+ Args:
299
+ overlay: "voice" or "text"
300
+
301
+ Returns:
302
+ Overlay file contents
303
+ """
304
+ filename_map = {
305
+ "voice": "system-voice-overlay.md",
306
+ "text": "system-text-overlay.md",
307
+ }
308
+ filename = filename_map.get(overlay)
309
+ if not filename:
310
+ raise ValueError(f'Unknown overlay mode: "{overlay}". Expected "voice" or "text".')
311
+ return _read_template(filename)
312
+
313
+
314
+ def _read_agent_config(config_path: str) -> AgentConfig:
315
+ """Parse an agent's config.json into an AgentConfig dataclass.
316
+
317
+ Args:
318
+ config_path: Path to config.json
319
+
320
+ Returns:
321
+ Parsed AgentConfig
322
+ """
323
+ with open(config_path, "r", encoding="utf-8") as f:
324
+ raw = json.load(f)
325
+
326
+ voice_config = None
327
+ if "voice" in raw:
328
+ voice_raw = raw["voice"]
329
+ elevenlabs = None
330
+ if "elevenlabs" in voice_raw:
331
+ el = voice_raw["elevenlabs"]
332
+ elevenlabs = VoicePreference(id=el["id"], name=el["name"])
333
+ voice_config = AgentVoiceConfig(elevenlabs=elevenlabs)
334
+
335
+ return AgentConfig(
336
+ heartbeat_interval_minutes=raw.get("heartbeatIntervalMinutes", 10),
337
+ heartbeat_timeout_minutes=raw.get("heartbeatTimeoutMinutes"),
338
+ enabled=raw.get("enabled", True),
339
+ voice=voice_config,
340
+ )
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Start a Cloudflare quick tunnel and configure the Twilio phone number
4
+ # webhook to point at it, then start the voice pipeline server.
5
+ #
6
+ # Required env vars (from ~/.voicecc/.env or exported):
7
+ # TWILIO_ACCOUNT_SID - Twilio account SID
8
+ # TWILIO_AUTH_TOKEN - Twilio auth token
9
+ # TWILIO_PHONE_NUMBER - Twilio phone number (E.164, e.g. +15551234567)
10
+ # ELEVENLABS_API_KEY - ElevenLabs API key
11
+ #
12
+ # Usage:
13
+ # ./dev-server-start.sh
14
+
15
+ set -euo pipefail
16
+
17
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
18
+
19
+ # Create venv and install dependencies if needed
20
+ if [[ ! -d "$SCRIPT_DIR/.venv" ]]; then
21
+ echo "Creating virtual environment..."
22
+ python3 -m venv "$SCRIPT_DIR/.venv"
23
+ fi
24
+ source "$SCRIPT_DIR/.venv/bin/activate"
25
+ pip install -q -r "$SCRIPT_DIR/requirements.txt"
26
+
27
+ # Load ~/.voicecc/.env if present (same as config.py)
28
+ VOICECC_DIR="${VOICECC_DIR:-$HOME/.voicecc}"
29
+ if [[ -f "$VOICECC_DIR/.env" ]]; then
30
+ set -a
31
+ source "$VOICECC_DIR/.env"
32
+ set +a
33
+ fi
34
+
35
+ API_PORT="${API_PORT:-7861}"
36
+
37
+ # Type check — catch type errors before starting
38
+ echo "Running type check..."
39
+ cd "$SCRIPT_DIR"
40
+ if ! python3 -m pyright .; then
41
+ echo "ERROR: Type check failed. Fix the errors above before starting." >&2
42
+ exit 1
43
+ fi
44
+ echo "Type check passed."
45
+
46
+ # Validate required credentials
47
+ for var in TWILIO_ACCOUNT_SID TWILIO_AUTH_TOKEN TWILIO_PHONE_NUMBER ELEVENLABS_API_KEY; do
48
+ if [[ -z "${!var:-}" ]]; then
49
+ echo "ERROR: $var is not set. Add it to ~/.voicecc/.env or export it." >&2
50
+ exit 1
51
+ fi
52
+ done
53
+
54
+ # Check dependencies
55
+ if ! command -v cloudflared &>/dev/null; then
56
+ echo "ERROR: cloudflared is not installed. brew install cloudflared" >&2
57
+ exit 1
58
+ fi
59
+
60
+ # Start cloudflared quick tunnel in background, capture the URL from its log
61
+ TUNNEL_LOG=$(mktemp)
62
+ cloudflared tunnel --url "http://localhost:$API_PORT" 2>"$TUNNEL_LOG" &
63
+ TUNNEL_PID=$!
64
+
65
+ cleanup() {
66
+ echo ""
67
+ echo "Shutting down tunnel (PID $TUNNEL_PID)..."
68
+ kill "$TUNNEL_PID" 2>/dev/null || true
69
+ rm -f "$TUNNEL_LOG"
70
+ }
71
+ trap cleanup EXIT
72
+
73
+ # Wait for the tunnel URL to appear in the log
74
+ echo "Starting Cloudflare quick tunnel on port $API_PORT..."
75
+ TUNNEL_URL=""
76
+ for i in $(seq 1 30); do
77
+ TUNNEL_URL=$(grep -oE 'https://[a-zA-Z0-9_-]+(-[a-zA-Z0-9_-]+)+\.trycloudflare\.com' "$TUNNEL_LOG" | head -1 || true)
78
+ if [[ -n "$TUNNEL_URL" ]]; then
79
+ break
80
+ fi
81
+ sleep 1
82
+ done
83
+
84
+ if [[ -z "$TUNNEL_URL" ]]; then
85
+ echo "ERROR: Could not get tunnel URL after 30s. cloudflared log:" >&2
86
+ cat "$TUNNEL_LOG" >&2
87
+ exit 1
88
+ fi
89
+
90
+ echo "Tunnel URL: $TUNNEL_URL"
91
+
92
+ # URL-encode the phone number (+ → %2B)
93
+ ENCODED_PHONE=$(python3 -c "import urllib.parse; print(urllib.parse.quote('$TWILIO_PHONE_NUMBER', safe=''))")
94
+
95
+ # Look up the phone number SID
96
+ PHONE_SID=$(curl -s -X GET \
97
+ "https://api.twilio.com/2010-04-01/Accounts/$TWILIO_ACCOUNT_SID/IncomingPhoneNumbers.json?PhoneNumber=$ENCODED_PHONE" \
98
+ -u "$TWILIO_ACCOUNT_SID:$TWILIO_AUTH_TOKEN" \
99
+ | python3 -c "import sys,json; nums=json.load(sys.stdin).get('incoming_phone_numbers',[]); print(nums[0]['sid'] if nums else '')")
100
+
101
+ if [[ -z "$PHONE_SID" ]]; then
102
+ echo "ERROR: Could not find phone number $TWILIO_PHONE_NUMBER in your Twilio account." >&2
103
+ exit 1
104
+ fi
105
+
106
+ # Update the voice webhook URL
107
+ WEBHOOK_URL="$TUNNEL_URL/twilio/voice"
108
+ echo "Updating Twilio phone number $TWILIO_PHONE_NUMBER webhook to: $WEBHOOK_URL"
109
+
110
+ curl -s -X POST \
111
+ "https://api.twilio.com/2010-04-01/Accounts/$TWILIO_ACCOUNT_SID/IncomingPhoneNumbers/$PHONE_SID.json" \
112
+ -u "$TWILIO_ACCOUNT_SID:$TWILIO_AUTH_TOKEN" \
113
+ --data-urlencode "VoiceUrl=$WEBHOOK_URL" \
114
+ --data-urlencode "VoiceMethod=POST" \
115
+ > /dev/null
116
+
117
+ echo "Twilio webhook configured."
118
+ echo ""
119
+ echo "=== Ready ==="
120
+ echo " Tunnel: $TUNNEL_URL"
121
+ echo " Webhook: $WEBHOOK_URL"
122
+ echo " API: http://localhost:$API_PORT"
123
+ echo ""
124
+
125
+ # Start the voice server with TUNNEL_URL set
126
+ export TUNNEL_URL="$TUNNEL_URL"
127
+ cd "$SCRIPT_DIR"
128
+ exec python3 server.py