voicecc 1.0.10 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +34 -17
  2. package/bin/voicecc.js +115 -8
  3. package/dashboard/dist/assets/index-Cl1_P8eo.js +11 -0
  4. package/dashboard/dist/index.html +1 -1
  5. package/dashboard/routes/agents.ts +113 -0
  6. package/dashboard/routes/auth.ts +1 -20
  7. package/dashboard/routes/browser-call.ts +5 -14
  8. package/dashboard/routes/claude-md.ts +6 -1
  9. package/dashboard/routes/conversations.ts +6 -1
  10. package/dashboard/routes/integrations.ts +9 -18
  11. package/dashboard/routes/mcp-servers.ts +2 -27
  12. package/dashboard/routes/providers.ts +94 -0
  13. package/dashboard/routes/settings.ts +2 -2
  14. package/dashboard/routes/tunnel.ts +6 -8
  15. package/dashboard/routes/twilio.ts +43 -13
  16. package/dashboard/routes/version.ts +102 -0
  17. package/dashboard/routes/webrtc.ts +1 -1
  18. package/dashboard/server.ts +14 -3
  19. package/init/defaults/heartbeat.md +0 -0
  20. package/init/defaults/soul.md +1 -0
  21. package/init/defaults/system-heartbeat.md +1 -0
  22. package/init/defaults/system.md +11 -0
  23. package/package.json +11 -17
  24. package/{run.ts → server/index.ts} +30 -17
  25. package/server/services/agent-store.ts +236 -0
  26. package/server/services/browser-call-manager.ts +69 -0
  27. package/server/services/heartbeat.ts +402 -0
  28. package/server/services/tunnel.ts +123 -0
  29. package/server/services/twilio-manager.ts +104 -0
  30. package/server/voice/assets/chime.wav +0 -0
  31. package/{sidecar → server/voice}/audio-adapter.ts +1 -1
  32. package/server/voice/audio-inactivity.test.ts +108 -0
  33. package/server/voice/audio-inactivity.ts +91 -0
  34. package/{sidecar → server/voice}/browser-audio-playback.test.ts +2 -2
  35. package/{sidecar → server/voice}/browser-server.ts +30 -125
  36. package/{sidecar → server/voice}/chime.test.ts +1 -1
  37. package/{sidecar → server/voice}/claude-session.ts +15 -8
  38. package/server/voice/stt-elevenlabs.ts +210 -0
  39. package/server/voice/stt-provider.ts +106 -0
  40. package/server/voice/tts-elevenlabs-hiss.test.ts +183 -0
  41. package/server/voice/tts-elevenlabs.ts +397 -0
  42. package/server/voice/tts-provider.ts +155 -0
  43. package/{sidecar → server/voice}/twilio-audio.ts +2 -2
  44. package/{sidecar → server/voice}/twilio-server.ts +225 -132
  45. package/{sidecar → server/voice}/types.ts +97 -25
  46. package/{sidecar → server/voice}/vad.ts +1 -1
  47. package/server/voice/voice-loop-bugs.test.ts +251 -0
  48. package/server/voice/voice-server.ts +126 -0
  49. package/{sidecar → server/voice}/voice-session.ts +49 -33
  50. package/tsconfig.json +2 -2
  51. package/.claude-plugin/plugin.json +0 -6
  52. package/dashboard/dist/assets/index-dAYfRls7.js +0 -11
  53. package/dashboard/routes/voice.ts +0 -48
  54. package/scripts/postinstall.js +0 -202
  55. package/services/browser-call-manager.ts +0 -106
  56. package/services/tunnel.ts +0 -204
  57. package/services/twilio-manager.ts +0 -126
  58. package/sidecar/audio-capture.ts +0 -220
  59. package/sidecar/index.ts +0 -83
  60. package/sidecar/local-audio.ts +0 -126
  61. package/sidecar/mic-vpio.swift +0 -484
  62. package/sidecar/mock-tts-server-tagged.mjs +0 -132
  63. package/sidecar/scripts/generate-startup-audio.py +0 -79
  64. package/sidecar/sherpa-onnx-node.d.ts +0 -4
  65. package/sidecar/stt.ts +0 -199
  66. package/sidecar/tts-server.py +0 -193
  67. package/sidecar/tts.ts +0 -481
  68. package/sidecar/voice-loop-bugs.test.ts +0 -522
  69. package/skills/voice/SKILL.md +0 -26
  70. /package/{services → server/services}/device-pairing.ts +0 -0
  71. /package/{services → server/services}/env.ts +0 -0
  72. /package/{sidecar → server/voice}/assets/startup.pcm +0 -0
  73. /package/{sidecar → server/voice}/browser-audio.ts +0 -0
  74. /package/{sidecar → server/voice}/chime.ts +0 -0
  75. /package/{sidecar → server/voice}/endpointing.ts +0 -0
  76. /package/{sidecar → server/voice}/mic-vpio +0 -0
  77. /package/{sidecar → server/voice}/narration.ts +0 -0
  78. /package/{sidecar → server/voice}/session-lock.ts +0 -0
package/README.md CHANGED
@@ -1,41 +1,58 @@
1
1
  # Voice CC
2
2
 
3
- A Claude Code plugin for hands-free voice interaction with local speech-to-text, text-to-speech, and voice activity detection.
3
+ A Claude Code plugin for hands-free voice interaction with ElevenLabs speech-to-text, text-to-speech, and voice activity detection.
4
+
5
+ ## Project Structure
6
+
7
+ ```
8
+ server/ Backend: voice pipeline + orchestration services
9
+ voice/ Real-time audio: STT, TTS, VAD, session management
10
+ services/ Orchestration: tunnel, Twilio, browser calls, agents
11
+ index.ts Entry point (boots dashboard + auto-starts integrations)
12
+ dashboard/ Web UI (Vite + React) + API routes (Hono)
13
+ lander/ Static landing page
14
+ init/ Default prompt templates for new agents
15
+ bin/ CLI entry point (voicecc command)
16
+ ```
4
17
 
5
18
  ## Installation
6
19
 
7
20
  ### Prerequisites
8
21
 
9
- - macOS with Apple Silicon (M1/M2/M3/M4)
22
+ - macOS or Linux
10
23
  - Node.js 18+
11
- - Python 3.10+
12
- - Homebrew
24
+ - An ElevenLabs API key
13
25
 
14
26
  ### Install
15
27
 
16
28
  ```bash
17
- # 1. Install system dependencies
18
- xcode-select --install
19
- brew install espeak-ng cloudflared
20
-
21
- # 2. Install Voice CC
29
+ # 1. Install Voice CC
22
30
  npm install -g voicecc
23
31
 
24
- # 3. Start the dashboard
32
+ # 2. Start the dashboard
25
33
  voicecc
26
34
  ```
27
35
 
28
- The postinstall script handles sox, the Whisper model, Python venv, and TTS dependencies automatically.
36
+ ## Development
29
37
 
30
- ## How It Works
38
+ ```bash
39
+ # 1. Install dependencies
40
+ npm install
41
+
42
+ # 2. Start the backend server
43
+ npm start
31
44
 
32
- The voice loop runs locally with zero external API calls except to Claude:
45
+ # 3. In a separate terminal, start the dashboard with hot reload
46
+ npm run dev:dashboard
47
+ ```
48
+
49
+ ## How It Works
33
50
 
34
- 1. **Mic capture**: VPIO (macOS Voice Processing IO) records 16kHz mono PCM with echo cancellation
51
+ 1. **Mic capture**: Browser captures 16kHz mono PCM via WebRTC
35
52
  2. **Voice activity detection**: Silero VAD v5 detects speech segments
36
- 3. **Speech-to-text**: sherpa-onnx (Whisper ONNX model) transcribes audio locally
53
+ 3. **Speech-to-text**: ElevenLabs Scribe API transcribes audio
37
54
  4. **Endpointing**: VAD silence-based turn detection
38
55
  5. **Claude inference**: Transcript sent to Claude Code Agent SDK session with streaming response
39
56
  6. **Narration**: Claude's response stripped of markdown and split into sentences
40
- 7. **Text-to-speech**: Kokoro-82M via mlx-audio on Apple Silicon GPU (~8x realtime)
41
- 8. **Speaker playback**: Audio output through VPIO at 24kHz with echo cancellation
57
+ 7. **Text-to-speech**: ElevenLabs streaming TTS API generates audio
58
+ 8. **Speaker playback**: Audio output through browser at 24kHz
package/bin/voicecc.js CHANGED
@@ -3,29 +3,136 @@
3
3
  /**
4
4
  * CLI entry point for the voicecc command.
5
5
  *
6
- * Checks if first-run setup is needed (compile mic-vpio, Python venv, etc.)
7
- * and runs it with visible output. Then spawns `tsx run.ts` for the dashboard.
6
+ * - On first run (no .env), launches an interactive setup wizard
7
+ * - Copies CLAUDE.md template on first run
8
+ * - Spawns the dashboard server
8
9
  */
9
10
 
10
11
  import { spawn } from "node:child_process";
12
+ import { copyFileSync, existsSync } from "node:fs";
13
+ import { writeFile, readFile } from "node:fs/promises";
14
+ import { createInterface } from "node:readline";
15
+ import { randomBytes } from "node:crypto";
11
16
  import { dirname, join } from "node:path";
12
17
  import { fileURLToPath } from "node:url";
13
18
 
14
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
15
20
  const PKG_ROOT = join(__dirname, "..");
16
21
  const TSX_BIN = join(PKG_ROOT, "node_modules", ".bin", "tsx");
22
+ const ENV_PATH = join(PKG_ROOT, ".env");
17
23
 
18
- // Run setup if needed (first run or incomplete install)
19
24
  process.chdir(PKG_ROOT);
20
- const { needsSetup, runSetup } = await import("../scripts/postinstall.js");
21
25
 
22
- if (needsSetup()) {
23
- console.log("[voicecc] Running first-time setup...\n");
24
- runSetup();
26
+ // ============================================================================
27
+ // SETUP WIZARD
28
+ // ============================================================================
29
+
30
+ /**
31
+ * Prompt the user for a single line of input.
32
+ *
33
+ * @param rl - readline interface
34
+ * @param question - the prompt text
35
+ * @returns the user's trimmed answer
36
+ */
37
+ function ask(rl, question) {
38
+ return new Promise((resolve) => {
39
+ rl.question(question, (answer) => resolve(answer.trim()));
40
+ });
41
+ }
42
+
43
+ /**
44
+ * Generate a cryptographically random password (24 URL-safe characters).
45
+ *
46
+ * @returns generated password string
47
+ */
48
+ function generatePassword() {
49
+ return randomBytes(18).toString("base64url");
50
+ }
51
+
52
+ /**
53
+ * Run the first-run setup wizard.
54
+ * Prompts for ElevenLabs API key and dashboard password configuration.
55
+ * Writes results to .env.
56
+ */
57
+ async function runSetupWizard() {
58
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
59
+
60
+ console.log("");
61
+ console.log("========================================");
62
+ console.log(" Welcome to VoiceCC Setup! ");
63
+ console.log("========================================");
64
+ console.log("");
65
+
66
+ // ElevenLabs API key
67
+ console.log("VoiceCC uses ElevenLabs for speech recognition and text-to-speech.");
68
+ console.log("You can get a free API key at: https://elevenlabs.io");
69
+ console.log("");
70
+ const apiKey = await ask(rl, "Paste your ElevenLabs API key (or press Enter to skip): ");
71
+ if (!apiKey) {
72
+ console.log("Skipped. You can add it later from the dashboard.");
73
+ }
74
+
75
+ // Dashboard password
76
+ console.log("");
77
+ console.log("Would you like to protect your dashboard with a password?");
78
+ console.log("Anyone with access to the dashboard can control your voice agents.");
79
+ console.log("");
80
+ console.log(" 1) Yes, generate a password for me (recommended)");
81
+ console.log(" 2) No, leave it open (not recommended)");
82
+ console.log("");
83
+ const passwordChoice = await ask(rl, "Choose [1/2]: ");
84
+
85
+ let password = "";
86
+ if (passwordChoice === "2") {
87
+ console.log("");
88
+ console.log("WARNING: Your dashboard will be open to anyone who can reach it.");
89
+ } else {
90
+ password = generatePassword();
91
+ console.log("");
92
+ console.log("========================================");
93
+ console.log(" Your dashboard login (save this!) ");
94
+ console.log("========================================");
95
+ console.log("");
96
+ console.log(` Username: admin`);
97
+ console.log(` Password: ${password}`);
98
+ console.log("");
99
+ console.log(" \x1b[31mThis will NOT be shown again.\x1b[0m");
100
+ console.log(" Your browser will ask for these when");
101
+ console.log(" you open the dashboard.");
102
+ console.log("========================================");
103
+ console.log("");
104
+ await ask(rl, "Have you saved the password? Press Enter to continue. ");
105
+ }
106
+
107
+ rl.close();
108
+
109
+ // Build .env content
110
+ const lines = [];
111
+ if (apiKey) lines.push(`ELEVENLABS_API_KEY=${apiKey}`);
112
+ if (password) lines.push(`DASHBOARD_PASSWORD=${password}`);
113
+ await writeFile(ENV_PATH, lines.join("\n") + "\n", "utf-8");
114
+
115
+ console.log("All done! Starting VoiceCC...");
116
+ console.log("");
117
+ }
118
+
119
+ // ============================================================================
120
+ // MAIN ENTRYPOINT
121
+ // ============================================================================
122
+
123
+ // Copy CLAUDE.md template if available
124
+ const claudeMdSrc = join("init", "CLAUDE.md");
125
+ if (existsSync(claudeMdSrc)) {
126
+ copyFileSync(claudeMdSrc, "CLAUDE.md");
127
+ }
128
+
129
+ // Run setup wizard on first run (no .env file)
130
+ if (!existsSync(ENV_PATH)) {
131
+ await runSetupWizard();
25
132
  }
26
133
 
27
134
  // Start the dashboard
28
- const child = spawn(TSX_BIN, ["run.ts"], {
135
+ const child = spawn(TSX_BIN, ["server/index.ts"], {
29
136
  cwd: PKG_ROOT,
30
137
  stdio: "inherit",
31
138
  });