npm - voicecc - Versions diffs - 1.0.10 → 1.1.0 - Mend

voicecc 1.0.10 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +34 -17
package/bin/voicecc.js +115 -8
package/dashboard/dist/assets/index-Cl1_P8eo.js +11 -0
package/dashboard/dist/index.html +1 -1
package/dashboard/routes/agents.ts +113 -0
package/dashboard/routes/auth.ts +1 -20
package/dashboard/routes/browser-call.ts +5 -14
package/dashboard/routes/claude-md.ts +6 -1
package/dashboard/routes/conversations.ts +6 -1
package/dashboard/routes/integrations.ts +9 -18
package/dashboard/routes/mcp-servers.ts +2 -27
package/dashboard/routes/providers.ts +94 -0
package/dashboard/routes/settings.ts +2 -2
package/dashboard/routes/tunnel.ts +6 -8
package/dashboard/routes/twilio.ts +43 -13
package/dashboard/routes/version.ts +102 -0
package/dashboard/routes/webrtc.ts +1 -1
package/dashboard/server.ts +14 -3
package/init/defaults/heartbeat.md +0 -0
package/init/defaults/soul.md +1 -0
package/init/defaults/system-heartbeat.md +1 -0
package/init/defaults/system.md +11 -0
package/package.json +11 -17
package/{run.ts → server/index.ts} +30 -17
package/server/services/agent-store.ts +236 -0
package/server/services/browser-call-manager.ts +69 -0
package/server/services/heartbeat.ts +402 -0
package/server/services/tunnel.ts +123 -0
package/server/services/twilio-manager.ts +104 -0
package/server/voice/assets/chime.wav +0 -0
package/{sidecar → server/voice}/audio-adapter.ts +1 -1
package/server/voice/audio-inactivity.test.ts +108 -0
package/server/voice/audio-inactivity.ts +91 -0
package/{sidecar → server/voice}/browser-audio-playback.test.ts +2 -2
package/{sidecar → server/voice}/browser-server.ts +30 -125
package/{sidecar → server/voice}/chime.test.ts +1 -1
package/{sidecar → server/voice}/claude-session.ts +15 -8
package/server/voice/stt-elevenlabs.ts +210 -0
package/server/voice/stt-provider.ts +106 -0
package/server/voice/tts-elevenlabs-hiss.test.ts +183 -0
package/server/voice/tts-elevenlabs.ts +397 -0
package/server/voice/tts-provider.ts +155 -0
package/{sidecar → server/voice}/twilio-audio.ts +2 -2
package/{sidecar → server/voice}/twilio-server.ts +225 -132
package/{sidecar → server/voice}/types.ts +97 -25
package/{sidecar → server/voice}/vad.ts +1 -1
package/server/voice/voice-loop-bugs.test.ts +251 -0
package/server/voice/voice-server.ts +126 -0
package/{sidecar → server/voice}/voice-session.ts +49 -33
package/tsconfig.json +2 -2
package/.claude-plugin/plugin.json +0 -6
package/dashboard/dist/assets/index-dAYfRls7.js +0 -11
package/dashboard/routes/voice.ts +0 -48
package/scripts/postinstall.js +0 -202
package/services/browser-call-manager.ts +0 -106
package/services/tunnel.ts +0 -204
package/services/twilio-manager.ts +0 -126
package/sidecar/audio-capture.ts +0 -220
package/sidecar/index.ts +0 -83
package/sidecar/local-audio.ts +0 -126
package/sidecar/mic-vpio.swift +0 -484
package/sidecar/mock-tts-server-tagged.mjs +0 -132
package/sidecar/scripts/generate-startup-audio.py +0 -79
package/sidecar/sherpa-onnx-node.d.ts +0 -4
package/sidecar/stt.ts +0 -199
package/sidecar/tts-server.py +0 -193
package/sidecar/tts.ts +0 -481
package/sidecar/voice-loop-bugs.test.ts +0 -522
package/skills/voice/SKILL.md +0 -26
/package/{services → server/services}/device-pairing.ts +0 -0
/package/{services → server/services}/env.ts +0 -0
/package/{sidecar → server/voice}/assets/startup.pcm +0 -0
/package/{sidecar → server/voice}/browser-audio.ts +0 -0
/package/{sidecar → server/voice}/chime.ts +0 -0
/package/{sidecar → server/voice}/endpointing.ts +0 -0
/package/{sidecar → server/voice}/mic-vpio +0 -0
/package/{sidecar → server/voice}/narration.ts +0 -0
/package/{sidecar → server/voice}/session-lock.ts +0 -0

package/README.md CHANGED Viewed

@@ -1,41 +1,58 @@
 # Voice CC
-A Claude Code plugin for hands-free voice interaction with local speech-to-text, text-to-speech, and voice activity detection.
+A Claude Code plugin for hands-free voice interaction with ElevenLabs speech-to-text, text-to-speech, and voice activity detection.
+## Project Structure
+```
+server/             Backend: voice pipeline + orchestration services
+  voice/            Real-time audio: STT, TTS, VAD, session management
+  services/         Orchestration: tunnel, Twilio, browser calls, agents
+  index.ts          Entry point (boots dashboard + auto-starts integrations)
+dashboard/          Web UI (Vite + React) + API routes (Hono)
+lander/             Static landing page
+init/               Default prompt templates for new agents
+bin/                CLI entry point (voicecc command)
+```
 ## Installation
 ### Prerequisites
-- macOS with Apple Silicon (M1/M2/M3/M4)
+- macOS or Linux
 - Node.js 18+
-- Python 3.10+
-- Homebrew
+- An ElevenLabs API key
 ### Install
 ```bash
-# 1. Install system dependencies
-xcode-select --install
-brew install espeak-ng cloudflared
-# 2. Install Voice CC
+# 1. Install Voice CC
 npm install -g voicecc
-# 3. Start the dashboard
+# 2. Start the dashboard
 voicecc
 ```
-The postinstall script handles sox, the Whisper model, Python venv, and TTS dependencies automatically.
+## Development
-## How It Works
+```bash
+# 1. Install dependencies
+npm install
+# 2. Start the backend server
+npm start
-The voice loop runs locally with zero external API calls except to Claude:
+# 3. In a separate terminal, start the dashboard with hot reload
+npm run dev:dashboard
+```
+## How It Works
-1. **Mic capture**: VPIO (macOS Voice Processing IO) records 16kHz mono PCM with echo cancellation
+1. **Mic capture**: Browser captures 16kHz mono PCM via WebRTC
 2. **Voice activity detection**: Silero VAD v5 detects speech segments
-3. **Speech-to-text**: sherpa-onnx (Whisper ONNX model) transcribes audio locally
+3. **Speech-to-text**: ElevenLabs Scribe API transcribes audio
 4. **Endpointing**: VAD silence-based turn detection
 5. **Claude inference**: Transcript sent to Claude Code Agent SDK session with streaming response
 6. **Narration**: Claude's response stripped of markdown and split into sentences
-7. **Text-to-speech**: Kokoro-82M via mlx-audio on Apple Silicon GPU (~8x realtime)
-8. **Speaker playback**: Audio output through VPIO at 24kHz with echo cancellation
+7. **Text-to-speech**: ElevenLabs streaming TTS API generates audio
+8. **Speaker playback**: Audio output through browser at 24kHz

package/bin/voicecc.js CHANGED Viewed

@@ -3,29 +3,136 @@
 /**
  * CLI entry point for the voicecc command.
  *
- * Checks if first-run setup is needed (compile mic-vpio, Python venv, etc.)
- * and runs it with visible output. Then spawns `tsx run.ts` for the dashboard.
+ * - On first run (no .env), launches an interactive setup wizard
+ * - Copies CLAUDE.md template on first run
+ * - Spawns the dashboard server
  */
 import { spawn } from "node:child_process";
+import { copyFileSync, existsSync } from "node:fs";
+import { writeFile, readFile } from "node:fs/promises";
+import { createInterface } from "node:readline";
+import { randomBytes } from "node:crypto";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const PKG_ROOT = join(__dirname, "..");
 const TSX_BIN = join(PKG_ROOT, "node_modules", ".bin", "tsx");
+const ENV_PATH = join(PKG_ROOT, ".env");
-// Run setup if needed (first run or incomplete install)
 process.chdir(PKG_ROOT);
-const { needsSetup, runSetup } = await import("../scripts/postinstall.js");
-if (needsSetup()) {
-  console.log("[voicecc] Running first-time setup...\n");
-  runSetup();
+// ============================================================================
+// SETUP WIZARD
+// ============================================================================
+/**
+ * Prompt the user for a single line of input.
+ *
+ * @param rl - readline interface
+ * @param question - the prompt text
+ * @returns the user's trimmed answer
+ */
+function ask(rl, question) {
+  return new Promise((resolve) => {
+    rl.question(question, (answer) => resolve(answer.trim()));
+  });
+}
+/**
+ * Generate a cryptographically random password (24 URL-safe characters).
+ *
+ * @returns generated password string
+ */
+function generatePassword() {
+  return randomBytes(18).toString("base64url");
+}
+/**
+ * Run the first-run setup wizard.
+ * Prompts for ElevenLabs API key and dashboard password configuration.
+ * Writes results to .env.
+ */
+async function runSetupWizard() {
+  const rl = createInterface({ input: process.stdin, output: process.stdout });
+  console.log("");
+  console.log("========================================");
+  console.log("       Welcome to VoiceCC Setup!        ");
+  console.log("========================================");
+  console.log("");
+  // ElevenLabs API key
+  console.log("VoiceCC uses ElevenLabs for speech recognition and text-to-speech.");
+  console.log("You can get a free API key at: https://elevenlabs.io");
+  console.log("");
+  const apiKey = await ask(rl, "Paste your ElevenLabs API key (or press Enter to skip): ");
+  if (!apiKey) {
+    console.log("Skipped. You can add it later from the dashboard.");
+  }
+  // Dashboard password
+  console.log("");
+  console.log("Would you like to protect your dashboard with a password?");
+  console.log("Anyone with access to the dashboard can control your voice agents.");
+  console.log("");
+  console.log("  1) Yes, generate a password for me (recommended)");
+  console.log("  2) No, leave it open (not recommended)");
+  console.log("");
+  const passwordChoice = await ask(rl, "Choose [1/2]: ");
+  let password = "";
+  if (passwordChoice === "2") {
+    console.log("");
+    console.log("WARNING: Your dashboard will be open to anyone who can reach it.");
+  } else {
+    password = generatePassword();
+    console.log("");
+    console.log("========================================");
+    console.log("  Your dashboard login (save this!)     ");
+    console.log("========================================");
+    console.log("");
+    console.log(`  Username: admin`);
+    console.log(`  Password: ${password}`);
+    console.log("");
+    console.log("  \x1b[31mThis will NOT be shown again.\x1b[0m");
+    console.log("  Your browser will ask for these when");
+    console.log("  you open the dashboard.");
+    console.log("========================================");
+    console.log("");
+    await ask(rl, "Have you saved the password? Press Enter to continue. ");
+  }
+  rl.close();
+  // Build .env content
+  const lines = [];
+  if (apiKey) lines.push(`ELEVENLABS_API_KEY=${apiKey}`);
+  if (password) lines.push(`DASHBOARD_PASSWORD=${password}`);
+  await writeFile(ENV_PATH, lines.join("\n") + "\n", "utf-8");
+  console.log("All done! Starting VoiceCC...");
+  console.log("");
+}
+// ============================================================================
+// MAIN ENTRYPOINT
+// ============================================================================
+// Copy CLAUDE.md template if available
+const claudeMdSrc = join("init", "CLAUDE.md");
+if (existsSync(claudeMdSrc)) {
+  copyFileSync(claudeMdSrc, "CLAUDE.md");
+}
+// Run setup wizard on first run (no .env file)
+if (!existsSync(ENV_PATH)) {
+  await runSetupWizard();
 }
 // Start the dashboard
-const child = spawn(TSX_BIN, ["run.ts"], {
+const child = spawn(TSX_BIN, ["server/index.ts"], {
   cwd: PKG_ROOT,
   stdio: "inherit",
 });