npm - free-speech - Versions diffs - 0.1.0 - Mend

free-speech 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,108 @@
+# 🎙️ free-speech
+AI phone calling system with real-time voice conversations.
+**Stack:**
+- [Twilio](https://twilio.com) - Call infrastructure
+- [Deepgram](https://deepgram.com) - Real-time STT (Nova-2) + TTS (Aura)
+## Setup
+### 1. Install dependencies
+```bash
+npm install
+```
+### 2. Get API keys
+**Twilio:**
+1. Create account at [twilio.com](https://www.twilio.com)
+2. Get Account SID and Auth Token from Console
+3. Buy a phone number (~$1/month)
+**Deepgram:**
+1. Create account at [deepgram.com](https://deepgram.com)
+2. Create API key in Console
+3. Free tier includes $200 credit
+### 3. Configure environment
+```bash
+cp .env.example .env
+# Edit .env with your credentials
+```
+### 4. Expose webhooks (for local dev)
+```bash
+# In a separate terminal
+ngrok http 3000
+# Copy the https URL to PUBLIC_URL in .env
+```
+### 5. Run the server
+```bash
+npm run dev
+```
+## Usage
+### Make a call
+```bash
+npm run call -- +15551234567 "Hello, this is a test call"
+```
+Or via API:
+```bash
+curl -X POST http://localhost:3000/call \
+  -H "Content-Type: application/json" \
+  -d '{"to": "+15551234567", "message": "Hello from free-speech!"}'
+```
+## How it works
+```
+1. You initiate a call via CLI or API
+2. Twilio places the call
+3. When answered, we speak the initial message (Twilio TTS or Deepgram)
+4. Audio streams to our server via WebSocket
+5. We forward audio to Deepgram for real-time transcription
+6. When speech ends, we respond (currently echo, add LLM later)
+7. Response audio sent back through Twilio
+8. Loop until call ends
+```
+## Architecture
+```
+┌─────────┐     ┌─────────┐     ┌────────────┐
+│  CLI /  │────▶│ Server  │────▶│   Twilio   │
+│   API   │     │ :3000   │◀────│            │
+└─────────┘     └────┬────┘     └─────┬──────┘
+                     │                │
+                     │ WebSocket      │ PSTN
+                     ▼                ▼
+               ┌──────────┐     ┌──────────┐
+               │ Deepgram │     │  Phone   │
+               │ STT/TTS  │     │          │
+               └──────────┘     └──────────┘
+```
+## Roadmap
+- [x] Basic call flow
+- [x] Twilio integration
+- [x] Deepgram STT (real-time)
+- [x] Deepgram TTS (Aura)
+- [ ] LLM integration (Claude/OpenAI)
+- [ ] Inbound calls
+- [ ] Call recording
+- [ ] OpenClaw skill
+## License
+MIT

package/dist/cli.js ADDED Viewed

@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import 'dotenv/config';
+import { validateConfig } from './config.js';
+import { createTwilioClient, makeCall } from './twilio.js';
+const args = process.argv.slice(2);
+if (args.length < 2) {
+    console.log(`
+🎙️  free-speech CLI
+Usage:
+  npx tsx src/cli.ts <phone-number> "<message>"
+Examples:
+  npx tsx src/cli.ts +15551234567 "Hello, this is a test call"
+  npm run call -- +15551234567 "Your appointment is confirmed for tomorrow"
+Environment variables required:
+  TWILIO_ACCOUNT_SID
+  TWILIO_AUTH_TOKEN
+  TWILIO_PHONE_NUMBER
+  PUBLIC_URL (your ngrok URL)
+`);
+    process.exit(1);
+}
+const [to, ...messageParts] = args;
+const message = messageParts.join(' ');
+validateConfig();
+const client = createTwilioClient();
+console.log(`📞 Calling ${to}...`);
+console.log(`📝 Message: "${message}"`);
+try {
+    const call = await makeCall(client, to, message);
+    console.log(`✅ Call initiated!`);
+    console.log(`   SID: ${call.sid}`);
+    console.log(`   Status: ${call.status}`);
+}
+catch (err) {
+    console.error('❌ Failed to make call:', err);
+    process.exit(1);
+}

package/dist/config.js ADDED Viewed

@@ -0,0 +1,32 @@
+import 'dotenv/config';
+export const config = {
+    twilio: {
+        accountSid: process.env.TWILIO_ACCOUNT_SID,
+        authToken: process.env.TWILIO_AUTH_TOKEN,
+        phoneNumber: process.env.TWILIO_PHONE_NUMBER,
+    },
+    deepgram: {
+        apiKey: process.env.DEEPGRAM_API_KEY,
+    },
+    server: {
+        port: parseInt(process.env.PORT || '3000'),
+        publicUrl: process.env.PUBLIC_URL,
+    },
+};
+export function validateConfig() {
+    const missing = [];
+    if (!config.twilio.accountSid)
+        missing.push('TWILIO_ACCOUNT_SID');
+    if (!config.twilio.authToken)
+        missing.push('TWILIO_AUTH_TOKEN');
+    if (!config.twilio.phoneNumber)
+        missing.push('TWILIO_PHONE_NUMBER');
+    if (!config.deepgram.apiKey)
+        missing.push('DEEPGRAM_API_KEY');
+    if (!config.server.publicUrl)
+        missing.push('PUBLIC_URL');
+    if (missing.length > 0) {
+        console.error('Missing environment variables:', missing.join(', '));
+        process.exit(1);
+    }
+}

package/dist/deepgram.js ADDED Viewed

@@ -0,0 +1,65 @@
+import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
+import { config } from './config.js';
+const deepgram = createClient(config.deepgram.apiKey);
+export async function createDeepgramConnection(callbacks) {
+    const connection = deepgram.listen.live({
+        model: 'nova-2-phonecall', // Optimized for phone audio
+        language: 'en-US',
+        encoding: 'mulaw',
+        sample_rate: 8000,
+        channels: 1,
+        punctuate: true,
+        interim_results: true,
+        endpointing: 300, // 300ms silence = end of utterance
+        utterance_end_ms: 1000,
+    });
+    connection.on(LiveTranscriptionEvents.Open, () => {
+        console.log('🎙️ Deepgram connection opened');
+    });
+    connection.on(LiveTranscriptionEvents.Transcript, (data) => {
+        const transcript = data.channel?.alternatives?.[0]?.transcript;
+        if (transcript && transcript.trim()) {
+            const isFinal = data.is_final || data.speech_final;
+            callbacks.onTranscript(transcript, isFinal);
+        }
+    });
+    connection.on(LiveTranscriptionEvents.Error, (err) => {
+        callbacks.onError(err);
+    });
+    connection.on(LiveTranscriptionEvents.Close, () => {
+        console.log('🔇 Deepgram connection closed');
+    });
+    // Return the underlying WebSocket for sending audio
+    return connection;
+}
+// Deepgram Text-to-Speech (Aura)
+export async function textToSpeech(text) {
+    const response = await deepgram.speak.request({ text }, {
+        model: 'aura-asteria-en', // Natural female voice
+        encoding: 'mulaw',
+        sample_rate: 8000,
+        container: 'none',
+    });
+    const stream = await response.getStream();
+    if (!stream) {
+        throw new Error('No audio stream returned from Deepgram TTS');
+    }
+    // Collect audio chunks
+    const reader = stream.getReader();
+    const chunks = [];
+    while (true) {
+        const { done, value } = await reader.read();
+        if (done)
+            break;
+        chunks.push(value);
+    }
+    // Combine and base64 encode
+    const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
+    const combined = new Uint8Array(totalLength);
+    let offset = 0;
+    for (const chunk of chunks) {
+        combined.set(chunk, offset);
+        offset += chunk.length;
+    }
+    return Buffer.from(combined).toString('base64');
+}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,55 @@
+import express from 'express';
+import { config, validateConfig } from './config.js';
+import { createTwilioClient, makeCall } from './twilio.js';
+import { handleTwilioWebhook, handleMediaStream } from './webhooks.js';
+validateConfig();
+const app = express();
+app.use(express.json());
+app.use(express.urlencoded({ extended: true }));
+const twilioClient = createTwilioClient();
+// Health check
+app.get('/health', (req, res) => {
+    res.json({ status: 'ok', timestamp: new Date().toISOString() });
+});
+// Initiate outbound call
+app.post('/call', async (req, res) => {
+    const { to, message } = req.body;
+    if (!to || !message) {
+        return res.status(400).json({ error: 'Missing "to" or "message"' });
+    }
+    try {
+        const call = await makeCall(twilioClient, to, message);
+        res.json({ success: true, callSid: call.sid });
+    }
+    catch (err) {
+        console.error('Call failed:', err);
+        res.status(500).json({ error: 'Failed to initiate call' });
+    }
+});
+// Twilio webhook - call answered
+app.post('/twilio/voice', handleTwilioWebhook);
+// Twilio media stream WebSocket endpoint
+app.post('/twilio/media-stream', (req, res) => {
+    // Return TwiML that connects to our WebSocket
+    const twiml = `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Connect>
+    <Stream url="wss://${new URL(config.server.publicUrl).host}/media" />
+  </Connect>
+</Response>`;
+    res.type('text/xml').send(twiml);
+});
+// Start server
+app.listen(config.server.port, () => {
+    console.log(`🎙️ free-speech server running on port ${config.server.port}`);
+    console.log(`📞 Webhook URL: ${config.server.publicUrl}/twilio/voice`);
+});
+// WebSocket server for media streams (attach to same port)
+import { WebSocketServer } from 'ws';
+import { createServer } from 'http';
+const server = createServer(app);
+const wss = new WebSocketServer({ server, path: '/media' });
+wss.on('connection', handleMediaStream);
+server.listen(config.server.port, () => {
+    console.log(`🔊 WebSocket server ready on /media`);
+});

package/dist/twilio.js ADDED Viewed

@@ -0,0 +1,22 @@
+import twilio from 'twilio';
+import { config } from './config.js';
+export function createTwilioClient() {
+    return twilio(config.twilio.accountSid, config.twilio.authToken);
+}
+export async function makeCall(client, to, initialMessage) {
+    const call = await client.calls.create({
+        to,
+        from: config.twilio.phoneNumber,
+        url: `${config.server.publicUrl}/twilio/voice`,
+        statusCallback: `${config.server.publicUrl}/twilio/status`,
+        statusCallbackEvent: ['initiated', 'ringing', 'answered', 'completed'],
+        record: false,
+    });
+    // Store initial message for this call (in production, use Redis/DB)
+    callMessages.set(call.sid, initialMessage);
+    console.log(`📞 Call initiated: ${call.sid} -> ${to}`);
+    return call;
+}
+// In-memory store for call state (use Redis in production)
+export const callMessages = new Map();
+export const callStates = new Map();

package/dist/webhooks.js ADDED Viewed

@@ -0,0 +1,110 @@
+import { WebSocket } from 'ws';
+import { config } from './config.js';
+import { callMessages, callStates } from './twilio.js';
+import { createDeepgramConnection, textToSpeech } from './deepgram.js';
+// Handle incoming Twilio voice webhook (call answered)
+export function handleTwilioWebhook(req, res) {
+    const callSid = req.body.CallSid;
+    const initialMessage = callMessages.get(callSid) || 'Hello, this is an automated call.';
+    console.log(`📞 Call answered: ${callSid}`);
+    // Initialize call state
+    callStates.set(callSid, {
+        transcripts: [],
+        phase: 'greeting',
+    });
+    // Return TwiML: say greeting, then connect to media stream for real-time audio
+    const twiml = `<?xml version="1.0" encoding="UTF-8"?>
+<Response>
+  <Say voice="Polly.Matthew">${escapeXml(initialMessage)}</Say>
+  <Pause length="1"/>
+  <Say voice="Polly.Matthew">I'm listening. Please speak after the tone.</Say>
+  <Play>https://api.twilio.com/cowbell.mp3</Play>
+  <Connect>
+    <Stream url="wss://${new URL(config.server.publicUrl).host}/media">
+      <Parameter name="callSid" value="${callSid}" />
+    </Stream>
+  </Connect>
+</Response>`;
+    res.type('text/xml').send(twiml);
+}
+// Handle WebSocket media stream from Twilio
+export function handleMediaStream(ws) {
+    console.log('🔊 Media stream connected');
+    let callSid = null;
+    let deepgramWs = null;
+    let streamSid = null;
+    ws.on('message', async (data) => {
+        const msg = JSON.parse(data.toString());
+        switch (msg.event) {
+            case 'start':
+                // Stream started - extract callSid and connect to Deepgram
+                streamSid = msg.start.streamSid;
+                callSid = msg.start.customParameters?.callSid;
+                console.log(`🎙️ Stream started: ${streamSid} for call ${callSid}`);
+                // Connect to Deepgram for real-time STT
+                deepgramWs = await createDeepgramConnection({
+                    onTranscript: (transcript, isFinal) => {
+                        console.log(`📝 ${isFinal ? 'Final' : 'Interim'}: ${transcript}`);
+                        if (isFinal && callSid) {
+                            const state = callStates.get(callSid);
+                            if (state) {
+                                state.transcripts.push(transcript);
+                                // For now, just echo back what they said
+                                // Later: send to LLM for intelligent response
+                                if (transcript.length > 5) {
+                                    respondToCall(ws, streamSid, `I heard you say: ${transcript}`);
+                                }
+                            }
+                        }
+                    },
+                    onError: (err) => {
+                        console.error('Deepgram error:', err);
+                    },
+                });
+                break;
+            case 'media':
+                // Forward audio to Deepgram
+                if (deepgramWs?.readyState === WebSocket.OPEN) {
+                    const audio = Buffer.from(msg.media.payload, 'base64');
+                    deepgramWs.send(audio);
+                }
+                break;
+            case 'stop':
+                console.log('🔇 Stream stopped');
+                deepgramWs?.close();
+                break;
+        }
+    });
+    ws.on('close', () => {
+        console.log('🔌 Media stream disconnected');
+        deepgramWs?.close();
+    });
+}
+// Send TTS response back through Twilio stream
+async function respondToCall(ws, streamSid, text) {
+    try {
+        // Use Deepgram TTS to generate audio
+        const audioBase64 = await textToSpeech(text);
+        // Send audio to Twilio stream
+        const mediaMessage = {
+            event: 'media',
+            streamSid,
+            media: {
+                payload: audioBase64,
+            },
+        };
+        ws.send(JSON.stringify(mediaMessage));
+        console.log(`🔊 Sent response: ${text.substring(0, 50)}...`);
+    }
+    catch (err) {
+        console.error('TTS failed:', err);
+    }
+}
+function escapeXml(text) {
+    return text
+        .replace(/&/g, '&amp;')
+        .replace(/</g, '&lt;')
+        .replace(/>/g, '&gt;')
+        .replace(/"/g, '&quot;')
+        .replace(/'/g, '&apos;');
+}

package/package.json ADDED Viewed

@@ -0,0 +1,52 @@
+{
+  "name": "free-speech",
+  "version": "0.1.0",
+  "description": "AI phone calling with Twilio + Deepgram",
+  "type": "module",
+  "main": "dist/index.js",
+  "bin": {
+    "free-speech": "dist/cli.js"
+  },
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "keywords": [
+    "twilio",
+    "deepgram",
+    "phone",
+    "ai",
+    "voice",
+    "speech-to-text",
+    "text-to-speech"
+  ],
+  "author": "Seth Webster",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/sethwebster/free-speech"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsx watch src/index.ts",
+    "start": "node dist/index.js",
+    "call": "tsx src/cli.ts",
+    "test": "vitest run",
+    "test:watch": "vitest"
+  },
+  "dependencies": {
+    "@deepgram/sdk": "^3.0.0",
+    "dotenv": "^16.3.1",
+    "express": "^4.18.2",
+    "twilio": "^4.19.0",
+    "ws": "^8.14.2"
+  },
+  "devDependencies": {
+    "@types/express": "^4.17.21",
+    "@types/node": "^20.19.33",
+    "@types/ws": "^8.5.10",
+    "tsx": "^4.6.0",
+    "typescript": "^5.3.2",
+    "vitest": "^4.0.18"
+  }
+}