free-speech 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,108 @@
1
+ # πŸŽ™οΈ free-speech
2
+
3
+ AI phone calling system with real-time voice conversations.
4
+
5
+ **Stack:**
6
+ - [Twilio](https://twilio.com) - Call infrastructure
7
+ - [Deepgram](https://deepgram.com) - Real-time STT (Nova-2) + TTS (Aura)
8
+
9
+ ## Setup
10
+
11
+ ### 1. Install dependencies
12
+
13
+ ```bash
14
+ npm install
15
+ ```
16
+
17
+ ### 2. Get API keys
18
+
19
+ **Twilio:**
20
+ 1. Create account at [twilio.com](https://www.twilio.com)
21
+ 2. Get Account SID and Auth Token from Console
22
+ 3. Buy a phone number (~$1/month)
23
+
24
+ **Deepgram:**
25
+ 1. Create account at [deepgram.com](https://deepgram.com)
26
+ 2. Create API key in Console
27
+ 3. Free tier includes $200 credit
28
+
29
+ ### 3. Configure environment
30
+
31
+ ```bash
32
+ cp .env.example .env
33
+ # Edit .env with your credentials
34
+ ```
35
+
36
+ ### 4. Expose webhooks (for local dev)
37
+
38
+ ```bash
39
+ # In a separate terminal
40
+ ngrok http 3000
41
+ # Copy the https URL to PUBLIC_URL in .env
42
+ ```
43
+
44
+ ### 5. Run the server
45
+
46
+ ```bash
47
+ npm run dev
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ### Make a call
53
+
54
+ ```bash
55
+ npm run call -- +15551234567 "Hello, this is a test call"
56
+ ```
57
+
58
+ Or via API:
59
+
60
+ ```bash
61
+ curl -X POST http://localhost:3000/call \
62
+ -H "Content-Type: application/json" \
63
+ -d '{"to": "+15551234567", "message": "Hello from free-speech!"}'
64
+ ```
65
+
66
+ ## How it works
67
+
68
+ ```
69
+ 1. You initiate a call via CLI or API
70
+ 2. Twilio places the call
71
+ 3. When answered, we speak the initial message (Twilio TTS or Deepgram)
72
+ 4. Audio streams to our server via WebSocket
73
+ 5. We forward audio to Deepgram for real-time transcription
74
+ 6. When speech ends, we respond (currently echo, add LLM later)
75
+ 7. Response audio sent back through Twilio
76
+ 8. Loop until call ends
77
+ ```
78
+
79
+ ## Architecture
80
+
81
+ ```
82
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
83
+ β”‚ CLI / │────▢│ Server │────▢│ Twilio β”‚
84
+ β”‚ API β”‚ β”‚ :3000 │◀────│ β”‚
85
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜
86
+ β”‚ β”‚
87
+ β”‚ WebSocket β”‚ PSTN
88
+ β–Ό β–Ό
89
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
90
+ β”‚ Deepgram β”‚ β”‚ Phone β”‚
91
+ β”‚ STT/TTS β”‚ β”‚ β”‚
92
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
93
+ ```
94
+
95
+ ## Roadmap
96
+
97
+ - [x] Basic call flow
98
+ - [x] Twilio integration
99
+ - [x] Deepgram STT (real-time)
100
+ - [x] Deepgram TTS (Aura)
101
+ - [ ] LLM integration (Claude/OpenAI)
102
+ - [ ] Inbound calls
103
+ - [ ] Call recording
104
+ - [ ] OpenClaw skill
105
+
106
+ ## License
107
+
108
+ MIT
package/dist/cli.js ADDED
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env node
2
+ import 'dotenv/config';
3
+ import { validateConfig } from './config.js';
4
+ import { createTwilioClient, makeCall } from './twilio.js';
5
+ const args = process.argv.slice(2);
6
+ if (args.length < 2) {
7
+ console.log(`
8
+ πŸŽ™οΈ free-speech CLI
9
+
10
+ Usage:
11
+ npx tsx src/cli.ts <phone-number> "<message>"
12
+
13
+ Examples:
14
+ npx tsx src/cli.ts +15551234567 "Hello, this is a test call"
15
+ npm run call -- +15551234567 "Your appointment is confirmed for tomorrow"
16
+
17
+ Environment variables required:
18
+ TWILIO_ACCOUNT_SID
19
+ TWILIO_AUTH_TOKEN
20
+ TWILIO_PHONE_NUMBER
21
+ PUBLIC_URL (your ngrok URL)
22
+ `);
23
+ process.exit(1);
24
+ }
25
+ const [to, ...messageParts] = args;
26
+ const message = messageParts.join(' ');
27
+ validateConfig();
28
+ const client = createTwilioClient();
29
+ console.log(`πŸ“ž Calling ${to}...`);
30
+ console.log(`πŸ“ Message: "${message}"`);
31
+ try {
32
+ const call = await makeCall(client, to, message);
33
+ console.log(`βœ… Call initiated!`);
34
+ console.log(` SID: ${call.sid}`);
35
+ console.log(` Status: ${call.status}`);
36
+ }
37
+ catch (err) {
38
+ console.error('❌ Failed to make call:', err);
39
+ process.exit(1);
40
+ }
package/dist/config.js ADDED
@@ -0,0 +1,32 @@
1
+ import 'dotenv/config';
2
+ export const config = {
3
+ twilio: {
4
+ accountSid: process.env.TWILIO_ACCOUNT_SID,
5
+ authToken: process.env.TWILIO_AUTH_TOKEN,
6
+ phoneNumber: process.env.TWILIO_PHONE_NUMBER,
7
+ },
8
+ deepgram: {
9
+ apiKey: process.env.DEEPGRAM_API_KEY,
10
+ },
11
+ server: {
12
+ port: parseInt(process.env.PORT || '3000'),
13
+ publicUrl: process.env.PUBLIC_URL,
14
+ },
15
+ };
16
+ export function validateConfig() {
17
+ const missing = [];
18
+ if (!config.twilio.accountSid)
19
+ missing.push('TWILIO_ACCOUNT_SID');
20
+ if (!config.twilio.authToken)
21
+ missing.push('TWILIO_AUTH_TOKEN');
22
+ if (!config.twilio.phoneNumber)
23
+ missing.push('TWILIO_PHONE_NUMBER');
24
+ if (!config.deepgram.apiKey)
25
+ missing.push('DEEPGRAM_API_KEY');
26
+ if (!config.server.publicUrl)
27
+ missing.push('PUBLIC_URL');
28
+ if (missing.length > 0) {
29
+ console.error('Missing environment variables:', missing.join(', '));
30
+ process.exit(1);
31
+ }
32
+ }
@@ -0,0 +1,65 @@
1
+ import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
2
+ import { config } from './config.js';
3
+ const deepgram = createClient(config.deepgram.apiKey);
4
+ export async function createDeepgramConnection(callbacks) {
5
+ const connection = deepgram.listen.live({
6
+ model: 'nova-2-phonecall', // Optimized for phone audio
7
+ language: 'en-US',
8
+ encoding: 'mulaw',
9
+ sample_rate: 8000,
10
+ channels: 1,
11
+ punctuate: true,
12
+ interim_results: true,
13
+ endpointing: 300, // 300ms silence = end of utterance
14
+ utterance_end_ms: 1000,
15
+ });
16
+ connection.on(LiveTranscriptionEvents.Open, () => {
17
+ console.log('πŸŽ™οΈ Deepgram connection opened');
18
+ });
19
+ connection.on(LiveTranscriptionEvents.Transcript, (data) => {
20
+ const transcript = data.channel?.alternatives?.[0]?.transcript;
21
+ if (transcript && transcript.trim()) {
22
+ const isFinal = data.is_final || data.speech_final;
23
+ callbacks.onTranscript(transcript, isFinal);
24
+ }
25
+ });
26
+ connection.on(LiveTranscriptionEvents.Error, (err) => {
27
+ callbacks.onError(err);
28
+ });
29
+ connection.on(LiveTranscriptionEvents.Close, () => {
30
+ console.log('πŸ”‡ Deepgram connection closed');
31
+ });
32
+ // Return the underlying WebSocket for sending audio
33
+ return connection;
34
+ }
35
+ // Deepgram Text-to-Speech (Aura)
36
+ export async function textToSpeech(text) {
37
+ const response = await deepgram.speak.request({ text }, {
38
+ model: 'aura-asteria-en', // Natural female voice
39
+ encoding: 'mulaw',
40
+ sample_rate: 8000,
41
+ container: 'none',
42
+ });
43
+ const stream = await response.getStream();
44
+ if (!stream) {
45
+ throw new Error('No audio stream returned from Deepgram TTS');
46
+ }
47
+ // Collect audio chunks
48
+ const reader = stream.getReader();
49
+ const chunks = [];
50
+ while (true) {
51
+ const { done, value } = await reader.read();
52
+ if (done)
53
+ break;
54
+ chunks.push(value);
55
+ }
56
+ // Combine and base64 encode
57
+ const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
58
+ const combined = new Uint8Array(totalLength);
59
+ let offset = 0;
60
+ for (const chunk of chunks) {
61
+ combined.set(chunk, offset);
62
+ offset += chunk.length;
63
+ }
64
+ return Buffer.from(combined).toString('base64');
65
+ }
package/dist/index.js ADDED
@@ -0,0 +1,55 @@
1
+ import express from 'express';
2
+ import { config, validateConfig } from './config.js';
3
+ import { createTwilioClient, makeCall } from './twilio.js';
4
+ import { handleTwilioWebhook, handleMediaStream } from './webhooks.js';
5
+ validateConfig();
6
+ const app = express();
7
+ app.use(express.json());
8
+ app.use(express.urlencoded({ extended: true }));
9
+ const twilioClient = createTwilioClient();
10
+ // Health check
11
+ app.get('/health', (req, res) => {
12
+ res.json({ status: 'ok', timestamp: new Date().toISOString() });
13
+ });
14
+ // Initiate outbound call
15
+ app.post('/call', async (req, res) => {
16
+ const { to, message } = req.body;
17
+ if (!to || !message) {
18
+ return res.status(400).json({ error: 'Missing "to" or "message"' });
19
+ }
20
+ try {
21
+ const call = await makeCall(twilioClient, to, message);
22
+ res.json({ success: true, callSid: call.sid });
23
+ }
24
+ catch (err) {
25
+ console.error('Call failed:', err);
26
+ res.status(500).json({ error: 'Failed to initiate call' });
27
+ }
28
+ });
29
+ // Twilio webhook - call answered
30
+ app.post('/twilio/voice', handleTwilioWebhook);
31
+ // Twilio media stream WebSocket endpoint
32
+ app.post('/twilio/media-stream', (req, res) => {
33
+ // Return TwiML that connects to our WebSocket
34
+ const twiml = `<?xml version="1.0" encoding="UTF-8"?>
35
+ <Response>
36
+ <Connect>
37
+ <Stream url="wss://${new URL(config.server.publicUrl).host}/media" />
38
+ </Connect>
39
+ </Response>`;
40
+ res.type('text/xml').send(twiml);
41
+ });
42
+ // Start server
43
+ app.listen(config.server.port, () => {
44
+ console.log(`πŸŽ™οΈ free-speech server running on port ${config.server.port}`);
45
+ console.log(`πŸ“ž Webhook URL: ${config.server.publicUrl}/twilio/voice`);
46
+ });
47
+ // WebSocket server for media streams (attach to same port)
48
+ import { WebSocketServer } from 'ws';
49
+ import { createServer } from 'http';
50
+ const server = createServer(app);
51
+ const wss = new WebSocketServer({ server, path: '/media' });
52
+ wss.on('connection', handleMediaStream);
53
+ server.listen(config.server.port, () => {
54
+ console.log(`πŸ”Š WebSocket server ready on /media`);
55
+ });
package/dist/twilio.js ADDED
@@ -0,0 +1,22 @@
1
+ import twilio from 'twilio';
2
+ import { config } from './config.js';
3
+ export function createTwilioClient() {
4
+ return twilio(config.twilio.accountSid, config.twilio.authToken);
5
+ }
6
+ export async function makeCall(client, to, initialMessage) {
7
+ const call = await client.calls.create({
8
+ to,
9
+ from: config.twilio.phoneNumber,
10
+ url: `${config.server.publicUrl}/twilio/voice`,
11
+ statusCallback: `${config.server.publicUrl}/twilio/status`,
12
+ statusCallbackEvent: ['initiated', 'ringing', 'answered', 'completed'],
13
+ record: false,
14
+ });
15
+ // Store initial message for this call (in production, use Redis/DB)
16
+ callMessages.set(call.sid, initialMessage);
17
+ console.log(`πŸ“ž Call initiated: ${call.sid} -> ${to}`);
18
+ return call;
19
+ }
20
+ // In-memory store for call state (use Redis in production)
21
+ export const callMessages = new Map();
22
+ export const callStates = new Map();
@@ -0,0 +1,110 @@
1
+ import { WebSocket } from 'ws';
2
+ import { config } from './config.js';
3
+ import { callMessages, callStates } from './twilio.js';
4
+ import { createDeepgramConnection, textToSpeech } from './deepgram.js';
5
+ // Handle incoming Twilio voice webhook (call answered)
6
+ export function handleTwilioWebhook(req, res) {
7
+ const callSid = req.body.CallSid;
8
+ const initialMessage = callMessages.get(callSid) || 'Hello, this is an automated call.';
9
+ console.log(`πŸ“ž Call answered: ${callSid}`);
10
+ // Initialize call state
11
+ callStates.set(callSid, {
12
+ transcripts: [],
13
+ phase: 'greeting',
14
+ });
15
+ // Return TwiML: say greeting, then connect to media stream for real-time audio
16
+ const twiml = `<?xml version="1.0" encoding="UTF-8"?>
17
+ <Response>
18
+ <Say voice="Polly.Matthew">${escapeXml(initialMessage)}</Say>
19
+ <Pause length="1"/>
20
+ <Say voice="Polly.Matthew">I'm listening. Please speak after the tone.</Say>
21
+ <Play>https://api.twilio.com/cowbell.mp3</Play>
22
+ <Connect>
23
+ <Stream url="wss://${new URL(config.server.publicUrl).host}/media">
24
+ <Parameter name="callSid" value="${callSid}" />
25
+ </Stream>
26
+ </Connect>
27
+ </Response>`;
28
+ res.type('text/xml').send(twiml);
29
+ }
30
+ // Handle WebSocket media stream from Twilio
31
+ export function handleMediaStream(ws) {
32
+ console.log('πŸ”Š Media stream connected');
33
+ let callSid = null;
34
+ let deepgramWs = null;
35
+ let streamSid = null;
36
+ ws.on('message', async (data) => {
37
+ const msg = JSON.parse(data.toString());
38
+ switch (msg.event) {
39
+ case 'start':
40
+ // Stream started - extract callSid and connect to Deepgram
41
+ streamSid = msg.start.streamSid;
42
+ callSid = msg.start.customParameters?.callSid;
43
+ console.log(`πŸŽ™οΈ Stream started: ${streamSid} for call ${callSid}`);
44
+ // Connect to Deepgram for real-time STT
45
+ deepgramWs = await createDeepgramConnection({
46
+ onTranscript: (transcript, isFinal) => {
47
+ console.log(`πŸ“ ${isFinal ? 'Final' : 'Interim'}: ${transcript}`);
48
+ if (isFinal && callSid) {
49
+ const state = callStates.get(callSid);
50
+ if (state) {
51
+ state.transcripts.push(transcript);
52
+ // For now, just echo back what they said
53
+ // Later: send to LLM for intelligent response
54
+ if (transcript.length > 5) {
55
+ respondToCall(ws, streamSid, `I heard you say: ${transcript}`);
56
+ }
57
+ }
58
+ }
59
+ },
60
+ onError: (err) => {
61
+ console.error('Deepgram error:', err);
62
+ },
63
+ });
64
+ break;
65
+ case 'media':
66
+ // Forward audio to Deepgram
67
+ if (deepgramWs?.readyState === WebSocket.OPEN) {
68
+ const audio = Buffer.from(msg.media.payload, 'base64');
69
+ deepgramWs.send(audio);
70
+ }
71
+ break;
72
+ case 'stop':
73
+ console.log('πŸ”‡ Stream stopped');
74
+ deepgramWs?.close();
75
+ break;
76
+ }
77
+ });
78
+ ws.on('close', () => {
79
+ console.log('πŸ”Œ Media stream disconnected');
80
+ deepgramWs?.close();
81
+ });
82
+ }
83
+ // Send TTS response back through Twilio stream
84
+ async function respondToCall(ws, streamSid, text) {
85
+ try {
86
+ // Use Deepgram TTS to generate audio
87
+ const audioBase64 = await textToSpeech(text);
88
+ // Send audio to Twilio stream
89
+ const mediaMessage = {
90
+ event: 'media',
91
+ streamSid,
92
+ media: {
93
+ payload: audioBase64,
94
+ },
95
+ };
96
+ ws.send(JSON.stringify(mediaMessage));
97
+ console.log(`πŸ”Š Sent response: ${text.substring(0, 50)}...`);
98
+ }
99
+ catch (err) {
100
+ console.error('TTS failed:', err);
101
+ }
102
+ }
103
+ function escapeXml(text) {
104
+ return text
105
+ .replace(/&/g, '&amp;')
106
+ .replace(/</g, '&lt;')
107
+ .replace(/>/g, '&gt;')
108
+ .replace(/"/g, '&quot;')
109
+ .replace(/'/g, '&apos;');
110
+ }
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "free-speech",
3
+ "version": "0.1.0",
4
+ "description": "AI phone calling with Twilio + Deepgram",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "free-speech": "dist/cli.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "README.md"
13
+ ],
14
+ "keywords": [
15
+ "twilio",
16
+ "deepgram",
17
+ "phone",
18
+ "ai",
19
+ "voice",
20
+ "speech-to-text",
21
+ "text-to-speech"
22
+ ],
23
+ "author": "Seth Webster",
24
+ "license": "MIT",
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "https://github.com/sethwebster/free-speech"
28
+ },
29
+ "scripts": {
30
+ "build": "tsc",
31
+ "dev": "tsx watch src/index.ts",
32
+ "start": "node dist/index.js",
33
+ "call": "tsx src/cli.ts",
34
+ "test": "vitest run",
35
+ "test:watch": "vitest"
36
+ },
37
+ "dependencies": {
38
+ "@deepgram/sdk": "^3.0.0",
39
+ "dotenv": "^16.3.1",
40
+ "express": "^4.18.2",
41
+ "twilio": "^4.19.0",
42
+ "ws": "^8.14.2"
43
+ },
44
+ "devDependencies": {
45
+ "@types/express": "^4.17.21",
46
+ "@types/node": "^20.19.33",
47
+ "@types/ws": "^8.5.10",
48
+ "tsx": "^4.6.0",
49
+ "typescript": "^5.3.2",
50
+ "vitest": "^4.0.18"
51
+ }
52
+ }