create-local-voice-agent 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # create-voice-agent
2
+
3
+ A CLI that scaffolds a **production-ready, containerized infrastructure** for real-time Voice AI Agents using **LiveKit**, **Python**, and **Docker**.
4
+
5
+ Stop fighting WebRTC UDP port exhaustion, Docker permission errors, and token handshake wiring — this tool generates a perfect boilerplate in seconds.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ npx create-voice-agent my-voice-app
11
+ ```
12
+
13
+ You'll be prompted to choose:
14
+
15
+ 1. **LLM Provider** — Ollama (local GPU) or OpenAI (cloud)
16
+ 2. **Next.js Frontend** — optional browser UI with LiveKit Room components
17
+
18
+ Then:
19
+
20
+ ```bash
21
+ cd my-voice-app
22
+ cp .env.example .env # fill in your API keys
23
+ docker compose up --build
24
+ ```
25
+
26
+ ## What Gets Generated
27
+
28
+ ```
29
+ my-voice-app/
30
+ ├── docker-compose.yml # LiveKit + Redis + Agent (+ Frontend)
31
+ ├── .env.example # Pre-configured environment variables
32
+ ├── README.md # Project-specific documentation
33
+ ├── python-agent/
34
+ │ ├── Dockerfile # Non-root user (EACCES fix)
35
+ │ ├── requirements.txt # livekit-agents + plugins
36
+ │ └── agent.py # VoicePipelineAgent (VAD→STT→LLM→TTS)
37
+ └── frontend/ # (if selected)
38
+ ├── Dockerfile
39
+ ├── package.json # includes livekit-server-sdk
40
+ ├── app/api/token/route.ts # Secure AccessToken generation
41
+ └── app/page.tsx # LiveKit Room + voice visualizer
42
+ ```
43
+
44
+ ## Key Architectural Decisions
45
+
46
+ ### WebRTC UDP Port Limiting
47
+ LiveKit's UDP port range is strictly limited to `50000-50050` to prevent `docker-proxy` from spawning thousands of processes and hanging the host OS.
48
+
49
+ ### Token Handshake
50
+ The Next.js frontend includes a `/api/token` endpoint using `livekit-server-sdk` to generate `AccessToken` JWTs. The browser fetches this token before connecting to the LiveKit room.
51
+
52
+ ### Non-Root Docker User
53
+ The Python agent Dockerfile creates a dedicated `agentuser` to prevent `EACCES` permission errors common on Linux hosts.
54
+
55
+ ### Ollama GPU Routing
56
+ When Ollama is selected, the agent container uses `host.docker.internal` to reach the host's GPU-accelerated Ollama instance.
57
+
58
+ ## Development
59
+
60
+ ```bash
61
+ # Clone and install
62
+ git clone https://github.com/geladons/create-voice-agent.git
63
+ cd create-voice-agent
64
+ npm install
65
+
66
+ # Run locally
67
+ node bin/create-voice-agent.js my-test-project
68
+ ```
69
+
70
+ ## License
71
+
72
+ MIT
73
+
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env node
2
+
3
+ import '../src/index.js';
4
+
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "create-local-voice-agent",
3
+ "version": "1.0.0",
4
+ "description": "CLI to scaffold production-ready, containerized Voice AI Agents using LiveKit, Python, and Docker",
5
+ "type": "module",
6
+ "main": "src/index.js",
7
+ "bin": {
8
+ "create-voice-agent": "./bin/create-voice-agent.js"
9
+ },
10
+ "scripts": {
11
+ "test": "echo \"Error: no test specified\" && exit 1"
12
+ },
13
+ "keywords": [
14
+ "livekit",
15
+ "voice-agent",
16
+ "webrtc",
17
+ "docker",
18
+ "cli",
19
+ "scaffold",
20
+ "ai"
21
+ ],
22
+ "author": "",
23
+ "license": "MIT",
24
+ "files": [
25
+ "bin/",
26
+ "src/",
27
+ "templates/"
28
+ ],
29
+ "dependencies": {
30
+ "@inquirer/prompts": "^8.3.0",
31
+ "commander": "^14.0.3",
32
+ "ejs": "^4.0.1"
33
+ }
34
+ }
package/src/index.js ADDED
@@ -0,0 +1,310 @@
1
+ import { Command } from 'commander';
2
+ import { select, input, confirm } from '@inquirer/prompts';
3
+ import { execSync } from 'node:child_process';
4
+ import { scaffold } from './scaffold.js';
5
+
6
+ // ─── Ollama API Fetching ──────────────────────────────────────────────
7
+ async function fetchOllamaModels(ollamaIp = '127.0.0.1') {
8
+ try {
9
+ const url = `http://${ollamaIp}:11434/api/tags`;
10
+ const controller = new AbortController();
11
+ const timeout = setTimeout(() => controller.abort(), 3000);
12
+ const res = await fetch(url, { signal: controller.signal });
13
+ clearTimeout(timeout);
14
+ if (!res.ok) return [];
15
+ const data = await res.json();
16
+ return (data.models || []).map((m) => m.name);
17
+ } catch {
18
+ return [];
19
+ }
20
+ }
21
+
22
+ // ─── Model Selection Prompt ───────────────────────────────────────────
23
+ async function promptModelName(models, defaultModel) {
24
+ if (models.length > 0) {
25
+ const choices = models.map((m) => ({ name: m, value: m }));
26
+ choices.push({ name: '✏️ Enter custom model name', value: '__custom__' });
27
+ const picked = await select({
28
+ message: 'Select an Ollama model:',
29
+ choices,
30
+ });
31
+ if (picked === '__custom__') {
32
+ return input({ message: 'Enter model name:', default: defaultModel });
33
+ }
34
+ return picked;
35
+ }
36
+ return input({ message: 'Enter Ollama model name:', default: defaultModel });
37
+ }
38
+
39
+ // ─── Docker Auto-Start ────────────────────────────────────────────────
40
+ function autoStartDocker(projectName, deploymentType, modelName) {
41
+ const cwd = projectName;
42
+ console.log('\n🐳 Starting Docker containers...\n');
43
+
44
+ try {
45
+ // Copy .env.example to .env
46
+ execSync('cp .env.example .env', { cwd, stdio: 'inherit' });
47
+
48
+ if (deploymentType === 'standalone') {
49
+ // Start ollama container first
50
+ console.log('⏳ Starting Ollama container...');
51
+ execSync('docker compose up -d ollama', { cwd, stdio: 'inherit' });
52
+
53
+ // Wait for Ollama to be ready
54
+ console.log('⏳ Waiting for Ollama to be ready...');
55
+ let ready = false;
56
+ for (let i = 0; i < 30; i++) {
57
+ try {
58
+ execSync('docker compose exec ollama ollama list', {
59
+ cwd,
60
+ stdio: 'pipe',
61
+ });
62
+ ready = true;
63
+ break;
64
+ } catch {
65
+ execSync('sleep 2', { stdio: 'pipe' });
66
+ }
67
+ }
68
+ if (!ready) {
69
+ console.log('⚠️ Ollama container took too long to start. Pull model manually.');
70
+ } else {
71
+ // Pull the model
72
+ console.log(`⏳ Pulling model "${modelName}" (this may take a while)...`);
73
+ execSync(`docker compose exec ollama ollama pull ${modelName}`, {
74
+ cwd,
75
+ stdio: 'inherit',
76
+ });
77
+ }
78
+ }
79
+
80
+ // Start all services
81
+ console.log('\n🚀 Starting all services...');
82
+ execSync('docker compose up --build -d', { cwd, stdio: 'inherit' });
83
+ console.log('\n✅ All services are running!');
84
+ console.log(' LiveKit: ws://localhost:7880');
85
+ console.log(' Frontend: http://localhost:3000 (if included)');
86
+ } catch (err) {
87
+ console.error('\n⚠️ Docker auto-start failed:', err.message);
88
+ console.log(' You can start manually with: docker compose up --build');
89
+ }
90
+ }
91
+
92
+ // ─── CLI Entry Point ──────────────────────────────────────────────────
93
+ const program = new Command();
94
+
95
+ program
96
+ .name('create-voice-agent')
97
+ .description(
98
+ 'Scaffold a production-ready, containerized Voice AI Agent using LiveKit, Python, and Docker',
99
+ )
100
+ .version('1.0.0')
101
+ .argument('<project-name>', 'Name of the project directory to create')
102
+ .action(async (projectName) => {
103
+ console.log('\n🎙️ create-voice-agent v1.0.0\n');
104
+ console.log(`Scaffolding project: ${projectName}\n`);
105
+
106
+ // ─── Fetch Ollama models in background ────────────────────────
107
+ let ollamaModels = await fetchOllamaModels();
108
+ if (ollamaModels.length > 0) {
109
+ console.log(`✅ Ollama detected — ${ollamaModels.length} model(s) available\n`);
110
+ } else {
111
+ console.log('ℹ️ Ollama not detected on localhost (will ask later)\n');
112
+ }
113
+
114
+ // ─── Step 1: Setup Mode ───────────────────────────────────────
115
+ const setupMode = await select({
116
+ message: 'Select setup mode:',
117
+ choices: [
118
+ {
119
+ name: '⚡ Auto Mode (1-Click Magic)',
120
+ value: 'auto',
121
+ description: 'Ollama + defaults + auto-start Docker',
122
+ },
123
+ {
124
+ name: '🔧 Advanced Mode',
125
+ value: 'advanced',
126
+ description: 'Full control over every option',
127
+ },
128
+ {
129
+ name: '📦 Fast Mode (Scaffold Only)',
130
+ value: 'fast',
131
+ description: 'Generate files and exit — no auto-start',
132
+ },
133
+ ],
134
+ });
135
+
136
+ let llmProvider = 'ollama';
137
+ let deploymentType = null;
138
+ let modelName = 'qwen3-vl:2b';
139
+ let language = 'en';
140
+ let includeFrontend = true;
141
+ let shouldAutoStart = false;
142
+ let ollamaIp = 'localhost';
143
+
144
+ // ─── AUTO MODE ────────────────────────────────────────────────
145
+ if (setupMode === 'auto') {
146
+ llmProvider = 'ollama';
147
+ includeFrontend = true;
148
+ shouldAutoStart = true;
149
+
150
+ deploymentType = await select({
151
+ message: 'How do you want to run Ollama?',
152
+ choices: [
153
+ {
154
+ name: '🖥️ Use Existing Local Ollama (host)',
155
+ value: 'existing',
156
+ description: 'Ollama is already running on your machine',
157
+ },
158
+ {
159
+ name: '🐳 Install Standalone in Docker (CPU/GPU)',
160
+ value: 'standalone',
161
+ description: 'Ollama runs as a Docker service',
162
+ },
163
+ ],
164
+ });
165
+
166
+ if (deploymentType === 'existing') {
167
+ ollamaIp = await input({
168
+ message: 'Ollama IP address:',
169
+ default: 'localhost',
170
+ });
171
+ // Re-fetch models if IP changed
172
+ const fetchIp = ollamaIp === 'localhost' ? '127.0.0.1' : ollamaIp;
173
+ ollamaModels = await fetchOllamaModels(fetchIp);
174
+ modelName = await promptModelName(ollamaModels, 'qwen3-vl:2b');
175
+ } else {
176
+ modelName = 'qwen3-vl:2b';
177
+ console.log(`\n📦 Standalone mode: will use model "${modelName}" (fastest CPU model)`);
178
+ }
179
+ }
180
+
181
+ // ─── ADVANCED MODE ────────────────────────────────────────────
182
+ if (setupMode === 'advanced') {
183
+ llmProvider = await select({
184
+ message: 'Which LLM Provider?',
185
+ choices: [
186
+ { name: 'Ollama (Local)', value: 'ollama' },
187
+ { name: 'OpenAI (Cloud)', value: 'openai' },
188
+ ],
189
+ });
190
+
191
+ if (llmProvider === 'ollama') {
192
+ deploymentType = await select({
193
+ message: 'How do you want to run Ollama?',
194
+ choices: [
195
+ {
196
+ name: '🖥️ Use Existing Local Ollama (host)',
197
+ value: 'existing',
198
+ description: 'Ollama is already running on your machine',
199
+ },
200
+ {
201
+ name: '🐳 Install Standalone in Docker (CPU/GPU)',
202
+ value: 'standalone',
203
+ description: 'Ollama runs as a Docker service',
204
+ },
205
+ ],
206
+ });
207
+
208
+ ollamaIp = await input({
209
+ message: 'Ollama IP address:',
210
+ default: 'localhost',
211
+ });
212
+
213
+ if (deploymentType === 'existing') {
214
+ const fetchIp = ollamaIp === 'localhost' ? '127.0.0.1' : ollamaIp;
215
+ ollamaModels = await fetchOllamaModels(fetchIp);
216
+ modelName = await promptModelName(ollamaModels, 'qwen3-vl:4b');
217
+ } else {
218
+ modelName = await input({
219
+ message: 'Enter Ollama model name:',
220
+ default: 'qwen3-vl:4b',
221
+ });
222
+ }
223
+
224
+ language = await input({
225
+ message: 'Agent language (e.g., en, ru, de):',
226
+ default: 'en',
227
+ });
228
+ }
229
+
230
+ includeFrontend = await confirm({
231
+ message: 'Include a Next.js frontend?',
232
+ default: true,
233
+ });
234
+
235
+ if (llmProvider === 'ollama') {
236
+ shouldAutoStart = await confirm({
237
+ message: 'Auto-start Docker containers now?',
238
+ default: true,
239
+ });
240
+ }
241
+ }
242
+
243
+ // ─── FAST MODE ────────────────────────────────────────────────
244
+ if (setupMode === 'fast') {
245
+ llmProvider = await select({
246
+ message: 'Which LLM Provider?',
247
+ choices: [
248
+ { name: 'Ollama (Local)', value: 'ollama' },
249
+ { name: 'OpenAI (Cloud)', value: 'openai' },
250
+ ],
251
+ });
252
+
253
+ if (llmProvider === 'ollama') {
254
+ deploymentType = await select({
255
+ message: 'How do you want to run Ollama?',
256
+ choices: [
257
+ {
258
+ name: '🖥️ Use Existing Local Ollama (host)',
259
+ value: 'existing',
260
+ },
261
+ {
262
+ name: '🐳 Install Standalone in Docker (CPU/GPU)',
263
+ value: 'standalone',
264
+ },
265
+ ],
266
+ });
267
+ }
268
+
269
+ includeFrontend = await confirm({
270
+ message: 'Include a Next.js frontend?',
271
+ default: true,
272
+ });
273
+
274
+ // Fast mode uses defaults for model, language, no auto-start
275
+ modelName = deploymentType === 'standalone' ? 'qwen3-vl:2b' : 'qwen3-vl:4b';
276
+ shouldAutoStart = false;
277
+ }
278
+
279
+ // ─── Scaffold ─────────────────────────────────────────────────
280
+ await scaffold({
281
+ projectName,
282
+ llmProvider,
283
+ includeFrontend,
284
+ deploymentType,
285
+ modelName,
286
+ language,
287
+ ollamaIp,
288
+ });
289
+
290
+ console.log(`\n✅ Project "${projectName}" created successfully!\n`);
291
+
292
+ if (shouldAutoStart) {
293
+ autoStartDocker(projectName, deploymentType, modelName);
294
+ } else {
295
+ console.log('Next steps:');
296
+ console.log(` cd ${projectName}`);
297
+ console.log(' cp .env.example .env');
298
+ if (llmProvider === 'openai') {
299
+ console.log(' # Fill in your API keys in .env');
300
+ } else {
301
+ console.log(' # No external API keys required — 100% local!');
302
+ if (deploymentType === 'existing') {
303
+ console.log(' # Make sure Ollama is running on your host machine');
304
+ }
305
+ }
306
+ console.log(' docker compose up --build\n');
307
+ }
308
+ });
309
+
310
+ program.parse();
@@ -0,0 +1,98 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import ejs from 'ejs';
5
+
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+ const TEMPLATES_DIR = path.join(__dirname, '..', 'templates');
9
+
10
+ /**
11
+ * Directories that should only be included for the local/ollama path.
12
+ */
13
+ const LOCAL_ONLY_DIRS = ['custom_tts', 'custom_stt'];
14
+
15
+ /**
16
+ * Recursively render all .ejs templates from a source directory into a target directory.
17
+ */
18
+ function renderDir(srcDir, destDir, data) {
19
+ const entries = fs.readdirSync(srcDir, { withFileTypes: true });
20
+ for (const entry of entries) {
21
+ const srcPath = path.join(srcDir, entry.name);
22
+ if (entry.isDirectory()) {
23
+ // Skip local-only directories when using cloud provider
24
+ if (LOCAL_ONLY_DIRS.includes(entry.name) && data.llmProvider !== 'ollama') {
25
+ continue;
26
+ }
27
+ const childDest = path.join(destDir, entry.name);
28
+ fs.mkdirSync(childDest, { recursive: true });
29
+ renderDir(srcPath, childDest, data);
30
+ } else if (entry.name.endsWith('.ejs')) {
31
+ const outName = entry.name.replace(/\.ejs$/, '');
32
+ const template = fs.readFileSync(srcPath, 'utf-8');
33
+ const rendered = ejs.render(template, data);
34
+ fs.writeFileSync(path.join(destDir, outName), rendered);
35
+ } else {
36
+ // Copy non-template files as-is
37
+ fs.copyFileSync(srcPath, path.join(destDir, entry.name));
38
+ }
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Main scaffolding function.
44
+ */
45
+ export async function scaffold({
46
+ projectName,
47
+ llmProvider,
48
+ includeFrontend,
49
+ deploymentType = null,
50
+ modelName = 'qwen3-vl:2b',
51
+ language = 'en',
52
+ ollamaIp = 'localhost',
53
+ }) {
54
+ const projectDir = path.resolve(process.cwd(), projectName);
55
+
56
+ if (fs.existsSync(projectDir)) {
57
+ console.error(`\n❌ Directory "${projectName}" already exists. Aborting.`);
58
+ process.exit(1);
59
+ }
60
+
61
+ fs.mkdirSync(projectDir, { recursive: true });
62
+
63
+ const data = {
64
+ projectName,
65
+ llmProvider,
66
+ includeFrontend,
67
+ deploymentType,
68
+ modelName,
69
+ language,
70
+ ollamaIp,
71
+ };
72
+
73
+ // Render root-level templates
74
+ const rootTemplates = ['docker-compose.yml.ejs', '.env.example.ejs', 'README.md.ejs'];
75
+ for (const tpl of rootTemplates) {
76
+ const template = fs.readFileSync(path.join(TEMPLATES_DIR, tpl), 'utf-8');
77
+ const rendered = ejs.render(template, data);
78
+ const outName = tpl.replace(/\.ejs$/, '');
79
+ fs.writeFileSync(path.join(projectDir, outName), rendered);
80
+ }
81
+
82
+ // Render python-agent directory
83
+ const agentSrc = path.join(TEMPLATES_DIR, 'python-agent');
84
+ const agentDest = path.join(projectDir, 'python-agent');
85
+ fs.mkdirSync(agentDest, { recursive: true });
86
+ renderDir(agentSrc, agentDest, data);
87
+
88
+ // Render frontend directory (if selected)
89
+ if (includeFrontend) {
90
+ const frontendSrc = path.join(TEMPLATES_DIR, 'frontend');
91
+ const frontendDest = path.join(projectDir, 'frontend');
92
+ fs.mkdirSync(frontendDest, { recursive: true });
93
+ renderDir(frontendSrc, frontendDest, data);
94
+ }
95
+
96
+ console.log(`\n📁 Generated files in ./${projectName}/`);
97
+ }
98
+
@@ -0,0 +1,22 @@
1
+ # ─── LiveKit Credentials ─────────────────────────────────────────────
2
+ # Generate a key/secret pair or use defaults for local development.
3
+ LIVEKIT_API_KEY=devkey
4
+ LIVEKIT_API_SECRET=devsecret
5
+
6
+ <% if (llmProvider === 'openai') { -%>
7
+ # ─── OpenAI ──────────────────────────────────────────────────────────
8
+ OPENAI_API_KEY=sk-your-openai-api-key-here
9
+ <% } else if (deploymentType === 'standalone') { -%>
10
+ # ─── Ollama (Standalone Docker) ─────────────────────────────────────
11
+ # The agent connects to the Ollama container via Docker networking.
12
+ # No need to change this — it is set automatically in docker-compose.yml.
13
+ OLLAMA_BASE_URL=http://localhost:11434
14
+ <% } else { -%>
15
+ # ─── Ollama (Existing Host) ─────────────────────────────────────────
16
+ # Points to the host machine's Ollama instance from inside Docker.
17
+ <% if (ollamaIp === 'localhost' || ollamaIp === '127.0.0.1') { -%>
18
+ OLLAMA_BASE_URL=http://host.docker.internal:11434
19
+ <% } else { -%>
20
+ OLLAMA_BASE_URL=http://<%= ollamaIp %>:11434
21
+ <% } -%>
22
+ <% } -%>
@@ -0,0 +1,60 @@
1
+ # <%= projectName %>
2
+
3
+ A real-time Voice AI Agent powered by [LiveKit](https://livekit.io/), Python, and Docker.
4
+
5
+ ## Architecture
6
+
7
+ - **LiveKit Server** — WebRTC media server (SFU)
8
+ - **Redis** — Required by LiveKit for signaling
9
+ - **Python Agent** — Voice pipeline using `livekit-agents` (VAD → STT → LLM → TTS)
10
+ <% if (includeFrontend) { -%>
11
+ - **Next.js Frontend** — Browser UI with LiveKit Room components
12
+ <% } -%>
13
+
14
+ ## Quick Start
15
+
16
+ ```bash
17
+ # 1. Copy and configure environment variables
18
+ cp .env.example .env
19
+ # Edit .env with your API keys
20
+
21
+ # 2. Start all services
22
+ docker compose up --build
23
+ ```
24
+
25
+ <% if (llmProvider === 'ollama') { -%>
26
+ ### Ollama (Local LLM)
27
+
28
+ Make sure Ollama is running on your host machine:
29
+
30
+ ```bash
31
+ ollama serve
32
+ ollama pull llama3.1
33
+ ```
34
+
35
+ The agent connects to Ollama via `http://host.docker.internal:11434`.
36
+ <% } else { -%>
37
+ ### OpenAI
38
+
39
+ Set your `OPENAI_API_KEY` in the `.env` file.
40
+ <% } -%>
41
+
42
+ <% if (includeFrontend) { -%>
43
+ ## Frontend
44
+
45
+ Open [http://localhost:3000](http://localhost:3000) in your browser.
46
+ The frontend fetches a LiveKit access token from `/api/token` and connects to the voice agent.
47
+ <% } -%>
48
+
49
+ ## Services & Ports
50
+
51
+ | Service | Port | Protocol |
52
+ | ---------- | ----- | -------- |
53
+ | LiveKit | 7880 | HTTP/WS |
54
+ | LiveKit | 7881 | RTC TCP |
55
+ | LiveKit | 50000-50050 | UDP |
56
+ | Redis | 6379 | TCP |
57
+ <% if (includeFrontend) { -%>
58
+ | Frontend | 3000 | HTTP |
59
+ <% } -%>
60
+
@@ -0,0 +1,100 @@
1
+ version: "3.9"
2
+
3
+ services:
4
+ <% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
5
+ # ─── Ollama (Standalone) ─────────────────────────────────────────
6
+ ollama:
7
+ image: ollama/ollama
8
+ restart: unless-stopped
9
+ ports:
10
+ - "11434:11434"
11
+ volumes:
12
+ - ollama_data:/root/.ollama
13
+ deploy:
14
+ resources:
15
+ limits:
16
+ cpus: '10'
17
+
18
+ <% } -%>
19
+ # ─── Redis (required by LiveKit) ───────────────────────────────────
20
+ redis:
21
+ image: redis:7-alpine
22
+ restart: unless-stopped
23
+ ports:
24
+ - "6379:6379"
25
+ healthcheck:
26
+ test: ["CMD", "redis-cli", "ping"]
27
+ interval: 5s
28
+ timeout: 3s
29
+ retries: 5
30
+
31
+ # ─── LiveKit Server ────────────────────────────────────────────────
32
+ livekit:
33
+ image: livekit/livekit-server:latest
34
+ restart: unless-stopped
35
+ depends_on:
36
+ redis:
37
+ condition: service_healthy
38
+ ports:
39
+ - "7880:7880" # HTTP / WebSocket
40
+ - "7881:7881" # RTC TCP
41
+ - "50000-50050:50000-50050/udp" # WebRTC UDP (strictly limited range)
42
+ environment:
43
+ - LIVEKIT_KEYS=${LIVEKIT_API_KEY}:${LIVEKIT_API_SECRET}
44
+ command: >
45
+ --config /etc/livekit.yaml
46
+ --bind 0.0.0.0
47
+ --redis-host redis:6379
48
+ --rtc.port-range-start 50000
49
+ --rtc.port-range-end 50050
50
+ --rtc.tcp-port 7881
51
+
52
+ # ─── Python Voice Agent ────────────────────────────────────────────
53
+ agent:
54
+ build:
55
+ context: ./python-agent
56
+ dockerfile: Dockerfile
57
+ restart: unless-stopped
58
+ depends_on:
59
+ - livekit
60
+ <% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
61
+ - ollama
62
+ <% } -%>
63
+ environment:
64
+ - LIVEKIT_URL=ws://livekit:7880
65
+ - LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
66
+ - LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
67
+ <% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
68
+ - OLLAMA_BASE_URL=http://ollama:11434
69
+ <% } else if (llmProvider === 'ollama' && deploymentType === 'existing') { -%>
70
+ - OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
71
+ <% } else { -%>
72
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
73
+ <% } -%>
74
+ <% if (llmProvider === 'ollama' && deploymentType === 'existing') { -%>
75
+ extra_hosts:
76
+ - "host.docker.internal:host-gateway"
77
+ <% } -%>
78
+
79
+ <% if (includeFrontend) { -%>
80
+ # ─── Next.js Frontend ─────────────────────────────────────────────
81
+ frontend:
82
+ build:
83
+ context: ./frontend
84
+ dockerfile: Dockerfile
85
+ restart: unless-stopped
86
+ depends_on:
87
+ - livekit
88
+ ports:
89
+ - "3000:3000"
90
+ environment:
91
+ - LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
92
+ - LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
93
+ - LIVEKIT_URL=ws://livekit:7880
94
+ - NEXT_PUBLIC_LIVEKIT_URL=ws://localhost:7880
95
+ <% } -%>
96
+
97
+ <% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
98
+ volumes:
99
+ ollama_data:
100
+ <% } -%>
@@ -0,0 +1,13 @@
1
+ FROM node:20-alpine
2
+
3
+ WORKDIR /app
4
+
5
+ COPY package.json package-lock.json* ./
6
+ RUN npm install
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 3000
11
+
12
+ CMD ["npm", "run", "dev"]
13
+
@@ -0,0 +1,33 @@
1
+ import { AccessToken } from "livekit-server-sdk";
2
+ import { NextRequest, NextResponse } from "next/server";
3
+
4
+ export async function GET(req: NextRequest) {
5
+ const room = req.nextUrl.searchParams.get("room") ?? "default-room";
6
+ const username = req.nextUrl.searchParams.get("username") ?? `user-${Math.floor(Math.random() * 10000)}`;
7
+
8
+ const apiKey = process.env.LIVEKIT_API_KEY;
9
+ const apiSecret = process.env.LIVEKIT_API_SECRET;
10
+
11
+ if (!apiKey || !apiSecret) {
12
+ return NextResponse.json(
13
+ { error: "LIVEKIT_API_KEY and LIVEKIT_API_SECRET must be set" },
14
+ { status: 500 }
15
+ );
16
+ }
17
+
18
+ const at = new AccessToken(apiKey, apiSecret, {
19
+ identity: username,
20
+ });
21
+
22
+ at.addGrant({
23
+ room,
24
+ roomJoin: true,
25
+ canPublish: true,
26
+ canSubscribe: true,
27
+ });
28
+
29
+ const token = await at.toJwt();
30
+
31
+ return NextResponse.json({ token });
32
+ }
33
+
@@ -0,0 +1,17 @@
1
+ export const metadata = {
2
+ title: "Voice Agent",
3
+ description: "Real-time Voice AI Agent",
4
+ };
5
+
6
+ export default function RootLayout({
7
+ children,
8
+ }: {
9
+ children: React.ReactNode;
10
+ }) {
11
+ return (
12
+ <html lang="en">
13
+ <body>{children}</body>
14
+ </html>
15
+ );
16
+ }
17
+
@@ -0,0 +1,90 @@
1
+ "use client";
2
+
3
+ import { useEffect, useState } from "react";
4
+ import {
5
+ LiveKitRoom,
6
+ RoomAudioRenderer,
7
+ useVoiceAssistant,
8
+ BarVisualizer,
9
+ DisconnectButton,
10
+ } from "@livekit/components-react";
11
+ import "@livekit/components-styles";
12
+
13
+ function VoiceAssistantUI() {
14
+ const { state, audioTrack } = useVoiceAssistant();
15
+
16
+ return (
17
+ <div
18
+ style={{
19
+ display: "flex",
20
+ flexDirection: "column",
21
+ alignItems: "center",
22
+ gap: "1rem",
23
+ padding: "2rem",
24
+ }}
25
+ >
26
+ <h2>Agent Status: {state}</h2>
27
+ <BarVisualizer
28
+ state={state}
29
+ barCount={5}
30
+ trackRef={audioTrack}
31
+ style={{ width: "300px", height: "80px" }}
32
+ />
33
+ <DisconnectButton>Disconnect</DisconnectButton>
34
+ </div>
35
+ );
36
+ }
37
+
38
+ export default function Home() {
39
+ const [token, setToken] = useState<string | null>(null);
40
+ const [error, setError] = useState<string | null>(null);
41
+
42
+ const livekitUrl = process.env.NEXT_PUBLIC_LIVEKIT_URL ?? "ws://localhost:7880";
43
+
44
+ useEffect(() => {
45
+ async function fetchToken() {
46
+ try {
47
+ const res = await fetch("/api/token?room=voice-room&username=user");
48
+ if (!res.ok) throw new Error("Failed to fetch token");
49
+ const data = await res.json();
50
+ setToken(data.token);
51
+ } catch (err: unknown) {
52
+ setError(err instanceof Error ? err.message : "Unknown error");
53
+ }
54
+ }
55
+ fetchToken();
56
+ }, []);
57
+
58
+ if (error) {
59
+ return (
60
+ <div style={{ padding: "2rem", textAlign: "center" }}>
61
+ <h1>Error</h1>
62
+ <p>{error}</p>
63
+ </div>
64
+ );
65
+ }
66
+
67
+ if (!token) {
68
+ return (
69
+ <div style={{ padding: "2rem", textAlign: "center" }}>
70
+ <p>Connecting to voice agent...</p>
71
+ </div>
72
+ );
73
+ }
74
+
75
+ return (
76
+ <main style={{ padding: "2rem", textAlign: "center" }}>
77
+ <h1><%= projectName %></h1>
78
+ <LiveKitRoom
79
+ serverUrl={livekitUrl}
80
+ token={token}
81
+ connect={true}
82
+ audio={true}
83
+ >
84
+ <VoiceAssistantUI />
85
+ <RoomAudioRenderer />
86
+ </LiveKitRoom>
87
+ </main>
88
+ );
89
+ }
90
+
@@ -0,0 +1,5 @@
1
+ /** @type {import('next').NextConfig} */
2
+ const nextConfig = {};
3
+
4
+ module.exports = nextConfig;
5
+
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "<%= projectName %>-frontend",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start"
9
+ },
10
+ "dependencies": {
11
+ "next": "^15.0.0",
12
+ "react": "^19.0.0",
13
+ "react-dom": "^19.0.0",
14
+ "@livekit/components-react": "^2.0.0",
15
+ "livekit-client": "^2.0.0",
16
+ "livekit-server-sdk": "^2.0.0"
17
+ },
18
+ "devDependencies": {
19
+ "@types/node": "^22.0.0",
20
+ "@types/react": "^19.0.0",
21
+ "typescript": "^5.0.0"
22
+ }
23
+ }
24
+
@@ -0,0 +1,22 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2017",
4
+ "lib": ["dom", "dom.iterable", "esnext"],
5
+ "allowJs": true,
6
+ "skipLibCheck": true,
7
+ "strict": true,
8
+ "noEmit": true,
9
+ "esModuleInterop": true,
10
+ "module": "esnext",
11
+ "moduleResolution": "bundler",
12
+ "resolveJsonModule": true,
13
+ "isolatedModules": true,
14
+ "jsx": "preserve",
15
+ "incremental": true,
16
+ "plugins": [{ "name": "next" }],
17
+ "paths": { "@/*": ["./*"] }
18
+ },
19
+ "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
20
+ "exclude": ["node_modules"]
21
+ }
22
+
@@ -0,0 +1,35 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && \
7
+ <% if (llmProvider === 'ollama') { -%>
8
+ apt-get install -y --no-install-recommends gcc ffmpeg libsndfile1-dev curl && \
9
+ <% } else { -%>
10
+ apt-get install -y --no-install-recommends gcc && \
11
+ <% } -%>
12
+ rm -rf /var/lib/apt/lists/*
13
+
14
+ <% if (llmProvider === 'ollama') { -%>
15
+ # Download Piper TTS voice model (en_US-lessac-medium)
16
+ RUN mkdir -p /app/models && \
17
+ curl -L -o /app/models/en_US-lessac-medium.onnx \
18
+ "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx" && \
19
+ curl -L -o /app/models/en_US-lessac-medium.onnx.json \
20
+ "https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json"
21
+
22
+ <% } -%>
23
+ # Copy and install Python dependencies
24
+ COPY requirements.txt .
25
+ RUN pip install --no-cache-dir -r requirements.txt
26
+
27
+ # Copy agent source code
28
+ COPY . .
29
+
30
+ # ─── EACCES Fix: run as non-root user ───────────────────────────────
31
+ RUN useradd -m agentuser && \
32
+ chown -R agentuser:agentuser /app
33
+ USER agentuser
34
+
35
+ CMD ["python", "agent.py", "start"]
@@ -0,0 +1,63 @@
1
+ """
2
+ <%= projectName %> — Voice AI Agent
3
+ Uses the official livekit-agents library with VoicePipelineAgent.
4
+ """
5
+
6
+ import os
7
+ import logging
8
+
9
+ from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
10
+ from livekit.agents.pipeline import VoicePipelineAgent
11
+ from livekit.plugins import silero
12
+ <% if (llmProvider === 'openai') { -%>
13
+ from livekit.plugins import deepgram
14
+ from livekit.plugins import openai as openai_plugin
15
+ <% } else { -%>
16
+ from livekit.plugins import openai as openai_plugin
17
+ from custom_stt.faster_whisper_stt import FasterWhisperSTT
18
+ from custom_tts.piper_tts import PiperTTSPlugin
19
+ <% } -%>
20
+
21
+ logger = logging.getLogger("voice-agent")
22
+ logger.setLevel(logging.INFO)
23
+
24
+
25
+ async def entrypoint(ctx: JobContext):
26
+ """Called when a participant joins the LiveKit room."""
27
+
28
+ logger.info(f"Connecting to room: {ctx.room.name}")
29
+ await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
30
+
31
+ participant = await ctx.wait_for_participant()
32
+ logger.info(f"Participant joined: {participant.identity}")
33
+
34
+ # ─── Build the Voice Pipeline ────────────────────────────────────
35
+ <% if (llmProvider === 'openai') { -%>
36
+ llm = openai_plugin.LLM(model="gpt-4o-mini")
37
+ tts_engine = openai_plugin.TTS(voice="alloy")
38
+ stt_engine = deepgram.STT()
39
+ <% } else { -%>
40
+ # Model: <%= modelName %> | Language: <%= language %>
41
+ llm = openai_plugin.LLM.with_ollama(
42
+ model="<%= modelName %>",
43
+ base_url=os.environ.get("OLLAMA_BASE_URL", "http://host.docker.internal:11434"),
44
+ )
45
+ tts_engine = PiperTTSPlugin(model="/app/models/en_US-lessac-medium.onnx")
46
+ stt_engine = FasterWhisperSTT(model="base", language="<%= language %>")
47
+ <% } -%>
48
+
49
+ agent = VoicePipelineAgent(
50
+ vad=silero.VAD.load(),
51
+ stt=stt_engine,
52
+ llm=llm,
53
+ tts=tts_engine,
54
+ )
55
+
56
+ # Start the agent in the room
57
+ agent.start(ctx.room, participant)
58
+
59
+ await agent.say("Hello! I'm your voice assistant. How can I help you today?")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
@@ -0,0 +1,85 @@
1
+ """
2
+ Faster Whisper STT Plugin for LiveKit Agents.
3
+ Uses the faster-whisper Python package for fully offline speech-to-text.
4
+ """
5
+
6
+ import asyncio
7
+ import io
8
+ import logging
9
+ import wave
10
+
11
+ import numpy as np
12
+ from faster_whisper import WhisperModel
13
+
14
+ from livekit.agents import stt
15
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS
16
+
17
+ logger = logging.getLogger("faster-whisper-stt")
18
+
19
+
20
+ class FasterWhisperSTT(stt.STT):
21
+ def __init__(self, model="base", language="en", device="cpu", compute_type="int8"):
22
+ super().__init__(
23
+ capabilities=stt.STTCapabilities(streaming=False, interim_results=False),
24
+ )
25
+ self.model_size = model
26
+ self.language = language
27
+ self.device = device
28
+ self.compute_type = compute_type
29
+ self._model = None
30
+ self._load_model()
31
+
32
+ def _load_model(self):
33
+ logger.info(f"Loading Whisper model: {self.model_size} (device={self.device})")
34
+ self._model = WhisperModel(
35
+ self.model_size,
36
+ device=self.device,
37
+ compute_type=self.compute_type,
38
+ )
39
+ logger.info("Whisper model loaded successfully")
40
+
41
+ async def recognize(self, buffer, *, language=None, conn_options=DEFAULT_API_CONNECT_OPTIONS):
42
+ loop = asyncio.get_event_loop()
43
+ return await loop.run_in_executor(
44
+ None, self._transcribe, buffer, language or self.language
45
+ )
46
+
47
+ def _transcribe(self, buffer, language):
48
+ # Convert LiveKit AudioFrame to numpy array
49
+ if hasattr(buffer, "data"):
50
+ audio_data = np.frombuffer(buffer.data, dtype=np.int16).astype(np.float32) / 32768.0
51
+ sample_rate = buffer.sample_rate if hasattr(buffer, "sample_rate") else 16000
52
+ elif hasattr(buffer, "frame"):
53
+ frame = buffer.frame
54
+ audio_data = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0
55
+ sample_rate = frame.sample_rate if hasattr(frame, "sample_rate") else 16000
56
+ else:
57
+ audio_data = np.frombuffer(buffer, dtype=np.int16).astype(np.float32) / 32768.0
58
+ sample_rate = 16000
59
+
60
+ # Resample to 16kHz if needed (Whisper expects 16kHz)
61
+ if sample_rate != 16000:
62
+ from scipy.signal import resample
63
+ num_samples = int(len(audio_data) * 16000 / sample_rate)
64
+ audio_data = resample(audio_data, num_samples).astype(np.float32)
65
+
66
+ segments, info = self._model.transcribe(
67
+ audio_data,
68
+ language=language,
69
+ beam_size=5,
70
+ vad_filter=True,
71
+ )
72
+
73
+ text = " ".join(segment.text for segment in segments).strip()
74
+
75
+ return stt.SpeechEvent(
76
+ type=stt.SpeechEventType.FINAL_TRANSCRIPT,
77
+ alternatives=[
78
+ stt.SpeechData(
79
+ text=text,
80
+ language=language or info.language,
81
+ confidence=1.0,
82
+ )
83
+ ],
84
+ )
85
+
@@ -0,0 +1,104 @@
1
+ """
2
+ Piper TTS Plugin for LiveKit Agents.
3
+ Based on the community plugin by nay-cat/LiveKit-PiperTTS-Plugin.
4
+ Uses the piper-tts Python package for fully offline text-to-speech.
5
+ """
6
+
7
+ import numpy as np
8
+ import asyncio
9
+ import logging
10
+
11
+ from livekit.agents import tts
12
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS
13
+ from livekit import rtc
14
+ from piper import PiperVoice, SynthesisConfig
15
+
16
+ logger = logging.getLogger("piper-tts")
17
+
18
+
19
+ class PiperTTSPlugin(tts.TTS):
20
+ def __init__(self, model, speed=1.0, volume=1.0, noise_scale=0.667, noise_w=0.8, use_cuda=False):
21
+ super().__init__(
22
+ capabilities=tts.TTSCapabilities(streaming=False),
23
+ sample_rate=22050,
24
+ num_channels=1,
25
+ )
26
+ self.model = model
27
+ self.speed = speed
28
+ self.volume = volume
29
+ self.noise_scale = noise_scale
30
+ self.noise_w = noise_w
31
+ self.use_cuda = use_cuda
32
+ self._voice = None
33
+ self._load_voice()
34
+
35
+ def _load_voice(self):
36
+ logger.info(f"Loading Piper voice model: {self.model}")
37
+ self._voice = PiperVoice.load(self.model, use_cuda=self.use_cuda)
38
+ logger.info("Piper voice model loaded successfully")
39
+
40
+ def synthesize(self, text, *, conn_options=DEFAULT_API_CONNECT_OPTIONS):
41
+ return PiperStream(self, text, conn_options)
42
+
43
+
44
+ class PiperStream(tts.ChunkedStream):
45
+ def __init__(self, plugin, text, conn_options):
46
+ super().__init__(tts=plugin, input_text=text, conn_options=conn_options)
47
+ self.plugin = plugin
48
+
49
+ async def _run(self):
50
+ try:
51
+ config = SynthesisConfig(
52
+ volume=self.plugin.volume,
53
+ length_scale=self.plugin.speed,
54
+ noise_scale=self.plugin.noise_scale,
55
+ noise_w_scale=self.plugin.noise_w,
56
+ normalize_audio=True,
57
+ )
58
+
59
+ loop = asyncio.get_event_loop()
60
+ chunks = await loop.run_in_executor(None, self._synthesize_chunks, config)
61
+
62
+ for chunk in chunks:
63
+ frame = rtc.AudioFrame(
64
+ data=chunk,
65
+ sample_rate=22050,
66
+ num_channels=1,
67
+ samples_per_channel=len(chunk) // 2,
68
+ )
69
+ self._event_ch.send_nowait(
70
+ tts.SynthesizedAudio(
71
+ request_id="1",
72
+ segment_id="1",
73
+ frame=frame,
74
+ )
75
+ )
76
+
77
+ except Exception as e:
78
+ logger.error(f"Piper TTS synthesis failed: {e}")
79
+ silence = np.zeros(22050, dtype=np.int16).tobytes()
80
+ frame = rtc.AudioFrame(
81
+ data=silence,
82
+ sample_rate=22050,
83
+ num_channels=1,
84
+ samples_per_channel=22050,
85
+ )
86
+ self._event_ch.send_nowait(
87
+ tts.SynthesizedAudio(
88
+ request_id="1",
89
+ segment_id="1",
90
+ frame=frame,
91
+ )
92
+ )
93
+
94
+ def _synthesize_chunks(self, config):
95
+ chunks = []
96
+ for chunk in self.plugin._voice.synthesize(self.input_text, syn_config=config):
97
+ audio_data = chunk.audio_int16_bytes
98
+ if chunk.sample_channels == 2:
99
+ audio = np.frombuffer(audio_data, dtype=np.int16)
100
+ audio = audio.reshape(-1, 2).mean(axis=1).astype(np.int16)
101
+ audio_data = audio.tobytes()
102
+ chunks.append(audio_data)
103
+ return chunks
104
+
@@ -0,0 +1,11 @@
1
+ livekit-agents>=0.12.0
2
+ livekit-plugins-silero>=0.7.0
3
+ livekit-plugins-openai>=0.10.0
4
+ <% if (llmProvider === 'openai') { -%>
5
+ livekit-plugins-deepgram>=0.7.0
6
+ <% } else { -%>
7
+ piper-tts>=1.3.0
8
+ faster-whisper>=1.0.0
9
+ numpy
10
+ onnxruntime>=1.22.0
11
+ <% } -%>