create-local-voice-agent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/bin/create-voice-agent.js +4 -0
- package/package.json +34 -0
- package/src/index.js +310 -0
- package/src/scaffold.js +98 -0
- package/templates/.env.example.ejs +22 -0
- package/templates/README.md.ejs +60 -0
- package/templates/docker-compose.yml.ejs +100 -0
- package/templates/frontend/Dockerfile.ejs +13 -0
- package/templates/frontend/app/api/token/route.ts.ejs +33 -0
- package/templates/frontend/app/layout.tsx +17 -0
- package/templates/frontend/app/page.tsx.ejs +90 -0
- package/templates/frontend/next.config.js +5 -0
- package/templates/frontend/package.json.ejs +24 -0
- package/templates/frontend/tsconfig.json +22 -0
- package/templates/python-agent/Dockerfile.ejs +35 -0
- package/templates/python-agent/agent.py.ejs +63 -0
- package/templates/python-agent/custom_stt/__init__.py +1 -0
- package/templates/python-agent/custom_stt/faster_whisper_stt.py +85 -0
- package/templates/python-agent/custom_tts/__init__.py +1 -0
- package/templates/python-agent/custom_tts/piper_tts.py +104 -0
- package/templates/python-agent/requirements.txt.ejs +11 -0
package/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# create-voice-agent
|
|
2
|
+
|
|
3
|
+
A CLI that scaffolds a **production-ready, containerized infrastructure** for real-time Voice AI Agents using **LiveKit**, **Python**, and **Docker**.
|
|
4
|
+
|
|
5
|
+
Stop fighting WebRTC UDP port exhaustion, Docker permission errors, and token handshake wiring — this tool generates a perfect boilerplate in seconds.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx create-voice-agent my-voice-app
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
You'll be prompted to choose:
|
|
14
|
+
|
|
15
|
+
1. **LLM Provider** — Ollama (local GPU) or OpenAI (cloud)
|
|
16
|
+
2. **Next.js Frontend** — optional browser UI with LiveKit Room components
|
|
17
|
+
|
|
18
|
+
Then:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
cd my-voice-app
|
|
22
|
+
cp .env.example .env # fill in your API keys
|
|
23
|
+
docker compose up --build
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## What Gets Generated
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
my-voice-app/
|
|
30
|
+
├── docker-compose.yml # LiveKit + Redis + Agent (+ Frontend)
|
|
31
|
+
├── .env.example # Pre-configured environment variables
|
|
32
|
+
├── README.md # Project-specific documentation
|
|
33
|
+
├── python-agent/
|
|
34
|
+
│ ├── Dockerfile # Non-root user (EACCES fix)
|
|
35
|
+
│ ├── requirements.txt # livekit-agents + plugins
|
|
36
|
+
│ └── agent.py # VoicePipelineAgent (VAD→STT→LLM→TTS)
|
|
37
|
+
└── frontend/ # (if selected)
|
|
38
|
+
├── Dockerfile
|
|
39
|
+
├── package.json # includes livekit-server-sdk
|
|
40
|
+
├── app/api/token/route.ts # Secure AccessToken generation
|
|
41
|
+
└── app/page.tsx # LiveKit Room + voice visualizer
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Key Architectural Decisions
|
|
45
|
+
|
|
46
|
+
### WebRTC UDP Port Limiting
|
|
47
|
+
LiveKit's UDP port range is strictly limited to `50000-50050` to prevent `docker-proxy` from spawning thousands of processes and hanging the host OS.
|
|
48
|
+
|
|
49
|
+
### Token Handshake
|
|
50
|
+
The Next.js frontend includes a `/api/token` endpoint using `livekit-server-sdk` to generate `AccessToken` JWTs. The browser fetches this token before connecting to the LiveKit room.
|
|
51
|
+
|
|
52
|
+
### Non-Root Docker User
|
|
53
|
+
The Python agent Dockerfile creates a dedicated `agentuser` to prevent `EACCES` permission errors common on Linux hosts.
|
|
54
|
+
|
|
55
|
+
### Ollama GPU Routing
|
|
56
|
+
When Ollama is selected, the agent container uses `host.docker.internal` to reach the host's GPU-accelerated Ollama instance.
|
|
57
|
+
|
|
58
|
+
## Development
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# Clone and install
|
|
62
|
+
git clone https://github.com/geladons/create-voice-agent.git
|
|
63
|
+
cd create-voice-agent
|
|
64
|
+
npm install
|
|
65
|
+
|
|
66
|
+
# Run locally
|
|
67
|
+
node bin/create-voice-agent.js my-test-project
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
MIT
|
|
73
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "create-local-voice-agent",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "CLI to scaffold production-ready, containerized Voice AI Agents using LiveKit, Python, and Docker",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"create-voice-agent": "./bin/create-voice-agent.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"livekit",
|
|
15
|
+
"voice-agent",
|
|
16
|
+
"webrtc",
|
|
17
|
+
"docker",
|
|
18
|
+
"cli",
|
|
19
|
+
"scaffold",
|
|
20
|
+
"ai"
|
|
21
|
+
],
|
|
22
|
+
"author": "",
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"files": [
|
|
25
|
+
"bin/",
|
|
26
|
+
"src/",
|
|
27
|
+
"templates/"
|
|
28
|
+
],
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"@inquirer/prompts": "^8.3.0",
|
|
31
|
+
"commander": "^14.0.3",
|
|
32
|
+
"ejs": "^4.0.1"
|
|
33
|
+
}
|
|
34
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { select, input, confirm } from '@inquirer/prompts';
|
|
3
|
+
import { execSync } from 'node:child_process';
|
|
4
|
+
import { scaffold } from './scaffold.js';
|
|
5
|
+
|
|
6
|
+
// ─── Ollama API Fetching ──────────────────────────────────────────────
|
|
7
|
+
async function fetchOllamaModels(ollamaIp = '127.0.0.1') {
|
|
8
|
+
try {
|
|
9
|
+
const url = `http://${ollamaIp}:11434/api/tags`;
|
|
10
|
+
const controller = new AbortController();
|
|
11
|
+
const timeout = setTimeout(() => controller.abort(), 3000);
|
|
12
|
+
const res = await fetch(url, { signal: controller.signal });
|
|
13
|
+
clearTimeout(timeout);
|
|
14
|
+
if (!res.ok) return [];
|
|
15
|
+
const data = await res.json();
|
|
16
|
+
return (data.models || []).map((m) => m.name);
|
|
17
|
+
} catch {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ─── Model Selection Prompt ───────────────────────────────────────────
|
|
23
|
+
async function promptModelName(models, defaultModel) {
|
|
24
|
+
if (models.length > 0) {
|
|
25
|
+
const choices = models.map((m) => ({ name: m, value: m }));
|
|
26
|
+
choices.push({ name: '✏️ Enter custom model name', value: '__custom__' });
|
|
27
|
+
const picked = await select({
|
|
28
|
+
message: 'Select an Ollama model:',
|
|
29
|
+
choices,
|
|
30
|
+
});
|
|
31
|
+
if (picked === '__custom__') {
|
|
32
|
+
return input({ message: 'Enter model name:', default: defaultModel });
|
|
33
|
+
}
|
|
34
|
+
return picked;
|
|
35
|
+
}
|
|
36
|
+
return input({ message: 'Enter Ollama model name:', default: defaultModel });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ─── Docker Auto-Start ────────────────────────────────────────────────
|
|
40
|
+
function autoStartDocker(projectName, deploymentType, modelName) {
|
|
41
|
+
const cwd = projectName;
|
|
42
|
+
console.log('\n🐳 Starting Docker containers...\n');
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
// Copy .env.example to .env
|
|
46
|
+
execSync('cp .env.example .env', { cwd, stdio: 'inherit' });
|
|
47
|
+
|
|
48
|
+
if (deploymentType === 'standalone') {
|
|
49
|
+
// Start ollama container first
|
|
50
|
+
console.log('⏳ Starting Ollama container...');
|
|
51
|
+
execSync('docker compose up -d ollama', { cwd, stdio: 'inherit' });
|
|
52
|
+
|
|
53
|
+
// Wait for Ollama to be ready
|
|
54
|
+
console.log('⏳ Waiting for Ollama to be ready...');
|
|
55
|
+
let ready = false;
|
|
56
|
+
for (let i = 0; i < 30; i++) {
|
|
57
|
+
try {
|
|
58
|
+
execSync('docker compose exec ollama ollama list', {
|
|
59
|
+
cwd,
|
|
60
|
+
stdio: 'pipe',
|
|
61
|
+
});
|
|
62
|
+
ready = true;
|
|
63
|
+
break;
|
|
64
|
+
} catch {
|
|
65
|
+
execSync('sleep 2', { stdio: 'pipe' });
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (!ready) {
|
|
69
|
+
console.log('⚠️ Ollama container took too long to start. Pull model manually.');
|
|
70
|
+
} else {
|
|
71
|
+
// Pull the model
|
|
72
|
+
console.log(`⏳ Pulling model "${modelName}" (this may take a while)...`);
|
|
73
|
+
execSync(`docker compose exec ollama ollama pull ${modelName}`, {
|
|
74
|
+
cwd,
|
|
75
|
+
stdio: 'inherit',
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Start all services
|
|
81
|
+
console.log('\n🚀 Starting all services...');
|
|
82
|
+
execSync('docker compose up --build -d', { cwd, stdio: 'inherit' });
|
|
83
|
+
console.log('\n✅ All services are running!');
|
|
84
|
+
console.log(' LiveKit: ws://localhost:7880');
|
|
85
|
+
console.log(' Frontend: http://localhost:3000 (if included)');
|
|
86
|
+
} catch (err) {
|
|
87
|
+
console.error('\n⚠️ Docker auto-start failed:', err.message);
|
|
88
|
+
console.log(' You can start manually with: docker compose up --build');
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ─── CLI Entry Point ──────────────────────────────────────────────────
|
|
93
|
+
const program = new Command();
|
|
94
|
+
|
|
95
|
+
program
|
|
96
|
+
.name('create-voice-agent')
|
|
97
|
+
.description(
|
|
98
|
+
'Scaffold a production-ready, containerized Voice AI Agent using LiveKit, Python, and Docker',
|
|
99
|
+
)
|
|
100
|
+
.version('1.0.0')
|
|
101
|
+
.argument('<project-name>', 'Name of the project directory to create')
|
|
102
|
+
.action(async (projectName) => {
|
|
103
|
+
console.log('\n🎙️ create-voice-agent v1.0.0\n');
|
|
104
|
+
console.log(`Scaffolding project: ${projectName}\n`);
|
|
105
|
+
|
|
106
|
+
// ─── Fetch Ollama models in background ────────────────────────
|
|
107
|
+
let ollamaModels = await fetchOllamaModels();
|
|
108
|
+
if (ollamaModels.length > 0) {
|
|
109
|
+
console.log(`✅ Ollama detected — ${ollamaModels.length} model(s) available\n`);
|
|
110
|
+
} else {
|
|
111
|
+
console.log('ℹ️ Ollama not detected on localhost (will ask later)\n');
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ─── Step 1: Setup Mode ───────────────────────────────────────
|
|
115
|
+
const setupMode = await select({
|
|
116
|
+
message: 'Select setup mode:',
|
|
117
|
+
choices: [
|
|
118
|
+
{
|
|
119
|
+
name: '⚡ Auto Mode (1-Click Magic)',
|
|
120
|
+
value: 'auto',
|
|
121
|
+
description: 'Ollama + defaults + auto-start Docker',
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: '🔧 Advanced Mode',
|
|
125
|
+
value: 'advanced',
|
|
126
|
+
description: 'Full control over every option',
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: '📦 Fast Mode (Scaffold Only)',
|
|
130
|
+
value: 'fast',
|
|
131
|
+
description: 'Generate files and exit — no auto-start',
|
|
132
|
+
},
|
|
133
|
+
],
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
let llmProvider = 'ollama';
|
|
137
|
+
let deploymentType = null;
|
|
138
|
+
let modelName = 'qwen3-vl:2b';
|
|
139
|
+
let language = 'en';
|
|
140
|
+
let includeFrontend = true;
|
|
141
|
+
let shouldAutoStart = false;
|
|
142
|
+
let ollamaIp = 'localhost';
|
|
143
|
+
|
|
144
|
+
// ─── AUTO MODE ────────────────────────────────────────────────
|
|
145
|
+
if (setupMode === 'auto') {
|
|
146
|
+
llmProvider = 'ollama';
|
|
147
|
+
includeFrontend = true;
|
|
148
|
+
shouldAutoStart = true;
|
|
149
|
+
|
|
150
|
+
deploymentType = await select({
|
|
151
|
+
message: 'How do you want to run Ollama?',
|
|
152
|
+
choices: [
|
|
153
|
+
{
|
|
154
|
+
name: '🖥️ Use Existing Local Ollama (host)',
|
|
155
|
+
value: 'existing',
|
|
156
|
+
description: 'Ollama is already running on your machine',
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
name: '🐳 Install Standalone in Docker (CPU/GPU)',
|
|
160
|
+
value: 'standalone',
|
|
161
|
+
description: 'Ollama runs as a Docker service',
|
|
162
|
+
},
|
|
163
|
+
],
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
if (deploymentType === 'existing') {
|
|
167
|
+
ollamaIp = await input({
|
|
168
|
+
message: 'Ollama IP address:',
|
|
169
|
+
default: 'localhost',
|
|
170
|
+
});
|
|
171
|
+
// Re-fetch models if IP changed
|
|
172
|
+
const fetchIp = ollamaIp === 'localhost' ? '127.0.0.1' : ollamaIp;
|
|
173
|
+
ollamaModels = await fetchOllamaModels(fetchIp);
|
|
174
|
+
modelName = await promptModelName(ollamaModels, 'qwen3-vl:2b');
|
|
175
|
+
} else {
|
|
176
|
+
modelName = 'qwen3-vl:2b';
|
|
177
|
+
console.log(`\n📦 Standalone mode: will use model "${modelName}" (fastest CPU model)`);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ─── ADVANCED MODE ────────────────────────────────────────────
|
|
182
|
+
if (setupMode === 'advanced') {
|
|
183
|
+
llmProvider = await select({
|
|
184
|
+
message: 'Which LLM Provider?',
|
|
185
|
+
choices: [
|
|
186
|
+
{ name: 'Ollama (Local)', value: 'ollama' },
|
|
187
|
+
{ name: 'OpenAI (Cloud)', value: 'openai' },
|
|
188
|
+
],
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
if (llmProvider === 'ollama') {
|
|
192
|
+
deploymentType = await select({
|
|
193
|
+
message: 'How do you want to run Ollama?',
|
|
194
|
+
choices: [
|
|
195
|
+
{
|
|
196
|
+
name: '🖥️ Use Existing Local Ollama (host)',
|
|
197
|
+
value: 'existing',
|
|
198
|
+
description: 'Ollama is already running on your machine',
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
name: '🐳 Install Standalone in Docker (CPU/GPU)',
|
|
202
|
+
value: 'standalone',
|
|
203
|
+
description: 'Ollama runs as a Docker service',
|
|
204
|
+
},
|
|
205
|
+
],
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
ollamaIp = await input({
|
|
209
|
+
message: 'Ollama IP address:',
|
|
210
|
+
default: 'localhost',
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
if (deploymentType === 'existing') {
|
|
214
|
+
const fetchIp = ollamaIp === 'localhost' ? '127.0.0.1' : ollamaIp;
|
|
215
|
+
ollamaModels = await fetchOllamaModels(fetchIp);
|
|
216
|
+
modelName = await promptModelName(ollamaModels, 'qwen3-vl:4b');
|
|
217
|
+
} else {
|
|
218
|
+
modelName = await input({
|
|
219
|
+
message: 'Enter Ollama model name:',
|
|
220
|
+
default: 'qwen3-vl:4b',
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
language = await input({
|
|
225
|
+
message: 'Agent language (e.g., en, ru, de):',
|
|
226
|
+
default: 'en',
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
includeFrontend = await confirm({
|
|
231
|
+
message: 'Include a Next.js frontend?',
|
|
232
|
+
default: true,
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
if (llmProvider === 'ollama') {
|
|
236
|
+
shouldAutoStart = await confirm({
|
|
237
|
+
message: 'Auto-start Docker containers now?',
|
|
238
|
+
default: true,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// ─── FAST MODE ────────────────────────────────────────────────
|
|
244
|
+
if (setupMode === 'fast') {
|
|
245
|
+
llmProvider = await select({
|
|
246
|
+
message: 'Which LLM Provider?',
|
|
247
|
+
choices: [
|
|
248
|
+
{ name: 'Ollama (Local)', value: 'ollama' },
|
|
249
|
+
{ name: 'OpenAI (Cloud)', value: 'openai' },
|
|
250
|
+
],
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
if (llmProvider === 'ollama') {
|
|
254
|
+
deploymentType = await select({
|
|
255
|
+
message: 'How do you want to run Ollama?',
|
|
256
|
+
choices: [
|
|
257
|
+
{
|
|
258
|
+
name: '🖥️ Use Existing Local Ollama (host)',
|
|
259
|
+
value: 'existing',
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
name: '🐳 Install Standalone in Docker (CPU/GPU)',
|
|
263
|
+
value: 'standalone',
|
|
264
|
+
},
|
|
265
|
+
],
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
includeFrontend = await confirm({
|
|
270
|
+
message: 'Include a Next.js frontend?',
|
|
271
|
+
default: true,
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// Fast mode uses defaults for model, language, no auto-start
|
|
275
|
+
modelName = deploymentType === 'standalone' ? 'qwen3-vl:2b' : 'qwen3-vl:4b';
|
|
276
|
+
shouldAutoStart = false;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ─── Scaffold ─────────────────────────────────────────────────
|
|
280
|
+
await scaffold({
|
|
281
|
+
projectName,
|
|
282
|
+
llmProvider,
|
|
283
|
+
includeFrontend,
|
|
284
|
+
deploymentType,
|
|
285
|
+
modelName,
|
|
286
|
+
language,
|
|
287
|
+
ollamaIp,
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
console.log(`\n✅ Project "${projectName}" created successfully!\n`);
|
|
291
|
+
|
|
292
|
+
if (shouldAutoStart) {
|
|
293
|
+
autoStartDocker(projectName, deploymentType, modelName);
|
|
294
|
+
} else {
|
|
295
|
+
console.log('Next steps:');
|
|
296
|
+
console.log(` cd ${projectName}`);
|
|
297
|
+
console.log(' cp .env.example .env');
|
|
298
|
+
if (llmProvider === 'openai') {
|
|
299
|
+
console.log(' # Fill in your API keys in .env');
|
|
300
|
+
} else {
|
|
301
|
+
console.log(' # No external API keys required — 100% local!');
|
|
302
|
+
if (deploymentType === 'existing') {
|
|
303
|
+
console.log(' # Make sure Ollama is running on your host machine');
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
console.log(' docker compose up --build\n');
|
|
307
|
+
}
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
program.parse();
|
package/src/scaffold.js
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import ejs from 'ejs';
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
const TEMPLATES_DIR = path.join(__dirname, '..', 'templates');
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Directories that should only be included for the local/ollama path.
|
|
12
|
+
*/
|
|
13
|
+
const LOCAL_ONLY_DIRS = ['custom_tts', 'custom_stt'];
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Recursively render all .ejs templates from a source directory into a target directory.
|
|
17
|
+
*/
|
|
18
|
+
function renderDir(srcDir, destDir, data) {
|
|
19
|
+
const entries = fs.readdirSync(srcDir, { withFileTypes: true });
|
|
20
|
+
for (const entry of entries) {
|
|
21
|
+
const srcPath = path.join(srcDir, entry.name);
|
|
22
|
+
if (entry.isDirectory()) {
|
|
23
|
+
// Skip local-only directories when using cloud provider
|
|
24
|
+
if (LOCAL_ONLY_DIRS.includes(entry.name) && data.llmProvider !== 'ollama') {
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
const childDest = path.join(destDir, entry.name);
|
|
28
|
+
fs.mkdirSync(childDest, { recursive: true });
|
|
29
|
+
renderDir(srcPath, childDest, data);
|
|
30
|
+
} else if (entry.name.endsWith('.ejs')) {
|
|
31
|
+
const outName = entry.name.replace(/\.ejs$/, '');
|
|
32
|
+
const template = fs.readFileSync(srcPath, 'utf-8');
|
|
33
|
+
const rendered = ejs.render(template, data);
|
|
34
|
+
fs.writeFileSync(path.join(destDir, outName), rendered);
|
|
35
|
+
} else {
|
|
36
|
+
// Copy non-template files as-is
|
|
37
|
+
fs.copyFileSync(srcPath, path.join(destDir, entry.name));
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Main scaffolding function.
|
|
44
|
+
*/
|
|
45
|
+
export async function scaffold({
|
|
46
|
+
projectName,
|
|
47
|
+
llmProvider,
|
|
48
|
+
includeFrontend,
|
|
49
|
+
deploymentType = null,
|
|
50
|
+
modelName = 'qwen3-vl:2b',
|
|
51
|
+
language = 'en',
|
|
52
|
+
ollamaIp = 'localhost',
|
|
53
|
+
}) {
|
|
54
|
+
const projectDir = path.resolve(process.cwd(), projectName);
|
|
55
|
+
|
|
56
|
+
if (fs.existsSync(projectDir)) {
|
|
57
|
+
console.error(`\n❌ Directory "${projectName}" already exists. Aborting.`);
|
|
58
|
+
process.exit(1);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
fs.mkdirSync(projectDir, { recursive: true });
|
|
62
|
+
|
|
63
|
+
const data = {
|
|
64
|
+
projectName,
|
|
65
|
+
llmProvider,
|
|
66
|
+
includeFrontend,
|
|
67
|
+
deploymentType,
|
|
68
|
+
modelName,
|
|
69
|
+
language,
|
|
70
|
+
ollamaIp,
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// Render root-level templates
|
|
74
|
+
const rootTemplates = ['docker-compose.yml.ejs', '.env.example.ejs', 'README.md.ejs'];
|
|
75
|
+
for (const tpl of rootTemplates) {
|
|
76
|
+
const template = fs.readFileSync(path.join(TEMPLATES_DIR, tpl), 'utf-8');
|
|
77
|
+
const rendered = ejs.render(template, data);
|
|
78
|
+
const outName = tpl.replace(/\.ejs$/, '');
|
|
79
|
+
fs.writeFileSync(path.join(projectDir, outName), rendered);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Render python-agent directory
|
|
83
|
+
const agentSrc = path.join(TEMPLATES_DIR, 'python-agent');
|
|
84
|
+
const agentDest = path.join(projectDir, 'python-agent');
|
|
85
|
+
fs.mkdirSync(agentDest, { recursive: true });
|
|
86
|
+
renderDir(agentSrc, agentDest, data);
|
|
87
|
+
|
|
88
|
+
// Render frontend directory (if selected)
|
|
89
|
+
if (includeFrontend) {
|
|
90
|
+
const frontendSrc = path.join(TEMPLATES_DIR, 'frontend');
|
|
91
|
+
const frontendDest = path.join(projectDir, 'frontend');
|
|
92
|
+
fs.mkdirSync(frontendDest, { recursive: true });
|
|
93
|
+
renderDir(frontendSrc, frontendDest, data);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
console.log(`\n📁 Generated files in ./${projectName}/`);
|
|
97
|
+
}
|
|
98
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# ─── LiveKit Credentials ─────────────────────────────────────────────
|
|
2
|
+
# Generate a key/secret pair or use defaults for local development.
|
|
3
|
+
LIVEKIT_API_KEY=devkey
|
|
4
|
+
LIVEKIT_API_SECRET=devsecret
|
|
5
|
+
|
|
6
|
+
<% if (llmProvider === 'openai') { -%>
|
|
7
|
+
# ─── OpenAI ──────────────────────────────────────────────────────────
|
|
8
|
+
OPENAI_API_KEY=sk-your-openai-api-key-here
|
|
9
|
+
<% } else if (deploymentType === 'standalone') { -%>
|
|
10
|
+
# ─── Ollama (Standalone Docker) ─────────────────────────────────────
|
|
11
|
+
# The agent connects to the Ollama container via Docker networking.
|
|
12
|
+
# No need to change this — it is set automatically in docker-compose.yml.
|
|
13
|
+
OLLAMA_BASE_URL=http://localhost:11434
|
|
14
|
+
<% } else { -%>
|
|
15
|
+
# ─── Ollama (Existing Host) ─────────────────────────────────────────
|
|
16
|
+
# Points to the host machine's Ollama instance from inside Docker.
|
|
17
|
+
<% if (ollamaIp === 'localhost' || ollamaIp === '127.0.0.1') { -%>
|
|
18
|
+
OLLAMA_BASE_URL=http://host.docker.internal:11434
|
|
19
|
+
<% } else { -%>
|
|
20
|
+
OLLAMA_BASE_URL=http://<%= ollamaIp %>:11434
|
|
21
|
+
<% } -%>
|
|
22
|
+
<% } -%>
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# <%= projectName %>
|
|
2
|
+
|
|
3
|
+
A real-time Voice AI Agent powered by [LiveKit](https://livekit.io/), Python, and Docker.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
- **LiveKit Server** — WebRTC media server (SFU)
|
|
8
|
+
- **Redis** — Required by LiveKit for signaling
|
|
9
|
+
- **Python Agent** — Voice pipeline using `livekit-agents` (VAD → STT → LLM → TTS)
|
|
10
|
+
<% if (includeFrontend) { -%>
|
|
11
|
+
- **Next.js Frontend** — Browser UI with LiveKit Room components
|
|
12
|
+
<% } -%>
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# 1. Copy and configure environment variables
|
|
18
|
+
cp .env.example .env
|
|
19
|
+
# Edit .env with your API keys
|
|
20
|
+
|
|
21
|
+
# 2. Start all services
|
|
22
|
+
docker compose up --build
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
<% if (llmProvider === 'ollama') { -%>
|
|
26
|
+
### Ollama (Local LLM)
|
|
27
|
+
|
|
28
|
+
Make sure Ollama is running on your host machine:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
ollama serve
|
|
32
|
+
ollama pull llama3.1
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
The agent connects to Ollama via `http://host.docker.internal:11434`.
|
|
36
|
+
<% } else { -%>
|
|
37
|
+
### OpenAI
|
|
38
|
+
|
|
39
|
+
Set your `OPENAI_API_KEY` in the `.env` file.
|
|
40
|
+
<% } -%>
|
|
41
|
+
|
|
42
|
+
<% if (includeFrontend) { -%>
|
|
43
|
+
## Frontend
|
|
44
|
+
|
|
45
|
+
Open [http://localhost:3000](http://localhost:3000) in your browser.
|
|
46
|
+
The frontend fetches a LiveKit access token from `/api/token` and connects to the voice agent.
|
|
47
|
+
<% } -%>
|
|
48
|
+
|
|
49
|
+
## Services & Ports
|
|
50
|
+
|
|
51
|
+
| Service | Port | Protocol |
|
|
52
|
+
| ---------- | ----- | -------- |
|
|
53
|
+
| LiveKit | 7880 | HTTP/WS |
|
|
54
|
+
| LiveKit | 7881 | RTC TCP |
|
|
55
|
+
| LiveKit | 50000-50050 | UDP |
|
|
56
|
+
| Redis | 6379 | TCP |
|
|
57
|
+
<% if (includeFrontend) { -%>
|
|
58
|
+
| Frontend | 3000 | HTTP |
|
|
59
|
+
<% } -%>
|
|
60
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
version: "3.9"
|
|
2
|
+
|
|
3
|
+
services:
|
|
4
|
+
<% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
|
|
5
|
+
# ─── Ollama (Standalone) ─────────────────────────────────────────
|
|
6
|
+
ollama:
|
|
7
|
+
image: ollama/ollama
|
|
8
|
+
restart: unless-stopped
|
|
9
|
+
ports:
|
|
10
|
+
- "11434:11434"
|
|
11
|
+
volumes:
|
|
12
|
+
- ollama_data:/root/.ollama
|
|
13
|
+
deploy:
|
|
14
|
+
resources:
|
|
15
|
+
limits:
|
|
16
|
+
cpus: '10'
|
|
17
|
+
|
|
18
|
+
<% } -%>
|
|
19
|
+
# ─── Redis (required by LiveKit) ───────────────────────────────────
|
|
20
|
+
redis:
|
|
21
|
+
image: redis:7-alpine
|
|
22
|
+
restart: unless-stopped
|
|
23
|
+
ports:
|
|
24
|
+
- "6379:6379"
|
|
25
|
+
healthcheck:
|
|
26
|
+
test: ["CMD", "redis-cli", "ping"]
|
|
27
|
+
interval: 5s
|
|
28
|
+
timeout: 3s
|
|
29
|
+
retries: 5
|
|
30
|
+
|
|
31
|
+
# ─── LiveKit Server ────────────────────────────────────────────────
|
|
32
|
+
livekit:
|
|
33
|
+
image: livekit/livekit-server:latest
|
|
34
|
+
restart: unless-stopped
|
|
35
|
+
depends_on:
|
|
36
|
+
redis:
|
|
37
|
+
condition: service_healthy
|
|
38
|
+
ports:
|
|
39
|
+
- "7880:7880" # HTTP / WebSocket
|
|
40
|
+
- "7881:7881" # RTC TCP
|
|
41
|
+
- "50000-50050:50000-50050/udp" # WebRTC UDP (strictly limited range)
|
|
42
|
+
environment:
|
|
43
|
+
- LIVEKIT_KEYS=${LIVEKIT_API_KEY}:${LIVEKIT_API_SECRET}
|
|
44
|
+
command: >
|
|
45
|
+
--config /etc/livekit.yaml
|
|
46
|
+
--bind 0.0.0.0
|
|
47
|
+
--redis-host redis:6379
|
|
48
|
+
--rtc.port-range-start 50000
|
|
49
|
+
--rtc.port-range-end 50050
|
|
50
|
+
--rtc.tcp-port 7881
|
|
51
|
+
|
|
52
|
+
# ─── Python Voice Agent ────────────────────────────────────────────
|
|
53
|
+
agent:
|
|
54
|
+
build:
|
|
55
|
+
context: ./python-agent
|
|
56
|
+
dockerfile: Dockerfile
|
|
57
|
+
restart: unless-stopped
|
|
58
|
+
depends_on:
|
|
59
|
+
- livekit
|
|
60
|
+
<% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
|
|
61
|
+
- ollama
|
|
62
|
+
<% } -%>
|
|
63
|
+
environment:
|
|
64
|
+
- LIVEKIT_URL=ws://livekit:7880
|
|
65
|
+
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
|
|
66
|
+
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
|
|
67
|
+
<% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
|
|
68
|
+
- OLLAMA_BASE_URL=http://ollama:11434
|
|
69
|
+
<% } else if (llmProvider === 'ollama' && deploymentType === 'existing') { -%>
|
|
70
|
+
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
|
|
71
|
+
<% } else { -%>
|
|
72
|
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
73
|
+
<% } -%>
|
|
74
|
+
<% if (llmProvider === 'ollama' && deploymentType === 'existing') { -%>
|
|
75
|
+
extra_hosts:
|
|
76
|
+
- "host.docker.internal:host-gateway"
|
|
77
|
+
<% } -%>
|
|
78
|
+
|
|
79
|
+
<% if (includeFrontend) { -%>
|
|
80
|
+
# ─── Next.js Frontend ─────────────────────────────────────────────
|
|
81
|
+
frontend:
|
|
82
|
+
build:
|
|
83
|
+
context: ./frontend
|
|
84
|
+
dockerfile: Dockerfile
|
|
85
|
+
restart: unless-stopped
|
|
86
|
+
depends_on:
|
|
87
|
+
- livekit
|
|
88
|
+
ports:
|
|
89
|
+
- "3000:3000"
|
|
90
|
+
environment:
|
|
91
|
+
- LIVEKIT_API_KEY=${LIVEKIT_API_KEY}
|
|
92
|
+
- LIVEKIT_API_SECRET=${LIVEKIT_API_SECRET}
|
|
93
|
+
- LIVEKIT_URL=ws://livekit:7880
|
|
94
|
+
- NEXT_PUBLIC_LIVEKIT_URL=ws://localhost:7880
|
|
95
|
+
<% } -%>
|
|
96
|
+
|
|
97
|
+
<% if (llmProvider === 'ollama' && deploymentType === 'standalone') { -%>
|
|
98
|
+
volumes:
|
|
99
|
+
ollama_data:
|
|
100
|
+
<% } -%>
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { AccessToken } from "livekit-server-sdk";
|
|
2
|
+
import { NextRequest, NextResponse } from "next/server";
|
|
3
|
+
|
|
4
|
+
export async function GET(req: NextRequest) {
|
|
5
|
+
const room = req.nextUrl.searchParams.get("room") ?? "default-room";
|
|
6
|
+
const username = req.nextUrl.searchParams.get("username") ?? `user-${Math.floor(Math.random() * 10000)}`;
|
|
7
|
+
|
|
8
|
+
const apiKey = process.env.LIVEKIT_API_KEY;
|
|
9
|
+
const apiSecret = process.env.LIVEKIT_API_SECRET;
|
|
10
|
+
|
|
11
|
+
if (!apiKey || !apiSecret) {
|
|
12
|
+
return NextResponse.json(
|
|
13
|
+
{ error: "LIVEKIT_API_KEY and LIVEKIT_API_SECRET must be set" },
|
|
14
|
+
{ status: 500 }
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const at = new AccessToken(apiKey, apiSecret, {
|
|
19
|
+
identity: username,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
at.addGrant({
|
|
23
|
+
room,
|
|
24
|
+
roomJoin: true,
|
|
25
|
+
canPublish: true,
|
|
26
|
+
canSubscribe: true,
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const token = await at.toJwt();
|
|
30
|
+
|
|
31
|
+
return NextResponse.json({ token });
|
|
32
|
+
}
|
|
33
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export const metadata = {
|
|
2
|
+
title: "Voice Agent",
|
|
3
|
+
description: "Real-time Voice AI Agent",
|
|
4
|
+
};
|
|
5
|
+
|
|
6
|
+
export default function RootLayout({
|
|
7
|
+
children,
|
|
8
|
+
}: {
|
|
9
|
+
children: React.ReactNode;
|
|
10
|
+
}) {
|
|
11
|
+
return (
|
|
12
|
+
<html lang="en">
|
|
13
|
+
<body>{children}</body>
|
|
14
|
+
</html>
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use client";
|
|
2
|
+
|
|
3
|
+
import { useEffect, useState } from "react";
|
|
4
|
+
import {
|
|
5
|
+
LiveKitRoom,
|
|
6
|
+
RoomAudioRenderer,
|
|
7
|
+
useVoiceAssistant,
|
|
8
|
+
BarVisualizer,
|
|
9
|
+
DisconnectButton,
|
|
10
|
+
} from "@livekit/components-react";
|
|
11
|
+
import "@livekit/components-styles";
|
|
12
|
+
|
|
13
|
+
function VoiceAssistantUI() {
|
|
14
|
+
const { state, audioTrack } = useVoiceAssistant();
|
|
15
|
+
|
|
16
|
+
return (
|
|
17
|
+
<div
|
|
18
|
+
style={{
|
|
19
|
+
display: "flex",
|
|
20
|
+
flexDirection: "column",
|
|
21
|
+
alignItems: "center",
|
|
22
|
+
gap: "1rem",
|
|
23
|
+
padding: "2rem",
|
|
24
|
+
}}
|
|
25
|
+
>
|
|
26
|
+
<h2>Agent Status: {state}</h2>
|
|
27
|
+
<BarVisualizer
|
|
28
|
+
state={state}
|
|
29
|
+
barCount={5}
|
|
30
|
+
trackRef={audioTrack}
|
|
31
|
+
style={{ width: "300px", height: "80px" }}
|
|
32
|
+
/>
|
|
33
|
+
<DisconnectButton>Disconnect</DisconnectButton>
|
|
34
|
+
</div>
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export default function Home() {
|
|
39
|
+
const [token, setToken] = useState<string | null>(null);
|
|
40
|
+
const [error, setError] = useState<string | null>(null);
|
|
41
|
+
|
|
42
|
+
const livekitUrl = process.env.NEXT_PUBLIC_LIVEKIT_URL ?? "ws://localhost:7880";
|
|
43
|
+
|
|
44
|
+
useEffect(() => {
|
|
45
|
+
async function fetchToken() {
|
|
46
|
+
try {
|
|
47
|
+
const res = await fetch("/api/token?room=voice-room&username=user");
|
|
48
|
+
if (!res.ok) throw new Error("Failed to fetch token");
|
|
49
|
+
const data = await res.json();
|
|
50
|
+
setToken(data.token);
|
|
51
|
+
} catch (err: unknown) {
|
|
52
|
+
setError(err instanceof Error ? err.message : "Unknown error");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
fetchToken();
|
|
56
|
+
}, []);
|
|
57
|
+
|
|
58
|
+
if (error) {
|
|
59
|
+
return (
|
|
60
|
+
<div style={{ padding: "2rem", textAlign: "center" }}>
|
|
61
|
+
<h1>Error</h1>
|
|
62
|
+
<p>{error}</p>
|
|
63
|
+
</div>
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (!token) {
|
|
68
|
+
return (
|
|
69
|
+
<div style={{ padding: "2rem", textAlign: "center" }}>
|
|
70
|
+
<p>Connecting to voice agent...</p>
|
|
71
|
+
</div>
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return (
|
|
76
|
+
<main style={{ padding: "2rem", textAlign: "center" }}>
|
|
77
|
+
<h1><%= projectName %></h1>
|
|
78
|
+
<LiveKitRoom
|
|
79
|
+
serverUrl={livekitUrl}
|
|
80
|
+
token={token}
|
|
81
|
+
connect={true}
|
|
82
|
+
audio={true}
|
|
83
|
+
>
|
|
84
|
+
<VoiceAssistantUI />
|
|
85
|
+
<RoomAudioRenderer />
|
|
86
|
+
</LiveKitRoom>
|
|
87
|
+
</main>
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "<%= projectName %>-frontend",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"private": true,
|
|
5
|
+
"scripts": {
|
|
6
|
+
"dev": "next dev",
|
|
7
|
+
"build": "next build",
|
|
8
|
+
"start": "next start"
|
|
9
|
+
},
|
|
10
|
+
"dependencies": {
|
|
11
|
+
"next": "^15.0.0",
|
|
12
|
+
"react": "^19.0.0",
|
|
13
|
+
"react-dom": "^19.0.0",
|
|
14
|
+
"@livekit/components-react": "^2.0.0",
|
|
15
|
+
"livekit-client": "^2.0.0",
|
|
16
|
+
"livekit-server-sdk": "^2.0.0"
|
|
17
|
+
},
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"@types/node": "^22.0.0",
|
|
20
|
+
"@types/react": "^19.0.0",
|
|
21
|
+
"typescript": "^5.0.0"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2017",
|
|
4
|
+
"lib": ["dom", "dom.iterable", "esnext"],
|
|
5
|
+
"allowJs": true,
|
|
6
|
+
"skipLibCheck": true,
|
|
7
|
+
"strict": true,
|
|
8
|
+
"noEmit": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"module": "esnext",
|
|
11
|
+
"moduleResolution": "bundler",
|
|
12
|
+
"resolveJsonModule": true,
|
|
13
|
+
"isolatedModules": true,
|
|
14
|
+
"jsx": "preserve",
|
|
15
|
+
"incremental": true,
|
|
16
|
+
"plugins": [{ "name": "next" }],
|
|
17
|
+
"paths": { "@/*": ["./*"] }
|
|
18
|
+
},
|
|
19
|
+
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
|
|
20
|
+
"exclude": ["node_modules"]
|
|
21
|
+
}
|
|
22
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# Install system dependencies
|
|
6
|
+
RUN apt-get update && \
|
|
7
|
+
<% if (llmProvider === 'ollama') { -%>
|
|
8
|
+
apt-get install -y --no-install-recommends gcc ffmpeg libsndfile1-dev curl && \
|
|
9
|
+
<% } else { -%>
|
|
10
|
+
apt-get install -y --no-install-recommends gcc && \
|
|
11
|
+
<% } -%>
|
|
12
|
+
rm -rf /var/lib/apt/lists/*
|
|
13
|
+
|
|
14
|
+
<% if (llmProvider === 'ollama') { -%>
|
|
15
|
+
# Download Piper TTS voice model (en_US-lessac-medium)
|
|
16
|
+
RUN mkdir -p /app/models && \
|
|
17
|
+
curl -L -o /app/models/en_US-lessac-medium.onnx \
|
|
18
|
+
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx" && \
|
|
19
|
+
curl -L -o /app/models/en_US-lessac-medium.onnx.json \
|
|
20
|
+
"https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json"
|
|
21
|
+
|
|
22
|
+
<% } -%>
|
|
23
|
+
# Copy and install Python dependencies
|
|
24
|
+
COPY requirements.txt .
|
|
25
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
26
|
+
|
|
27
|
+
# Copy agent source code
|
|
28
|
+
COPY . .
|
|
29
|
+
|
|
30
|
+
# ─── EACCES Fix: run as non-root user ───────────────────────────────
|
|
31
|
+
RUN useradd -m agentuser && \
|
|
32
|
+
chown -R agentuser:agentuser /app
|
|
33
|
+
USER agentuser
|
|
34
|
+
|
|
35
|
+
CMD ["python", "agent.py", "start"]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
<%= projectName %> — Voice AI Agent
|
|
3
|
+
Uses the official livekit-agents library with VoicePipelineAgent.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
|
|
10
|
+
from livekit.agents.pipeline import VoicePipelineAgent
|
|
11
|
+
from livekit.plugins import silero
|
|
12
|
+
<% if (llmProvider === 'openai') { -%>
|
|
13
|
+
from livekit.plugins import deepgram
|
|
14
|
+
from livekit.plugins import openai as openai_plugin
|
|
15
|
+
<% } else { -%>
|
|
16
|
+
from livekit.plugins import openai as openai_plugin
|
|
17
|
+
from custom_stt.faster_whisper_stt import FasterWhisperSTT
|
|
18
|
+
from custom_tts.piper_tts import PiperTTSPlugin
|
|
19
|
+
<% } -%>
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("voice-agent")
|
|
22
|
+
logger.setLevel(logging.INFO)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def entrypoint(ctx: JobContext):
|
|
26
|
+
"""Called when a participant joins the LiveKit room."""
|
|
27
|
+
|
|
28
|
+
logger.info(f"Connecting to room: {ctx.room.name}")
|
|
29
|
+
await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
|
30
|
+
|
|
31
|
+
participant = await ctx.wait_for_participant()
|
|
32
|
+
logger.info(f"Participant joined: {participant.identity}")
|
|
33
|
+
|
|
34
|
+
# ─── Build the Voice Pipeline ────────────────────────────────────
|
|
35
|
+
<% if (llmProvider === 'openai') { -%>
|
|
36
|
+
llm = openai_plugin.LLM(model="gpt-4o-mini")
|
|
37
|
+
tts_engine = openai_plugin.TTS(voice="alloy")
|
|
38
|
+
stt_engine = deepgram.STT()
|
|
39
|
+
<% } else { -%>
|
|
40
|
+
# Model: <%= modelName %> | Language: <%= language %>
|
|
41
|
+
llm = openai_plugin.LLM.with_ollama(
|
|
42
|
+
model="<%= modelName %>",
|
|
43
|
+
base_url=os.environ.get("OLLAMA_BASE_URL", "http://host.docker.internal:11434"),
|
|
44
|
+
)
|
|
45
|
+
tts_engine = PiperTTSPlugin(model="/app/models/en_US-lessac-medium.onnx")
|
|
46
|
+
stt_engine = FasterWhisperSTT(model="base", language="<%= language %>")
|
|
47
|
+
<% } -%>
|
|
48
|
+
|
|
49
|
+
agent = VoicePipelineAgent(
|
|
50
|
+
vad=silero.VAD.load(),
|
|
51
|
+
stt=stt_engine,
|
|
52
|
+
llm=llm,
|
|
53
|
+
tts=tts_engine,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Start the agent in the room
|
|
57
|
+
agent.start(ctx.room, participant)
|
|
58
|
+
|
|
59
|
+
await agent.say("Hello! I'm your voice assistant. How can I help you today?")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Faster Whisper STT Plugin for LiveKit Agents.
|
|
3
|
+
Uses the faster-whisper Python package for fully offline speech-to-text.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import io
|
|
8
|
+
import logging
|
|
9
|
+
import wave
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from faster_whisper import WhisperModel
|
|
13
|
+
|
|
14
|
+
from livekit.agents import stt
|
|
15
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("faster-whisper-stt")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FasterWhisperSTT(stt.STT):
|
|
21
|
+
def __init__(self, model="base", language="en", device="cpu", compute_type="int8"):
|
|
22
|
+
super().__init__(
|
|
23
|
+
capabilities=stt.STTCapabilities(streaming=False, interim_results=False),
|
|
24
|
+
)
|
|
25
|
+
self.model_size = model
|
|
26
|
+
self.language = language
|
|
27
|
+
self.device = device
|
|
28
|
+
self.compute_type = compute_type
|
|
29
|
+
self._model = None
|
|
30
|
+
self._load_model()
|
|
31
|
+
|
|
32
|
+
def _load_model(self):
|
|
33
|
+
logger.info(f"Loading Whisper model: {self.model_size} (device={self.device})")
|
|
34
|
+
self._model = WhisperModel(
|
|
35
|
+
self.model_size,
|
|
36
|
+
device=self.device,
|
|
37
|
+
compute_type=self.compute_type,
|
|
38
|
+
)
|
|
39
|
+
logger.info("Whisper model loaded successfully")
|
|
40
|
+
|
|
41
|
+
async def recognize(self, buffer, *, language=None, conn_options=DEFAULT_API_CONNECT_OPTIONS):
|
|
42
|
+
loop = asyncio.get_event_loop()
|
|
43
|
+
return await loop.run_in_executor(
|
|
44
|
+
None, self._transcribe, buffer, language or self.language
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def _transcribe(self, buffer, language):
|
|
48
|
+
# Convert LiveKit AudioFrame to numpy array
|
|
49
|
+
if hasattr(buffer, "data"):
|
|
50
|
+
audio_data = np.frombuffer(buffer.data, dtype=np.int16).astype(np.float32) / 32768.0
|
|
51
|
+
sample_rate = buffer.sample_rate if hasattr(buffer, "sample_rate") else 16000
|
|
52
|
+
elif hasattr(buffer, "frame"):
|
|
53
|
+
frame = buffer.frame
|
|
54
|
+
audio_data = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0
|
|
55
|
+
sample_rate = frame.sample_rate if hasattr(frame, "sample_rate") else 16000
|
|
56
|
+
else:
|
|
57
|
+
audio_data = np.frombuffer(buffer, dtype=np.int16).astype(np.float32) / 32768.0
|
|
58
|
+
sample_rate = 16000
|
|
59
|
+
|
|
60
|
+
# Resample to 16kHz if needed (Whisper expects 16kHz)
|
|
61
|
+
if sample_rate != 16000:
|
|
62
|
+
from scipy.signal import resample
|
|
63
|
+
num_samples = int(len(audio_data) * 16000 / sample_rate)
|
|
64
|
+
audio_data = resample(audio_data, num_samples).astype(np.float32)
|
|
65
|
+
|
|
66
|
+
segments, info = self._model.transcribe(
|
|
67
|
+
audio_data,
|
|
68
|
+
language=language,
|
|
69
|
+
beam_size=5,
|
|
70
|
+
vad_filter=True,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
text = " ".join(segment.text for segment in segments).strip()
|
|
74
|
+
|
|
75
|
+
return stt.SpeechEvent(
|
|
76
|
+
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
77
|
+
alternatives=[
|
|
78
|
+
stt.SpeechData(
|
|
79
|
+
text=text,
|
|
80
|
+
language=language or info.language,
|
|
81
|
+
confidence=1.0,
|
|
82
|
+
)
|
|
83
|
+
],
|
|
84
|
+
)
|
|
85
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Piper TTS Plugin for LiveKit Agents.
|
|
3
|
+
Based on the community plugin by nay-cat/LiveKit-PiperTTS-Plugin.
|
|
4
|
+
Uses the piper-tts Python package for fully offline text-to-speech.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from livekit.agents import tts
|
|
12
|
+
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS
|
|
13
|
+
from livekit import rtc
|
|
14
|
+
from piper import PiperVoice, SynthesisConfig
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("piper-tts")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PiperTTSPlugin(tts.TTS):
|
|
20
|
+
def __init__(self, model, speed=1.0, volume=1.0, noise_scale=0.667, noise_w=0.8, use_cuda=False):
|
|
21
|
+
super().__init__(
|
|
22
|
+
capabilities=tts.TTSCapabilities(streaming=False),
|
|
23
|
+
sample_rate=22050,
|
|
24
|
+
num_channels=1,
|
|
25
|
+
)
|
|
26
|
+
self.model = model
|
|
27
|
+
self.speed = speed
|
|
28
|
+
self.volume = volume
|
|
29
|
+
self.noise_scale = noise_scale
|
|
30
|
+
self.noise_w = noise_w
|
|
31
|
+
self.use_cuda = use_cuda
|
|
32
|
+
self._voice = None
|
|
33
|
+
self._load_voice()
|
|
34
|
+
|
|
35
|
+
def _load_voice(self):
|
|
36
|
+
logger.info(f"Loading Piper voice model: {self.model}")
|
|
37
|
+
self._voice = PiperVoice.load(self.model, use_cuda=self.use_cuda)
|
|
38
|
+
logger.info("Piper voice model loaded successfully")
|
|
39
|
+
|
|
40
|
+
def synthesize(self, text, *, conn_options=DEFAULT_API_CONNECT_OPTIONS):
|
|
41
|
+
return PiperStream(self, text, conn_options)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class PiperStream(tts.ChunkedStream):
|
|
45
|
+
def __init__(self, plugin, text, conn_options):
|
|
46
|
+
super().__init__(tts=plugin, input_text=text, conn_options=conn_options)
|
|
47
|
+
self.plugin = plugin
|
|
48
|
+
|
|
49
|
+
async def _run(self):
|
|
50
|
+
try:
|
|
51
|
+
config = SynthesisConfig(
|
|
52
|
+
volume=self.plugin.volume,
|
|
53
|
+
length_scale=self.plugin.speed,
|
|
54
|
+
noise_scale=self.plugin.noise_scale,
|
|
55
|
+
noise_w_scale=self.plugin.noise_w,
|
|
56
|
+
normalize_audio=True,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
loop = asyncio.get_event_loop()
|
|
60
|
+
chunks = await loop.run_in_executor(None, self._synthesize_chunks, config)
|
|
61
|
+
|
|
62
|
+
for chunk in chunks:
|
|
63
|
+
frame = rtc.AudioFrame(
|
|
64
|
+
data=chunk,
|
|
65
|
+
sample_rate=22050,
|
|
66
|
+
num_channels=1,
|
|
67
|
+
samples_per_channel=len(chunk) // 2,
|
|
68
|
+
)
|
|
69
|
+
self._event_ch.send_nowait(
|
|
70
|
+
tts.SynthesizedAudio(
|
|
71
|
+
request_id="1",
|
|
72
|
+
segment_id="1",
|
|
73
|
+
frame=frame,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.error(f"Piper TTS synthesis failed: {e}")
|
|
79
|
+
silence = np.zeros(22050, dtype=np.int16).tobytes()
|
|
80
|
+
frame = rtc.AudioFrame(
|
|
81
|
+
data=silence,
|
|
82
|
+
sample_rate=22050,
|
|
83
|
+
num_channels=1,
|
|
84
|
+
samples_per_channel=22050,
|
|
85
|
+
)
|
|
86
|
+
self._event_ch.send_nowait(
|
|
87
|
+
tts.SynthesizedAudio(
|
|
88
|
+
request_id="1",
|
|
89
|
+
segment_id="1",
|
|
90
|
+
frame=frame,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _synthesize_chunks(self, config):
|
|
95
|
+
chunks = []
|
|
96
|
+
for chunk in self.plugin._voice.synthesize(self.input_text, syn_config=config):
|
|
97
|
+
audio_data = chunk.audio_int16_bytes
|
|
98
|
+
if chunk.sample_channels == 2:
|
|
99
|
+
audio = np.frombuffer(audio_data, dtype=np.int16)
|
|
100
|
+
audio = audio.reshape(-1, 2).mean(axis=1).astype(np.int16)
|
|
101
|
+
audio_data = audio.tobytes()
|
|
102
|
+
chunks.append(audio_data)
|
|
103
|
+
return chunks
|
|
104
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
livekit-agents>=0.12.0
|
|
2
|
+
livekit-plugins-silero>=0.7.0
|
|
3
|
+
livekit-plugins-openai>=0.10.0
|
|
4
|
+
<% if (llmProvider === 'openai') { -%>
|
|
5
|
+
livekit-plugins-deepgram>=0.7.0
|
|
6
|
+
<% } else { -%>
|
|
7
|
+
piper-tts>=1.3.0
|
|
8
|
+
faster-whisper>=1.0.0
|
|
9
|
+
numpy
|
|
10
|
+
onnxruntime>=1.22.0
|
|
11
|
+
<% } -%>
|