@mooncompany/uplink-chat 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @mooncompany/uplink-chat might be problematic. Click here for more details.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/bin/uplink.js +279 -0
- package/middleware/error-handler.js +69 -0
- package/package.json +93 -0
- package/public/css/agents.36b98c0f.css +1469 -0
- package/public/css/agents.css +1469 -0
- package/public/css/app.a6a7f8f5.css +2731 -0
- package/public/css/app.css +2731 -0
- package/public/css/artifacts.css +444 -0
- package/public/css/commands.css +55 -0
- package/public/css/connection.css +131 -0
- package/public/css/dashboard.css +233 -0
- package/public/css/developer.css +328 -0
- package/public/css/files.css +123 -0
- package/public/css/markdown.css +156 -0
- package/public/css/message-actions.css +278 -0
- package/public/css/mobile.css +614 -0
- package/public/css/panels-unified.css +483 -0
- package/public/css/premium.css +415 -0
- package/public/css/realtime.css +189 -0
- package/public/css/satellites.css +401 -0
- package/public/css/shortcuts.css +185 -0
- package/public/css/split-view.4def0262.css +673 -0
- package/public/css/split-view.css +673 -0
- package/public/css/theme-generator.css +391 -0
- package/public/css/themes.css +387 -0
- package/public/css/timestamps.css +54 -0
- package/public/css/variables.css +78 -0
- package/public/dist/bundle.b55050c4.js +15757 -0
- package/public/favicon.svg +24 -0
- package/public/img/agents/ada.png +0 -0
- package/public/img/agents/clarice.png +0 -0
- package/public/img/agents/dennis-nedry.png +0 -0
- package/public/img/agents/elliot-alderson.png +0 -0
- package/public/img/agents/main.png +0 -0
- package/public/img/agents/scotty.png +0 -0
- package/public/img/agents/top-flight-security.png +0 -0
- package/public/index.html +1083 -0
- package/public/js/agents-data.js +234 -0
- package/public/js/agents-ui.js +72 -0
- package/public/js/agents.js +1525 -0
- package/public/js/app.js +79 -0
- package/public/js/appearance-settings.js +111 -0
- package/public/js/artifacts.js +432 -0
- package/public/js/audio-queue.js +168 -0
- package/public/js/bootstrap.js +54 -0
- package/public/js/chat.js +1211 -0
- package/public/js/commands.js +581 -0
- package/public/js/connection-api.js +121 -0
- package/public/js/connection.js +1231 -0
- package/public/js/context-tracker.js +271 -0
- package/public/js/core.js +172 -0
- package/public/js/dashboard.js +452 -0
- package/public/js/developer.js +432 -0
- package/public/js/encryption.js +124 -0
- package/public/js/errors.js +122 -0
- package/public/js/event-bus.js +77 -0
- package/public/js/fetch-utils.js +171 -0
- package/public/js/file-handler.js +229 -0
- package/public/js/files.js +352 -0
- package/public/js/gateway-chat.js +538 -0
- package/public/js/logger.js +112 -0
- package/public/js/markdown.js +190 -0
- package/public/js/message-actions.js +431 -0
- package/public/js/message-renderer.js +288 -0
- package/public/js/missed-messages.js +235 -0
- package/public/js/mobile-debug.js +95 -0
- package/public/js/notifications.js +367 -0
- package/public/js/offline-queue.js +178 -0
- package/public/js/onboarding.js +543 -0
- package/public/js/panels.js +156 -0
- package/public/js/premium.js +412 -0
- package/public/js/realtime-voice.js +844 -0
- package/public/js/satellite-sync.js +256 -0
- package/public/js/satellite-ui.js +175 -0
- package/public/js/satellites.js +1516 -0
- package/public/js/settings.js +1087 -0
- package/public/js/shortcuts.js +381 -0
- package/public/js/split-chat.js +1234 -0
- package/public/js/split-resize.js +211 -0
- package/public/js/splitview.js +340 -0
- package/public/js/storage.js +408 -0
- package/public/js/streaming-handler.js +324 -0
- package/public/js/stt-settings.js +316 -0
- package/public/js/theme-generator.js +661 -0
- package/public/js/themes.js +164 -0
- package/public/js/timestamps.js +198 -0
- package/public/js/tts-settings.js +575 -0
- package/public/js/ui.js +267 -0
- package/public/js/update-notifier.js +143 -0
- package/public/js/utils/constants.js +165 -0
- package/public/js/utils/sanitize.js +93 -0
- package/public/js/utils/sse-parser.js +195 -0
- package/public/js/voice.js +883 -0
- package/public/manifest.json +58 -0
- package/public/moon_texture.jpg +0 -0
- package/public/sw.js +221 -0
- package/public/three.min.js +6 -0
- package/server/channel.js +529 -0
- package/server/chat.js +270 -0
- package/server/config-store.js +362 -0
- package/server/config.js +159 -0
- package/server/context.js +131 -0
- package/server/gateway-commands.js +211 -0
- package/server/gateway-proxy.js +318 -0
- package/server/index.js +22 -0
- package/server/logger.js +89 -0
- package/server/middleware/auth.js +188 -0
- package/server/middleware.js +218 -0
- package/server/openclaw-discover.js +308 -0
- package/server/premium/index.js +156 -0
- package/server/premium/license.js +140 -0
- package/server/realtime/bridge.js +837 -0
- package/server/realtime/index.js +349 -0
- package/server/realtime/tts-stream.js +446 -0
- package/server/routes/agents.js +564 -0
- package/server/routes/artifacts.js +174 -0
- package/server/routes/chat.js +311 -0
- package/server/routes/config-settings.js +345 -0
- package/server/routes/config.js +603 -0
- package/server/routes/files.js +307 -0
- package/server/routes/index.js +18 -0
- package/server/routes/media.js +451 -0
- package/server/routes/missed-messages.js +107 -0
- package/server/routes/premium.js +75 -0
- package/server/routes/push.js +156 -0
- package/server/routes/satellite.js +406 -0
- package/server/routes/status.js +251 -0
- package/server/routes/stt.js +35 -0
- package/server/routes/voice.js +260 -0
- package/server/routes/webhooks.js +203 -0
- package/server/routes.js +206 -0
- package/server/runtime-config.js +336 -0
- package/server/share.js +305 -0
- package/server/stt/faster-whisper.js +72 -0
- package/server/stt/groq.js +51 -0
- package/server/stt/index.js +196 -0
- package/server/stt/openai.js +49 -0
- package/server/sync.js +244 -0
- package/server/tailscale-https.js +175 -0
- package/server/tts.js +646 -0
- package/server/update-checker.js +172 -0
- package/server/utils/filename.js +129 -0
- package/server/utils.js +147 -0
- package/server/watchdog.js +318 -0
- package/server/websocket/broadcast.js +359 -0
- package/server/websocket/connections.js +339 -0
- package/server/websocket/index.js +215 -0
- package/server/websocket/routing.js +277 -0
- package/server/websocket/sync.js +102 -0
- package/server.js +404 -0
- package/utils/detect-tool-usage.js +93 -0
- package/utils/errors.js +158 -0
- package/utils/html-escape.js +84 -0
- package/utils/id-sanitize.js +94 -0
- package/utils/response.js +130 -0
- package/utils/with-retry.js +105 -0
package/server/tts.js
ADDED
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TTS Module - ElevenLabs, Edge TTS, and Local XTTS voice synthesis
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { randomUUID } from 'crypto';
|
|
6
|
+
import fs from 'fs/promises';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import { spawn } from 'child_process';
|
|
9
|
+
import { createLogger } from './logger.js';
|
|
10
|
+
|
|
11
|
+
const log = createLogger('TTS');
|
|
12
|
+
|
|
13
|
+
// Edge TTS - optional, user must install: npm install node-edge-tts
|
|
14
|
+
let EdgeTTS = null;
|
|
15
|
+
try {
|
|
16
|
+
EdgeTTS = (await import('node-edge-tts')).EdgeTTS;
|
|
17
|
+
} catch {
|
|
18
|
+
// Not installed - that's fine, it's optional
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// TTS config from environment (fallbacks — runtime config takes priority)
|
|
22
|
+
const ELEVENLABS_API_KEY = process.env.ELEVENLABS_API_KEY;
|
|
23
|
+
const ELEVENLABS_VOICE_ID = process.env.ELEVENLABS_VOICE_ID;
|
|
24
|
+
|
|
25
|
+
// Local TTS config - user must configure these in .env
|
|
26
|
+
const LOCAL_TTS_SCRIPT = process.env.LOCAL_TTS_SCRIPT;
|
|
27
|
+
const LOCAL_TTS_PYTHON = process.env.LOCAL_TTS_PYTHON;
|
|
28
|
+
const LOCAL_TTS_URL = process.env.LOCAL_TTS_URL;
|
|
29
|
+
|
|
30
|
+
// Edge TTS config (optional - user installs node-edge-tts themselves)
|
|
31
|
+
const EDGE_TTS_VOICE = process.env.EDGE_TTS_VOICE || 'en-US-AriaNeural';
|
|
32
|
+
|
|
33
|
+
// OpenAI TTS config
|
|
34
|
+
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
|
35
|
+
const OPENAI_TTS_VOICE = process.env.OPENAI_TTS_VOICE || 'nova'; // alloy, echo, fable, onyx, nova, shimmer
|
|
36
|
+
const OPENAI_TTS_MODEL = process.env.OPENAI_TTS_MODEL || 'tts-1'; // tts-1, tts-1-hd
|
|
37
|
+
|
|
38
|
+
// Piper TTS config (local, requires piper executable)
|
|
39
|
+
const PIPER_EXECUTABLE = process.env.PIPER_EXECUTABLE || 'piper';
|
|
40
|
+
const PIPER_MODEL = process.env.PIPER_MODEL; // Path to .onnx model file
|
|
41
|
+
const PIPER_CONFIG = process.env.PIPER_CONFIG; // Optional: path to .json config
|
|
42
|
+
|
|
43
|
+
// Lazy import runtime config to avoid circular deps
|
|
44
|
+
let _loadConfig = null;
|
|
45
|
+
async function getRuntimeConfig() {
|
|
46
|
+
if (!_loadConfig) {
|
|
47
|
+
const mod = await import('./runtime-config.js');
|
|
48
|
+
_loadConfig = mod.loadConfig;
|
|
49
|
+
}
|
|
50
|
+
return _loadConfig();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* List available Edge TTS voices
|
|
55
|
+
* @returns {Promise<Array>} List of voice objects with Name, ShortName, Gender, Locale
|
|
56
|
+
*/
|
|
57
|
+
export async function listEdgeTTSVoices() {
|
|
58
|
+
if (!EdgeTTS) return [];
|
|
59
|
+
try {
|
|
60
|
+
const tts = new EdgeTTS();
|
|
61
|
+
const voices = await tts.getVoices();
|
|
62
|
+
return voices.map(v => ({
|
|
63
|
+
shortName: v.ShortName,
|
|
64
|
+
name: v.FriendlyName || v.ShortName,
|
|
65
|
+
gender: v.Gender,
|
|
66
|
+
locale: v.Locale,
|
|
67
|
+
}));
|
|
68
|
+
} catch (e) {
|
|
69
|
+
log.error('Failed to list Edge TTS voices:', e);
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Clean text for speech synthesis
|
|
76
|
+
*/
|
|
77
|
+
export function cleanTextForTTS(text) {
|
|
78
|
+
return text
|
|
79
|
+
.replace(/\*\*/g, '')
|
|
80
|
+
.replace(/\*/g, '')
|
|
81
|
+
.replace(/`/g, '')
|
|
82
|
+
.replace(/\[.*?\]/g, '')
|
|
83
|
+
.replace(/#{1,6}\s/g, '')
|
|
84
|
+
.replace(/[\u{1F600}-\u{1F64F}]/gu, '') // Remove emojis
|
|
85
|
+
.replace(/[\u{1F300}-\u{1F5FF}]/gu, '')
|
|
86
|
+
.replace(/[\u{1F680}-\u{1F6FF}]/gu, '')
|
|
87
|
+
.replace(/[\u{2600}-\u{26FF}]/gu, '')
|
|
88
|
+
.trim()
|
|
89
|
+
.substring(0, 500); // Keep it short for faster TTS
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Extract first sentence for parallel TTS
|
|
94
|
+
*/
|
|
95
|
+
export function extractFirstSentence(text) {
|
|
96
|
+
// Match sentence ending with . ! ? (optionally followed by closing quote/paren)
|
|
97
|
+
// Then followed by whitespace, newline, or end of string
|
|
98
|
+
const match = text.match(/^(.+?[.!?]["'»)…]?)(?:\s|$)/s);
|
|
99
|
+
if (match) {
|
|
100
|
+
return {
|
|
101
|
+
sentence: match[1].trim(),
|
|
102
|
+
remaining: text.slice(match[0].length).trim()
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Generate TTS audio using ElevenLabs
|
|
110
|
+
* @param {string} text - Text to convert to speech
|
|
111
|
+
* @param {string} outputDir - Directory to save audio file
|
|
112
|
+
* @param {Object} options - Options
|
|
113
|
+
* @param {number} options.timeoutMs - Request timeout in milliseconds (default: 30000)
|
|
114
|
+
* @returns {Promise<string>} - URL path to the generated audio file
|
|
115
|
+
*/
|
|
116
|
+
export async function textToSpeech(text, outputDir, options = {}) {
|
|
117
|
+
const audioId = randomUUID();
|
|
118
|
+
const outputPath = path.join(outputDir, `${audioId}.mp3`);
|
|
119
|
+
const timeoutMs = options.timeoutMs || 30000;
|
|
120
|
+
|
|
121
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
122
|
+
|
|
123
|
+
const cleanText = cleanTextForTTS(text);
|
|
124
|
+
|
|
125
|
+
if (!cleanText) {
|
|
126
|
+
throw new Error('No text to speak');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Check if TTS is configured
|
|
130
|
+
if (!isTTSConfigured()) {
|
|
131
|
+
throw new Error('TTS is not configured. Please set ELEVENLABS_API_KEY environment variable.');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Create abort controller for timeout
|
|
135
|
+
const controller = new AbortController();
|
|
136
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${ELEVENLABS_VOICE_ID}`, {
|
|
140
|
+
method: 'POST',
|
|
141
|
+
headers: {
|
|
142
|
+
'xi-api-key': ELEVENLABS_API_KEY,
|
|
143
|
+
'Content-Type': 'application/json'
|
|
144
|
+
},
|
|
145
|
+
body: JSON.stringify({
|
|
146
|
+
text: cleanText,
|
|
147
|
+
model_id: 'eleven_multilingual_v2',
|
|
148
|
+
voice_settings: {
|
|
149
|
+
stability: 0.48,
|
|
150
|
+
similarity_boost: 0.92,
|
|
151
|
+
style: 0.07,
|
|
152
|
+
use_speaker_boost: true
|
|
153
|
+
}
|
|
154
|
+
}),
|
|
155
|
+
signal: controller.signal
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
clearTimeout(timeoutId);
|
|
159
|
+
|
|
160
|
+
if (!response.ok) {
|
|
161
|
+
let errorMessage;
|
|
162
|
+
try {
|
|
163
|
+
const errorData = await response.json();
|
|
164
|
+
errorMessage = errorData.detail?.message || errorData.detail || JSON.stringify(errorData);
|
|
165
|
+
} catch {
|
|
166
|
+
errorMessage = await response.text();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Log server-side for debugging
|
|
170
|
+
log.error(`ElevenLabs API error: ${response.status} - ${errorMessage}`);
|
|
171
|
+
|
|
172
|
+
// Return user-friendly error based on status code
|
|
173
|
+
if (response.status === 401) {
|
|
174
|
+
throw new Error('Text-to-speech service authentication failed. Please check your API key configuration.');
|
|
175
|
+
} else if (response.status === 429) {
|
|
176
|
+
throw new Error('Text-to-speech service is temporarily busy. Please try again in a moment.');
|
|
177
|
+
} else if (response.status >= 500) {
|
|
178
|
+
throw new Error('Text-to-speech service is experiencing issues. Please try again later.');
|
|
179
|
+
} else {
|
|
180
|
+
throw new Error('Failed to generate speech audio. Please try again.');
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const audioBuffer = await response.arrayBuffer();
|
|
185
|
+
await fs.writeFile(outputPath, Buffer.from(audioBuffer));
|
|
186
|
+
|
|
187
|
+
return `/audio/${audioId}.mp3`;
|
|
188
|
+
} catch (error) {
|
|
189
|
+
clearTimeout(timeoutId);
|
|
190
|
+
|
|
191
|
+
// Handle timeout specifically
|
|
192
|
+
if (error.name === 'AbortError') {
|
|
193
|
+
log.error(`Request timed out after ${timeoutMs}ms`);
|
|
194
|
+
throw new Error('Text-to-speech generation timed out. Please try again with shorter text.');
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Handle network errors
|
|
198
|
+
if (error.code === 'ENOTFOUND' || error.code === 'ECONNREFUSED' || error.type === 'system') {
|
|
199
|
+
log.error('Network error:', error.message);
|
|
200
|
+
throw new Error('Unable to connect to text-to-speech service. Please check your network connection.');
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Re-throw user-friendly errors we've already created
|
|
204
|
+
if (error.message.includes('Text-to-speech') || error.message.includes('TTS')) {
|
|
205
|
+
throw error;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Log unexpected errors and return generic message
|
|
209
|
+
log.error('Unexpected error:', error);
|
|
210
|
+
throw new Error('An unexpected error occurred while generating speech. Please try again.');
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Check if Edge TTS is available (user installed node-edge-tts)
|
|
216
|
+
*/
|
|
217
|
+
export function isEdgeTTSAvailable() {
|
|
218
|
+
return EdgeTTS !== null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Generate TTS audio using Microsoft Edge TTS
|
|
223
|
+
* Requires user to install: npm install node-edge-tts
|
|
224
|
+
* @param {string} text - Text to convert to speech
|
|
225
|
+
* @param {string} outputDir - Directory to save audio file
|
|
226
|
+
* @param {Object} options - Options
|
|
227
|
+
* @param {string} options.voice - Voice to use (default: EDGE_TTS_VOICE env var)
|
|
228
|
+
* @param {number} options.timeoutMs - Request timeout in milliseconds (default: 30000)
|
|
229
|
+
* @returns {Promise<string>} - URL path to the generated audio file
|
|
230
|
+
*/
|
|
231
|
+
export async function textToSpeechEdge(text, outputDir, options = {}) {
|
|
232
|
+
if (!EdgeTTS) {
|
|
233
|
+
throw new Error('Edge TTS not installed. Run: npm install node-edge-tts');
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const audioId = randomUUID();
|
|
237
|
+
const outputPath = path.join(outputDir, `${audioId}.mp3`);
|
|
238
|
+
const timeoutMs = options.timeoutMs || 30000;
|
|
239
|
+
|
|
240
|
+
// Read voice from runtime config (falls back to env var)
|
|
241
|
+
const config = await getRuntimeConfig();
|
|
242
|
+
const voice = options.voice || config.edgeTtsVoice || EDGE_TTS_VOICE;
|
|
243
|
+
|
|
244
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
245
|
+
|
|
246
|
+
const cleanText = cleanTextForTTS(text);
|
|
247
|
+
if (!cleanText) {
|
|
248
|
+
throw new Error('No text to speak');
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
try {
|
|
252
|
+
const tts = new EdgeTTS({
|
|
253
|
+
voice: voice,
|
|
254
|
+
timeout: timeoutMs,
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
await tts.ttsPromise(cleanText, outputPath);
|
|
258
|
+
|
|
259
|
+
return `/audio/${audioId}.mp3`;
|
|
260
|
+
} catch (error) {
|
|
261
|
+
log.error('Edge TTS error:', error);
|
|
262
|
+
|
|
263
|
+
if (error.message?.includes('timeout')) {
|
|
264
|
+
throw new Error('Edge TTS generation timed out. Please try again with shorter text.');
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
throw new Error('Edge TTS failed: ' + (error.message || 'Unknown error'));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Generate TTS audio using OpenAI TTS API
|
|
273
|
+
* @param {string} text - Text to convert to speech
|
|
274
|
+
* @param {string} outputDir - Directory to save audio file
|
|
275
|
+
* @param {Object} options - Options
|
|
276
|
+
* @param {string} options.voice - Voice to use (alloy, echo, fable, onyx, nova, shimmer)
|
|
277
|
+
* @param {string} options.model - Model to use (tts-1, tts-1-hd)
|
|
278
|
+
* @param {number} options.timeoutMs - Request timeout in milliseconds (default: 30000)
|
|
279
|
+
* @returns {Promise<string>} - URL path to the generated audio file
|
|
280
|
+
*/
|
|
281
|
+
export async function textToSpeechOpenAI(text, outputDir, options = {}) {
|
|
282
|
+
const audioId = randomUUID();
|
|
283
|
+
const outputPath = path.join(outputDir, `${audioId}.mp3`);
|
|
284
|
+
const timeoutMs = options.timeoutMs || 30000;
|
|
285
|
+
|
|
286
|
+
// Read voice/model from runtime config (falls back to env vars)
|
|
287
|
+
const config = await getRuntimeConfig();
|
|
288
|
+
const voice = options.voice || config.openaiTtsVoice || OPENAI_TTS_VOICE;
|
|
289
|
+
const model = options.model || config.openaiTtsModel || OPENAI_TTS_MODEL;
|
|
290
|
+
|
|
291
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
292
|
+
|
|
293
|
+
const cleanText = cleanTextForTTS(text);
|
|
294
|
+
if (!cleanText) {
|
|
295
|
+
throw new Error('No text to speak');
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const openaiKey = config.openaiApiKey || OPENAI_API_KEY;
|
|
299
|
+
if (!openaiKey) {
|
|
300
|
+
throw new Error('OpenAI API key not configured. Add it in Settings → Voice & TTS.');
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const controller = new AbortController();
|
|
304
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
305
|
+
|
|
306
|
+
try {
|
|
307
|
+
const response = await fetch('https://api.openai.com/v1/audio/speech', {
|
|
308
|
+
method: 'POST',
|
|
309
|
+
headers: {
|
|
310
|
+
'Authorization': `Bearer ${openaiKey}`,
|
|
311
|
+
'Content-Type': 'application/json',
|
|
312
|
+
},
|
|
313
|
+
body: JSON.stringify({
|
|
314
|
+
model: model,
|
|
315
|
+
input: cleanText,
|
|
316
|
+
voice: voice,
|
|
317
|
+
response_format: 'mp3',
|
|
318
|
+
}),
|
|
319
|
+
signal: controller.signal,
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
clearTimeout(timeoutId);
|
|
323
|
+
|
|
324
|
+
if (!response.ok) {
|
|
325
|
+
const errorText = await response.text();
|
|
326
|
+
log.error(`OpenAI API error: ${response.status} - ${errorText}`);
|
|
327
|
+
|
|
328
|
+
if (response.status === 401) {
|
|
329
|
+
throw new Error('OpenAI authentication failed. Please check your API key.');
|
|
330
|
+
} else if (response.status === 429) {
|
|
331
|
+
throw new Error('OpenAI rate limit reached. Please try again in a moment.');
|
|
332
|
+
}
|
|
333
|
+
throw new Error('OpenAI TTS failed. Please try again.');
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const audioBuffer = await response.arrayBuffer();
|
|
337
|
+
await fs.writeFile(outputPath, Buffer.from(audioBuffer));
|
|
338
|
+
|
|
339
|
+
return `/audio/${audioId}.mp3`;
|
|
340
|
+
} catch (error) {
|
|
341
|
+
clearTimeout(timeoutId);
|
|
342
|
+
|
|
343
|
+
if (error.name === 'AbortError') {
|
|
344
|
+
throw new Error('OpenAI TTS timed out. Please try again with shorter text.');
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (error.message.includes('OpenAI')) {
|
|
348
|
+
throw error;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
log.error('OpenAI TTS error:', error);
|
|
352
|
+
throw new Error('OpenAI TTS failed: ' + error.message);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Generate TTS audio using Piper (local, fast, many voices)
|
|
358
|
+
* Requires piper executable installed: https://github.com/rhasspy/piper
|
|
359
|
+
* @param {string} text - Text to convert to speech
|
|
360
|
+
* @param {string} outputDir - Directory to save audio file
|
|
361
|
+
* @param {Object} options - Options
|
|
362
|
+
* @param {number} options.timeoutMs - Request timeout in milliseconds (default: 30000)
|
|
363
|
+
* @returns {Promise<string>} - URL path to the generated audio file
|
|
364
|
+
*/
|
|
365
|
+
export async function textToSpeechPiper(text, outputDir, options = {}) {
|
|
366
|
+
const audioId = randomUUID();
|
|
367
|
+
const outputPath = path.join(outputDir, `${audioId}.wav`);
|
|
368
|
+
const timeoutMs = options.timeoutMs || 30000;
|
|
369
|
+
|
|
370
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
371
|
+
|
|
372
|
+
const cleanText = cleanTextForTTS(text);
|
|
373
|
+
if (!cleanText) {
|
|
374
|
+
throw new Error('No text to speak');
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (!PIPER_MODEL) {
|
|
378
|
+
throw new Error('Piper TTS requires PIPER_MODEL environment variable (path to .onnx model)');
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return new Promise((resolve, reject) => {
|
|
382
|
+
const args = ['--model', PIPER_MODEL, '--output_file', outputPath];
|
|
383
|
+
if (PIPER_CONFIG) {
|
|
384
|
+
args.push('--config', PIPER_CONFIG);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const piperProcess = spawn(PIPER_EXECUTABLE, args, {
|
|
388
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
let stderr = '';
|
|
392
|
+
|
|
393
|
+
const timeout = setTimeout(() => {
|
|
394
|
+
piperProcess.kill();
|
|
395
|
+
reject(new Error('Piper TTS timed out. Please try again with shorter text.'));
|
|
396
|
+
}, timeoutMs);
|
|
397
|
+
|
|
398
|
+
piperProcess.stderr.on('data', (data) => {
|
|
399
|
+
stderr += data.toString();
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
piperProcess.on('close', async (code) => {
|
|
403
|
+
clearTimeout(timeout);
|
|
404
|
+
|
|
405
|
+
if (code !== 0) {
|
|
406
|
+
log.error(`Piper exited with code ${code}: ${stderr}`);
|
|
407
|
+
reject(new Error('Piper TTS failed. Check that piper is installed and model path is correct.'));
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Verify output file exists
|
|
412
|
+
try {
|
|
413
|
+
await fs.stat(outputPath);
|
|
414
|
+
resolve(`/audio/${audioId}.wav`);
|
|
415
|
+
} catch {
|
|
416
|
+
reject(new Error('Piper TTS did not produce output file.'));
|
|
417
|
+
}
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
piperProcess.on('error', (error) => {
|
|
421
|
+
clearTimeout(timeout);
|
|
422
|
+
log.error('Piper spawn error:', error);
|
|
423
|
+
if (error.code === 'ENOENT') {
|
|
424
|
+
reject(new Error('Piper executable not found. Install from: https://github.com/rhasspy/piper'));
|
|
425
|
+
} else {
|
|
426
|
+
reject(new Error('Failed to start Piper: ' + error.message));
|
|
427
|
+
}
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
// Send text to stdin
|
|
431
|
+
piperProcess.stdin.write(cleanText);
|
|
432
|
+
piperProcess.stdin.end();
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Generate TTS audio using local XTTS script (process-based)
|
|
438
|
+
* @param {string} text - Text to convert to speech
|
|
439
|
+
* @param {string} outputDir - Directory to save audio file
|
|
440
|
+
* @param {Object} options - Options
|
|
441
|
+
* @param {number} options.timeoutMs - Request timeout in milliseconds (default: 60000)
|
|
442
|
+
* @returns {Promise<string>} - URL path to the generated audio file
|
|
443
|
+
*/
|
|
444
|
+
export async function localTextToSpeech(text, outputDir, options = {}) {
|
|
445
|
+
const audioId = randomUUID();
|
|
446
|
+
const timeoutMs = options.timeoutMs || 60000;
|
|
447
|
+
|
|
448
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
449
|
+
|
|
450
|
+
const cleanText = cleanTextForTTS(text);
|
|
451
|
+
|
|
452
|
+
if (!cleanText) {
|
|
453
|
+
throw new Error('No text to speak');
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Check if local TTS is configured
|
|
457
|
+
if (!isLocalTTSConfigured()) {
|
|
458
|
+
throw new Error('Local TTS is not configured. Please check LOCAL_TTS_SCRIPT and LOCAL_TTS_PYTHON environment variables.');
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
return new Promise((resolve, reject) => {
|
|
462
|
+
const pythonProcess = spawn(LOCAL_TTS_PYTHON, [LOCAL_TTS_SCRIPT], {
|
|
463
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
let stdout = '';
|
|
467
|
+
let stderr = '';
|
|
468
|
+
|
|
469
|
+
const timeout = setTimeout(() => {
|
|
470
|
+
pythonProcess.kill();
|
|
471
|
+
reject(new Error('Local TTS generation timed out. Please try again with shorter text.'));
|
|
472
|
+
}, timeoutMs);
|
|
473
|
+
|
|
474
|
+
pythonProcess.stdout.on('data', (data) => {
|
|
475
|
+
stdout += data.toString();
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
pythonProcess.stderr.on('data', (data) => {
|
|
479
|
+
stderr += data.toString();
|
|
480
|
+
});
|
|
481
|
+
|
|
482
|
+
pythonProcess.on('close', async (code) => {
|
|
483
|
+
clearTimeout(timeout);
|
|
484
|
+
|
|
485
|
+
if (code !== 0) {
|
|
486
|
+
log.error(`Local TTS process exited with code ${code}: ${stderr}`);
|
|
487
|
+
reject(new Error('Local TTS generation failed. Please check the TTS script configuration.'));
|
|
488
|
+
return;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// The script outputs the WAV file path
|
|
492
|
+
const wavPath = stdout.trim();
|
|
493
|
+
|
|
494
|
+
if (!wavPath || !(await fs.stat(wavPath).catch(() => null))) {
|
|
495
|
+
log.error('Local TTS did not produce a valid output file:', wavPath);
|
|
496
|
+
reject(new Error('Local TTS did not produce audio output.'));
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
try {
|
|
501
|
+
// Copy the WAV file to our output directory
|
|
502
|
+
const outputPath = path.join(outputDir, `${audioId}.wav`);
|
|
503
|
+
await fs.copyFile(wavPath, outputPath);
|
|
504
|
+
|
|
505
|
+
// Optionally clean up the original file
|
|
506
|
+
await fs.unlink(wavPath).catch(() => {});
|
|
507
|
+
|
|
508
|
+
resolve(`/audio/${audioId}.wav`);
|
|
509
|
+
} catch (error) {
|
|
510
|
+
log.error('Failed to process local TTS output:', error);
|
|
511
|
+
reject(new Error('Failed to process local TTS audio output.'));
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
|
|
515
|
+
pythonProcess.on('error', (error) => {
|
|
516
|
+
clearTimeout(timeout);
|
|
517
|
+
log.error('Failed to spawn local TTS process:', error);
|
|
518
|
+
reject(new Error('Failed to start local TTS process. Please check Python path.'));
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
// Send text to stdin and close it
|
|
522
|
+
pythonProcess.stdin.write(cleanText);
|
|
523
|
+
pythonProcess.stdin.end();
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* Check if local TTS is configured (server or process-based)
|
|
529
|
+
*/
|
|
530
|
+
export function isLocalTTSConfigured() {
|
|
531
|
+
// Note: This is a sync check using env vars only.
|
|
532
|
+
// For runtime config check, use isLocalTTSConfiguredAsync()
|
|
533
|
+
return !!(LOCAL_TTS_URL || (LOCAL_TTS_SCRIPT && LOCAL_TTS_PYTHON));
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
export async function isLocalTTSConfiguredAsync() {
|
|
537
|
+
const config = await getRuntimeConfig();
|
|
538
|
+
return !!(config.localTtsUrl || LOCAL_TTS_URL || (LOCAL_TTS_SCRIPT && LOCAL_TTS_PYTHON));
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Check if Piper TTS is configured
|
|
543
|
+
*/
|
|
544
|
+
export function isPiperConfigured() {
|
|
545
|
+
return !!PIPER_MODEL;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Check if TTS is configured (any provider)
|
|
550
|
+
*/
|
|
551
|
+
export function isTTSConfigured(provider = 'elevenlabs') {
|
|
552
|
+
switch (provider) {
|
|
553
|
+
case 'local':
|
|
554
|
+
return isLocalTTSConfigured();
|
|
555
|
+
case 'edge':
|
|
556
|
+
return isEdgeTTSAvailable();
|
|
557
|
+
case 'openai':
|
|
558
|
+
// Note: sync check only uses env var. Async callers should check runtime config.
|
|
559
|
+
return !!OPENAI_API_KEY;
|
|
560
|
+
case 'piper':
|
|
561
|
+
return isPiperConfigured();
|
|
562
|
+
case 'elevenlabs':
|
|
563
|
+
default:
|
|
564
|
+
return !!(ELEVENLABS_API_KEY && ELEVENLABS_VOICE_ID);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
/**
|
|
569
|
+
* Generate TTS using local XTTS server
|
|
570
|
+
*/
|
|
571
|
+
export async function textToSpeechLocal(text, outputDir, options = {}) {
|
|
572
|
+
const audioId = randomUUID();
|
|
573
|
+
const outputPath = path.join(outputDir, `${audioId}.wav`);
|
|
574
|
+
const timeoutMs = options.timeoutMs || 60000; // Local TTS can be slower
|
|
575
|
+
|
|
576
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
577
|
+
|
|
578
|
+
const cleanText = cleanTextForTTS(text);
|
|
579
|
+
if (!cleanText) throw new Error('No text to speak');
|
|
580
|
+
|
|
581
|
+
// Read URL from runtime config (falls back to env var)
|
|
582
|
+
const config = await getRuntimeConfig();
|
|
583
|
+
const ttsUrl = config.localTtsUrl || LOCAL_TTS_URL;
|
|
584
|
+
|
|
585
|
+
if (!ttsUrl) {
|
|
586
|
+
throw new Error('XTTS server URL not configured. Set it in Settings → Voice & TTS.');
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// Try calling local TTS server
|
|
590
|
+
const controller = new AbortController();
|
|
591
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
592
|
+
|
|
593
|
+
try {
|
|
594
|
+
const response = await fetch(`${ttsUrl}/synthesize`, {
|
|
595
|
+
method: 'POST',
|
|
596
|
+
headers: { 'Content-Type': 'application/json' },
|
|
597
|
+
body: JSON.stringify({ text: cleanText, use_short_ref: true }),
|
|
598
|
+
signal: controller.signal
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
clearTimeout(timeoutId);
|
|
602
|
+
|
|
603
|
+
if (!response.ok) {
|
|
604
|
+
const errorText = await response.text();
|
|
605
|
+
log.error('Local TTS error:', response.status, errorText);
|
|
606
|
+
throw new Error(`Local TTS error: ${response.status}`);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Save the audio data
|
|
610
|
+
const audioBuffer = await response.arrayBuffer();
|
|
611
|
+
await fs.writeFile(outputPath, Buffer.from(audioBuffer));
|
|
612
|
+
|
|
613
|
+
return `/audio/${audioId}.wav`;
|
|
614
|
+
} catch (error) {
|
|
615
|
+
clearTimeout(timeoutId);
|
|
616
|
+
|
|
617
|
+
if (error.name === 'AbortError') {
|
|
618
|
+
throw new Error('Local TTS request timed out. The GPU might be warming up - try again.');
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// If server not running, provide helpful message
|
|
622
|
+
if (error.code === 'ECONNREFUSED') {
|
|
623
|
+
throw new Error('Local TTS server not running. Start it with: python uplink_tts_adapter.py');
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
log.error('Local TTS error:', error);
|
|
627
|
+
throw new Error('Local TTS failed: ' + error.message);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
export default {
|
|
632
|
+
cleanTextForTTS,
|
|
633
|
+
extractFirstSentence,
|
|
634
|
+
textToSpeech,
|
|
635
|
+
textToSpeechEdge,
|
|
636
|
+
textToSpeechOpenAI,
|
|
637
|
+
textToSpeechPiper,
|
|
638
|
+
localTextToSpeech,
|
|
639
|
+
textToSpeechLocal,
|
|
640
|
+
isTTSConfigured,
|
|
641
|
+
isLocalTTSConfigured,
|
|
642
|
+
isLocalTTSConfiguredAsync,
|
|
643
|
+
isEdgeTTSAvailable,
|
|
644
|
+
isPiperConfigured,
|
|
645
|
+
listEdgeTTSVoices,
|
|
646
|
+
};
|