@madeinoz67/voice-server 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/speckit.analyze.md +184 -0
- package/.claude/commands/speckit.checklist.md +294 -0
- package/.claude/commands/speckit.clarify.md +181 -0
- package/.claude/commands/speckit.constitution.md +82 -0
- package/.claude/commands/speckit.implement.md +135 -0
- package/.claude/commands/speckit.plan.md +89 -0
- package/.claude/commands/speckit.specify.md +258 -0
- package/.claude/commands/speckit.tasks.md +137 -0
- package/.claude/commands/speckit.taskstoissues.md +30 -0
- package/.claude/settings.local.json +23 -0
- package/.codanna/settings.toml +384 -0
- package/.env.development +18 -0
- package/.env.example +30 -0
- package/.github/codeql/config.yml +13 -0
- package/.github/codeql.yml +30 -0
- package/.github/dependabot.yml +11 -0
- package/.github/workflows/ci.yml +308 -0
- package/.specify/memory/constitution.md +223 -0
- package/.specify/scripts/bash/check-prerequisites.sh +166 -0
- package/.specify/scripts/bash/common.sh +156 -0
- package/.specify/scripts/bash/create-new-feature.sh +297 -0
- package/.specify/scripts/bash/setup-plan.sh +61 -0
- package/.specify/scripts/bash/update-agent-context.sh +799 -0
- package/.specify/templates/agent-file-template.md +28 -0
- package/.specify/templates/checklist-template.md +40 -0
- package/.specify/templates/plan-template.md +106 -0
- package/.specify/templates/spec-template.md +115 -0
- package/.specify/templates/tasks-template.md +261 -0
- package/AGENTPERSONALITIES.md +233 -0
- package/ATTRIBUTION.md +70 -0
- package/CHANGELOG.md +90 -0
- package/CLAUDE.md +50 -0
- package/Formula/madeinoz-voice-server.rb +106 -0
- package/README.md +451 -0
- package/bun.lock +212 -0
- package/cliff.toml +67 -0
- package/docs/KOKORO_VOICES.md +152 -0
- package/docs/MIGRATION.md +267 -0
- package/docs/VOICE_EXAMPLES.md +283 -0
- package/docs/VOICE_GUIDE.md +227 -0
- package/docs/VOICE_QUICK_REF.md +157 -0
- package/docs/agent-voices.md +114 -0
- package/docs/api.md +336 -0
- package/docs/assets/voice-server-architecture.png +0 -0
- package/docs/assets/voice-server-header.png +0 -0
- package/docs/assets/voice-server-pack-logo.png +0 -0
- package/docs/index.md +60 -0
- package/eslint.config.js +42 -0
- package/mkdocs.yml +55 -0
- package/package.json +28 -0
- package/reports/MLX_AUDIO_EVALUATION.md +302 -0
- package/reports/agent/2026-02-06-20-51-mlx-audio-qwen-tts-investigation.md +613 -0
- package/reports/agent/2026-02-06-Qwen3-TTS-API-Specification.md +446 -0
- package/reports/agent/2026-02-07-python-backend-removal-plan.md +790 -0
- package/scripts/generate-reference.ts +139 -0
- package/specs/001-qwen-tts/checklists/requirements.md +50 -0
- package/specs/001-qwen-tts/contracts/api.yaml +305 -0
- package/specs/001-qwen-tts/data-model.md +197 -0
- package/specs/001-qwen-tts/plan.md +236 -0
- package/specs/001-qwen-tts/quickstart.md +306 -0
- package/specs/001-qwen-tts/research.md +194 -0
- package/specs/001-qwen-tts/spec.md +135 -0
- package/specs/001-qwen-tts/tasks.md +305 -0
- package/src/ts/constants/KOKORO_VOICES.ts +141 -0
- package/src/ts/middleware/cors.ts +153 -0
- package/src/ts/middleware/rate-limiter.ts +200 -0
- package/src/ts/models/health.ts +45 -0
- package/src/ts/models/notification.ts +69 -0
- package/src/ts/models/pronunciation.ts +39 -0
- package/src/ts/models/tts.ts +54 -0
- package/src/ts/models/voice-config.ts +82 -0
- package/src/ts/server.ts +460 -0
- package/src/ts/services/mlx-tts-client.ts +337 -0
- package/src/ts/services/pronunciation.ts +209 -0
- package/src/ts/services/prosody-translator.ts +130 -0
- package/src/ts/services/voice-loader.ts +214 -0
- package/src/ts/utils/logger.ts +144 -0
- package/src/ts/utils/text-sanitizer.ts +118 -0
- package/tests/integration/api.test.ts +210 -0
- package/tests/mocks/index.ts +152 -0
- package/tests/ts/server.test.ts +11 -0
- package/tests/unit/middleware/cors.test.ts +146 -0
- package/tests/unit/models/validation.test.ts +332 -0
- package/tests/unit/services/pronunciation.test.ts +171 -0
- package/tests/unit/services/prosody-translator.test.ts +142 -0
- package/tsconfig.json +25 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice configuration types
|
|
3
|
+
* Defines voice personality with prosody settings
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Prosody settings for voice control
|
|
8
|
+
* Maps numerical parameters to voice characteristics
|
|
9
|
+
*/
|
|
10
|
+
export interface ProsodySettings {
|
|
11
|
+
/** Speaking consistency (0.0-1.0, higher = more stable) */
|
|
12
|
+
stability: number;
|
|
13
|
+
/** Voice cloning fidelity (0.0-1.0, custom voices only) */
|
|
14
|
+
similarity_boost: number;
|
|
15
|
+
/** Expressiveness level (0.0-1.0) */
|
|
16
|
+
style: number;
|
|
17
|
+
/** Speaking rate multiplier (0.1-2.0) */
|
|
18
|
+
speed: number;
|
|
19
|
+
/** Enhance voice clarity */
|
|
20
|
+
use_speaker_boost: boolean;
|
|
21
|
+
/** Playback volume level (0.0-1.0, optional) */
|
|
22
|
+
volume?: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Voice configuration with personality and prosody
|
|
27
|
+
*/
|
|
28
|
+
export interface VoiceConfig {
|
|
29
|
+
/** Unique identifier for the voice */
|
|
30
|
+
voice_id: string;
|
|
31
|
+
/** Human-readable name */
|
|
32
|
+
voice_name: string;
|
|
33
|
+
/** Description of the voice characteristics */
|
|
34
|
+
description: string;
|
|
35
|
+
/** Voice type */
|
|
36
|
+
type: "built-in" | "custom" | "cloned";
|
|
37
|
+
/** Speaking stability (0.0-1.0) */
|
|
38
|
+
stability: number;
|
|
39
|
+
/** Voice cloning fidelity (0.0-1.0, custom voices only) */
|
|
40
|
+
similarity_boost: number;
|
|
41
|
+
/** Expressiveness level (0.0-1.0) */
|
|
42
|
+
style: number;
|
|
43
|
+
/** Speaking rate multiplier (0.1-2.0) */
|
|
44
|
+
speed: number;
|
|
45
|
+
/** Enhance voice clarity */
|
|
46
|
+
use_speaker_boost: boolean;
|
|
47
|
+
/** Nested prosody configuration (optional) */
|
|
48
|
+
prosody?: ProsodySettings;
|
|
49
|
+
/** Playback volume level (0.0-1.0, optional) */
|
|
50
|
+
volume?: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Validate voice configuration
|
|
55
|
+
*/
|
|
56
|
+
export function isValidVoiceConfig(config: Partial<VoiceConfig>): boolean {
|
|
57
|
+
if (!config.voice_id || !config.voice_name) return false;
|
|
58
|
+
|
|
59
|
+
// Validate ID format
|
|
60
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(config.voice_id)) return false;
|
|
61
|
+
|
|
62
|
+
// Validate numeric ranges
|
|
63
|
+
if (config.stability !== undefined && (config.stability < 0 || config.stability > 1)) return false;
|
|
64
|
+
if (config.similarity_boost !== undefined && (config.similarity_boost < 0 || config.similarity_boost > 1)) return false;
|
|
65
|
+
if (config.style !== undefined && (config.style < 0 || config.style > 1)) return false;
|
|
66
|
+
if (config.speed !== undefined && (config.speed < 0.1 || config.speed > 2)) return false;
|
|
67
|
+
if (config.volume !== undefined && (config.volume < 0 || config.volume > 1)) return false;
|
|
68
|
+
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Validate prosody settings
|
|
74
|
+
*/
|
|
75
|
+
export function isValidProsody(settings: Partial<ProsodySettings>): boolean {
|
|
76
|
+
if (settings.stability !== undefined && (settings.stability < 0 || settings.stability > 1)) return false;
|
|
77
|
+
if (settings.similarity_boost !== undefined && (settings.similarity_boost < 0 || settings.similarity_boost > 1)) return false;
|
|
78
|
+
if (settings.style !== undefined && (settings.style < 0 || settings.style > 1)) return false;
|
|
79
|
+
if (settings.speed !== undefined && (settings.speed < 0.1 || settings.speed > 2)) return false;
|
|
80
|
+
if (settings.volume !== undefined && (settings.volume < 0 || settings.volume > 1)) return false;
|
|
81
|
+
return true;
|
|
82
|
+
}
|
package/src/ts/server.ts
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Qwen TTS Voice Server
|
|
3
|
+
* Main Bun HTTP server with /notify, /pai, /health endpoints
|
|
4
|
+
* MLX-audio only - Python backend removed
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { NotificationRequest, PaiNotificationRequest } from "@/models/notification.js";
|
|
8
|
+
import type { HealthStatus } from "@/models/health.js";
|
|
9
|
+
import type { SuccessResponse, ErrorResponse } from "@/models/notification.js";
|
|
10
|
+
import type { ProsodySettings } from "@/models/voice-config.js";
|
|
11
|
+
import { createDefaultHealthStatus } from "@/models/health.js";
|
|
12
|
+
import { sanitizeTitle, sanitizeMessage } from "@/utils/text-sanitizer.js";
|
|
13
|
+
import { logger } from "@/utils/logger.js";
|
|
14
|
+
import { translateProsody, DEFAULT_PROSODY } from "@/services/prosody-translator.js";
|
|
15
|
+
import { applyPronunciations, loadPAIPronunciations } from "@/services/pronunciation.js";
|
|
16
|
+
import { getMLXTTSClient, type MLXTTSClientConfig } from "@/services/mlx-tts-client.js";
|
|
17
|
+
import { getVoiceLoader } from "@/services/voice-loader.js";
|
|
18
|
+
import { getRateLimiter, extractClientId } from "@/middleware/rate-limiter.js";
|
|
19
|
+
import { getCORSMiddleware } from "@/middleware/cors.js";
|
|
20
|
+
import { $ } from "bun";
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Server configuration
|
|
24
|
+
*/
|
|
25
|
+
interface ServerConfig {
|
|
26
|
+
port: number;
|
|
27
|
+
host: string;
|
|
28
|
+
defaultVoiceId: string;
|
|
29
|
+
enableMacOSNotifications: boolean;
|
|
30
|
+
/** MLX-audio configuration */
|
|
31
|
+
mlxConfig: MLXTTSClientConfig;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Default configuration
|
|
36
|
+
*/
|
|
37
|
+
const DEFAULT_CONFIG: ServerConfig = {
|
|
38
|
+
port: parseInt(process.env.PORT || "8888", 10),
|
|
39
|
+
host: "127.0.0.1",
|
|
40
|
+
defaultVoiceId: process.env.DEFAULT_VOICE_ID || "marrvin",
|
|
41
|
+
enableMacOSNotifications: process.env.ENABLE_MACOS_NOTIFICATIONS !== "false",
|
|
42
|
+
mlxConfig: {
|
|
43
|
+
// Use Kokoro-82M for smooth streaming (RTF ~1.0x)
|
|
44
|
+
// Voices resolved via numeric ID (1-54) in voice loader
|
|
45
|
+
model: process.env.MLX_MODEL || "mlx-community/Kokoro-82M-bf16",
|
|
46
|
+
instruct: process.env.MLX_INSTRUCT,
|
|
47
|
+
langCode: "en",
|
|
48
|
+
speed: 1.0,
|
|
49
|
+
streamingInterval: parseFloat(process.env.MLX_STREAMING_INTERVAL || "0.3"),
|
|
50
|
+
timeout: 10000,
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Server state
|
|
56
|
+
*/
|
|
57
|
+
interface ServerState {
|
|
58
|
+
healthStatus: HealthStatus;
|
|
59
|
+
config: ServerConfig;
|
|
60
|
+
cors: ReturnType<typeof getCORSMiddleware>;
|
|
61
|
+
rateLimiter: ReturnType<typeof getRateLimiter>;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let serverState: ServerState;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Create success response
|
|
68
|
+
*/
|
|
69
|
+
function successResponse(message: string = "Notification sent"): SuccessResponse {
|
|
70
|
+
return {
|
|
71
|
+
status: "success",
|
|
72
|
+
message,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Create error response
|
|
78
|
+
*/
|
|
79
|
+
function errorResponse(message: string): ErrorResponse {
|
|
80
|
+
return {
|
|
81
|
+
status: "error",
|
|
82
|
+
message,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Escape a string for safe use inside a double-quoted AppleScript string literal.
|
|
88
|
+
*
|
|
89
|
+
* AppleScript does not use backslash escapes; embed quotes by doubling them.
|
|
90
|
+
*/
|
|
91
|
+
function escapeForAppleScriptString(input: string): string {
|
|
92
|
+
return input.replace(/"/g, `""`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Display macOS notification using osascript
|
|
97
|
+
*/
|
|
98
|
+
async function displayMacOSNotification(title: string, message: string): Promise<void> {
|
|
99
|
+
try {
|
|
100
|
+
// Escape backslashes and quotes for AppleScript
|
|
101
|
+
const escapedTitle = escapeForAppleScriptString(title);
|
|
102
|
+
const escapedMessage = escapeForAppleScriptString(message);
|
|
103
|
+
|
|
104
|
+
const script = `display notification "${escapedMessage}" with title "${escapedTitle}"`;
|
|
105
|
+
await $`osascript -e ${script}`;
|
|
106
|
+
|
|
107
|
+
logger.debug("Displayed macOS notification", { title });
|
|
108
|
+
} catch (error) {
|
|
109
|
+
logger.warn("Failed to display macOS notification", { error: (error as Error).message });
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Fallback to macOS say command
|
|
115
|
+
*/
|
|
116
|
+
async function fallbackToMacOSSay(text: string, voiceId?: string): Promise<void> {
|
|
117
|
+
try {
|
|
118
|
+
logger.info("Using macOS say command as fallback");
|
|
119
|
+
|
|
120
|
+
// Map voice IDs to macOS voices
|
|
121
|
+
const voiceMap: Record<string, string> = {
|
|
122
|
+
marrvin: "Alex",
|
|
123
|
+
marlin: "Fred",
|
|
124
|
+
daniel: "Daniel",
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
const voice = voiceId ? voiceMap[voiceId] || "Alex" : "Alex";
|
|
128
|
+
await $`say -v ${voice} ${text}`;
|
|
129
|
+
} catch (error) {
|
|
130
|
+
logger.error("macOS say command failed", error as Error);
|
|
131
|
+
throw error;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Process TTS request with MLX-audio backend
|
|
137
|
+
*/
|
|
138
|
+
async function processTTS(
|
|
139
|
+
text: string,
|
|
140
|
+
voiceId: string,
|
|
141
|
+
prosody: ProsodySettings,
|
|
142
|
+
_volume: number
|
|
143
|
+
): Promise<void> {
|
|
144
|
+
const mlxClient = getMLXTTSClient(serverState.config.mlxConfig);
|
|
145
|
+
const prosodyInstruction = translateProsody(prosody);
|
|
146
|
+
|
|
147
|
+
// Apply pronunciation rules
|
|
148
|
+
const processedText = applyPronunciations(text);
|
|
149
|
+
|
|
150
|
+
logger.info("Processing TTS with MLX-audio streaming", {
|
|
151
|
+
text: processedText.substring(0, 50),
|
|
152
|
+
model: serverState.config.mlxConfig.model,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
// Use streaming mode - MLX-audio plays directly via sounddevice
|
|
157
|
+
// This provides lower latency and smoother playback
|
|
158
|
+
await mlxClient.synthesize(
|
|
159
|
+
{
|
|
160
|
+
text: processedText,
|
|
161
|
+
voice: voiceId,
|
|
162
|
+
prosody_instruction: prosodyInstruction,
|
|
163
|
+
speed: prosody.speed,
|
|
164
|
+
output_format: "wav",
|
|
165
|
+
},
|
|
166
|
+
true // stream = true
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
logger.info("MLX-audio streaming playback complete");
|
|
170
|
+
} catch (error) {
|
|
171
|
+
logger.warn("MLX-audio TTS synthesis failed, falling back to macOS say", {
|
|
172
|
+
error: (error as Error).message,
|
|
173
|
+
});
|
|
174
|
+
await fallbackToMacOSSay(text, voiceId);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Handle POST /notify endpoint
|
|
180
|
+
*/
|
|
181
|
+
async function handleNotify(request: NotificationRequest): Promise<SuccessResponse | ErrorResponse> {
|
|
182
|
+
try {
|
|
183
|
+
logger.info("Received /notify request", { title: request.title || "Notification" });
|
|
184
|
+
|
|
185
|
+
// Validate request (title is optional for backward compatibility)
|
|
186
|
+
if (!request.message) {
|
|
187
|
+
return errorResponse("Missing required field: message");
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Sanitize input - use default title if not provided
|
|
191
|
+
const title = sanitizeTitle(request.title || "Notification");
|
|
192
|
+
const message = sanitizeMessage(request.message);
|
|
193
|
+
|
|
194
|
+
if (!message) {
|
|
195
|
+
return errorResponse("Invalid input after sanitization");
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Determine voice settings
|
|
199
|
+
const voiceId = request.voice_id || request.voice_name || serverState.config.defaultVoiceId;
|
|
200
|
+
const voiceSettings = request.voice_settings || DEFAULT_PROSODY;
|
|
201
|
+
const volume = request.volume ?? voiceSettings.volume ?? 1.0;
|
|
202
|
+
|
|
203
|
+
// Display macOS notification
|
|
204
|
+
if (serverState.config.enableMacOSNotifications) {
|
|
205
|
+
await displayMacOSNotification(title, message);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Process TTS if enabled
|
|
209
|
+
if (request.voice_enabled !== false) {
|
|
210
|
+
await processTTS(message, voiceId, voiceSettings, volume);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return successResponse();
|
|
214
|
+
} catch (error) {
|
|
215
|
+
logger.error("Error handling /notify", error as Error);
|
|
216
|
+
return errorResponse("Internal server error");
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Handle POST /pai endpoint
|
|
222
|
+
*/
|
|
223
|
+
async function handlePai(request: PaiNotificationRequest): Promise<SuccessResponse | ErrorResponse> {
|
|
224
|
+
try {
|
|
225
|
+
logger.info("Received /pai request", { title: request.title });
|
|
226
|
+
|
|
227
|
+
// Validate request
|
|
228
|
+
if (!request.title || !request.message) {
|
|
229
|
+
return errorResponse("Missing required fields: title and message");
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Sanitize input
|
|
233
|
+
const title = sanitizeTitle(request.title);
|
|
234
|
+
const message = sanitizeMessage(request.message);
|
|
235
|
+
|
|
236
|
+
// Use default DA voice settings
|
|
237
|
+
const voiceId = serverState.config.defaultVoiceId;
|
|
238
|
+
const voiceSettings = DEFAULT_PROSODY;
|
|
239
|
+
const volume = 1.0;
|
|
240
|
+
|
|
241
|
+
// Display macOS notification
|
|
242
|
+
if (serverState.config.enableMacOSNotifications) {
|
|
243
|
+
await displayMacOSNotification(title, message);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Process TTS
|
|
247
|
+
await processTTS(message, voiceId, voiceSettings, volume);
|
|
248
|
+
|
|
249
|
+
return successResponse("PAI notification sent");
|
|
250
|
+
} catch (error) {
|
|
251
|
+
logger.error("Error handling /pai", error as Error);
|
|
252
|
+
return errorResponse("Internal server error");
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Handle GET /health endpoint
|
|
258
|
+
*/
|
|
259
|
+
async function handleHealth(): Promise<HealthStatus> {
|
|
260
|
+
// Determine voice system status
|
|
261
|
+
let voiceSystem: HealthStatus["voice_system"] = "Unavailable";
|
|
262
|
+
let modelLoaded = false;
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
const mlxClient = getMLXTTSClient(serverState.config.mlxConfig);
|
|
266
|
+
const healthy = await mlxClient.healthCheck();
|
|
267
|
+
if (healthy) {
|
|
268
|
+
voiceSystem = "MLX-audio";
|
|
269
|
+
modelLoaded = true;
|
|
270
|
+
}
|
|
271
|
+
} catch {
|
|
272
|
+
voiceSystem = "Unavailable";
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Fallback always available on macOS
|
|
276
|
+
if (voiceSystem === "Unavailable") {
|
|
277
|
+
voiceSystem = "macOS Say";
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Get available voices
|
|
281
|
+
let availableVoices: string[] = [];
|
|
282
|
+
try {
|
|
283
|
+
const voiceLoader = getVoiceLoader();
|
|
284
|
+
availableVoices = await voiceLoader.getAvailableVoices();
|
|
285
|
+
} catch (error) {
|
|
286
|
+
logger.warn("Failed to get available voices", { error: (error as Error).message });
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Update health status
|
|
290
|
+
serverState.healthStatus = {
|
|
291
|
+
status: modelLoaded ? "healthy" : "degraded",
|
|
292
|
+
port: serverState.config.port,
|
|
293
|
+
voice_system: voiceSystem,
|
|
294
|
+
default_voice_id: serverState.config.defaultVoiceId,
|
|
295
|
+
model_loaded: modelLoaded,
|
|
296
|
+
available_voices: availableVoices.length > 0 ? availableVoices : undefined,
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
return serverState.healthStatus;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Parse JSON body from request
|
|
304
|
+
*/
|
|
305
|
+
async function parseJsonBody<T>(req: Request): Promise<T> {
|
|
306
|
+
const text = await req.text();
|
|
307
|
+
if (!text) {
|
|
308
|
+
throw new Error("Empty request body");
|
|
309
|
+
}
|
|
310
|
+
return JSON.parse(text) as T;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Main Bun HTTP server
|
|
315
|
+
*/
|
|
316
|
+
export async function startServer(config: Partial<ServerConfig> = {}): Promise<void> {
|
|
317
|
+
// Initialize server state
|
|
318
|
+
const finalConfig = { ...DEFAULT_CONFIG, ...config };
|
|
319
|
+
serverState = {
|
|
320
|
+
healthStatus: createDefaultHealthStatus(),
|
|
321
|
+
config: finalConfig,
|
|
322
|
+
cors: getCORSMiddleware(),
|
|
323
|
+
rateLimiter: getRateLimiter(),
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
const { port, host } = finalConfig;
|
|
327
|
+
const serverUrl = `http://${host}:${port}`;
|
|
328
|
+
|
|
329
|
+
logger.info(`Starting Voice Server on ${serverUrl}`);
|
|
330
|
+
logger.info(`TTS Backend: MLX-audio`, {
|
|
331
|
+
model: finalConfig.mlxConfig.model,
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// Initialize MLX-audio client
|
|
335
|
+
try {
|
|
336
|
+
const mlxClient = getMLXTTSClient(finalConfig.mlxConfig);
|
|
337
|
+
const healthy = await mlxClient.healthCheck();
|
|
338
|
+
if (!healthy) {
|
|
339
|
+
throw new Error("MLX-audio CLI not available");
|
|
340
|
+
}
|
|
341
|
+
logger.info("MLX-audio TTS backend initialized successfully");
|
|
342
|
+
} catch (error) {
|
|
343
|
+
logger.error("Failed to initialize MLX-audio", error as Error);
|
|
344
|
+
throw error;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Load voice configurations
|
|
348
|
+
try {
|
|
349
|
+
const voiceLoader = getVoiceLoader();
|
|
350
|
+
const voices = await voiceLoader.loadVoices();
|
|
351
|
+
logger.info(`Loaded ${voices.size} voice configurations`);
|
|
352
|
+
} catch (error) {
|
|
353
|
+
logger.warn("Failed to load voice configurations", {
|
|
354
|
+
error: (error as Error).message,
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Load pronunciation rules
|
|
359
|
+
try {
|
|
360
|
+
const pronunciations = loadPAIPronunciations();
|
|
361
|
+
if (pronunciations.length > 0) {
|
|
362
|
+
logger.info(`Loaded ${pronunciations.length} pronunciation rules`);
|
|
363
|
+
}
|
|
364
|
+
} catch (error) {
|
|
365
|
+
logger.warn("Failed to load pronunciation rules", {
|
|
366
|
+
error: (error as Error).message,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const server = Bun.serve({
|
|
371
|
+
hostname: host,
|
|
372
|
+
port,
|
|
373
|
+
async fetch(req): Promise<Response> {
|
|
374
|
+
const url = new URL(req.url);
|
|
375
|
+
const path = url.pathname;
|
|
376
|
+
const origin = req.headers.get("Origin");
|
|
377
|
+
|
|
378
|
+
logger.debug("Incoming request", { method: req.method, path });
|
|
379
|
+
|
|
380
|
+
try {
|
|
381
|
+
// CORS preflight
|
|
382
|
+
if (req.method === "OPTIONS") {
|
|
383
|
+
return serverState.cors.handlePreflight(origin);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Rate limiting (apply to POST requests)
|
|
387
|
+
if (req.method === "POST") {
|
|
388
|
+
const clientId = extractClientId(req);
|
|
389
|
+
if (serverState.rateLimiter.isRateLimited(clientId)) {
|
|
390
|
+
return Response.json(
|
|
391
|
+
errorResponse("Rate limit exceeded"),
|
|
392
|
+
{ status: 429 }
|
|
393
|
+
);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
let responseData: unknown;
|
|
398
|
+
|
|
399
|
+
// POST /notify
|
|
400
|
+
if (path === "/notify" && req.method === "POST") {
|
|
401
|
+
const body = await parseJsonBody<NotificationRequest>(req);
|
|
402
|
+
responseData = await handleNotify(body);
|
|
403
|
+
}
|
|
404
|
+
// POST /pai
|
|
405
|
+
else if (path === "/pai" && req.method === "POST") {
|
|
406
|
+
const body = await parseJsonBody<PaiNotificationRequest>(req);
|
|
407
|
+
responseData = await handlePai(body);
|
|
408
|
+
}
|
|
409
|
+
// GET /health
|
|
410
|
+
else if (path === "/health" && req.method === "GET") {
|
|
411
|
+
responseData = await handleHealth();
|
|
412
|
+
}
|
|
413
|
+
// 404 Not Found
|
|
414
|
+
else {
|
|
415
|
+
return Response.json(
|
|
416
|
+
errorResponse("Not found"),
|
|
417
|
+
{ status: 404 }
|
|
418
|
+
);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Add CORS headers to response
|
|
422
|
+
const response = Response.json(responseData);
|
|
423
|
+
return serverState.cors.addCorsHeaders(response, origin);
|
|
424
|
+
} catch (error) {
|
|
425
|
+
logger.error("Request handler error", error as Error, { path });
|
|
426
|
+
|
|
427
|
+
const errorResp = Response.json(
|
|
428
|
+
errorResponse("Internal server error"),
|
|
429
|
+
{ status: 500 }
|
|
430
|
+
);
|
|
431
|
+
return serverState.cors.addCorsHeaders(errorResp, origin);
|
|
432
|
+
}
|
|
433
|
+
},
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
logger.info(`Server listening on ${serverUrl}`);
|
|
437
|
+
|
|
438
|
+
// Graceful shutdown
|
|
439
|
+
const shutdown = async () => {
|
|
440
|
+
logger.info("Shutting down server...");
|
|
441
|
+
|
|
442
|
+
// Stop rate limiter
|
|
443
|
+
serverState.rateLimiter.stop();
|
|
444
|
+
|
|
445
|
+
server.stop();
|
|
446
|
+
logger.info("Server stopped");
|
|
447
|
+
process.exit(0);
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
process.on("SIGTERM", shutdown);
|
|
451
|
+
process.on("SIGINT", shutdown);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Start server if this file is run directly
|
|
455
|
+
if (import.meta.main) {
|
|
456
|
+
startServer().catch((error) => {
|
|
457
|
+
logger.error("Failed to start server", error);
|
|
458
|
+
process.exit(1);
|
|
459
|
+
});
|
|
460
|
+
}
|