@openpalm/channel-voice 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +31 -0
- package/src/config.ts +59 -0
- package/src/index.test.ts +95 -0
- package/src/index.ts +193 -0
- package/src/providers.ts +156 -0
- package/web/app.js +520 -0
- package/web/index.html +85 -0
- package/web/manifest.webmanifest +9 -0
- package/web/styles.css +552 -0
- package/web/sw.js +26 -0
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@openpalm/channel-voice",
|
|
3
|
+
"version": "0.9.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"license": "MPL-2.0",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/itlackey/openpalm",
|
|
9
|
+
"directory": "packages/channel-voice"
|
|
10
|
+
},
|
|
11
|
+
"main": "src/index.ts",
|
|
12
|
+
"files": [
|
|
13
|
+
"src",
|
|
14
|
+
"web"
|
|
15
|
+
],
|
|
16
|
+
"scripts": {
|
|
17
|
+
"start": "bun run src/index.ts",
|
|
18
|
+
"dev": "export CHANNEL_VOICE_SECRET=105a158d326fa54e569b234d4458ada2 && export PORT=8090 && bun --watch run src/index.ts",
|
|
19
|
+
"dev:unset": "unset STT_API_KEY && unset OPENAI_API_KEY && export CHANNEL_VOICE_SECRET=105a158d326fa54e569b234d4458ada2 && export PORT=8090 && bun --watch run src/index.ts",
|
|
20
|
+
"typecheck": "tsc --noEmit",
|
|
21
|
+
"test": "bun test src/",
|
|
22
|
+
"test:e2e": "npx playwright test --config=playwright.config.ts"
|
|
23
|
+
},
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"@openpalm/channels-sdk": ">=0.8.0 <1.0.0"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@openpalm/channels-sdk": ">=0.8.0 <1.0.0",
|
|
29
|
+
"@playwright/test": "^1.58.2"
|
|
30
|
+
}
|
|
31
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed environment configuration for the voice channel.
|
|
3
|
+
* Bun loads .env automatically. Uses Bun.env, not process.env.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { resolve } from 'node:path'
|
|
7
|
+
|
|
8
|
+
interface Config {
|
|
9
|
+
server: { webRoot: string }
|
|
10
|
+
stt: { baseUrl: string; apiKey: string; model: string; timeoutMs: number }
|
|
11
|
+
tts: { baseUrl: string; apiKey: string; model: string; voice: string; timeoutMs: number }
|
|
12
|
+
llm: { baseUrl: string; apiKey: string; model: string; timeoutMs: number; systemPrompt: string }
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function env(key: string, fallback = ''): string {
|
|
16
|
+
return Bun.env[key] || fallback
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function envInt(key: string, fallback: number): number {
|
|
20
|
+
const v = Bun.env[key]
|
|
21
|
+
if (!v) return fallback
|
|
22
|
+
const n = parseInt(v, 10)
|
|
23
|
+
return Number.isNaN(n) ? fallback : n
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Resolve API key: check dedicated key first, then shared OPENAI_API_KEY.
|
|
27
|
+
// Only use OPENAI_API_KEY if the dedicated key is truly unset (not present in env at all),
|
|
28
|
+
// to avoid shell-inherited vars overriding .env values unexpectedly.
|
|
29
|
+
function resolveApiKey(dedicatedKey: string): string {
|
|
30
|
+
const dedicated = Bun.env[dedicatedKey]
|
|
31
|
+
if (dedicated !== undefined && dedicated !== '') return dedicated
|
|
32
|
+
return Bun.env.OPENAI_API_KEY || ''
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export const config: Config = {
|
|
36
|
+
server: {
|
|
37
|
+
webRoot: resolve(env('WEB_ROOT', new URL('../web', import.meta.url).pathname)),
|
|
38
|
+
},
|
|
39
|
+
stt: {
|
|
40
|
+
baseUrl: env('STT_BASE_URL', 'https://api.openai.com').replace(/\/$/, ''),
|
|
41
|
+
apiKey: resolveApiKey('STT_API_KEY'),
|
|
42
|
+
model: env('STT_MODEL', 'whisper-1'),
|
|
43
|
+
timeoutMs: envInt('STT_TIMEOUT_MS', 30_000),
|
|
44
|
+
},
|
|
45
|
+
tts: {
|
|
46
|
+
baseUrl: env('TTS_BASE_URL', 'https://api.openai.com').replace(/\/$/, ''),
|
|
47
|
+
apiKey: resolveApiKey('TTS_API_KEY'),
|
|
48
|
+
model: env('TTS_MODEL', 'tts-1'),
|
|
49
|
+
voice: env('TTS_VOICE', 'alloy'),
|
|
50
|
+
timeoutMs: envInt('TTS_TIMEOUT_MS', 30_000),
|
|
51
|
+
},
|
|
52
|
+
llm: {
|
|
53
|
+
baseUrl: env('LLM_BASE_URL', 'http://localhost:11434').replace(/\/$/, ''),
|
|
54
|
+
apiKey: env('LLM_API_KEY', 'ollama'),
|
|
55
|
+
model: env('LLM_MODEL', 'qwen2.5:3b'),
|
|
56
|
+
timeoutMs: envInt('LLM_TIMEOUT_MS', 60_000),
|
|
57
|
+
systemPrompt: env('LLM_SYSTEM_PROMPT', 'You are a helpful voice assistant. Respond conversationally and concisely. Do not use markdown formatting.'),
|
|
58
|
+
},
|
|
59
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test";
|
|
2
|
+
import VoiceChannel from "./index";
|
|
3
|
+
|
|
4
|
+
function mockGuardianFetch() {
|
|
5
|
+
const mockFetch = async () => {
|
|
6
|
+
return new Response(JSON.stringify({ answer: "hello back", sessionId: "s1" }), { status: 200 });
|
|
7
|
+
};
|
|
8
|
+
return mockFetch as unknown as typeof fetch;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function createHandler() {
|
|
12
|
+
const channel = new VoiceChannel();
|
|
13
|
+
Object.defineProperty(channel, "secret", { get: () => "test-secret" });
|
|
14
|
+
return channel.createFetch(mockGuardianFetch());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe("voice channel health", () => {
|
|
18
|
+
it("GET /api/health returns 200 with STT/TTS config", async () => {
|
|
19
|
+
const handler = createHandler();
|
|
20
|
+
const resp = await handler(new Request("http://voice/api/health"));
|
|
21
|
+
expect(resp.status).toBe(200);
|
|
22
|
+
const body = (await resp.json()) as Record<string, unknown>;
|
|
23
|
+
expect(body.ok).toBe(true);
|
|
24
|
+
expect(body.service).toBe("channel-voice");
|
|
25
|
+
expect(body.stt).toBeDefined();
|
|
26
|
+
expect(body.tts).toBeDefined();
|
|
27
|
+
const stt = body.stt as Record<string, unknown>;
|
|
28
|
+
const tts = body.tts as Record<string, unknown>;
|
|
29
|
+
expect(stt.model).toBe("whisper-1");
|
|
30
|
+
expect(tts.model).toBe("tts-1");
|
|
31
|
+
expect(tts.voice).toBe("alloy");
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
describe("voice channel pipeline validation", () => {
|
|
36
|
+
it("POST /api/pipeline with no audio returns 400", async () => {
|
|
37
|
+
const handler = createHandler();
|
|
38
|
+
const form = new FormData();
|
|
39
|
+
const resp = await handler(
|
|
40
|
+
new Request("http://voice/api/pipeline", {
|
|
41
|
+
method: "POST",
|
|
42
|
+
body: form,
|
|
43
|
+
})
|
|
44
|
+
);
|
|
45
|
+
expect(resp.status).toBe(400);
|
|
46
|
+
const body = (await resp.json()) as Record<string, unknown>;
|
|
47
|
+
expect(body.error).toBe("Missing audio file or text");
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("POST /api/pipeline rejects oversized audio (>25MB)", async () => {
|
|
51
|
+
const handler = createHandler();
|
|
52
|
+
const form = new FormData();
|
|
53
|
+
// Create a file slightly over 25MB
|
|
54
|
+
const bigBuffer = new Uint8Array(26 * 1024 * 1024);
|
|
55
|
+
form.append("audio", new File([bigBuffer], "big.wav", { type: "audio/wav" }));
|
|
56
|
+
const resp = await handler(
|
|
57
|
+
new Request("http://voice/api/pipeline", {
|
|
58
|
+
method: "POST",
|
|
59
|
+
body: form,
|
|
60
|
+
})
|
|
61
|
+
);
|
|
62
|
+
expect(resp.status).toBe(413);
|
|
63
|
+
const body = (await resp.json()) as Record<string, unknown>;
|
|
64
|
+
expect(body.error).toContain("max 25MB");
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
describe("voice channel static files", () => {
|
|
69
|
+
it("GET / returns index.html", async () => {
|
|
70
|
+
const handler = createHandler();
|
|
71
|
+
const resp = await handler(new Request("http://voice/"));
|
|
72
|
+
expect(resp.status).toBe(200);
|
|
73
|
+
expect(resp.headers.get("Content-Type")).toContain("text/html");
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it("GET /nonexistent returns 404", async () => {
|
|
77
|
+
const handler = createHandler();
|
|
78
|
+
const resp = await handler(new Request("http://voice/nonexistent.xyz"));
|
|
79
|
+
expect(resp.status).toBe(404);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it("GET with path traversal returns 403", async () => {
|
|
83
|
+
// URL parser normalizes ".." out of paths, so we call route() directly
|
|
84
|
+
// with a crafted URL to test the defense-in-depth traversal guard.
|
|
85
|
+
const channel = new VoiceChannel();
|
|
86
|
+
Object.defineProperty(channel, "secret", { get: () => "test-secret" });
|
|
87
|
+
const req = new Request("http://voice/etc/passwd", { method: "GET" });
|
|
88
|
+
const url = new URL("http://voice/../../etc/passwd");
|
|
89
|
+
// Override pathname to contain traversal (URL normalizes it away)
|
|
90
|
+
Object.defineProperty(url, "pathname", { value: "/../../etc/passwd" });
|
|
91
|
+
const resp = await channel.route(req, url);
|
|
92
|
+
expect(resp).not.toBeNull();
|
|
93
|
+
expect(resp!.status).toBe(403);
|
|
94
|
+
});
|
|
95
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenPalm Channel Voice — Voice-driven conversational channel.
|
|
3
|
+
*
|
|
4
|
+
* Receives audio, transcribes it (STT), forwards the transcript to the
|
|
5
|
+
* guardian via the channels SDK, gets the LLM response, synthesizes it
|
|
6
|
+
* to audio (TTS), and returns everything.
|
|
7
|
+
*
|
|
8
|
+
* Endpoints:
|
|
9
|
+
* POST /api/pipeline — Full voice pipeline (audio in -> text + audio out)
|
|
10
|
+
* GET /api/health — Health check with STT/TTS config info
|
|
11
|
+
* GET /* — Static file serving from web/ directory
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { extname, join, resolve } from 'node:path'
|
|
15
|
+
import { BaseChannel, type HandleResult, createLogger } from '@openpalm/channels-sdk'
|
|
16
|
+
import type { GuardianSuccessResponse } from '@openpalm/channels-sdk'
|
|
17
|
+
import { config } from './config'
|
|
18
|
+
import { transcribe, synthesize, chatCompletion } from './providers'
|
|
19
|
+
|
|
20
|
+
// ── MIME types for static file serving ──────────────────────────────────
|
|
21
|
+
|
|
22
|
+
const MIME_TYPES: Record<string, string> = {
|
|
23
|
+
'.html': 'text/html; charset=utf-8',
|
|
24
|
+
'.css': 'text/css; charset=utf-8',
|
|
25
|
+
'.js': 'text/javascript; charset=utf-8',
|
|
26
|
+
'.json': 'application/json; charset=utf-8',
|
|
27
|
+
'.webmanifest': 'application/manifest+json; charset=utf-8',
|
|
28
|
+
'.png': 'image/png',
|
|
29
|
+
'.svg': 'image/svg+xml; charset=utf-8',
|
|
30
|
+
'.ico': 'image/x-icon',
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// ── Channel ─────────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
export default class VoiceChannel extends BaseChannel {
|
|
36
|
+
name = 'voice'
|
|
37
|
+
|
|
38
|
+
async route(req: Request, url: URL): Promise<Response | null> {
|
|
39
|
+
// POST /api/pipeline — full voice pipeline
|
|
40
|
+
if (url.pathname === '/api/pipeline' && req.method === 'POST') {
|
|
41
|
+
return this.handlePipeline(req)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// GET /api/health — health check with provider info
|
|
45
|
+
if (url.pathname === '/api/health' && req.method === 'GET') {
|
|
46
|
+
return this.json(200, {
|
|
47
|
+
ok: true,
|
|
48
|
+
service: 'channel-voice',
|
|
49
|
+
stt: { model: config.stt.model, configured: !!config.stt.apiKey },
|
|
50
|
+
tts: { model: config.tts.model, voice: config.tts.voice, configured: !!config.tts.apiKey },
|
|
51
|
+
llm: { model: config.llm.model, configured: !!config.llm.apiKey },
|
|
52
|
+
})
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// GET /* — serve static files from web/ directory
|
|
56
|
+
if (req.method === 'GET' || req.method === 'HEAD') {
|
|
57
|
+
return this.serveStatic(req, url)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ── Pipeline ────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
private async handlePipeline(req: Request): Promise<Response> {
|
|
66
|
+
// Parse FormData
|
|
67
|
+
let form: FormData
|
|
68
|
+
try {
|
|
69
|
+
form = await req.formData()
|
|
70
|
+
} catch {
|
|
71
|
+
return this.json(400, { error: 'Invalid form data' })
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const audioFile = form.get('audio') ?? form.get('file')
|
|
75
|
+
const textField = form.get('text')
|
|
76
|
+
|
|
77
|
+
// Must provide either audio or text
|
|
78
|
+
if (!(audioFile instanceof File) && !textField) {
|
|
79
|
+
return this.json(400, { error: 'Missing audio file or text' })
|
|
80
|
+
}
|
|
81
|
+
if (audioFile instanceof File && audioFile.size > 25 * 1024 * 1024) {
|
|
82
|
+
return this.json(413, { error: 'Audio too large (max 25MB)' })
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const userId = req.headers.get('x-forwarded-for')
|
|
86
|
+
|| req.headers.get('x-real-ip')
|
|
87
|
+
|| 'voice-user'
|
|
88
|
+
|
|
89
|
+
// Step 1: STT — transcribe audio, or use provided text (browser STT fallback)
|
|
90
|
+
let transcript: string
|
|
91
|
+
if (typeof textField === 'string' && textField.trim()) {
|
|
92
|
+
transcript = textField.trim()
|
|
93
|
+
} else if (audioFile instanceof File) {
|
|
94
|
+
if (!config.stt.apiKey) {
|
|
95
|
+
return this.json(400, { error: 'STT not configured', code: 'stt_not_configured' })
|
|
96
|
+
}
|
|
97
|
+
try {
|
|
98
|
+
transcript = await transcribe(audioFile)
|
|
99
|
+
} catch (err) {
|
|
100
|
+
this.log('error', 'STT failed', { error: (err as Error).message })
|
|
101
|
+
return this.json(502, { error: `Transcription failed: ${(err as Error).message}`, code: 'stt_error' })
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
transcript = ''
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!transcript.trim()) {
|
|
108
|
+
return this.json(200, { transcript: '', response: '', audio: null })
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Step 2: Forward transcript to guardian, fall back to direct LLM
|
|
112
|
+
let answer: string
|
|
113
|
+
try {
|
|
114
|
+
const guardianResp = await this.forward({ userId, text: transcript })
|
|
115
|
+
|
|
116
|
+
if (!guardianResp.ok) {
|
|
117
|
+
this.log('error', 'Guardian error', { status: guardianResp.status })
|
|
118
|
+
throw new Error(`Guardian error (${guardianResp.status})`)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const data = (await guardianResp.json()) as GuardianSuccessResponse
|
|
122
|
+
answer = data.answer ?? ''
|
|
123
|
+
} catch (err) {
|
|
124
|
+
this.log('warn', 'Guardian unavailable, trying direct LLM', { error: (err as Error).message })
|
|
125
|
+
try {
|
|
126
|
+
answer = await chatCompletion(transcript)
|
|
127
|
+
} catch (llmErr) {
|
|
128
|
+
this.log('error', 'LLM fallback also failed', { error: (llmErr as Error).message })
|
|
129
|
+
return this.json(502, { error: 'No LLM available (guardian down, no direct LLM key configured)' })
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Step 3: TTS — synthesize response to audio (non-fatal)
|
|
134
|
+
const audio = await synthesize(answer).catch((err) => {
|
|
135
|
+
this.log('warn', 'TTS failed', { error: (err as Error).message })
|
|
136
|
+
return null
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
return this.json(200, { transcript, response: answer, audio })
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── Static file serving ─────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
private async serveStatic(_req: Request, url: URL): Promise<Response> {
|
|
145
|
+
const pathname = url.pathname === '/' ? '/index.html' : url.pathname
|
|
146
|
+
const filePath = resolve(join(config.server.webRoot, pathname.replace(/^\/+/, '')))
|
|
147
|
+
|
|
148
|
+
// Prevent path traversal
|
|
149
|
+
if (!filePath.startsWith(config.server.webRoot)) {
|
|
150
|
+
return new Response('Forbidden', { status: 403 })
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const file = Bun.file(filePath)
|
|
154
|
+
if (!(await file.exists())) {
|
|
155
|
+
// SPA fallback: serve index.html for HTML navigation requests
|
|
156
|
+
if (_req.headers.get('accept')?.includes('text/html')) {
|
|
157
|
+
const indexPath = join(config.server.webRoot, 'index.html')
|
|
158
|
+
const indexFile = Bun.file(indexPath)
|
|
159
|
+
if (await indexFile.exists()) {
|
|
160
|
+
return new Response(indexFile, {
|
|
161
|
+
headers: { 'Content-Type': 'text/html; charset=utf-8', 'Cache-Control': 'no-cache' },
|
|
162
|
+
})
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return new Response('Not found', { status: 404 })
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const ext = extname(filePath).toLowerCase()
|
|
169
|
+
const contentType = MIME_TYPES[ext] || 'application/octet-stream'
|
|
170
|
+
const isVolatile = ext === '.html' || ext === '.webmanifest' || pathname === '/sw.js'
|
|
171
|
+
const cacheControl = isVolatile ? 'no-cache' : 'public, max-age=31536000, immutable'
|
|
172
|
+
|
|
173
|
+
return new Response(file, {
|
|
174
|
+
headers: { 'Content-Type': contentType, 'Cache-Control': cacheControl },
|
|
175
|
+
})
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// handleRequest is not used — all logic is in route()
|
|
179
|
+
async handleRequest(_req: Request): Promise<HandleResult | null> {
|
|
180
|
+
return null
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Self-start when run directly (not via channel entrypoint)
|
|
185
|
+
if (import.meta.main) {
|
|
186
|
+
const log = createLogger('channel-voice')
|
|
187
|
+
log.info('config', {
|
|
188
|
+
stt: config.stt.apiKey ? `${config.stt.baseUrl} (${config.stt.model})` : 'not configured — browser fallback',
|
|
189
|
+
tts: config.tts.apiKey ? `${config.tts.baseUrl} (${config.tts.model}, ${config.tts.voice})` : 'not configured — browser fallback',
|
|
190
|
+
})
|
|
191
|
+
const channel = new VoiceChannel()
|
|
192
|
+
channel.start()
|
|
193
|
+
}
|
package/src/providers.ts
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STT and TTS API calls. Both use OpenAI-compatible APIs.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { createLogger } from '@openpalm/channels-sdk'
|
|
6
|
+
import { config } from './config'
|
|
7
|
+
|
|
8
|
+
const log = createLogger('channel-voice')
|
|
9
|
+
|
|
10
|
+
// ── Timeout helper ──────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
async function fetchWithTimeout(url: string, init: RequestInit, timeoutMs: number): Promise<Response> {
|
|
13
|
+
const controller = new AbortController()
|
|
14
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs)
|
|
15
|
+
try {
|
|
16
|
+
return await fetch(url, { ...init, signal: controller.signal })
|
|
17
|
+
} catch (err) {
|
|
18
|
+
if ((err as Error).name === 'AbortError') {
|
|
19
|
+
throw new Error(`Request timed out after ${timeoutMs}ms`)
|
|
20
|
+
}
|
|
21
|
+
throw err
|
|
22
|
+
} finally {
|
|
23
|
+
clearTimeout(timer)
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ── STT ─────────────────────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Transcribe audio via OpenAI-compatible STT API.
|
|
31
|
+
* Accepts the raw File from the client's FormData.
|
|
32
|
+
*/
|
|
33
|
+
export async function transcribe(audioFile: File): Promise<string> {
|
|
34
|
+
const form = new FormData()
|
|
35
|
+
form.set('model', config.stt.model)
|
|
36
|
+
form.set('file', audioFile, audioFile.name || 'audio.webm')
|
|
37
|
+
|
|
38
|
+
const res = await fetchWithTimeout(
|
|
39
|
+
`${config.stt.baseUrl}/v1/audio/transcriptions`,
|
|
40
|
+
{
|
|
41
|
+
method: 'POST',
|
|
42
|
+
headers: { Authorization: `Bearer ${config.stt.apiKey}` },
|
|
43
|
+
body: form,
|
|
44
|
+
},
|
|
45
|
+
config.stt.timeoutMs,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if (!res.ok) {
|
|
49
|
+
const body = await res.text().catch(() => '')
|
|
50
|
+
throw new Error(`STT failed (${res.status}): ${body || res.statusText}`)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const data = (await res.json()) as { text?: string }
|
|
54
|
+
return data.text || ''
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ── TTS ─────────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Synthesize text to audio via OpenAI-compatible TTS API.
|
|
61
|
+
* Returns base64-encoded mp3 string, or null if TTS is not configured or fails.
|
|
62
|
+
* TTS failure is non-fatal — the client still gets the text response.
|
|
63
|
+
*/
|
|
64
|
+
/** Strip markdown syntax so TTS reads clean prose. */
|
|
65
|
+
function stripMarkdown(text: string): string {
|
|
66
|
+
return text
|
|
67
|
+
.replace(/```[\s\S]*?```/g, '') // remove code blocks
|
|
68
|
+
.replace(/`([^`]+)`/g, '$1') // inline code → plain text
|
|
69
|
+
.replace(/\*\*([^*]+)\*\*/g, '$1') // bold → plain
|
|
70
|
+
.replace(/\*([^*]+)\*/g, '$1') // italic → plain
|
|
71
|
+
.replace(/^#{1,6}\s+/gm, '') // headings → plain
|
|
72
|
+
.replace(/^\s*[-*+]\s+/gm, '') // list markers → plain
|
|
73
|
+
.replace(/^\s*\d+\.\s+/gm, '') // numbered lists → plain
|
|
74
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links → text only
|
|
75
|
+
.replace(/\n{3,}/g, '\n\n') // collapse excess newlines
|
|
76
|
+
.trim()
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export async function synthesize(text: string): Promise<string | null> {
|
|
80
|
+
if (!text.trim() || !config.tts.apiKey) return null
|
|
81
|
+
|
|
82
|
+
const cleanText = stripMarkdown(text)
|
|
83
|
+
if (!cleanText) return null
|
|
84
|
+
|
|
85
|
+
let res: Response
|
|
86
|
+
try {
|
|
87
|
+
res = await fetchWithTimeout(
|
|
88
|
+
`${config.tts.baseUrl}/v1/audio/speech`,
|
|
89
|
+
{
|
|
90
|
+
method: 'POST',
|
|
91
|
+
headers: {
|
|
92
|
+
Authorization: `Bearer ${config.tts.apiKey}`,
|
|
93
|
+
'Content-Type': 'application/json',
|
|
94
|
+
},
|
|
95
|
+
body: JSON.stringify({
|
|
96
|
+
model: config.tts.model,
|
|
97
|
+
input: cleanText,
|
|
98
|
+
voice: config.tts.voice,
|
|
99
|
+
response_format: 'mp3',
|
|
100
|
+
}),
|
|
101
|
+
},
|
|
102
|
+
config.tts.timeoutMs,
|
|
103
|
+
)
|
|
104
|
+
} catch (err) {
|
|
105
|
+
log.error('TTS request error', { error: (err as Error).message })
|
|
106
|
+
return null
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!res.ok) {
|
|
110
|
+
const body = await res.text().catch(() => '')
|
|
111
|
+
log.error('TTS API error', { status: res.status, body: body || res.statusText })
|
|
112
|
+
return null
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const buffer = await res.arrayBuffer()
|
|
116
|
+
return Buffer.from(buffer).toString('base64')
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ── LLM (direct fallback when guardian is unavailable) ─────────────────
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Direct LLM call via OpenAI-compatible chat completions API.
|
|
123
|
+
* Used as fallback when the guardian/assistant pipeline is unreachable.
|
|
124
|
+
*/
|
|
125
|
+
export async function chatCompletion(prompt: string): Promise<string> {
|
|
126
|
+
if (!config.llm.apiKey) throw new Error('No LLM API key configured and guardian unavailable')
|
|
127
|
+
|
|
128
|
+
const res = await fetchWithTimeout(
|
|
129
|
+
`${config.llm.baseUrl}/v1/chat/completions`,
|
|
130
|
+
{
|
|
131
|
+
method: 'POST',
|
|
132
|
+
headers: {
|
|
133
|
+
Authorization: `Bearer ${config.llm.apiKey}`,
|
|
134
|
+
'Content-Type': 'application/json',
|
|
135
|
+
},
|
|
136
|
+
body: JSON.stringify({
|
|
137
|
+
model: config.llm.model,
|
|
138
|
+
messages: [
|
|
139
|
+
{ role: 'system', content: config.llm.systemPrompt },
|
|
140
|
+
{ role: 'user', content: prompt },
|
|
141
|
+
],
|
|
142
|
+
}),
|
|
143
|
+
},
|
|
144
|
+
config.llm.timeoutMs,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if (!res.ok) {
|
|
148
|
+
const body = await res.text().catch(() => '')
|
|
149
|
+
throw new Error(`LLM failed (${res.status}): ${body || res.statusText}`)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const data = (await res.json()) as { choices?: Array<{ message?: { content?: string } }> }
|
|
153
|
+
const text = data.choices?.[0]?.message?.content
|
|
154
|
+
if (!text) throw new Error('Empty response from LLM')
|
|
155
|
+
return text
|
|
156
|
+
}
|