@openpalm/channel-voice 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@openpalm/channel-voice",
3
+ "version": "0.9.0",
4
+ "type": "module",
5
+ "license": "MPL-2.0",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/itlackey/openpalm",
9
+ "directory": "packages/channel-voice"
10
+ },
11
+ "main": "src/index.ts",
12
+ "files": [
13
+ "src",
14
+ "web"
15
+ ],
16
+ "scripts": {
17
+ "start": "bun run src/index.ts",
18
+ "dev": "export CHANNEL_VOICE_SECRET=105a158d326fa54e569b234d4458ada2 && export PORT=8090 && bun --watch run src/index.ts",
19
+ "dev:unset": "unset STT_API_KEY && unset OPENAI_API_KEY && export CHANNEL_VOICE_SECRET=105a158d326fa54e569b234d4458ada2 && export PORT=8090 && bun --watch run src/index.ts",
20
+ "typecheck": "tsc --noEmit",
21
+ "test": "bun test src/",
22
+ "test:e2e": "npx playwright test --config=playwright.config.ts"
23
+ },
24
+ "peerDependencies": {
25
+ "@openpalm/channels-sdk": ">=0.8.0 <1.0.0"
26
+ },
27
+ "devDependencies": {
28
+ "@openpalm/channels-sdk": ">=0.8.0 <1.0.0",
29
+ "@playwright/test": "^1.58.2"
30
+ }
31
+ }
package/src/config.ts ADDED
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Typed environment configuration for the voice channel.
3
+ * Bun loads .env automatically. Uses Bun.env, not process.env.
4
+ */
5
+
6
+ import { resolve } from 'node:path'
7
+
8
+ interface Config {
9
+ server: { webRoot: string }
10
+ stt: { baseUrl: string; apiKey: string; model: string; timeoutMs: number }
11
+ tts: { baseUrl: string; apiKey: string; model: string; voice: string; timeoutMs: number }
12
+ llm: { baseUrl: string; apiKey: string; model: string; timeoutMs: number; systemPrompt: string }
13
+ }
14
+
15
+ function env(key: string, fallback = ''): string {
16
+ return Bun.env[key] || fallback
17
+ }
18
+
19
+ function envInt(key: string, fallback: number): number {
20
+ const v = Bun.env[key]
21
+ if (!v) return fallback
22
+ const n = parseInt(v, 10)
23
+ return Number.isNaN(n) ? fallback : n
24
+ }
25
+
26
+ // Resolve API key: check dedicated key first, then shared OPENAI_API_KEY.
27
+ // Only use OPENAI_API_KEY if the dedicated key is truly unset (not present in env at all),
28
+ // to avoid shell-inherited vars overriding .env values unexpectedly.
29
+ function resolveApiKey(dedicatedKey: string): string {
30
+ const dedicated = Bun.env[dedicatedKey]
31
+ if (dedicated !== undefined && dedicated !== '') return dedicated
32
+ return Bun.env.OPENAI_API_KEY || ''
33
+ }
34
+
35
+ export const config: Config = {
36
+ server: {
37
+ webRoot: resolve(env('WEB_ROOT', new URL('../web', import.meta.url).pathname)),
38
+ },
39
+ stt: {
40
+ baseUrl: env('STT_BASE_URL', 'https://api.openai.com').replace(/\/$/, ''),
41
+ apiKey: resolveApiKey('STT_API_KEY'),
42
+ model: env('STT_MODEL', 'whisper-1'),
43
+ timeoutMs: envInt('STT_TIMEOUT_MS', 30_000),
44
+ },
45
+ tts: {
46
+ baseUrl: env('TTS_BASE_URL', 'https://api.openai.com').replace(/\/$/, ''),
47
+ apiKey: resolveApiKey('TTS_API_KEY'),
48
+ model: env('TTS_MODEL', 'tts-1'),
49
+ voice: env('TTS_VOICE', 'alloy'),
50
+ timeoutMs: envInt('TTS_TIMEOUT_MS', 30_000),
51
+ },
52
+ llm: {
53
+ baseUrl: env('LLM_BASE_URL', 'http://localhost:11434').replace(/\/$/, ''),
54
+ apiKey: env('LLM_API_KEY', 'ollama'),
55
+ model: env('LLM_MODEL', 'qwen2.5:3b'),
56
+ timeoutMs: envInt('LLM_TIMEOUT_MS', 60_000),
57
+ systemPrompt: env('LLM_SYSTEM_PROMPT', 'You are a helpful voice assistant. Respond conversationally and concisely. Do not use markdown formatting.'),
58
+ },
59
+ }
@@ -0,0 +1,95 @@
1
+ import { describe, expect, it } from "bun:test";
2
+ import VoiceChannel from "./index";
3
+
4
+ function mockGuardianFetch() {
5
+ const mockFetch = async () => {
6
+ return new Response(JSON.stringify({ answer: "hello back", sessionId: "s1" }), { status: 200 });
7
+ };
8
+ return mockFetch as unknown as typeof fetch;
9
+ }
10
+
11
+ function createHandler() {
12
+ const channel = new VoiceChannel();
13
+ Object.defineProperty(channel, "secret", { get: () => "test-secret" });
14
+ return channel.createFetch(mockGuardianFetch());
15
+ }
16
+
17
+ describe("voice channel health", () => {
18
+ it("GET /api/health returns 200 with STT/TTS config", async () => {
19
+ const handler = createHandler();
20
+ const resp = await handler(new Request("http://voice/api/health"));
21
+ expect(resp.status).toBe(200);
22
+ const body = (await resp.json()) as Record<string, unknown>;
23
+ expect(body.ok).toBe(true);
24
+ expect(body.service).toBe("channel-voice");
25
+ expect(body.stt).toBeDefined();
26
+ expect(body.tts).toBeDefined();
27
+ const stt = body.stt as Record<string, unknown>;
28
+ const tts = body.tts as Record<string, unknown>;
29
+ expect(stt.model).toBe("whisper-1");
30
+ expect(tts.model).toBe("tts-1");
31
+ expect(tts.voice).toBe("alloy");
32
+ });
33
+ });
34
+
35
+ describe("voice channel pipeline validation", () => {
36
+ it("POST /api/pipeline with no audio returns 400", async () => {
37
+ const handler = createHandler();
38
+ const form = new FormData();
39
+ const resp = await handler(
40
+ new Request("http://voice/api/pipeline", {
41
+ method: "POST",
42
+ body: form,
43
+ })
44
+ );
45
+ expect(resp.status).toBe(400);
46
+ const body = (await resp.json()) as Record<string, unknown>;
47
+ expect(body.error).toBe("Missing audio file or text");
48
+ });
49
+
50
+ it("POST /api/pipeline rejects oversized audio (>25MB)", async () => {
51
+ const handler = createHandler();
52
+ const form = new FormData();
53
+ // Create a file slightly over 25MB
54
+ const bigBuffer = new Uint8Array(26 * 1024 * 1024);
55
+ form.append("audio", new File([bigBuffer], "big.wav", { type: "audio/wav" }));
56
+ const resp = await handler(
57
+ new Request("http://voice/api/pipeline", {
58
+ method: "POST",
59
+ body: form,
60
+ })
61
+ );
62
+ expect(resp.status).toBe(413);
63
+ const body = (await resp.json()) as Record<string, unknown>;
64
+ expect(body.error).toContain("max 25MB");
65
+ });
66
+ });
67
+
68
+ describe("voice channel static files", () => {
69
+ it("GET / returns index.html", async () => {
70
+ const handler = createHandler();
71
+ const resp = await handler(new Request("http://voice/"));
72
+ expect(resp.status).toBe(200);
73
+ expect(resp.headers.get("Content-Type")).toContain("text/html");
74
+ });
75
+
76
+ it("GET /nonexistent returns 404", async () => {
77
+ const handler = createHandler();
78
+ const resp = await handler(new Request("http://voice/nonexistent.xyz"));
79
+ expect(resp.status).toBe(404);
80
+ });
81
+
82
+ it("GET with path traversal returns 403", async () => {
83
+ // URL parser normalizes ".." out of paths, so we call route() directly
84
+ // with a crafted URL to test the defense-in-depth traversal guard.
85
+ const channel = new VoiceChannel();
86
+ Object.defineProperty(channel, "secret", { get: () => "test-secret" });
87
+ const req = new Request("http://voice/etc/passwd", { method: "GET" });
88
+ const url = new URL("http://voice/../../etc/passwd");
89
+ // Override pathname to contain traversal (URL normalizes it away)
90
+ Object.defineProperty(url, "pathname", { value: "/../../etc/passwd" });
91
+ const resp = await channel.route(req, url);
92
+ expect(resp).not.toBeNull();
93
+ expect(resp!.status).toBe(403);
94
+ });
95
+ });
package/src/index.ts ADDED
@@ -0,0 +1,193 @@
1
+ /**
2
+ * OpenPalm Channel Voice — Voice-driven conversational channel.
3
+ *
4
+ * Receives audio, transcribes it (STT), forwards the transcript to the
5
+ * guardian via the channels SDK, gets the LLM response, synthesizes it
6
+ * to audio (TTS), and returns everything.
7
+ *
8
+ * Endpoints:
9
+ * POST /api/pipeline — Full voice pipeline (audio in -> text + audio out)
10
+ * GET /api/health — Health check with STT/TTS config info
11
+ * GET /* — Static file serving from web/ directory
12
+ */
13
+
14
+ import { extname, join, resolve } from 'node:path'
15
+ import { BaseChannel, type HandleResult, createLogger } from '@openpalm/channels-sdk'
16
+ import type { GuardianSuccessResponse } from '@openpalm/channels-sdk'
17
+ import { config } from './config'
18
+ import { transcribe, synthesize, chatCompletion } from './providers'
19
+
20
+ // ── MIME types for static file serving ──────────────────────────────────
21
+
22
+ const MIME_TYPES: Record<string, string> = {
23
+ '.html': 'text/html; charset=utf-8',
24
+ '.css': 'text/css; charset=utf-8',
25
+ '.js': 'text/javascript; charset=utf-8',
26
+ '.json': 'application/json; charset=utf-8',
27
+ '.webmanifest': 'application/manifest+json; charset=utf-8',
28
+ '.png': 'image/png',
29
+ '.svg': 'image/svg+xml; charset=utf-8',
30
+ '.ico': 'image/x-icon',
31
+ }
32
+
33
+ // ── Channel ─────────────────────────────────────────────────────────────
34
+
35
+ export default class VoiceChannel extends BaseChannel {
36
+ name = 'voice'
37
+
38
+ async route(req: Request, url: URL): Promise<Response | null> {
39
+ // POST /api/pipeline — full voice pipeline
40
+ if (url.pathname === '/api/pipeline' && req.method === 'POST') {
41
+ return this.handlePipeline(req)
42
+ }
43
+
44
+ // GET /api/health — health check with provider info
45
+ if (url.pathname === '/api/health' && req.method === 'GET') {
46
+ return this.json(200, {
47
+ ok: true,
48
+ service: 'channel-voice',
49
+ stt: { model: config.stt.model, configured: !!config.stt.apiKey },
50
+ tts: { model: config.tts.model, voice: config.tts.voice, configured: !!config.tts.apiKey },
51
+ llm: { model: config.llm.model, configured: !!config.llm.apiKey },
52
+ })
53
+ }
54
+
55
+ // GET /* — serve static files from web/ directory
56
+ if (req.method === 'GET' || req.method === 'HEAD') {
57
+ return this.serveStatic(req, url)
58
+ }
59
+
60
+ return null
61
+ }
62
+
63
+ // ── Pipeline ────────────────────────────────────────────────────────
64
+
65
+ private async handlePipeline(req: Request): Promise<Response> {
66
+ // Parse FormData
67
+ let form: FormData
68
+ try {
69
+ form = await req.formData()
70
+ } catch {
71
+ return this.json(400, { error: 'Invalid form data' })
72
+ }
73
+
74
+ const audioFile = form.get('audio') ?? form.get('file')
75
+ const textField = form.get('text')
76
+
77
+ // Must provide either audio or text
78
+ if (!(audioFile instanceof File) && !textField) {
79
+ return this.json(400, { error: 'Missing audio file or text' })
80
+ }
81
+ if (audioFile instanceof File && audioFile.size > 25 * 1024 * 1024) {
82
+ return this.json(413, { error: 'Audio too large (max 25MB)' })
83
+ }
84
+
85
+ const userId = req.headers.get('x-forwarded-for')
86
+ || req.headers.get('x-real-ip')
87
+ || 'voice-user'
88
+
89
+ // Step 1: STT — transcribe audio, or use provided text (browser STT fallback)
90
+ let transcript: string
91
+ if (typeof textField === 'string' && textField.trim()) {
92
+ transcript = textField.trim()
93
+ } else if (audioFile instanceof File) {
94
+ if (!config.stt.apiKey) {
95
+ return this.json(400, { error: 'STT not configured', code: 'stt_not_configured' })
96
+ }
97
+ try {
98
+ transcript = await transcribe(audioFile)
99
+ } catch (err) {
100
+ this.log('error', 'STT failed', { error: (err as Error).message })
101
+ return this.json(502, { error: `Transcription failed: ${(err as Error).message}`, code: 'stt_error' })
102
+ }
103
+ } else {
104
+ transcript = ''
105
+ }
106
+
107
+ if (!transcript.trim()) {
108
+ return this.json(200, { transcript: '', response: '', audio: null })
109
+ }
110
+
111
+ // Step 2: Forward transcript to guardian, fall back to direct LLM
112
+ let answer: string
113
+ try {
114
+ const guardianResp = await this.forward({ userId, text: transcript })
115
+
116
+ if (!guardianResp.ok) {
117
+ this.log('error', 'Guardian error', { status: guardianResp.status })
118
+ throw new Error(`Guardian error (${guardianResp.status})`)
119
+ }
120
+
121
+ const data = (await guardianResp.json()) as GuardianSuccessResponse
122
+ answer = data.answer ?? ''
123
+ } catch (err) {
124
+ this.log('warn', 'Guardian unavailable, trying direct LLM', { error: (err as Error).message })
125
+ try {
126
+ answer = await chatCompletion(transcript)
127
+ } catch (llmErr) {
128
+ this.log('error', 'LLM fallback also failed', { error: (llmErr as Error).message })
129
+ return this.json(502, { error: 'No LLM available (guardian down, no direct LLM key configured)' })
130
+ }
131
+ }
132
+
133
+ // Step 3: TTS — synthesize response to audio (non-fatal)
134
+ const audio = await synthesize(answer).catch((err) => {
135
+ this.log('warn', 'TTS failed', { error: (err as Error).message })
136
+ return null
137
+ })
138
+
139
+ return this.json(200, { transcript, response: answer, audio })
140
+ }
141
+
142
+ // ── Static file serving ─────────────────────────────────────────────
143
+
144
+ private async serveStatic(_req: Request, url: URL): Promise<Response> {
145
+ const pathname = url.pathname === '/' ? '/index.html' : url.pathname
146
+ const filePath = resolve(join(config.server.webRoot, pathname.replace(/^\/+/, '')))
147
+
148
+ // Prevent path traversal
149
+ if (!filePath.startsWith(config.server.webRoot)) {
150
+ return new Response('Forbidden', { status: 403 })
151
+ }
152
+
153
+ const file = Bun.file(filePath)
154
+ if (!(await file.exists())) {
155
+ // SPA fallback: serve index.html for HTML navigation requests
156
+ if (_req.headers.get('accept')?.includes('text/html')) {
157
+ const indexPath = join(config.server.webRoot, 'index.html')
158
+ const indexFile = Bun.file(indexPath)
159
+ if (await indexFile.exists()) {
160
+ return new Response(indexFile, {
161
+ headers: { 'Content-Type': 'text/html; charset=utf-8', 'Cache-Control': 'no-cache' },
162
+ })
163
+ }
164
+ }
165
+ return new Response('Not found', { status: 404 })
166
+ }
167
+
168
+ const ext = extname(filePath).toLowerCase()
169
+ const contentType = MIME_TYPES[ext] || 'application/octet-stream'
170
+ const isVolatile = ext === '.html' || ext === '.webmanifest' || pathname === '/sw.js'
171
+ const cacheControl = isVolatile ? 'no-cache' : 'public, max-age=31536000, immutable'
172
+
173
+ return new Response(file, {
174
+ headers: { 'Content-Type': contentType, 'Cache-Control': cacheControl },
175
+ })
176
+ }
177
+
178
+ // handleRequest is not used — all logic is in route()
179
+ async handleRequest(_req: Request): Promise<HandleResult | null> {
180
+ return null
181
+ }
182
+ }
183
+
184
+ // Self-start when run directly (not via channel entrypoint)
185
+ if (import.meta.main) {
186
+ const log = createLogger('channel-voice')
187
+ log.info('config', {
188
+ stt: config.stt.apiKey ? `${config.stt.baseUrl} (${config.stt.model})` : 'not configured — browser fallback',
189
+ tts: config.tts.apiKey ? `${config.tts.baseUrl} (${config.tts.model}, ${config.tts.voice})` : 'not configured — browser fallback',
190
+ })
191
+ const channel = new VoiceChannel()
192
+ channel.start()
193
+ }
@@ -0,0 +1,156 @@
1
+ /**
2
+ * STT and TTS API calls. Both use OpenAI-compatible APIs.
3
+ */
4
+
5
+ import { createLogger } from '@openpalm/channels-sdk'
6
+ import { config } from './config'
7
+
8
+ const log = createLogger('channel-voice')
9
+
10
+ // ── Timeout helper ──────────────────────────────────────────────────────
11
+
12
+ async function fetchWithTimeout(url: string, init: RequestInit, timeoutMs: number): Promise<Response> {
13
+ const controller = new AbortController()
14
+ const timer = setTimeout(() => controller.abort(), timeoutMs)
15
+ try {
16
+ return await fetch(url, { ...init, signal: controller.signal })
17
+ } catch (err) {
18
+ if ((err as Error).name === 'AbortError') {
19
+ throw new Error(`Request timed out after ${timeoutMs}ms`)
20
+ }
21
+ throw err
22
+ } finally {
23
+ clearTimeout(timer)
24
+ }
25
+ }
26
+
27
+ // ── STT ─────────────────────────────────────────────────────────────────
28
+
29
+ /**
30
+ * Transcribe audio via OpenAI-compatible STT API.
31
+ * Accepts the raw File from the client's FormData.
32
+ */
33
+ export async function transcribe(audioFile: File): Promise<string> {
34
+ const form = new FormData()
35
+ form.set('model', config.stt.model)
36
+ form.set('file', audioFile, audioFile.name || 'audio.webm')
37
+
38
+ const res = await fetchWithTimeout(
39
+ `${config.stt.baseUrl}/v1/audio/transcriptions`,
40
+ {
41
+ method: 'POST',
42
+ headers: { Authorization: `Bearer ${config.stt.apiKey}` },
43
+ body: form,
44
+ },
45
+ config.stt.timeoutMs,
46
+ )
47
+
48
+ if (!res.ok) {
49
+ const body = await res.text().catch(() => '')
50
+ throw new Error(`STT failed (${res.status}): ${body || res.statusText}`)
51
+ }
52
+
53
+ const data = (await res.json()) as { text?: string }
54
+ return data.text || ''
55
+ }
56
+
57
+ // ── TTS ─────────────────────────────────────────────────────────────────
58
+
59
+ /**
60
+ * Synthesize text to audio via OpenAI-compatible TTS API.
61
+ * Returns base64-encoded mp3 string, or null if TTS is not configured or fails.
62
+ * TTS failure is non-fatal — the client still gets the text response.
63
+ */
64
+ /** Strip markdown syntax so TTS reads clean prose. */
65
+ function stripMarkdown(text: string): string {
66
+ return text
67
+ .replace(/```[\s\S]*?```/g, '') // remove code blocks
68
+ .replace(/`([^`]+)`/g, '$1') // inline code → plain text
69
+ .replace(/\*\*([^*]+)\*\*/g, '$1') // bold → plain
70
+ .replace(/\*([^*]+)\*/g, '$1') // italic → plain
71
+ .replace(/^#{1,6}\s+/gm, '') // headings → plain
72
+ .replace(/^\s*[-*+]\s+/gm, '') // list markers → plain
73
+ .replace(/^\s*\d+\.\s+/gm, '') // numbered lists → plain
74
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links → text only
75
+ .replace(/\n{3,}/g, '\n\n') // collapse excess newlines
76
+ .trim()
77
+ }
78
+
79
+ export async function synthesize(text: string): Promise<string | null> {
80
+ if (!text.trim() || !config.tts.apiKey) return null
81
+
82
+ const cleanText = stripMarkdown(text)
83
+ if (!cleanText) return null
84
+
85
+ let res: Response
86
+ try {
87
+ res = await fetchWithTimeout(
88
+ `${config.tts.baseUrl}/v1/audio/speech`,
89
+ {
90
+ method: 'POST',
91
+ headers: {
92
+ Authorization: `Bearer ${config.tts.apiKey}`,
93
+ 'Content-Type': 'application/json',
94
+ },
95
+ body: JSON.stringify({
96
+ model: config.tts.model,
97
+ input: cleanText,
98
+ voice: config.tts.voice,
99
+ response_format: 'mp3',
100
+ }),
101
+ },
102
+ config.tts.timeoutMs,
103
+ )
104
+ } catch (err) {
105
+ log.error('TTS request error', { error: (err as Error).message })
106
+ return null
107
+ }
108
+
109
+ if (!res.ok) {
110
+ const body = await res.text().catch(() => '')
111
+ log.error('TTS API error', { status: res.status, body: body || res.statusText })
112
+ return null
113
+ }
114
+
115
+ const buffer = await res.arrayBuffer()
116
+ return Buffer.from(buffer).toString('base64')
117
+ }
118
+
119
+ // ── LLM (direct fallback when guardian is unavailable) ─────────────────
120
+
121
+ /**
122
+ * Direct LLM call via OpenAI-compatible chat completions API.
123
+ * Used as fallback when the guardian/assistant pipeline is unreachable.
124
+ */
125
+ export async function chatCompletion(prompt: string): Promise<string> {
126
+ if (!config.llm.apiKey) throw new Error('No LLM API key configured and guardian unavailable')
127
+
128
+ const res = await fetchWithTimeout(
129
+ `${config.llm.baseUrl}/v1/chat/completions`,
130
+ {
131
+ method: 'POST',
132
+ headers: {
133
+ Authorization: `Bearer ${config.llm.apiKey}`,
134
+ 'Content-Type': 'application/json',
135
+ },
136
+ body: JSON.stringify({
137
+ model: config.llm.model,
138
+ messages: [
139
+ { role: 'system', content: config.llm.systemPrompt },
140
+ { role: 'user', content: prompt },
141
+ ],
142
+ }),
143
+ },
144
+ config.llm.timeoutMs,
145
+ )
146
+
147
+ if (!res.ok) {
148
+ const body = await res.text().catch(() => '')
149
+ throw new Error(`LLM failed (${res.status}): ${body || res.statusText}`)
150
+ }
151
+
152
+ const data = (await res.json()) as { choices?: Array<{ message?: { content?: string } }> }
153
+ const text = data.choices?.[0]?.message?.content
154
+ if (!text) throw new Error('Empty response from LLM')
155
+ return text
156
+ }