@openpalm/channel-voice 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -0
- package/package.json +4 -4
- package/src/config.ts +31 -14
- package/src/index.ts +14 -11
- package/src/providers.ts +30 -23
- package/web/sw.js +9 -4
package/README.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# @openpalm/channel-voice
|
|
2
|
+
|
|
3
|
+
Voice-driven conversational channel for [OpenPalm](https://github.com/itlackey/openpalm). Provides a web-based recording interface with a server-side pipeline that chains STT, LLM, and TTS using OpenAI-compatible APIs.
|
|
4
|
+
|
|
5
|
+
## How it works
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
mic → STT → LLM → TTS → speaker
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
1. User speaks into the microphone (browser captures audio)
|
|
12
|
+
2. Audio is transcribed to text (server STT or browser Speech Recognition)
|
|
13
|
+
3. Text is forwarded to the assistant via the guardian (or direct LLM fallback)
|
|
14
|
+
4. Response is synthesized to audio (server TTS or browser speechSynthesis)
|
|
15
|
+
5. Audio plays back to the user
|
|
16
|
+
|
|
17
|
+
Every step has a browser fallback — the channel works with zero API keys using only the Web Speech API.
|
|
18
|
+
|
|
19
|
+
## Quick start
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Install dependencies
|
|
23
|
+
bun install
|
|
24
|
+
|
|
25
|
+
# Run locally (defaults to Ollama at localhost:11434)
|
|
26
|
+
bun run dev
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Open `http://localhost:8090` in your browser. Tap the microphone or press Space to start talking.
|
|
30
|
+
|
|
31
|
+
## Configuration
|
|
32
|
+
|
|
33
|
+
Copy `.env.example` to `.env` and adjust as needed. All settings use OpenAI-compatible API formats.
|
|
34
|
+
|
|
35
|
+
### LLM (direct fallback)
|
|
36
|
+
|
|
37
|
+
When the guardian is unavailable (e.g. running outside Docker), the channel calls the LLM directly.
|
|
38
|
+
|
|
39
|
+
| Variable | Default | Description |
|
|
40
|
+
|----------|---------|-------------|
|
|
41
|
+
| `LLM_BASE_URL` | `http://localhost:11434` | LLM API base URL (Ollama default) |
|
|
42
|
+
| `LLM_API_KEY` | `ollama` | API key |
|
|
43
|
+
| `LLM_MODEL` | `qwen2.5:3b` | Model name |
|
|
44
|
+
| `LLM_SYSTEM_PROMPT` | *(conversational)* | System prompt for voice responses |
|
|
45
|
+
| `LLM_TIMEOUT_MS` | `60000` | Request timeout |
|
|
46
|
+
|
|
47
|
+
### STT (Speech-to-Text)
|
|
48
|
+
|
|
49
|
+
Server-side transcription. If not configured, the browser's `SpeechRecognition` API is used.
|
|
50
|
+
|
|
51
|
+
| Variable | Default | Description |
|
|
52
|
+
|----------|---------|-------------|
|
|
53
|
+
| `STT_BASE_URL` | *(empty)* | STT API base URL |
|
|
54
|
+
| `STT_API_KEY` | *(empty)* | API key |
|
|
55
|
+
| `STT_MODEL` | `whisper-1` | Model name |
|
|
56
|
+
| `STT_TIMEOUT_MS` | `30000` | Request timeout |
|
|
57
|
+
|
|
58
|
+
### TTS (Text-to-Speech)
|
|
59
|
+
|
|
60
|
+
Server-side speech synthesis. If not configured, the browser's `speechSynthesis` API is used.
|
|
61
|
+
|
|
62
|
+
| Variable | Default | Description |
|
|
63
|
+
|----------|---------|-------------|
|
|
64
|
+
| `TTS_BASE_URL` | *(empty)* | TTS API base URL |
|
|
65
|
+
| `TTS_API_KEY` | *(empty)* | API key |
|
|
66
|
+
| `TTS_MODEL` | `tts-1` | Model name |
|
|
67
|
+
| `TTS_VOICE` | `alloy` | Voice name |
|
|
68
|
+
| `TTS_TIMEOUT_MS` | `30000` | Request timeout |
|
|
69
|
+
|
|
70
|
+
### Server
|
|
71
|
+
|
|
72
|
+
| Variable | Default | Description |
|
|
73
|
+
|----------|---------|-------------|
|
|
74
|
+
| `PORT` | `8186` | HTTP server port |
|
|
75
|
+
| `GUARDIAN_URL` | `http://guardian:8080` | Guardian service URL (Docker) |
|
|
76
|
+
| `CHANNEL_VOICE_SECRET` | *(required)* | HMAC secret for guardian signing |
|
|
77
|
+
| `OPENAI_API_KEY` | *(empty)* | Shared fallback key for STT/TTS/LLM |
|
|
78
|
+
|
|
79
|
+
## Docker Compose
|
|
80
|
+
|
|
81
|
+
The voice channel runs in the unified `openpalm/channel` image. Add the registry overlay to your stack:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# Copy the overlay
|
|
85
|
+
cp registry/channels/voice.yml ~/.config/openpalm/channels/
|
|
86
|
+
|
|
87
|
+
# Restart the stack
|
|
88
|
+
docker compose -f docker-compose.yml -f channels/voice.yml up -d
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The web UI is served at the channel's port (default 8186).
|
|
92
|
+
|
|
93
|
+
## API
|
|
94
|
+
|
|
95
|
+
### `GET /api/health`
|
|
96
|
+
|
|
97
|
+
Returns service status and provider configuration.
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
{
|
|
101
|
+
"ok": true,
|
|
102
|
+
"service": "channel-voice",
|
|
103
|
+
"stt": { "model": "whisper-1", "configured": false },
|
|
104
|
+
"tts": { "model": "tts-1", "voice": "alloy", "configured": false },
|
|
105
|
+
"llm": { "model": "qwen2.5:3b", "configured": true }
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### `POST /api/pipeline`
|
|
110
|
+
|
|
111
|
+
Full voice pipeline. Accepts `multipart/form-data` with either:
|
|
112
|
+
|
|
113
|
+
- `audio` — audio file (server STT transcribes it)
|
|
114
|
+
- `text` — pre-transcribed text (browser STT path)
|
|
115
|
+
|
|
116
|
+
Response:
|
|
117
|
+
|
|
118
|
+
```json
|
|
119
|
+
{
|
|
120
|
+
"transcript": "What is the capital of France?",
|
|
121
|
+
"response": "The capital of France is Paris.",
|
|
122
|
+
"audio": "<base64 mp3 or null>"
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Features
|
|
127
|
+
|
|
128
|
+
- **Browser fallback** — Works without any API keys using Web Speech APIs
|
|
129
|
+
- **Continuous listening** — Toggle auto-restart to keep the mic open between responses
|
|
130
|
+
- **Markdown rendering** — AI responses render bold, italic, code blocks in the UI
|
|
131
|
+
- **Markdown stripping** — TTS reads clean prose, not syntax characters
|
|
132
|
+
- **LLM fallback** — Direct LLM call when the guardian/assistant is unreachable
|
|
133
|
+
- **PWA** — Installable with offline shell caching
|
|
134
|
+
- **Accessible** — Keyboard nav (Space to toggle), screen reader announcements, focus outlines
|
|
135
|
+
|
|
136
|
+
## Development
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
bun run dev # Start with hot reload (port 8090)
|
|
140
|
+
bun run test # Unit tests (bun:test)
|
|
141
|
+
bun run test:e2e # Playwright e2e tests (22 tests)
|
|
142
|
+
bun run typecheck # TypeScript check
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
[MPL-2.0](https://www.mozilla.org/en-US/MPL/2.0/)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@openpalm/channel-voice",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"license": "MPL-2.0",
|
|
6
6
|
"repository": {
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"url": "https://github.com/itlackey/openpalm",
|
|
9
9
|
"directory": "packages/channel-voice"
|
|
10
10
|
},
|
|
11
|
+
"access": "public",
|
|
11
12
|
"main": "src/index.ts",
|
|
12
13
|
"files": [
|
|
13
14
|
"src",
|
|
@@ -15,8 +16,7 @@
|
|
|
15
16
|
],
|
|
16
17
|
"scripts": {
|
|
17
18
|
"start": "bun run src/index.ts",
|
|
18
|
-
"dev": "
|
|
19
|
-
"dev:unset": "unset STT_API_KEY && unset OPENAI_API_KEY && export CHANNEL_VOICE_SECRET=105a158d326fa54e569b234d4458ada2 && export PORT=8090 && bun --watch run src/index.ts",
|
|
19
|
+
"dev": "CHANNEL_VOICE_SECRET=test-secret bun --watch run src/index.ts",
|
|
20
20
|
"typecheck": "tsc --noEmit",
|
|
21
21
|
"test": "bun test src/",
|
|
22
22
|
"test:e2e": "npx playwright test --config=playwright.config.ts"
|
|
@@ -28,4 +28,4 @@
|
|
|
28
28
|
"@openpalm/channels-sdk": ">=0.8.0 <1.0.0",
|
|
29
29
|
"@playwright/test": "^1.58.2"
|
|
30
30
|
}
|
|
31
|
-
}
|
|
31
|
+
}
|
package/src/config.ts
CHANGED
|
@@ -7,52 +7,69 @@ import { resolve } from 'node:path'
|
|
|
7
7
|
|
|
8
8
|
interface Config {
|
|
9
9
|
server: { webRoot: string }
|
|
10
|
-
stt: { baseUrl: string; apiKey: string; model: string; timeoutMs: number }
|
|
11
|
-
tts: { baseUrl: string; apiKey: string; model: string; voice: string; timeoutMs: number }
|
|
10
|
+
stt: { baseUrl: string; apiKey: string; model: string; timeoutMs: number; configured: boolean }
|
|
11
|
+
tts: { baseUrl: string; apiKey: string; model: string; voice: string; timeoutMs: number; configured: boolean }
|
|
12
12
|
llm: { baseUrl: string; apiKey: string; model: string; timeoutMs: number; systemPrompt: string }
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
// env uses ?? so an explicit empty value (KEY=) clears the default.
|
|
16
|
+
// envOrDefault uses || so empty strings still get the fallback (for models, voices, etc).
|
|
15
17
|
function env(key: string, fallback = ''): string {
|
|
18
|
+
return Bun.env[key] ?? fallback
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function envOrDefault(key: string, fallback: string): string {
|
|
16
22
|
return Bun.env[key] || fallback
|
|
17
23
|
}
|
|
18
24
|
|
|
19
25
|
function envInt(key: string, fallback: number): number {
|
|
20
26
|
const v = Bun.env[key]
|
|
21
|
-
if (
|
|
27
|
+
if (v === undefined || v === '') return fallback
|
|
22
28
|
const n = parseInt(v, 10)
|
|
23
29
|
return Number.isNaN(n) ? fallback : n
|
|
24
30
|
}
|
|
25
31
|
|
|
26
32
|
// Resolve API key: check dedicated key first, then shared OPENAI_API_KEY.
|
|
27
|
-
//
|
|
28
|
-
//
|
|
33
|
+
// Falls back to OPENAI_API_KEY only when the dedicated key is absent or
|
|
34
|
+
// explicitly empty — an empty dedicated key means "no key" (keyless provider).
|
|
29
35
|
function resolveApiKey(dedicatedKey: string): string {
|
|
30
36
|
const dedicated = Bun.env[dedicatedKey]
|
|
31
37
|
if (dedicated !== undefined && dedicated !== '') return dedicated
|
|
32
|
-
return Bun.env.OPENAI_API_KEY
|
|
38
|
+
return Bun.env.OPENAI_API_KEY ?? ''
|
|
33
39
|
}
|
|
34
40
|
|
|
41
|
+
// STT/TTS are considered "configured" when a base URL is set (even without
|
|
42
|
+
// a key — local providers like whisper-local, kokoro, piper are keyless).
|
|
43
|
+
function isProviderConfigured(baseUrl: string): boolean {
|
|
44
|
+
return baseUrl !== ''
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const sttBaseUrl = env('STT_BASE_URL').replace(/\/$/, '')
|
|
48
|
+
const ttsBaseUrl = env('TTS_BASE_URL').replace(/\/$/, '')
|
|
49
|
+
|
|
35
50
|
export const config: Config = {
|
|
36
51
|
server: {
|
|
37
52
|
webRoot: resolve(env('WEB_ROOT', new URL('../web', import.meta.url).pathname)),
|
|
38
53
|
},
|
|
39
54
|
stt: {
|
|
40
|
-
baseUrl:
|
|
55
|
+
baseUrl: sttBaseUrl,
|
|
41
56
|
apiKey: resolveApiKey('STT_API_KEY'),
|
|
42
|
-
model:
|
|
57
|
+
model: envOrDefault('STT_MODEL', 'whisper-1'),
|
|
43
58
|
timeoutMs: envInt('STT_TIMEOUT_MS', 30_000),
|
|
59
|
+
configured: isProviderConfigured(sttBaseUrl),
|
|
44
60
|
},
|
|
45
61
|
tts: {
|
|
46
|
-
baseUrl:
|
|
62
|
+
baseUrl: ttsBaseUrl,
|
|
47
63
|
apiKey: resolveApiKey('TTS_API_KEY'),
|
|
48
|
-
model:
|
|
49
|
-
voice:
|
|
64
|
+
model: envOrDefault('TTS_MODEL', 'tts-1'),
|
|
65
|
+
voice: envOrDefault('TTS_VOICE', 'alloy'),
|
|
50
66
|
timeoutMs: envInt('TTS_TIMEOUT_MS', 30_000),
|
|
67
|
+
configured: isProviderConfigured(ttsBaseUrl),
|
|
51
68
|
},
|
|
52
69
|
llm: {
|
|
53
|
-
baseUrl:
|
|
54
|
-
apiKey:
|
|
55
|
-
model:
|
|
70
|
+
baseUrl: envOrDefault('LLM_BASE_URL', 'http://localhost:11434').replace(/\/$/, ''),
|
|
71
|
+
apiKey: envOrDefault('LLM_API_KEY', 'ollama'),
|
|
72
|
+
model: envOrDefault('LLM_MODEL', 'qwen2.5:3b'),
|
|
56
73
|
timeoutMs: envInt('LLM_TIMEOUT_MS', 60_000),
|
|
57
74
|
systemPrompt: env('LLM_SYSTEM_PROMPT', 'You are a helpful voice assistant. Respond conversationally and concisely. Do not use markdown formatting.'),
|
|
58
75
|
},
|
package/src/index.ts
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* GET /* — Static file serving from web/ directory
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
-
import { extname, join, resolve } from 'node:path'
|
|
14
|
+
import { extname, join, resolve, sep } from 'node:path'
|
|
15
15
|
import { BaseChannel, type HandleResult, createLogger } from '@openpalm/channels-sdk'
|
|
16
16
|
import type { GuardianSuccessResponse } from '@openpalm/channels-sdk'
|
|
17
17
|
import { config } from './config'
|
|
@@ -46,8 +46,8 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
46
46
|
return this.json(200, {
|
|
47
47
|
ok: true,
|
|
48
48
|
service: 'channel-voice',
|
|
49
|
-
stt: { model: config.stt.model, configured:
|
|
50
|
-
tts: { model: config.tts.model, voice: config.tts.voice, configured:
|
|
49
|
+
stt: { model: config.stt.model, configured: config.stt.configured },
|
|
50
|
+
tts: { model: config.tts.model, voice: config.tts.voice, configured: config.tts.configured },
|
|
51
51
|
llm: { model: config.llm.model, configured: !!config.llm.apiKey },
|
|
52
52
|
})
|
|
53
53
|
}
|
|
@@ -82,8 +82,11 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
82
82
|
return this.json(413, { error: 'Audio too large (max 25MB)' })
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
|
|
85
|
+
// Use client-provided ID (from x-client-id header or form field),
|
|
86
|
+
// falling back to x-forwarded-for (first IP only) or a default.
|
|
87
|
+
const clientId = (form.get('clientId') as string | null)
|
|
88
|
+
|| req.headers.get('x-client-id')
|
|
89
|
+
|| (req.headers.get('x-forwarded-for') || '').split(',')[0].trim()
|
|
87
90
|
|| 'voice-user'
|
|
88
91
|
|
|
89
92
|
// Step 1: STT — transcribe audio, or use provided text (browser STT fallback)
|
|
@@ -91,7 +94,7 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
91
94
|
if (typeof textField === 'string' && textField.trim()) {
|
|
92
95
|
transcript = textField.trim()
|
|
93
96
|
} else if (audioFile instanceof File) {
|
|
94
|
-
if (!config.stt.
|
|
97
|
+
if (!config.stt.configured) {
|
|
95
98
|
return this.json(400, { error: 'STT not configured', code: 'stt_not_configured' })
|
|
96
99
|
}
|
|
97
100
|
try {
|
|
@@ -111,7 +114,7 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
111
114
|
// Step 2: Forward transcript to guardian, fall back to direct LLM
|
|
112
115
|
let answer: string
|
|
113
116
|
try {
|
|
114
|
-
const guardianResp = await this.forward({ userId, text: transcript })
|
|
117
|
+
const guardianResp = await this.forward({ userId: clientId, text: transcript })
|
|
115
118
|
|
|
116
119
|
if (!guardianResp.ok) {
|
|
117
120
|
this.log('error', 'Guardian error', { status: guardianResp.status })
|
|
@@ -145,8 +148,8 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
145
148
|
const pathname = url.pathname === '/' ? '/index.html' : url.pathname
|
|
146
149
|
const filePath = resolve(join(config.server.webRoot, pathname.replace(/^\/+/, '')))
|
|
147
150
|
|
|
148
|
-
// Prevent path traversal
|
|
149
|
-
if (!filePath.startsWith(config.server.webRoot)) {
|
|
151
|
+
// Prevent path traversal — ensure resolved path is strictly inside webRoot
|
|
152
|
+
if (!filePath.startsWith(config.server.webRoot + sep) && filePath !== config.server.webRoot) {
|
|
150
153
|
return new Response('Forbidden', { status: 403 })
|
|
151
154
|
}
|
|
152
155
|
|
|
@@ -185,8 +188,8 @@ export default class VoiceChannel extends BaseChannel {
|
|
|
185
188
|
if (import.meta.main) {
|
|
186
189
|
const log = createLogger('channel-voice')
|
|
187
190
|
log.info('config', {
|
|
188
|
-
stt: config.stt.
|
|
189
|
-
tts: config.tts.
|
|
191
|
+
stt: config.stt.configured ? `${config.stt.baseUrl} (${config.stt.model})` : 'not configured — browser fallback',
|
|
192
|
+
tts: config.tts.configured ? `${config.tts.baseUrl} (${config.tts.model}, ${config.tts.voice})` : 'not configured — browser fallback',
|
|
190
193
|
})
|
|
191
194
|
const channel = new VoiceChannel()
|
|
192
195
|
channel.start()
|
package/src/providers.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* STT and TTS API calls. Both use OpenAI-compatible APIs.
|
|
3
|
+
* Auth headers are only sent when an API key is configured,
|
|
4
|
+
* allowing keyless local providers (whisper-local, kokoro, piper).
|
|
3
5
|
*/
|
|
4
6
|
|
|
5
7
|
import { createLogger } from '@openpalm/channels-sdk'
|
|
@@ -7,7 +9,7 @@ import { config } from './config'
|
|
|
7
9
|
|
|
8
10
|
const log = createLogger('channel-voice')
|
|
9
11
|
|
|
10
|
-
// ──
|
|
12
|
+
// ── Helpers ────────────────────────────────────────────────────────────
|
|
11
13
|
|
|
12
14
|
async function fetchWithTimeout(url: string, init: RequestInit, timeoutMs: number): Promise<Response> {
|
|
13
15
|
const controller = new AbortController()
|
|
@@ -24,11 +26,31 @@ async function fetchWithTimeout(url: string, init: RequestInit, timeoutMs: numbe
|
|
|
24
26
|
}
|
|
25
27
|
}
|
|
26
28
|
|
|
29
|
+
/** Build auth headers only when a key is present (keyless providers get none). */
|
|
30
|
+
function authHeaders(apiKey: string): Record<string, string> {
|
|
31
|
+
return apiKey ? { Authorization: `Bearer ${apiKey}` } : {}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Strip markdown syntax so TTS reads clean prose. */
|
|
35
|
+
function stripMarkdown(text: string): string {
|
|
36
|
+
return text
|
|
37
|
+
.replace(/```[\s\S]*?```/g, '')
|
|
38
|
+
.replace(/`([^`]+)`/g, '$1')
|
|
39
|
+
.replace(/\*\*([^*]+)\*\*/g, '$1')
|
|
40
|
+
.replace(/\*([^*]+)\*/g, '$1')
|
|
41
|
+
.replace(/^#{1,6}\s+/gm, '')
|
|
42
|
+
.replace(/^\s*[-*+]\s+/gm, '')
|
|
43
|
+
.replace(/^\s*\d+\.\s+/gm, '')
|
|
44
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
|
45
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
46
|
+
.trim()
|
|
47
|
+
}
|
|
48
|
+
|
|
27
49
|
// ── STT ─────────────────────────────────────────────────────────────────
|
|
28
50
|
|
|
29
51
|
/**
|
|
30
52
|
* Transcribe audio via OpenAI-compatible STT API.
|
|
31
|
-
*
|
|
53
|
+
* Auth header is omitted for keyless providers (e.g. local whisper).
|
|
32
54
|
*/
|
|
33
55
|
export async function transcribe(audioFile: File): Promise<string> {
|
|
34
56
|
const form = new FormData()
|
|
@@ -39,7 +61,7 @@ export async function transcribe(audioFile: File): Promise<string> {
|
|
|
39
61
|
`${config.stt.baseUrl}/v1/audio/transcriptions`,
|
|
40
62
|
{
|
|
41
63
|
method: 'POST',
|
|
42
|
-
headers:
|
|
64
|
+
headers: authHeaders(config.stt.apiKey),
|
|
43
65
|
body: form,
|
|
44
66
|
},
|
|
45
67
|
config.stt.timeoutMs,
|
|
@@ -58,26 +80,11 @@ export async function transcribe(audioFile: File): Promise<string> {
|
|
|
58
80
|
|
|
59
81
|
/**
|
|
60
82
|
* Synthesize text to audio via OpenAI-compatible TTS API.
|
|
61
|
-
* Returns base64-encoded mp3
|
|
62
|
-
*
|
|
83
|
+
* Returns base64-encoded mp3, or null if TTS is not configured or fails.
|
|
84
|
+
* Auth header is omitted for keyless providers (e.g. kokoro, piper).
|
|
63
85
|
*/
|
|
64
|
-
/** Strip markdown syntax so TTS reads clean prose. */
|
|
65
|
-
function stripMarkdown(text: string): string {
|
|
66
|
-
return text
|
|
67
|
-
.replace(/```[\s\S]*?```/g, '') // remove code blocks
|
|
68
|
-
.replace(/`([^`]+)`/g, '$1') // inline code → plain text
|
|
69
|
-
.replace(/\*\*([^*]+)\*\*/g, '$1') // bold → plain
|
|
70
|
-
.replace(/\*([^*]+)\*/g, '$1') // italic → plain
|
|
71
|
-
.replace(/^#{1,6}\s+/gm, '') // headings → plain
|
|
72
|
-
.replace(/^\s*[-*+]\s+/gm, '') // list markers → plain
|
|
73
|
-
.replace(/^\s*\d+\.\s+/gm, '') // numbered lists → plain
|
|
74
|
-
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links → text only
|
|
75
|
-
.replace(/\n{3,}/g, '\n\n') // collapse excess newlines
|
|
76
|
-
.trim()
|
|
77
|
-
}
|
|
78
|
-
|
|
79
86
|
export async function synthesize(text: string): Promise<string | null> {
|
|
80
|
-
if (!text.trim() || !config.tts.
|
|
87
|
+
if (!text.trim() || !config.tts.configured) return null
|
|
81
88
|
|
|
82
89
|
const cleanText = stripMarkdown(text)
|
|
83
90
|
if (!cleanText) return null
|
|
@@ -89,7 +96,7 @@ export async function synthesize(text: string): Promise<string | null> {
|
|
|
89
96
|
{
|
|
90
97
|
method: 'POST',
|
|
91
98
|
headers: {
|
|
92
|
-
|
|
99
|
+
...authHeaders(config.tts.apiKey),
|
|
93
100
|
'Content-Type': 'application/json',
|
|
94
101
|
},
|
|
95
102
|
body: JSON.stringify({
|
|
@@ -130,7 +137,7 @@ export async function chatCompletion(prompt: string): Promise<string> {
|
|
|
130
137
|
{
|
|
131
138
|
method: 'POST',
|
|
132
139
|
headers: {
|
|
133
|
-
|
|
140
|
+
...authHeaders(config.llm.apiKey),
|
|
134
141
|
'Content-Type': 'application/json',
|
|
135
142
|
},
|
|
136
143
|
body: JSON.stringify({
|
package/web/sw.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const CACHE = 'voice-
|
|
1
|
+
const CACHE = 'voice-v3'
|
|
2
2
|
const SHELL = ['/', '/index.html', '/styles.css', '/app.js', '/manifest.webmanifest']
|
|
3
3
|
|
|
4
4
|
self.addEventListener('install', (e) => {
|
|
@@ -14,13 +14,18 @@ self.addEventListener('activate', (e) => {
|
|
|
14
14
|
|
|
15
15
|
self.addEventListener('fetch', (e) => {
|
|
16
16
|
const url = new URL(e.request.url)
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
// Only cache same-origin GET requests; skip API calls and non-GET methods
|
|
18
|
+
if (e.request.method !== 'GET' || url.pathname.startsWith('/api/')) return
|
|
19
|
+
// Network-first: update cache on success, serve from cache when offline
|
|
19
20
|
e.respondWith(
|
|
20
21
|
fetch(e.request).then((res) => {
|
|
21
22
|
const clone = res.clone()
|
|
22
23
|
caches.open(CACHE).then((c) => c.put(e.request, clone))
|
|
23
24
|
return res
|
|
24
|
-
}).catch(() =>
|
|
25
|
+
}).catch(() =>
|
|
26
|
+
caches.match(e.request).then((cached) =>
|
|
27
|
+
cached || new Response('Offline', { status: 503, headers: { 'Content-Type': 'text/plain' } })
|
|
28
|
+
)
|
|
29
|
+
)
|
|
25
30
|
)
|
|
26
31
|
})
|