@onmars/lunar-voice 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +13 -0
- package/package.json +31 -0
- package/src/__tests__/elevenlabs.test.ts +281 -0
- package/src/__tests__/whisper.test.ts +308 -0
- package/src/elevenlabs.ts +74 -0
- package/src/index.ts +2 -0
- package/src/whisper.ts +83 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 onMars Tech
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# @onmars/lunar-voice
|
|
2
|
+
|
|
3
|
+
Voice synthesis adapter (ElevenLabs TTS) for [Lunar](https://github.com/onmars-tech/lunar).
|
|
4
|
+
|
|
5
|
+
This package is used internally by `@onmars/lunar-cli`. Install the CLI instead:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
bun install -g @onmars/lunar-cli
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## License
|
|
12
|
+
|
|
13
|
+
MIT — [onMars Tech](https://github.com/onmars-tech)
|
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@onmars/lunar-voice",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "src/index.ts",
|
|
6
|
+
"types": "src/index.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.ts"
|
|
9
|
+
},
|
|
10
|
+
"files": ["src/", "LICENSE"],
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@onmars/lunar-core": "0.1.0"
|
|
13
|
+
},
|
|
14
|
+
"description": "Voice synthesis adapter for Lunar (ElevenLabs TTS)",
|
|
15
|
+
"author": "onMars Tech",
|
|
16
|
+
"license": "MIT",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "https://github.com/onmars-tech/lunar",
|
|
20
|
+
"directory": "packages/voice"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/onmars-tech/lunar",
|
|
23
|
+
"bugs": "https://github.com/onmars-tech/lunar/issues",
|
|
24
|
+
"keywords": ["lunar", "ai", "voice", "tts", "elevenlabs", "bun"],
|
|
25
|
+
"publishConfig": {
|
|
26
|
+
"access": "public"
|
|
27
|
+
},
|
|
28
|
+
"engines": {
|
|
29
|
+
"bun": ">=1.2"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* # ElevenLabs TTS Provider — Functional Specification
|
|
3
|
+
*
|
|
4
|
+
* ## Constructor
|
|
5
|
+
* Stores config: apiKey, voiceId, modelId, language, stability, similarityBoost, speed.
|
|
6
|
+
* Base URL: https://api.elevenlabs.io/v1
|
|
7
|
+
*
|
|
8
|
+
* ## init() / destroy()
|
|
9
|
+
* init() logs initialization (no-op beyond that).
|
|
10
|
+
* destroy() is a no-op.
|
|
11
|
+
*
|
|
12
|
+
* ## synthesize(text, options?)
|
|
13
|
+
* POST to /text-to-speech/{voiceId} with:
|
|
14
|
+
* - Headers: xi-api-key, Content-Type: application/json
|
|
15
|
+
* - Body: text, model_id (default 'eleven_multilingual_v2'),
|
|
16
|
+
* language_code, voice_settings (stability, similarity_boost, speed)
|
|
17
|
+
* - Options override config: voice (voiceId), speed, language
|
|
18
|
+
* Returns: { audio: Buffer, format: 'mp3' }
|
|
19
|
+
* Throws on non-OK response.
|
|
20
|
+
*
|
|
21
|
+
* ## health()
|
|
22
|
+
* GET /user with xi-api-key header. Returns { ok: boolean, error? }.
|
|
23
|
+
* Catches fetch errors gracefully.
|
|
24
|
+
*/
|
|
25
|
+
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
|
|
26
|
+
import { type ElevenLabsConfig, ElevenLabsTTS } from '../elevenlabs'
|
|
27
|
+
|
|
28
|
+
// ─── Mock fetch ──────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
const originalFetch = globalThis.fetch
|
|
31
|
+
|
|
32
|
+
function mockFetch(handler: (url: string, init?: RequestInit) => Response | Promise<Response>) {
|
|
33
|
+
globalThis.fetch = handler as any
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
afterEach(() => {
|
|
37
|
+
globalThis.fetch = originalFetch
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
// ─── Shared fixtures ─────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
const baseConfig: ElevenLabsConfig = {
|
|
43
|
+
apiKey: 'test-xi-api-key',
|
|
44
|
+
voiceId: 'voice_abc123',
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function createProvider(overrides: Partial<ElevenLabsConfig> = {}): ElevenLabsTTS {
|
|
48
|
+
return new ElevenLabsTTS({ ...baseConfig, ...overrides })
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
52
|
+
// Constructor and metadata
|
|
53
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
54
|
+
|
|
55
|
+
describe('ElevenLabsTTS — metadata', () => {
|
|
56
|
+
it('has correct id and name', () => {
|
|
57
|
+
const tts = createProvider()
|
|
58
|
+
expect(tts.id).toBe('elevenlabs')
|
|
59
|
+
expect(tts.name).toBe('ElevenLabs')
|
|
60
|
+
})
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
64
|
+
// init / destroy — lifecycle
|
|
65
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
66
|
+
|
|
67
|
+
describe('ElevenLabsTTS — lifecycle', () => {
|
|
68
|
+
it('init() resolves without error', async () => {
|
|
69
|
+
const tts = createProvider()
|
|
70
|
+
await tts.init() // should not throw
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('destroy() resolves without error', async () => {
|
|
74
|
+
const tts = createProvider()
|
|
75
|
+
await tts.destroy() // should not throw
|
|
76
|
+
})
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
80
|
+
// synthesize — TTS API call
|
|
81
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
82
|
+
|
|
83
|
+
describe('ElevenLabsTTS — synthesize', () => {
|
|
84
|
+
it('POSTs to correct URL with correct headers and body', async () => {
|
|
85
|
+
let capturedUrl = ''
|
|
86
|
+
let capturedInit: RequestInit | undefined
|
|
87
|
+
|
|
88
|
+
mockFetch(async (url, init) => {
|
|
89
|
+
capturedUrl = url
|
|
90
|
+
capturedInit = init
|
|
91
|
+
return new Response(new ArrayBuffer(100), { status: 200 })
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
const tts = createProvider()
|
|
95
|
+
await tts.synthesize('Hello world')
|
|
96
|
+
|
|
97
|
+
expect(capturedUrl).toBe('https://api.elevenlabs.io/v1/text-to-speech/voice_abc123')
|
|
98
|
+
expect(capturedInit?.method).toBe('POST')
|
|
99
|
+
|
|
100
|
+
const headers = capturedInit?.headers as Record<string, string>
|
|
101
|
+
expect(headers['xi-api-key']).toBe('test-xi-api-key')
|
|
102
|
+
expect(headers['Content-Type']).toBe('application/json')
|
|
103
|
+
|
|
104
|
+
const body = JSON.parse(capturedInit?.body as string)
|
|
105
|
+
expect(body.text).toBe('Hello world')
|
|
106
|
+
expect(body.model_id).toBe('eleven_multilingual_v2')
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('uses custom modelId from config', async () => {
|
|
110
|
+
let capturedBody: any
|
|
111
|
+
|
|
112
|
+
mockFetch(async (_url, init) => {
|
|
113
|
+
capturedBody = JSON.parse(init?.body as string)
|
|
114
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
const tts = createProvider({ modelId: 'eleven_turbo_v2_5' })
|
|
118
|
+
await tts.synthesize('test')
|
|
119
|
+
|
|
120
|
+
expect(capturedBody.model_id).toBe('eleven_turbo_v2_5')
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
it('uses config voice_settings defaults', async () => {
|
|
124
|
+
let capturedBody: any
|
|
125
|
+
|
|
126
|
+
mockFetch(async (_url, init) => {
|
|
127
|
+
capturedBody = JSON.parse(init?.body as string)
|
|
128
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
const tts = createProvider({
|
|
132
|
+
stability: 0.8,
|
|
133
|
+
similarityBoost: 0.9,
|
|
134
|
+
speed: 1.2,
|
|
135
|
+
language: 'es',
|
|
136
|
+
})
|
|
137
|
+
await tts.synthesize('test')
|
|
138
|
+
|
|
139
|
+
expect(capturedBody.voice_settings.stability).toBe(0.8)
|
|
140
|
+
expect(capturedBody.voice_settings.similarity_boost).toBe(0.9)
|
|
141
|
+
expect(capturedBody.voice_settings.speed).toBe(1.2)
|
|
142
|
+
expect(capturedBody.language_code).toBe('es')
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
it('uses default voice_settings when config omits them', async () => {
|
|
146
|
+
let capturedBody: any
|
|
147
|
+
|
|
148
|
+
mockFetch(async (_url, init) => {
|
|
149
|
+
capturedBody = JSON.parse(init?.body as string)
|
|
150
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
const tts = createProvider()
|
|
154
|
+
await tts.synthesize('test')
|
|
155
|
+
|
|
156
|
+
expect(capturedBody.voice_settings.stability).toBe(0.5)
|
|
157
|
+
expect(capturedBody.voice_settings.similarity_boost).toBe(0.75)
|
|
158
|
+
expect(capturedBody.voice_settings.speed).toBe(1.0)
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
it('options.voice overrides config voiceId', async () => {
|
|
162
|
+
let capturedUrl = ''
|
|
163
|
+
|
|
164
|
+
mockFetch(async (url) => {
|
|
165
|
+
capturedUrl = url
|
|
166
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
const tts = createProvider()
|
|
170
|
+
await tts.synthesize('test', { voice: 'voice_override' })
|
|
171
|
+
|
|
172
|
+
expect(capturedUrl).toBe('https://api.elevenlabs.io/v1/text-to-speech/voice_override')
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
it('options.speed overrides config speed', async () => {
|
|
176
|
+
let capturedBody: any
|
|
177
|
+
|
|
178
|
+
mockFetch(async (_url, init) => {
|
|
179
|
+
capturedBody = JSON.parse(init?.body as string)
|
|
180
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
const tts = createProvider({ speed: 1.0 })
|
|
184
|
+
await tts.synthesize('test', { speed: 2.0 })
|
|
185
|
+
|
|
186
|
+
expect(capturedBody.voice_settings.speed).toBe(2.0)
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
it('options.language overrides config language', async () => {
|
|
190
|
+
let capturedBody: any
|
|
191
|
+
|
|
192
|
+
mockFetch(async (_url, init) => {
|
|
193
|
+
capturedBody = JSON.parse(init?.body as string)
|
|
194
|
+
return new Response(new ArrayBuffer(10), { status: 200 })
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
const tts = createProvider({ language: 'en' })
|
|
198
|
+
await tts.synthesize('test', { language: 'fr' })
|
|
199
|
+
|
|
200
|
+
expect(capturedBody.language_code).toBe('fr')
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('returns { audio: Buffer, format: "mp3" }', async () => {
|
|
204
|
+
const audioData = new Uint8Array([0x49, 0x44, 0x33]) // ID3 header
|
|
205
|
+
mockFetch(async () => new Response(audioData.buffer, { status: 200 }))
|
|
206
|
+
|
|
207
|
+
const tts = createProvider()
|
|
208
|
+
const result = await tts.synthesize('test')
|
|
209
|
+
|
|
210
|
+
expect(result.format).toBe('mp3')
|
|
211
|
+
expect(result.audio).toBeInstanceOf(Buffer)
|
|
212
|
+
expect(result.audio.length).toBe(3)
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
it('throws on non-OK response', async () => {
|
|
216
|
+
mockFetch(async () => new Response('{"error":"quota_exceeded"}', { status: 429 }))
|
|
217
|
+
|
|
218
|
+
const tts = createProvider()
|
|
219
|
+
await expect(tts.synthesize('test')).rejects.toThrow('ElevenLabs API error 429')
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
it('includes response body in error message', async () => {
|
|
223
|
+
mockFetch(async () => new Response('{"detail":"Invalid API key"}', { status: 401 }))
|
|
224
|
+
|
|
225
|
+
const tts = createProvider()
|
|
226
|
+
await expect(tts.synthesize('test')).rejects.toThrow('Invalid API key')
|
|
227
|
+
})
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
231
|
+
// health — Provider health check
|
|
232
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
233
|
+
|
|
234
|
+
describe('ElevenLabsTTS — health', () => {
|
|
235
|
+
it('returns { ok: true } on successful API call', async () => {
|
|
236
|
+
mockFetch(async (url) => {
|
|
237
|
+
expect(url).toBe('https://api.elevenlabs.io/v1/user')
|
|
238
|
+
return new Response('{}', { status: 200 })
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
const tts = createProvider()
|
|
242
|
+
const health = await tts.health()
|
|
243
|
+
|
|
244
|
+
expect(health.ok).toBe(true)
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
it('sends xi-api-key header', async () => {
|
|
248
|
+
let capturedHeaders: any
|
|
249
|
+
|
|
250
|
+
mockFetch(async (_url, init) => {
|
|
251
|
+
capturedHeaders = init?.headers
|
|
252
|
+
return new Response('{}', { status: 200 })
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
const tts = createProvider()
|
|
256
|
+
await tts.health()
|
|
257
|
+
|
|
258
|
+
expect(capturedHeaders['xi-api-key']).toBe('test-xi-api-key')
|
|
259
|
+
})
|
|
260
|
+
|
|
261
|
+
it('returns { ok: false } on non-OK response', async () => {
|
|
262
|
+
mockFetch(async () => new Response('Unauthorized', { status: 401 }))
|
|
263
|
+
|
|
264
|
+
const tts = createProvider()
|
|
265
|
+
const health = await tts.health()
|
|
266
|
+
|
|
267
|
+
expect(health.ok).toBe(false)
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
it('returns { ok: false, error } on network error', async () => {
|
|
271
|
+
mockFetch(async () => {
|
|
272
|
+
throw new Error('DNS resolution failed')
|
|
273
|
+
})
|
|
274
|
+
|
|
275
|
+
const tts = createProvider()
|
|
276
|
+
const health = await tts.health()
|
|
277
|
+
|
|
278
|
+
expect(health.ok).toBe(false)
|
|
279
|
+
expect(health.error).toBe('DNS resolution failed')
|
|
280
|
+
})
|
|
281
|
+
})
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* # Whisper STT Provider — Functional Specification
|
|
3
|
+
*
|
|
4
|
+
* ## Constructor
|
|
5
|
+
* Stores config: apiKey, model (default 'whisper-1'), language (optional).
|
|
6
|
+
* Base URL: https://api.openai.com/v1
|
|
7
|
+
*
|
|
8
|
+
* ## init() / destroy()
|
|
9
|
+
* Both are no-ops (stateless HTTP API).
|
|
10
|
+
*
|
|
11
|
+
* ## transcribe(audio, options?)
|
|
12
|
+
* POST multipart/form-data to /audio/transcriptions with:
|
|
13
|
+
* - Authorization: Bearer <apiKey>
|
|
14
|
+
* - FormData: file (Blob), model, language (optional), response_format: verbose_json
|
|
15
|
+
* - Options override config: model, language
|
|
16
|
+
* Returns: { text, language?, durationMs? }
|
|
17
|
+
* Throws on non-OK response (truncated to 200 chars).
|
|
18
|
+
*
|
|
19
|
+
* ## health()
|
|
20
|
+
* GET /models/whisper-1 with Authorization header.
|
|
21
|
+
* Returns { ok: boolean, error?: string }.
|
|
22
|
+
* Catches fetch errors gracefully.
|
|
23
|
+
*/
|
|
24
|
+
import { afterEach, describe, expect, it } from 'bun:test'
|
|
25
|
+
import { type WhisperConfig, WhisperSTT } from '../whisper'
|
|
26
|
+
|
|
27
|
+
// ─── Mock fetch ──────────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
const originalFetch = globalThis.fetch
|
|
30
|
+
|
|
31
|
+
function mockFetch(handler: (url: string, init?: RequestInit) => Response | Promise<Response>) {
|
|
32
|
+
globalThis.fetch = handler as any
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
afterEach(() => {
|
|
36
|
+
globalThis.fetch = originalFetch
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
// ─── Shared fixtures ─────────────────────────────────────────────
|
|
40
|
+
|
|
41
|
+
const baseConfig: WhisperConfig = {
|
|
42
|
+
apiKey: 'sk-test-openai-key',
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function createProvider(overrides: Partial<WhisperConfig> = {}): WhisperSTT {
|
|
46
|
+
return new WhisperSTT({ ...baseConfig, ...overrides })
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
50
|
+
// Constructor and metadata
|
|
51
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
52
|
+
|
|
53
|
+
describe('WhisperSTT — metadata', () => {
|
|
54
|
+
it('has correct id and name', () => {
|
|
55
|
+
const stt = createProvider()
|
|
56
|
+
expect(stt.id).toBe('whisper')
|
|
57
|
+
expect(stt.name).toBe('OpenAI Whisper')
|
|
58
|
+
})
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
62
|
+
// init / destroy — lifecycle
|
|
63
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
64
|
+
|
|
65
|
+
describe('WhisperSTT — lifecycle', () => {
|
|
66
|
+
it('init() resolves without error', async () => {
|
|
67
|
+
const stt = createProvider()
|
|
68
|
+
await stt.init()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('destroy() resolves without error', async () => {
|
|
72
|
+
const stt = createProvider()
|
|
73
|
+
await stt.destroy()
|
|
74
|
+
})
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
78
|
+
// transcribe — STT API call
|
|
79
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
80
|
+
|
|
81
|
+
describe('WhisperSTT — transcribe', () => {
|
|
82
|
+
const fakeAudio = Buffer.from([0x00, 0x01, 0x02, 0x03])
|
|
83
|
+
|
|
84
|
+
it('POSTs to correct URL with Authorization header', async () => {
|
|
85
|
+
let capturedUrl = ''
|
|
86
|
+
let capturedHeaders: Record<string, string> = {}
|
|
87
|
+
|
|
88
|
+
mockFetch(async (url, init) => {
|
|
89
|
+
capturedUrl = url
|
|
90
|
+
// Extract Authorization header from init
|
|
91
|
+
const h = init?.headers as Record<string, string> | undefined
|
|
92
|
+
if (h) capturedHeaders = h
|
|
93
|
+
return new Response(JSON.stringify({ text: 'Hello', language: 'en', duration: 1.5 }), {
|
|
94
|
+
status: 200,
|
|
95
|
+
})
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
const stt = createProvider()
|
|
99
|
+
await stt.transcribe(fakeAudio)
|
|
100
|
+
|
|
101
|
+
expect(capturedUrl).toBe('https://api.openai.com/v1/audio/transcriptions')
|
|
102
|
+
expect(capturedHeaders['Authorization']).toBe('Bearer sk-test-openai-key')
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
it('sends FormData with file, model, and response_format', async () => {
|
|
106
|
+
let capturedBody: FormData | undefined
|
|
107
|
+
|
|
108
|
+
mockFetch(async (_url, init) => {
|
|
109
|
+
capturedBody = init?.body as any
|
|
110
|
+
return new Response(JSON.stringify({ text: 'Hello' }), { status: 200 })
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
const stt = createProvider()
|
|
114
|
+
await stt.transcribe(fakeAudio)
|
|
115
|
+
|
|
116
|
+
expect(capturedBody).toBeInstanceOf(FormData)
|
|
117
|
+
expect(capturedBody!.get('model')).toBe('whisper-1')
|
|
118
|
+
expect(capturedBody!.get('response_format')).toBe('verbose_json')
|
|
119
|
+
expect(capturedBody!.get('file')).toBeInstanceOf(Blob)
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it('uses custom model from config', async () => {
|
|
123
|
+
let capturedBody: FormData | undefined
|
|
124
|
+
|
|
125
|
+
mockFetch(async (_url, init) => {
|
|
126
|
+
capturedBody = init?.body as any
|
|
127
|
+
return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
const stt = createProvider({ model: 'whisper-large-v3' })
|
|
131
|
+
await stt.transcribe(fakeAudio)
|
|
132
|
+
|
|
133
|
+
expect(capturedBody!.get('model')).toBe('whisper-large-v3')
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('options.model overrides config model', async () => {
|
|
137
|
+
let capturedBody: FormData | undefined
|
|
138
|
+
|
|
139
|
+
mockFetch(async (_url, init) => {
|
|
140
|
+
capturedBody = init?.body as any
|
|
141
|
+
return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
const stt = createProvider({ model: 'whisper-1' })
|
|
145
|
+
await stt.transcribe(fakeAudio, { model: 'whisper-large-v3' })
|
|
146
|
+
|
|
147
|
+
expect(capturedBody!.get('model')).toBe('whisper-large-v3')
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
it('includes language from config when set', async () => {
|
|
151
|
+
let capturedBody: FormData | undefined
|
|
152
|
+
|
|
153
|
+
mockFetch(async (_url, init) => {
|
|
154
|
+
capturedBody = init?.body as any
|
|
155
|
+
return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
const stt = createProvider({ language: 'es' })
|
|
159
|
+
await stt.transcribe(fakeAudio)
|
|
160
|
+
|
|
161
|
+
expect(capturedBody!.get('language')).toBe('es')
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('options.language overrides config language', async () => {
|
|
165
|
+
let capturedBody: FormData | undefined
|
|
166
|
+
|
|
167
|
+
mockFetch(async (_url, init) => {
|
|
168
|
+
capturedBody = init?.body as any
|
|
169
|
+
return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
const stt = createProvider({ language: 'en' })
|
|
173
|
+
await stt.transcribe(fakeAudio, { language: 'ja' })
|
|
174
|
+
|
|
175
|
+
expect(capturedBody!.get('language')).toBe('ja')
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
it('omits language when not configured', async () => {
|
|
179
|
+
let capturedBody: FormData | undefined
|
|
180
|
+
|
|
181
|
+
mockFetch(async (_url, init) => {
|
|
182
|
+
capturedBody = init?.body as any
|
|
183
|
+
return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
const stt = createProvider()
|
|
187
|
+
await stt.transcribe(fakeAudio)
|
|
188
|
+
|
|
189
|
+
expect(capturedBody!.get('language')).toBeNull()
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
it('returns text, language, and durationMs from response', async () => {
|
|
193
|
+
mockFetch(
|
|
194
|
+
async () =>
|
|
195
|
+
new Response(
|
|
196
|
+
JSON.stringify({ text: 'Transcribed text', language: 'en', duration: 3.456 }),
|
|
197
|
+
{ status: 200 },
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
const stt = createProvider()
|
|
202
|
+
const result = await stt.transcribe(fakeAudio)
|
|
203
|
+
|
|
204
|
+
expect(result.text).toBe('Transcribed text')
|
|
205
|
+
expect(result.language).toBe('en')
|
|
206
|
+
expect(result.durationMs).toBe(3456)
|
|
207
|
+
})
|
|
208
|
+
|
|
209
|
+
it('durationMs is undefined when duration is not in response', async () => {
|
|
210
|
+
mockFetch(async () => new Response(JSON.stringify({ text: 'No duration' }), { status: 200 }))
|
|
211
|
+
|
|
212
|
+
const stt = createProvider()
|
|
213
|
+
const result = await stt.transcribe(fakeAudio)
|
|
214
|
+
|
|
215
|
+
expect(result.text).toBe('No duration')
|
|
216
|
+
expect(result.durationMs).toBeUndefined()
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
it('rounds durationMs to nearest millisecond', async () => {
|
|
220
|
+
mockFetch(
|
|
221
|
+
async () => new Response(JSON.stringify({ text: 'test', duration: 1.2345 }), { status: 200 }),
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
const stt = createProvider()
|
|
225
|
+
const result = await stt.transcribe(fakeAudio)
|
|
226
|
+
|
|
227
|
+
expect(result.durationMs).toBe(1235) // Math.round(1234.5)
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
it('throws on non-OK response with status and truncated body', async () => {
|
|
231
|
+
const longError = 'x'.repeat(500)
|
|
232
|
+
mockFetch(async () => new Response(longError, { status: 400 }))
|
|
233
|
+
|
|
234
|
+
const stt = createProvider()
|
|
235
|
+
await expect(stt.transcribe(fakeAudio)).rejects.toThrow('Whisper API error 400')
|
|
236
|
+
})
|
|
237
|
+
|
|
238
|
+
it('truncates error body to 200 characters', async () => {
|
|
239
|
+
const longError = 'e'.repeat(500)
|
|
240
|
+
mockFetch(async () => new Response(longError, { status: 500 }))
|
|
241
|
+
|
|
242
|
+
const stt = createProvider()
|
|
243
|
+
|
|
244
|
+
try {
|
|
245
|
+
await stt.transcribe(fakeAudio)
|
|
246
|
+
expect(true).toBe(false) // should not reach
|
|
247
|
+
} catch (err: any) {
|
|
248
|
+
// The error message includes "Whisper API error 500: " + 200 chars
|
|
249
|
+
const bodyPart = err.message.split(': ').slice(1).join(': ')
|
|
250
|
+
expect(bodyPart.length).toBeLessThanOrEqual(200)
|
|
251
|
+
}
|
|
252
|
+
})
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
256
|
+
// health — Provider health check
|
|
257
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
258
|
+
|
|
259
|
+
describe('WhisperSTT — health', () => {
|
|
260
|
+
it('returns { ok: true } on successful API call', async () => {
|
|
261
|
+
mockFetch(async (url) => {
|
|
262
|
+
expect(url).toBe('https://api.openai.com/v1/models/whisper-1')
|
|
263
|
+
return new Response('{}', { status: 200 })
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
const stt = createProvider()
|
|
267
|
+
const health = await stt.health()
|
|
268
|
+
|
|
269
|
+
expect(health.ok).toBe(true)
|
|
270
|
+
expect(health.error).toBeUndefined()
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
it('sends correct Authorization header', async () => {
|
|
274
|
+
let capturedHeaders: any
|
|
275
|
+
|
|
276
|
+
mockFetch(async (_url, init) => {
|
|
277
|
+
capturedHeaders = init?.headers
|
|
278
|
+
return new Response('{}', { status: 200 })
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
const stt = createProvider()
|
|
282
|
+
await stt.health()
|
|
283
|
+
|
|
284
|
+
expect(capturedHeaders['Authorization']).toBe('Bearer sk-test-openai-key')
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
it('returns { ok: false, error } on non-OK response', async () => {
|
|
288
|
+
mockFetch(async () => new Response('Unauthorized', { status: 401 }))
|
|
289
|
+
|
|
290
|
+
const stt = createProvider()
|
|
291
|
+
const health = await stt.health()
|
|
292
|
+
|
|
293
|
+
expect(health.ok).toBe(false)
|
|
294
|
+
expect(health.error).toBe('HTTP 401')
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
it('returns { ok: false, error } on network error', async () => {
|
|
298
|
+
mockFetch(async () => {
|
|
299
|
+
throw new Error('ECONNREFUSED')
|
|
300
|
+
})
|
|
301
|
+
|
|
302
|
+
const stt = createProvider()
|
|
303
|
+
const health = await stt.health()
|
|
304
|
+
|
|
305
|
+
expect(health.ok).toBe(false)
|
|
306
|
+
expect(health.error).toBe('ECONNREFUSED')
|
|
307
|
+
})
|
|
308
|
+
})
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import type { TTSOptions, TTSProvider, TTSResult } from '@onmars/lunar-core'
|
|
2
|
+
import { log } from '@onmars/lunar-core'
|
|
3
|
+
|
|
4
|
+
export interface ElevenLabsConfig {
|
|
5
|
+
apiKey: string
|
|
6
|
+
voiceId: string
|
|
7
|
+
modelId?: string
|
|
8
|
+
language?: string
|
|
9
|
+
stability?: number
|
|
10
|
+
similarityBoost?: number
|
|
11
|
+
speed?: number
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export class ElevenLabsTTS implements TTSProvider {
|
|
15
|
+
readonly id = 'elevenlabs'
|
|
16
|
+
readonly name = 'ElevenLabs'
|
|
17
|
+
|
|
18
|
+
private baseUrl = 'https://api.elevenlabs.io/v1'
|
|
19
|
+
|
|
20
|
+
constructor(private config: ElevenLabsConfig) {}
|
|
21
|
+
|
|
22
|
+
async init(): Promise<void> {
|
|
23
|
+
log.info({ voiceId: this.config.voiceId }, 'ElevenLabs TTS initialized')
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async destroy(): Promise<void> {}
|
|
27
|
+
|
|
28
|
+
async synthesize(text: string, options?: TTSOptions): Promise<TTSResult> {
|
|
29
|
+
const voiceId = options?.voice ?? this.config.voiceId
|
|
30
|
+
const url = `${this.baseUrl}/text-to-speech/${voiceId}`
|
|
31
|
+
|
|
32
|
+
const response = await fetch(url, {
|
|
33
|
+
method: 'POST',
|
|
34
|
+
headers: {
|
|
35
|
+
'xi-api-key': this.config.apiKey,
|
|
36
|
+
'Content-Type': 'application/json',
|
|
37
|
+
},
|
|
38
|
+
body: JSON.stringify({
|
|
39
|
+
text,
|
|
40
|
+
model_id: this.config.modelId ?? 'eleven_multilingual_v2',
|
|
41
|
+
language_code: options?.language ?? this.config.language,
|
|
42
|
+
voice_settings: {
|
|
43
|
+
stability: this.config.stability ?? 0.5,
|
|
44
|
+
similarity_boost: this.config.similarityBoost ?? 0.75,
|
|
45
|
+
speed: options?.speed ?? this.config.speed ?? 1.0,
|
|
46
|
+
},
|
|
47
|
+
}),
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
if (!response.ok) {
|
|
51
|
+
const err = await response.text()
|
|
52
|
+
throw new Error(`ElevenLabs API error ${response.status}: ${err}`)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const arrayBuffer = await response.arrayBuffer()
|
|
56
|
+
const audio = Buffer.from(arrayBuffer)
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
audio,
|
|
60
|
+
format: 'mp3',
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async health(): Promise<{ ok: boolean; error?: string }> {
|
|
65
|
+
try {
|
|
66
|
+
const res = await fetch(`${this.baseUrl}/user`, {
|
|
67
|
+
headers: { 'xi-api-key': this.config.apiKey },
|
|
68
|
+
})
|
|
69
|
+
return { ok: res.ok }
|
|
70
|
+
} catch (err) {
|
|
71
|
+
return { ok: false, error: err instanceof Error ? err.message : String(err) }
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
package/src/index.ts
ADDED
package/src/whisper.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import type { STTOptions, STTProvider, STTResult } from '@onmars/lunar-core'
|
|
2
|
+
|
|
3
|
+
export interface WhisperConfig {
|
|
4
|
+
/** OpenAI API key */
|
|
5
|
+
apiKey: string
|
|
6
|
+
/** Model ID (default: whisper-1) */
|
|
7
|
+
model?: string
|
|
8
|
+
/** Default language hint */
|
|
9
|
+
language?: string
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* OpenAI Whisper STT — Speech to text via OpenAI Audio Transcriptions API.
|
|
14
|
+
*
|
|
15
|
+
* Uses the multipart/form-data endpoint. Supports all audio formats
|
|
16
|
+
* that Whisper accepts: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg.
|
|
17
|
+
*/
|
|
18
|
+
export class WhisperSTT implements STTProvider {
|
|
19
|
+
readonly id = 'whisper'
|
|
20
|
+
readonly name = 'OpenAI Whisper'
|
|
21
|
+
|
|
22
|
+
private baseUrl = 'https://api.openai.com/v1'
|
|
23
|
+
|
|
24
|
+
constructor(private config: WhisperConfig) {}
|
|
25
|
+
|
|
26
|
+
async init(): Promise<void> {
|
|
27
|
+
// No-op — stateless API
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async destroy(): Promise<void> {
|
|
31
|
+
// No-op
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async transcribe(audio: Buffer, options?: STTOptions): Promise<STTResult> {
|
|
35
|
+
const model = options?.model ?? this.config.model ?? 'whisper-1'
|
|
36
|
+
const language = options?.language ?? this.config.language
|
|
37
|
+
|
|
38
|
+
const form = new FormData()
|
|
39
|
+
form.append('file', new Blob([audio], { type: 'audio/ogg' }), 'audio.ogg')
|
|
40
|
+
form.append('model', model)
|
|
41
|
+
if (language) {
|
|
42
|
+
form.append('language', language)
|
|
43
|
+
}
|
|
44
|
+
form.append('response_format', 'verbose_json')
|
|
45
|
+
|
|
46
|
+
const response = await fetch(`${this.baseUrl}/audio/transcriptions`, {
|
|
47
|
+
method: 'POST',
|
|
48
|
+
headers: {
|
|
49
|
+
Authorization: `Bearer ${this.config.apiKey}`,
|
|
50
|
+
},
|
|
51
|
+
body: form,
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
if (!response.ok) {
|
|
55
|
+
const err = await response.text()
|
|
56
|
+
throw new Error(`Whisper API error ${response.status}: ${err.slice(0, 200)}`)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const data = (await response.json()) as {
|
|
60
|
+
text: string
|
|
61
|
+
language?: string
|
|
62
|
+
duration?: number
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
text: data.text,
|
|
67
|
+
language: data.language,
|
|
68
|
+
durationMs: data.duration ? Math.round(data.duration * 1000) : undefined,
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async health(): Promise<{ ok: boolean; error?: string }> {
|
|
73
|
+
try {
|
|
74
|
+
// Verify API key by hitting the models endpoint
|
|
75
|
+
const res = await fetch(`${this.baseUrl}/models/whisper-1`, {
|
|
76
|
+
headers: { Authorization: `Bearer ${this.config.apiKey}` },
|
|
77
|
+
})
|
|
78
|
+
return { ok: res.ok, error: res.ok ? undefined : `HTTP ${res.status}` }
|
|
79
|
+
} catch (err) {
|
|
80
|
+
return { ok: false, error: err instanceof Error ? err.message : String(err) }
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|