@onmars/lunar-voice 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 onMars Tech
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,13 @@
1
+ # @onmars/lunar-voice
2
+
3
+ Voice synthesis adapter (ElevenLabs TTS) for [Lunar](https://github.com/onmars-tech/lunar).
4
+
5
+ This package is used internally by `@onmars/lunar-cli`. Install the CLI instead:
6
+
7
+ ```bash
8
+ bun install -g @onmars/lunar-cli
9
+ ```
10
+
11
+ ## License
12
+
13
+ MIT — [onMars Tech](https://github.com/onmars-tech)
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@onmars/lunar-voice",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "main": "src/index.ts",
6
+ "types": "src/index.ts",
7
+ "exports": {
8
+ ".": "./src/index.ts"
9
+ },
10
+ "files": ["src/", "LICENSE"],
11
+ "dependencies": {
12
+ "@onmars/lunar-core": "0.1.0"
13
+ },
14
+ "description": "Voice synthesis adapter for Lunar (ElevenLabs TTS)",
15
+ "author": "onMars Tech",
16
+ "license": "MIT",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "https://github.com/onmars-tech/lunar",
20
+ "directory": "packages/voice"
21
+ },
22
+ "homepage": "https://github.com/onmars-tech/lunar",
23
+ "bugs": "https://github.com/onmars-tech/lunar/issues",
24
+ "keywords": ["lunar", "ai", "voice", "tts", "elevenlabs", "bun"],
25
+ "publishConfig": {
26
+ "access": "public"
27
+ },
28
+ "engines": {
29
+ "bun": ">=1.2"
30
+ }
31
+ }
@@ -0,0 +1,281 @@
1
+ /**
2
+ * # ElevenLabs TTS Provider — Functional Specification
3
+ *
4
+ * ## Constructor
5
+ * Stores config: apiKey, voiceId, modelId, language, stability, similarityBoost, speed.
6
+ * Base URL: https://api.elevenlabs.io/v1
7
+ *
8
+ * ## init() / destroy()
9
+ * init() logs initialization (no-op beyond that).
10
+ * destroy() is a no-op.
11
+ *
12
+ * ## synthesize(text, options?)
13
+ * POST to /text-to-speech/{voiceId} with:
14
+ * - Headers: xi-api-key, Content-Type: application/json
15
+ * - Body: text, model_id (default 'eleven_multilingual_v2'),
16
+ * language_code, voice_settings (stability, similarity_boost, speed)
17
+ * - Options override config: voice (voiceId), speed, language
18
+ * Returns: { audio: Buffer, format: 'mp3' }
19
+ * Throws on non-OK response.
20
+ *
21
+ * ## health()
22
+ * GET /user with xi-api-key header. Returns { ok: boolean, error? }.
23
+ * Catches fetch errors gracefully.
24
+ */
25
+ import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
26
+ import { type ElevenLabsConfig, ElevenLabsTTS } from '../elevenlabs'
27
+
28
+ // ─── Mock fetch ──────────────────────────────────────────────────
29
+
30
+ const originalFetch = globalThis.fetch
31
+
32
+ function mockFetch(handler: (url: string, init?: RequestInit) => Response | Promise<Response>) {
33
+ globalThis.fetch = handler as any
34
+ }
35
+
36
+ afterEach(() => {
37
+ globalThis.fetch = originalFetch
38
+ })
39
+
40
+ // ─── Shared fixtures ─────────────────────────────────────────────
41
+
42
+ const baseConfig: ElevenLabsConfig = {
43
+ apiKey: 'test-xi-api-key',
44
+ voiceId: 'voice_abc123',
45
+ }
46
+
47
+ function createProvider(overrides: Partial<ElevenLabsConfig> = {}): ElevenLabsTTS {
48
+ return new ElevenLabsTTS({ ...baseConfig, ...overrides })
49
+ }
50
+
51
+ // ═══════════════════════════════════════════════════════════════════
52
+ // Constructor and metadata
53
+ // ═══════════════════════════════════════════════════════════════════
54
+
55
+ describe('ElevenLabsTTS — metadata', () => {
56
+ it('has correct id and name', () => {
57
+ const tts = createProvider()
58
+ expect(tts.id).toBe('elevenlabs')
59
+ expect(tts.name).toBe('ElevenLabs')
60
+ })
61
+ })
62
+
63
+ // ═══════════════════════════════════════════════════════════════════
64
+ // init / destroy — lifecycle
65
+ // ═══════════════════════════════════════════════════════════════════
66
+
67
+ describe('ElevenLabsTTS — lifecycle', () => {
68
+ it('init() resolves without error', async () => {
69
+ const tts = createProvider()
70
+ await tts.init() // should not throw
71
+ })
72
+
73
+ it('destroy() resolves without error', async () => {
74
+ const tts = createProvider()
75
+ await tts.destroy() // should not throw
76
+ })
77
+ })
78
+
79
+ // ═══════════════════════════════════════════════════════════════════
80
+ // synthesize — TTS API call
81
+ // ═══════════════════════════════════════════════════════════════════
82
+
83
+ describe('ElevenLabsTTS — synthesize', () => {
84
+ it('POSTs to correct URL with correct headers and body', async () => {
85
+ let capturedUrl = ''
86
+ let capturedInit: RequestInit | undefined
87
+
88
+ mockFetch(async (url, init) => {
89
+ capturedUrl = url
90
+ capturedInit = init
91
+ return new Response(new ArrayBuffer(100), { status: 200 })
92
+ })
93
+
94
+ const tts = createProvider()
95
+ await tts.synthesize('Hello world')
96
+
97
+ expect(capturedUrl).toBe('https://api.elevenlabs.io/v1/text-to-speech/voice_abc123')
98
+ expect(capturedInit?.method).toBe('POST')
99
+
100
+ const headers = capturedInit?.headers as Record<string, string>
101
+ expect(headers['xi-api-key']).toBe('test-xi-api-key')
102
+ expect(headers['Content-Type']).toBe('application/json')
103
+
104
+ const body = JSON.parse(capturedInit?.body as string)
105
+ expect(body.text).toBe('Hello world')
106
+ expect(body.model_id).toBe('eleven_multilingual_v2')
107
+ })
108
+
109
+ it('uses custom modelId from config', async () => {
110
+ let capturedBody: any
111
+
112
+ mockFetch(async (_url, init) => {
113
+ capturedBody = JSON.parse(init?.body as string)
114
+ return new Response(new ArrayBuffer(10), { status: 200 })
115
+ })
116
+
117
+ const tts = createProvider({ modelId: 'eleven_turbo_v2_5' })
118
+ await tts.synthesize('test')
119
+
120
+ expect(capturedBody.model_id).toBe('eleven_turbo_v2_5')
121
+ })
122
+
123
+ it('uses config voice_settings defaults', async () => {
124
+ let capturedBody: any
125
+
126
+ mockFetch(async (_url, init) => {
127
+ capturedBody = JSON.parse(init?.body as string)
128
+ return new Response(new ArrayBuffer(10), { status: 200 })
129
+ })
130
+
131
+ const tts = createProvider({
132
+ stability: 0.8,
133
+ similarityBoost: 0.9,
134
+ speed: 1.2,
135
+ language: 'es',
136
+ })
137
+ await tts.synthesize('test')
138
+
139
+ expect(capturedBody.voice_settings.stability).toBe(0.8)
140
+ expect(capturedBody.voice_settings.similarity_boost).toBe(0.9)
141
+ expect(capturedBody.voice_settings.speed).toBe(1.2)
142
+ expect(capturedBody.language_code).toBe('es')
143
+ })
144
+
145
+ it('uses default voice_settings when config omits them', async () => {
146
+ let capturedBody: any
147
+
148
+ mockFetch(async (_url, init) => {
149
+ capturedBody = JSON.parse(init?.body as string)
150
+ return new Response(new ArrayBuffer(10), { status: 200 })
151
+ })
152
+
153
+ const tts = createProvider()
154
+ await tts.synthesize('test')
155
+
156
+ expect(capturedBody.voice_settings.stability).toBe(0.5)
157
+ expect(capturedBody.voice_settings.similarity_boost).toBe(0.75)
158
+ expect(capturedBody.voice_settings.speed).toBe(1.0)
159
+ })
160
+
161
+ it('options.voice overrides config voiceId', async () => {
162
+ let capturedUrl = ''
163
+
164
+ mockFetch(async (url) => {
165
+ capturedUrl = url
166
+ return new Response(new ArrayBuffer(10), { status: 200 })
167
+ })
168
+
169
+ const tts = createProvider()
170
+ await tts.synthesize('test', { voice: 'voice_override' })
171
+
172
+ expect(capturedUrl).toBe('https://api.elevenlabs.io/v1/text-to-speech/voice_override')
173
+ })
174
+
175
+ it('options.speed overrides config speed', async () => {
176
+ let capturedBody: any
177
+
178
+ mockFetch(async (_url, init) => {
179
+ capturedBody = JSON.parse(init?.body as string)
180
+ return new Response(new ArrayBuffer(10), { status: 200 })
181
+ })
182
+
183
+ const tts = createProvider({ speed: 1.0 })
184
+ await tts.synthesize('test', { speed: 2.0 })
185
+
186
+ expect(capturedBody.voice_settings.speed).toBe(2.0)
187
+ })
188
+
189
+ it('options.language overrides config language', async () => {
190
+ let capturedBody: any
191
+
192
+ mockFetch(async (_url, init) => {
193
+ capturedBody = JSON.parse(init?.body as string)
194
+ return new Response(new ArrayBuffer(10), { status: 200 })
195
+ })
196
+
197
+ const tts = createProvider({ language: 'en' })
198
+ await tts.synthesize('test', { language: 'fr' })
199
+
200
+ expect(capturedBody.language_code).toBe('fr')
201
+ })
202
+
203
+ it('returns { audio: Buffer, format: "mp3" }', async () => {
204
+ const audioData = new Uint8Array([0x49, 0x44, 0x33]) // ID3 header
205
+ mockFetch(async () => new Response(audioData.buffer, { status: 200 }))
206
+
207
+ const tts = createProvider()
208
+ const result = await tts.synthesize('test')
209
+
210
+ expect(result.format).toBe('mp3')
211
+ expect(result.audio).toBeInstanceOf(Buffer)
212
+ expect(result.audio.length).toBe(3)
213
+ })
214
+
215
+ it('throws on non-OK response', async () => {
216
+ mockFetch(async () => new Response('{"error":"quota_exceeded"}', { status: 429 }))
217
+
218
+ const tts = createProvider()
219
+ await expect(tts.synthesize('test')).rejects.toThrow('ElevenLabs API error 429')
220
+ })
221
+
222
+ it('includes response body in error message', async () => {
223
+ mockFetch(async () => new Response('{"detail":"Invalid API key"}', { status: 401 }))
224
+
225
+ const tts = createProvider()
226
+ await expect(tts.synthesize('test')).rejects.toThrow('Invalid API key')
227
+ })
228
+ })
229
+
230
+ // ═══════════════════════════════════════════════════════════════════
231
+ // health — Provider health check
232
+ // ═══════════════════════════════════════════════════════════════════
233
+
234
+ describe('ElevenLabsTTS — health', () => {
235
+ it('returns { ok: true } on successful API call', async () => {
236
+ mockFetch(async (url) => {
237
+ expect(url).toBe('https://api.elevenlabs.io/v1/user')
238
+ return new Response('{}', { status: 200 })
239
+ })
240
+
241
+ const tts = createProvider()
242
+ const health = await tts.health()
243
+
244
+ expect(health.ok).toBe(true)
245
+ })
246
+
247
+ it('sends xi-api-key header', async () => {
248
+ let capturedHeaders: any
249
+
250
+ mockFetch(async (_url, init) => {
251
+ capturedHeaders = init?.headers
252
+ return new Response('{}', { status: 200 })
253
+ })
254
+
255
+ const tts = createProvider()
256
+ await tts.health()
257
+
258
+ expect(capturedHeaders['xi-api-key']).toBe('test-xi-api-key')
259
+ })
260
+
261
+ it('returns { ok: false } on non-OK response', async () => {
262
+ mockFetch(async () => new Response('Unauthorized', { status: 401 }))
263
+
264
+ const tts = createProvider()
265
+ const health = await tts.health()
266
+
267
+ expect(health.ok).toBe(false)
268
+ })
269
+
270
+ it('returns { ok: false, error } on network error', async () => {
271
+ mockFetch(async () => {
272
+ throw new Error('DNS resolution failed')
273
+ })
274
+
275
+ const tts = createProvider()
276
+ const health = await tts.health()
277
+
278
+ expect(health.ok).toBe(false)
279
+ expect(health.error).toBe('DNS resolution failed')
280
+ })
281
+ })
@@ -0,0 +1,308 @@
1
+ /**
2
+ * # Whisper STT Provider — Functional Specification
3
+ *
4
+ * ## Constructor
5
+ * Stores config: apiKey, model (default 'whisper-1'), language (optional).
6
+ * Base URL: https://api.openai.com/v1
7
+ *
8
+ * ## init() / destroy()
9
+ * Both are no-ops (stateless HTTP API).
10
+ *
11
+ * ## transcribe(audio, options?)
12
+ * POST multipart/form-data to /audio/transcriptions with:
13
+ * - Authorization: Bearer <apiKey>
14
+ * - FormData: file (Blob), model, language (optional), response_format: verbose_json
15
+ * - Options override config: model, language
16
+ * Returns: { text, language?, durationMs? }
17
+ * Throws on non-OK response (truncated to 200 chars).
18
+ *
19
+ * ## health()
20
+ * GET /models/whisper-1 with Authorization header.
21
+ * Returns { ok: boolean, error?: string }.
22
+ * Catches fetch errors gracefully.
23
+ */
24
+ import { afterEach, describe, expect, it } from 'bun:test'
25
+ import { type WhisperConfig, WhisperSTT } from '../whisper'
26
+
27
+ // ─── Mock fetch ──────────────────────────────────────────────────
28
+
29
+ const originalFetch = globalThis.fetch
30
+
31
+ function mockFetch(handler: (url: string, init?: RequestInit) => Response | Promise<Response>) {
32
+ globalThis.fetch = handler as any
33
+ }
34
+
35
+ afterEach(() => {
36
+ globalThis.fetch = originalFetch
37
+ })
38
+
39
+ // ─── Shared fixtures ─────────────────────────────────────────────
40
+
41
+ const baseConfig: WhisperConfig = {
42
+ apiKey: 'sk-test-openai-key',
43
+ }
44
+
45
+ function createProvider(overrides: Partial<WhisperConfig> = {}): WhisperSTT {
46
+ return new WhisperSTT({ ...baseConfig, ...overrides })
47
+ }
48
+
49
+ // ═══════════════════════════════════════════════════════════════════
50
+ // Constructor and metadata
51
+ // ═══════════════════════════════════════════════════════════════════
52
+
53
+ describe('WhisperSTT — metadata', () => {
54
+ it('has correct id and name', () => {
55
+ const stt = createProvider()
56
+ expect(stt.id).toBe('whisper')
57
+ expect(stt.name).toBe('OpenAI Whisper')
58
+ })
59
+ })
60
+
61
+ // ═══════════════════════════════════════════════════════════════════
62
+ // init / destroy — lifecycle
63
+ // ═══════════════════════════════════════════════════════════════════
64
+
65
+ describe('WhisperSTT — lifecycle', () => {
66
+ it('init() resolves without error', async () => {
67
+ const stt = createProvider()
68
+ await stt.init()
69
+ })
70
+
71
+ it('destroy() resolves without error', async () => {
72
+ const stt = createProvider()
73
+ await stt.destroy()
74
+ })
75
+ })
76
+
77
+ // ═══════════════════════════════════════════════════════════════════
78
+ // transcribe — STT API call
79
+ // ═══════════════════════════════════════════════════════════════════
80
+
81
+ describe('WhisperSTT — transcribe', () => {
82
+ const fakeAudio = Buffer.from([0x00, 0x01, 0x02, 0x03])
83
+
84
+ it('POSTs to correct URL with Authorization header', async () => {
85
+ let capturedUrl = ''
86
+ let capturedHeaders: Record<string, string> = {}
87
+
88
+ mockFetch(async (url, init) => {
89
+ capturedUrl = url
90
+ // Extract Authorization header from init
91
+ const h = init?.headers as Record<string, string> | undefined
92
+ if (h) capturedHeaders = h
93
+ return new Response(JSON.stringify({ text: 'Hello', language: 'en', duration: 1.5 }), {
94
+ status: 200,
95
+ })
96
+ })
97
+
98
+ const stt = createProvider()
99
+ await stt.transcribe(fakeAudio)
100
+
101
+ expect(capturedUrl).toBe('https://api.openai.com/v1/audio/transcriptions')
102
+ expect(capturedHeaders['Authorization']).toBe('Bearer sk-test-openai-key')
103
+ })
104
+
105
+ it('sends FormData with file, model, and response_format', async () => {
106
+ let capturedBody: FormData | undefined
107
+
108
+ mockFetch(async (_url, init) => {
109
+ capturedBody = init?.body as any
110
+ return new Response(JSON.stringify({ text: 'Hello' }), { status: 200 })
111
+ })
112
+
113
+ const stt = createProvider()
114
+ await stt.transcribe(fakeAudio)
115
+
116
+ expect(capturedBody).toBeInstanceOf(FormData)
117
+ expect(capturedBody!.get('model')).toBe('whisper-1')
118
+ expect(capturedBody!.get('response_format')).toBe('verbose_json')
119
+ expect(capturedBody!.get('file')).toBeInstanceOf(Blob)
120
+ })
121
+
122
+ it('uses custom model from config', async () => {
123
+ let capturedBody: FormData | undefined
124
+
125
+ mockFetch(async (_url, init) => {
126
+ capturedBody = init?.body as any
127
+ return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
128
+ })
129
+
130
+ const stt = createProvider({ model: 'whisper-large-v3' })
131
+ await stt.transcribe(fakeAudio)
132
+
133
+ expect(capturedBody!.get('model')).toBe('whisper-large-v3')
134
+ })
135
+
136
+ it('options.model overrides config model', async () => {
137
+ let capturedBody: FormData | undefined
138
+
139
+ mockFetch(async (_url, init) => {
140
+ capturedBody = init?.body as any
141
+ return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
142
+ })
143
+
144
+ const stt = createProvider({ model: 'whisper-1' })
145
+ await stt.transcribe(fakeAudio, { model: 'whisper-large-v3' })
146
+
147
+ expect(capturedBody!.get('model')).toBe('whisper-large-v3')
148
+ })
149
+
150
+ it('includes language from config when set', async () => {
151
+ let capturedBody: FormData | undefined
152
+
153
+ mockFetch(async (_url, init) => {
154
+ capturedBody = init?.body as any
155
+ return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
156
+ })
157
+
158
+ const stt = createProvider({ language: 'es' })
159
+ await stt.transcribe(fakeAudio)
160
+
161
+ expect(capturedBody!.get('language')).toBe('es')
162
+ })
163
+
164
+ it('options.language overrides config language', async () => {
165
+ let capturedBody: FormData | undefined
166
+
167
+ mockFetch(async (_url, init) => {
168
+ capturedBody = init?.body as any
169
+ return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
170
+ })
171
+
172
+ const stt = createProvider({ language: 'en' })
173
+ await stt.transcribe(fakeAudio, { language: 'ja' })
174
+
175
+ expect(capturedBody!.get('language')).toBe('ja')
176
+ })
177
+
178
+ it('omits language when not configured', async () => {
179
+ let capturedBody: FormData | undefined
180
+
181
+ mockFetch(async (_url, init) => {
182
+ capturedBody = init?.body as any
183
+ return new Response(JSON.stringify({ text: 'test' }), { status: 200 })
184
+ })
185
+
186
+ const stt = createProvider()
187
+ await stt.transcribe(fakeAudio)
188
+
189
+ expect(capturedBody!.get('language')).toBeNull()
190
+ })
191
+
192
+ it('returns text, language, and durationMs from response', async () => {
193
+ mockFetch(
194
+ async () =>
195
+ new Response(
196
+ JSON.stringify({ text: 'Transcribed text', language: 'en', duration: 3.456 }),
197
+ { status: 200 },
198
+ ),
199
+ )
200
+
201
+ const stt = createProvider()
202
+ const result = await stt.transcribe(fakeAudio)
203
+
204
+ expect(result.text).toBe('Transcribed text')
205
+ expect(result.language).toBe('en')
206
+ expect(result.durationMs).toBe(3456)
207
+ })
208
+
209
+ it('durationMs is undefined when duration is not in response', async () => {
210
+ mockFetch(async () => new Response(JSON.stringify({ text: 'No duration' }), { status: 200 }))
211
+
212
+ const stt = createProvider()
213
+ const result = await stt.transcribe(fakeAudio)
214
+
215
+ expect(result.text).toBe('No duration')
216
+ expect(result.durationMs).toBeUndefined()
217
+ })
218
+
219
+ it('rounds durationMs to nearest millisecond', async () => {
220
+ mockFetch(
221
+ async () => new Response(JSON.stringify({ text: 'test', duration: 1.2345 }), { status: 200 }),
222
+ )
223
+
224
+ const stt = createProvider()
225
+ const result = await stt.transcribe(fakeAudio)
226
+
227
+ expect(result.durationMs).toBe(1235) // Math.round(1234.5)
228
+ })
229
+
230
+ it('throws on non-OK response with status and truncated body', async () => {
231
+ const longError = 'x'.repeat(500)
232
+ mockFetch(async () => new Response(longError, { status: 400 }))
233
+
234
+ const stt = createProvider()
235
+ await expect(stt.transcribe(fakeAudio)).rejects.toThrow('Whisper API error 400')
236
+ })
237
+
238
+ it('truncates error body to 200 characters', async () => {
239
+ const longError = 'e'.repeat(500)
240
+ mockFetch(async () => new Response(longError, { status: 500 }))
241
+
242
+ const stt = createProvider()
243
+
244
+ try {
245
+ await stt.transcribe(fakeAudio)
246
+ expect(true).toBe(false) // should not reach
247
+ } catch (err: any) {
248
+ // The error message includes "Whisper API error 500: " + 200 chars
249
+ const bodyPart = err.message.split(': ').slice(1).join(': ')
250
+ expect(bodyPart.length).toBeLessThanOrEqual(200)
251
+ }
252
+ })
253
+ })
254
+
255
+ // ═══════════════════════════════════════════════════════════════════
256
+ // health — Provider health check
257
+ // ═══════════════════════════════════════════════════════════════════
258
+
259
+ describe('WhisperSTT — health', () => {
260
+ it('returns { ok: true } on successful API call', async () => {
261
+ mockFetch(async (url) => {
262
+ expect(url).toBe('https://api.openai.com/v1/models/whisper-1')
263
+ return new Response('{}', { status: 200 })
264
+ })
265
+
266
+ const stt = createProvider()
267
+ const health = await stt.health()
268
+
269
+ expect(health.ok).toBe(true)
270
+ expect(health.error).toBeUndefined()
271
+ })
272
+
273
+ it('sends correct Authorization header', async () => {
274
+ let capturedHeaders: any
275
+
276
+ mockFetch(async (_url, init) => {
277
+ capturedHeaders = init?.headers
278
+ return new Response('{}', { status: 200 })
279
+ })
280
+
281
+ const stt = createProvider()
282
+ await stt.health()
283
+
284
+ expect(capturedHeaders['Authorization']).toBe('Bearer sk-test-openai-key')
285
+ })
286
+
287
+ it('returns { ok: false, error } on non-OK response', async () => {
288
+ mockFetch(async () => new Response('Unauthorized', { status: 401 }))
289
+
290
+ const stt = createProvider()
291
+ const health = await stt.health()
292
+
293
+ expect(health.ok).toBe(false)
294
+ expect(health.error).toBe('HTTP 401')
295
+ })
296
+
297
+ it('returns { ok: false, error } on network error', async () => {
298
+ mockFetch(async () => {
299
+ throw new Error('ECONNREFUSED')
300
+ })
301
+
302
+ const stt = createProvider()
303
+ const health = await stt.health()
304
+
305
+ expect(health.ok).toBe(false)
306
+ expect(health.error).toBe('ECONNREFUSED')
307
+ })
308
+ })
@@ -0,0 +1,74 @@
1
+ import type { TTSOptions, TTSProvider, TTSResult } from '@onmars/lunar-core'
2
+ import { log } from '@onmars/lunar-core'
3
+
4
+ export interface ElevenLabsConfig {
5
+ apiKey: string
6
+ voiceId: string
7
+ modelId?: string
8
+ language?: string
9
+ stability?: number
10
+ similarityBoost?: number
11
+ speed?: number
12
+ }
13
+
14
+ export class ElevenLabsTTS implements TTSProvider {
15
+ readonly id = 'elevenlabs'
16
+ readonly name = 'ElevenLabs'
17
+
18
+ private baseUrl = 'https://api.elevenlabs.io/v1'
19
+
20
+ constructor(private config: ElevenLabsConfig) {}
21
+
22
+ async init(): Promise<void> {
23
+ log.info({ voiceId: this.config.voiceId }, 'ElevenLabs TTS initialized')
24
+ }
25
+
26
+ async destroy(): Promise<void> {}
27
+
28
+ async synthesize(text: string, options?: TTSOptions): Promise<TTSResult> {
29
+ const voiceId = options?.voice ?? this.config.voiceId
30
+ const url = `${this.baseUrl}/text-to-speech/${voiceId}`
31
+
32
+ const response = await fetch(url, {
33
+ method: 'POST',
34
+ headers: {
35
+ 'xi-api-key': this.config.apiKey,
36
+ 'Content-Type': 'application/json',
37
+ },
38
+ body: JSON.stringify({
39
+ text,
40
+ model_id: this.config.modelId ?? 'eleven_multilingual_v2',
41
+ language_code: options?.language ?? this.config.language,
42
+ voice_settings: {
43
+ stability: this.config.stability ?? 0.5,
44
+ similarity_boost: this.config.similarityBoost ?? 0.75,
45
+ speed: options?.speed ?? this.config.speed ?? 1.0,
46
+ },
47
+ }),
48
+ })
49
+
50
+ if (!response.ok) {
51
+ const err = await response.text()
52
+ throw new Error(`ElevenLabs API error ${response.status}: ${err}`)
53
+ }
54
+
55
+ const arrayBuffer = await response.arrayBuffer()
56
+ const audio = Buffer.from(arrayBuffer)
57
+
58
+ return {
59
+ audio,
60
+ format: 'mp3',
61
+ }
62
+ }
63
+
64
+ async health(): Promise<{ ok: boolean; error?: string }> {
65
+ try {
66
+ const res = await fetch(`${this.baseUrl}/user`, {
67
+ headers: { 'xi-api-key': this.config.apiKey },
68
+ })
69
+ return { ok: res.ok }
70
+ } catch (err) {
71
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
72
+ }
73
+ }
74
+ }
package/src/index.ts ADDED
@@ -0,0 +1,2 @@
1
+ export { ElevenLabsTTS } from './elevenlabs'
2
+ export { WhisperSTT } from './whisper'
package/src/whisper.ts ADDED
@@ -0,0 +1,83 @@
1
+ import type { STTOptions, STTProvider, STTResult } from '@onmars/lunar-core'
2
+
3
+ export interface WhisperConfig {
4
+ /** OpenAI API key */
5
+ apiKey: string
6
+ /** Model ID (default: whisper-1) */
7
+ model?: string
8
+ /** Default language hint */
9
+ language?: string
10
+ }
11
+
12
+ /**
13
+ * OpenAI Whisper STT — Speech to text via OpenAI Audio Transcriptions API.
14
+ *
15
+ * Uses the multipart/form-data endpoint. Supports all audio formats
16
+ * that Whisper accepts: mp3, mp4, mpeg, mpga, m4a, wav, webm, ogg.
17
+ */
18
+ export class WhisperSTT implements STTProvider {
19
+ readonly id = 'whisper'
20
+ readonly name = 'OpenAI Whisper'
21
+
22
+ private baseUrl = 'https://api.openai.com/v1'
23
+
24
+ constructor(private config: WhisperConfig) {}
25
+
26
+ async init(): Promise<void> {
27
+ // No-op — stateless API
28
+ }
29
+
30
+ async destroy(): Promise<void> {
31
+ // No-op
32
+ }
33
+
34
+ async transcribe(audio: Buffer, options?: STTOptions): Promise<STTResult> {
35
+ const model = options?.model ?? this.config.model ?? 'whisper-1'
36
+ const language = options?.language ?? this.config.language
37
+
38
+ const form = new FormData()
39
+ form.append('file', new Blob([audio], { type: 'audio/ogg' }), 'audio.ogg')
40
+ form.append('model', model)
41
+ if (language) {
42
+ form.append('language', language)
43
+ }
44
+ form.append('response_format', 'verbose_json')
45
+
46
+ const response = await fetch(`${this.baseUrl}/audio/transcriptions`, {
47
+ method: 'POST',
48
+ headers: {
49
+ Authorization: `Bearer ${this.config.apiKey}`,
50
+ },
51
+ body: form,
52
+ })
53
+
54
+ if (!response.ok) {
55
+ const err = await response.text()
56
+ throw new Error(`Whisper API error ${response.status}: ${err.slice(0, 200)}`)
57
+ }
58
+
59
+ const data = (await response.json()) as {
60
+ text: string
61
+ language?: string
62
+ duration?: number
63
+ }
64
+
65
+ return {
66
+ text: data.text,
67
+ language: data.language,
68
+ durationMs: data.duration ? Math.round(data.duration * 1000) : undefined,
69
+ }
70
+ }
71
+
72
+ async health(): Promise<{ ok: boolean; error?: string }> {
73
+ try {
74
+ // Verify API key by hitting the models endpoint
75
+ const res = await fetch(`${this.baseUrl}/models/whisper-1`, {
76
+ headers: { Authorization: `Bearer ${this.config.apiKey}` },
77
+ })
78
+ return { ok: res.ok, error: res.ok ? undefined : `HTTP ${res.status}` }
79
+ } catch (err) {
80
+ return { ok: false, error: err instanceof Error ? err.message : String(err) }
81
+ }
82
+ }
83
+ }