@framers/agentos 0.1.110 → 0.1.111
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/agency.d.ts.map +1 -1
- package/dist/api/agency.js +38 -2
- package/dist/api/agency.js.map +1 -1
- package/dist/api/agent.js +1 -1
- package/dist/api/agent.js.map +1 -1
- package/dist/api/strategies/debate.d.ts.map +1 -1
- package/dist/api/strategies/debate.js.map +1 -1
- package/dist/api/strategies/graph.d.ts.map +1 -1
- package/dist/api/strategies/graph.js +1 -2
- package/dist/api/strategies/graph.js.map +1 -1
- package/dist/api/strategies/hierarchical.d.ts.map +1 -1
- package/dist/api/strategies/hierarchical.js +1 -2
- package/dist/api/strategies/hierarchical.js.map +1 -1
- package/dist/api/strategies/index.d.ts +1 -9
- package/dist/api/strategies/index.d.ts.map +1 -1
- package/dist/api/strategies/index.js +1 -11
- package/dist/api/strategies/index.js.map +1 -1
- package/dist/api/strategies/parallel.d.ts.map +1 -1
- package/dist/api/strategies/parallel.js +23 -4
- package/dist/api/strategies/parallel.js.map +1 -1
- package/dist/api/strategies/review-loop.d.ts.map +1 -1
- package/dist/api/strategies/review-loop.js.map +1 -1
- package/dist/api/strategies/sequential.d.ts.map +1 -1
- package/dist/api/strategies/sequential.js +1 -2
- package/dist/api/strategies/sequential.js.map +1 -1
- package/dist/api/strategies/shared.d.ts +8 -0
- package/dist/api/strategies/shared.d.ts.map +1 -1
- package/dist/api/strategies/shared.js +10 -1
- package/dist/api/strategies/shared.js.map +1 -1
- package/dist/api/types.d.ts +6 -0
- package/dist/api/types.d.ts.map +1 -1
- package/dist/api/types.js.map +1 -1
- package/dist/memory/AgentMemory.d.ts +2 -1
- package/dist/memory/AgentMemory.d.ts.map +1 -1
- package/dist/memory/AgentMemory.js +1 -1
- package/dist/memory/AgentMemory.js.map +1 -1
- package/dist/memory/CognitiveMemoryManager.d.ts.map +1 -1
- package/dist/memory/CognitiveMemoryManager.js +7 -2
- package/dist/memory/CognitiveMemoryManager.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts.map +1 -1
- package/dist/memory/facade/Memory.js +6 -9
- package/dist/memory/facade/Memory.js.map +1 -1
- package/dist/memory/store/MemoryStore.d.ts +9 -0
- package/dist/memory/store/MemoryStore.d.ts.map +1 -1
- package/dist/memory/store/MemoryStore.js +66 -6
- package/dist/memory/store/MemoryStore.js.map +1 -1
- package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -1
- package/dist/memory/store/SqliteMemoryGraph.js +27 -13
- package/dist/memory/store/SqliteMemoryGraph.js.map +1 -1
- package/dist/speech/FallbackProxy.d.ts +194 -41
- package/dist/speech/FallbackProxy.d.ts.map +1 -1
- package/dist/speech/FallbackProxy.js +155 -32
- package/dist/speech/FallbackProxy.js.map +1 -1
- package/dist/speech/SpeechProviderResolver.d.ts +278 -36
- package/dist/speech/SpeechProviderResolver.d.ts.map +1 -1
- package/dist/speech/SpeechProviderResolver.js +306 -40
- package/dist/speech/SpeechProviderResolver.js.map +1 -1
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts +119 -19
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts.map +1 -1
- package/dist/speech/providers/AssemblyAISTTProvider.js +153 -25
- package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -1
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts +121 -17
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts.map +1 -1
- package/dist/speech/providers/AzureSpeechSTTProvider.js +122 -14
- package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -1
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +130 -15
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts.map +1 -1
- package/dist/speech/providers/AzureSpeechTTSProvider.js +163 -18
- package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -1
- package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts +159 -0
- package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts.map +1 -1
- package/dist/speech/providers/BuiltInAdaptiveVadProvider.js +119 -0
- package/dist/speech/providers/BuiltInAdaptiveVadProvider.js.map +1 -1
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +102 -16
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts.map +1 -1
- package/dist/speech/providers/DeepgramBatchSTTProvider.js +108 -13
- package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -1
- package/dist/speech/providers/ElevenLabsTextToSpeechProvider.d.ts +149 -0
- package/dist/speech/providers/ElevenLabsTextToSpeechProvider.d.ts.map +1 -1
- package/dist/speech/providers/ElevenLabsTextToSpeechProvider.js +137 -2
- package/dist/speech/providers/ElevenLabsTextToSpeechProvider.js.map +1 -1
- package/dist/speech/providers/OpenAITextToSpeechProvider.d.ts +125 -0
- package/dist/speech/providers/OpenAITextToSpeechProvider.d.ts.map +1 -1
- package/dist/speech/providers/OpenAITextToSpeechProvider.js +128 -4
- package/dist/speech/providers/OpenAITextToSpeechProvider.js.map +1 -1
- package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts +110 -0
- package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts.map +1 -1
- package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js +115 -0
- package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,7 +2,18 @@
|
|
|
2
2
|
* Maps Deepgram word-level data to {@link SpeechTranscriptionSegment} objects.
|
|
3
3
|
*
|
|
4
4
|
* Each word is promoted to its own segment so that per-word timing and speaker
|
|
5
|
-
* information is preserved in the normalized result.
|
|
5
|
+
* information is preserved in the normalized result. This 1:1 word-to-segment
|
|
6
|
+
* mapping enables downstream consumers to reconstruct speaker-attributed
|
|
7
|
+
* timelines at the finest granularity Deepgram provides.
|
|
8
|
+
*
|
|
9
|
+
* Deepgram returns times in seconds (unlike AssemblyAI which uses milliseconds),
|
|
10
|
+
* so no unit conversion is needed here.
|
|
11
|
+
*
|
|
12
|
+
* @param words - Array of Deepgram word objects from the API response.
|
|
13
|
+
* @returns An array of normalized transcription segments, one per word.
|
|
14
|
+
*
|
|
15
|
+
* @see {@link DeepgramWord} for the input shape
|
|
16
|
+
* @see {@link SpeechTranscriptionSegment} for the output shape
|
|
6
17
|
*/
|
|
7
18
|
function wordsToSegments(words) {
|
|
8
19
|
return words.map((w) => ({
|
|
@@ -24,46 +35,125 @@ function wordsToSegments(words) {
|
|
|
24
35
|
/**
|
|
25
36
|
* Speech-to-text provider that uses the Deepgram batch (pre-recorded) REST API.
|
|
26
37
|
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
38
|
+
* ## REST API Contract
|
|
39
|
+
*
|
|
40
|
+
* - **Endpoint:** `POST https://api.deepgram.com/v1/listen`
|
|
41
|
+
* - **Authentication:** `Authorization: Token <apiKey>` header
|
|
42
|
+
* - **Content-Type:** Set to the audio's MIME type (e.g. `audio/wav`)
|
|
43
|
+
* - **Body:** Raw audio bytes sent directly (no multipart form)
|
|
44
|
+
* - **Query parameters:** `model`, `punctuate`, `diarize`, `language`
|
|
45
|
+
* - **Response:** JSON containing `results.channels[].alternatives[]` with
|
|
46
|
+
* transcript text, confidence scores, and optional word-level timing
|
|
47
|
+
*
|
|
48
|
+
* ## Word-Level Diarization Mapping
|
|
49
|
+
*
|
|
50
|
+
* When `enableSpeakerDiarization` is `true`, the `diarize=true` query parameter
|
|
51
|
+
* is set. Deepgram then includes a `speaker` field (zero-based integer index) on
|
|
52
|
+
* each word in the response. These speaker indices are preserved through the
|
|
53
|
+
* {@link wordsToSegments} mapping into the normalized result.
|
|
54
|
+
*
|
|
55
|
+
* ## Error Handling
|
|
56
|
+
*
|
|
57
|
+
* Non-2xx responses from Deepgram trigger an `Error` with the HTTP status code
|
|
58
|
+
* and response body text included in the message for debugging. Network-level
|
|
59
|
+
* errors (DNS failures, timeouts) propagate as-is from the fetch implementation.
|
|
60
|
+
*
|
|
61
|
+
* Streaming is NOT supported by this provider — use a Deepgram WebSocket adapter
|
|
62
|
+
* for real-time transcription.
|
|
63
|
+
*
|
|
64
|
+
* @see {@link DeepgramBatchSTTProviderConfig} for configuration options
|
|
65
|
+
* @see {@link wordsToSegments} for the word-to-segment mapping logic
|
|
30
66
|
*
|
|
31
67
|
* @example
|
|
32
68
|
* ```ts
|
|
33
|
-
* const provider = new DeepgramBatchSTTProvider({
|
|
34
|
-
*
|
|
69
|
+
* const provider = new DeepgramBatchSTTProvider({
|
|
70
|
+
* apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
71
|
+
* model: 'nova-2',
|
|
72
|
+
* });
|
|
73
|
+
* const result = await provider.transcribe(
|
|
74
|
+
* { data: audioBuffer, mimeType: 'audio/wav' },
|
|
75
|
+
* { enableSpeakerDiarization: true },
|
|
76
|
+
* );
|
|
35
77
|
* console.log(result.text);
|
|
78
|
+
* console.log(result.segments?.map(s => `[Speaker ${s.speaker}] ${s.text}`));
|
|
36
79
|
* ```
|
|
37
80
|
*/
|
|
38
81
|
export class DeepgramBatchSTTProvider {
|
|
82
|
+
/**
|
|
83
|
+
* Creates a new DeepgramBatchSTTProvider.
|
|
84
|
+
*
|
|
85
|
+
* @param config - Provider configuration including API key and optional defaults.
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```ts
|
|
89
|
+
* const provider = new DeepgramBatchSTTProvider({
|
|
90
|
+
* apiKey: 'dg-xxxx',
|
|
91
|
+
* model: 'nova-2',
|
|
92
|
+
* language: 'en-US',
|
|
93
|
+
* });
|
|
94
|
+
* ```
|
|
95
|
+
*/
|
|
39
96
|
constructor(config) {
|
|
40
97
|
this.config = config;
|
|
98
|
+
/** Unique provider identifier used for registration and resolution. */
|
|
41
99
|
this.id = 'deepgram-batch';
|
|
100
|
+
/** Human-readable display name for UI and logging. */
|
|
42
101
|
this.displayName = 'Deepgram (Batch)';
|
|
102
|
+
/** This provider uses synchronous HTTP requests, not WebSocket streaming. */
|
|
43
103
|
this.supportsStreaming = false;
|
|
44
104
|
this.fetchImpl = config.fetchImpl ?? fetch;
|
|
45
105
|
}
|
|
46
|
-
/**
|
|
106
|
+
/**
|
|
107
|
+
* Returns the human-readable provider name.
|
|
108
|
+
*
|
|
109
|
+
* @returns The display name string `'Deepgram (Batch)'`.
|
|
110
|
+
*
|
|
111
|
+
* @example
|
|
112
|
+
* ```ts
|
|
113
|
+
* provider.getProviderName(); // 'Deepgram (Batch)'
|
|
114
|
+
* ```
|
|
115
|
+
*/
|
|
47
116
|
getProviderName() {
|
|
48
117
|
return this.displayName;
|
|
49
118
|
}
|
|
50
119
|
/**
|
|
51
120
|
* Transcribes an audio buffer using the Deepgram pre-recorded API.
|
|
52
121
|
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
*
|
|
122
|
+
* Sends the raw audio bytes as the request body (not multipart form) with
|
|
123
|
+
* the appropriate Content-Type header. The response is parsed and normalized
|
|
124
|
+
* into a {@link SpeechTranscriptionResult}.
|
|
125
|
+
*
|
|
126
|
+
* @param audio - Raw audio data and associated metadata (buffer, MIME type,
|
|
127
|
+
* duration). The `data` buffer is sent directly as the request body.
|
|
128
|
+
* @param options - Optional transcription settings. Supports `model`,
|
|
129
|
+
* `language`, and `enableSpeakerDiarization` overrides.
|
|
130
|
+
* @returns A promise resolving to the normalized transcription result with
|
|
131
|
+
* text, confidence, timing, and optional speaker-attributed segments.
|
|
132
|
+
* @throws {Error} When the Deepgram API returns a non-2xx status code.
|
|
133
|
+
* The error message includes the HTTP status and response body for debugging.
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* ```ts
|
|
137
|
+
* const result = await provider.transcribe(
|
|
138
|
+
* { data: wavBuffer, mimeType: 'audio/wav', durationSeconds: 5.2 },
|
|
139
|
+
* { language: 'fr-FR', enableSpeakerDiarization: true },
|
|
140
|
+
* );
|
|
141
|
+
* ```
|
|
57
142
|
*/
|
|
58
143
|
async transcribe(audio, options = {}) {
|
|
144
|
+
// Resolve configuration with fallback chain: options > config > defaults
|
|
59
145
|
const model = options.model ?? this.config.model ?? 'nova-2';
|
|
60
146
|
const lang = options.language ?? this.config.language ?? 'en-US';
|
|
61
147
|
const diarize = options.enableSpeakerDiarization ?? false;
|
|
148
|
+
// Build the Deepgram REST API URL with query parameters.
|
|
149
|
+
// Punctuation is always enabled for better transcript readability.
|
|
62
150
|
const url = `https://api.deepgram.com/v1/listen` +
|
|
63
151
|
`?model=${encodeURIComponent(model)}` +
|
|
64
152
|
`&punctuate=true` +
|
|
65
153
|
`&diarize=${diarize}` +
|
|
66
154
|
`&language=${encodeURIComponent(lang)}`;
|
|
155
|
+
// Use the audio's actual MIME type so Deepgram can decode correctly.
|
|
156
|
+
// Deepgram supports wav, mp3, ogg, flac, webm, and many other formats.
|
|
67
157
|
const contentType = audio.mimeType ?? 'audio/wav';
|
|
68
158
|
const response = await this.fetchImpl(url, {
|
|
69
159
|
method: 'POST',
|
|
@@ -71,6 +161,8 @@ export class DeepgramBatchSTTProvider {
|
|
|
71
161
|
Authorization: `Token ${this.config.apiKey}`,
|
|
72
162
|
'Content-Type': contentType,
|
|
73
163
|
},
|
|
164
|
+
// Cast needed because SpeechAudioInput.data is typed as Buffer but
|
|
165
|
+
// fetch expects BodyInit (Blob | ArrayBuffer | string | etc.)
|
|
74
166
|
body: audio.data,
|
|
75
167
|
});
|
|
76
168
|
if (!response.ok) {
|
|
@@ -78,20 +170,23 @@ export class DeepgramBatchSTTProvider {
|
|
|
78
170
|
throw new Error(`Deepgram transcription failed (${response.status}): ${message}`);
|
|
79
171
|
}
|
|
80
172
|
const payload = (await response.json());
|
|
173
|
+
// Extract the first channel's first alternative — Deepgram always returns
|
|
174
|
+
// at least one channel with one alternative for valid audio input.
|
|
81
175
|
const firstAlternative = payload.results?.channels?.[0]?.alternatives?.[0];
|
|
82
176
|
const transcript = firstAlternative?.transcript ?? '';
|
|
83
177
|
const confidence = firstAlternative?.confidence;
|
|
84
178
|
const words = firstAlternative?.words ?? [];
|
|
179
|
+
// Prefer the API's reported duration over the client-provided estimate
|
|
85
180
|
const durationSeconds = payload.metadata?.duration ?? audio.durationSeconds;
|
|
86
181
|
return {
|
|
87
182
|
text: transcript,
|
|
88
183
|
language: lang,
|
|
89
184
|
durationSeconds,
|
|
90
185
|
confidence,
|
|
91
|
-
cost: 0,
|
|
186
|
+
cost: 0, // Cost tracking is handled at a higher layer
|
|
92
187
|
segments: words.length > 0 ? wordsToSegments(words) : undefined,
|
|
93
188
|
providerResponse: payload,
|
|
94
|
-
isFinal: true,
|
|
189
|
+
isFinal: true, // Batch API always returns final results
|
|
95
190
|
usage: {
|
|
96
191
|
durationMinutes: (durationSeconds ?? 0) / 60,
|
|
97
192
|
modelUsed: model,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DeepgramBatchSTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/DeepgramBatchSTTProvider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"DeepgramBatchSTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/DeepgramBatchSTTProvider.ts"],"names":[],"mappings":"AAkGA;;;;;;;;;;;;;;;;GAgBG;AACH,SAAS,eAAe,CAAC,KAAqB;IAC5C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK;QAClB,OAAO,EAAE,CAAC,CAAC,GAAG;QACd,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,UAAU,EAAE,CAAC,CAAC,UAAU;aACzB;SACF;KACF,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AACH,MAAM,OAAO,wBAAwB;IAanC;;;;;;;;;;;;;OAaG;IACH,YAA6B,MAAsC;QAAtC,WAAM,GAAN,MAAM,CAAgC;QA1BnE,uEAAuE;QACvD,OAAE,GAAG,gBAAgB,CAAC;QAEtC,sDAAsD;QACtC,gBAAW,GAAG,kBAAkB,CAAC;QAEjD,6EAA6E;QAC7D,sBAAiB,GAAG,KAAK,CAAC;QAoBxC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,KAAK,CAAC,UAAU,CACd,KAAuB,EACvB,UAAsC,EAAE;QAExC,yEAAyE;QACzE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,QAAQ,CAAC;QAC7D,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,OAAO,CAAC;QACjE,MAAM,OAAO,GAAG,OAAO,CAAC,wBAAwB,IAAI,KAAK,CAAC;QAE1D,yDAAyD;QACzD,mEAAmE;QACnE,MAAM,GAAG,GACP,oCAAoC;YACpC,UAAU,kBAAkB,CAAC,KAAK,CAAC,EAAE;YACrC,iBAAiB;YACjB,YAAY,OAAO,EAAE;YACrB,aAAa,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;QAE1C,qEAAqE;QACrE,uEAAuE;QACvE,MAAM,WAAW,GAAG,KAAK,CAAC,QAAQ,IAAI,WAAW,CAAC;QAElD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE;YACzC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,SAAS,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;gBAC5C,cAAc,EAAE,WAAW;aAC5B;YACD,mEAAmE;YACnE,8DAA8D;YAC9D,IAAI,EAAE,KAAK,CAAC,IAA2B;SACxC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,kCAAkC,QAAQ,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;QACpF,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAqB,CAAC;QAE5D,0EAA0E;QAC1E,mEAAmE;QACnE,MAAM,gBAAgB,GAAG,OAAO,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,UAAU,GAAG,gBAAgB,EAAE,UAAU,IAAI,EAAE,CAAC;QACtD,MAAM,UAAU,GAAG,gBAAgB,EAAE,UAAU,CAAC;QAChD,MAAM,KAAK,GAAG,gBAAgB,EAAE,KAAK,IAAI,EAAE,CAAC;QAE5C,uEAAuE;QACvE,MAAM,eAAe,GAAG,OAAO,CAAC,QAAQ,EAAE,QAAQ,IAAI,KAAK,CAAC,eAAe,CAAC;QAE5E,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;YACd,eAAe;YACf,UAAU;YACV,IAAI,EAAE,CAAC,EAAE,6CAA6C;YACtD,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;YAC/D,gBAAgB,EAAE,OAAO;YACzB,OAAO,EAAE,IAAI,EAAE,yCAAyC;YACxD,KAAK,EAAE;gBACL,eAAe,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;gBAC5C,SAAS,EAAE,KAAK;aACjB;SACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -1,20 +1,169 @@
|
|
|
1
1
|
import type { SpeechSynthesisOptions, SpeechSynthesisResult, SpeechVoice, TextToSpeechProvider } from '../types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Configuration for the {@link ElevenLabsTextToSpeechProvider}.
|
|
4
|
+
*
|
|
5
|
+
* @see {@link ElevenLabsTextToSpeechProvider} for usage examples
|
|
6
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech
|
|
7
|
+
*/
|
|
2
8
|
export interface ElevenLabsTextToSpeechProviderConfig {
|
|
9
|
+
/**
|
|
10
|
+
* ElevenLabs API key used for authentication.
|
|
11
|
+
* Sent as the `xi-api-key` header value (not Bearer-style auth).
|
|
12
|
+
*/
|
|
3
13
|
apiKey: string;
|
|
14
|
+
/**
|
|
15
|
+
* Base URL for the ElevenLabs API. Override for proxies or self-hosted instances.
|
|
16
|
+
* @default 'https://api.elevenlabs.io/v1'
|
|
17
|
+
*/
|
|
4
18
|
baseUrl?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Default voice ID. ElevenLabs uses opaque IDs (not human-readable names).
|
|
21
|
+
* @default 'EXAVITQu4vr4xnSDxMaL' (the "Sarah" voice)
|
|
22
|
+
*/
|
|
5
23
|
voiceId?: string;
|
|
24
|
+
/**
|
|
25
|
+
* Default model ID for synthesis.
|
|
26
|
+
* @default 'eleven_multilingual_v2'
|
|
27
|
+
*/
|
|
6
28
|
model?: string;
|
|
29
|
+
/**
|
|
30
|
+
* Custom fetch implementation for dependency injection in tests.
|
|
31
|
+
* @default globalThis.fetch
|
|
32
|
+
*/
|
|
7
33
|
fetchImpl?: typeof fetch;
|
|
8
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Text-to-speech provider that uses the ElevenLabs TTS API.
|
|
37
|
+
*
|
|
38
|
+
* ## API Contract
|
|
39
|
+
*
|
|
40
|
+
* - **Endpoint:** `POST {baseUrl}/text-to-speech/{voiceId}`
|
|
41
|
+
* - **Authentication:** `xi-api-key: <apiKey>` header
|
|
42
|
+
* - **Content-Type:** `application/json`
|
|
43
|
+
* - **Accept:** `audio/mpeg` (MP3 response)
|
|
44
|
+
* - **Request body:** `{ text, model_id, voice_settings: { stability, similarity_boost, style, use_speaker_boost } }`
|
|
45
|
+
* - **Response:** Raw MP3 audio bytes
|
|
46
|
+
*
|
|
47
|
+
* ## Voice Settings
|
|
48
|
+
*
|
|
49
|
+
* ElevenLabs exposes fine-grained voice control via `voice_settings`:
|
|
50
|
+
* - **stability** (0.0–1.0) — Lower values = more expressive/variable, higher = more consistent
|
|
51
|
+
* - **similarity_boost** (0.0–1.0) — Higher values make output more similar to the original voice
|
|
52
|
+
* - **style** (0.0–1.0) — Style exaggeration (optional, only for v2+ models)
|
|
53
|
+
* - **use_speaker_boost** (boolean) — Enhances speaker similarity (default: true)
|
|
54
|
+
*
|
|
55
|
+
* These can be passed via `options.providerSpecificOptions`.
|
|
56
|
+
*
|
|
57
|
+
* ## Voice ID Resolution
|
|
58
|
+
*
|
|
59
|
+
* The voice ID is resolved with the following priority:
|
|
60
|
+
* 1. `options.voice` (per-call override)
|
|
61
|
+
* 2. `config.voiceId` (constructor default)
|
|
62
|
+
* 3. `options.providerSpecificOptions.voiceId` (legacy override path)
|
|
63
|
+
* 4. `'EXAVITQu4vr4xnSDxMaL'` (hardcoded fallback — the "Sarah" voice)
|
|
64
|
+
*
|
|
65
|
+
* ## Voice Listing
|
|
66
|
+
*
|
|
67
|
+
* {@link listAvailableVoices} fetches the user's voice library from the
|
|
68
|
+
* `/voices` endpoint and maps each entry to the normalized {@link SpeechVoice}
|
|
69
|
+
* shape. Returns an empty array on API errors (graceful degradation).
|
|
70
|
+
*
|
|
71
|
+
* @see {@link ElevenLabsTextToSpeechProviderConfig} for configuration options
|
|
72
|
+
*
|
|
73
|
+
* @example
|
|
74
|
+
* ```ts
|
|
75
|
+
* const provider = new ElevenLabsTextToSpeechProvider({
|
|
76
|
+
* apiKey: process.env.ELEVENLABS_API_KEY!,
|
|
77
|
+
* voiceId: 'pNInz6obpgDQGcFmaJgB', // "Adam"
|
|
78
|
+
* });
|
|
79
|
+
* const result = await provider.synthesize('Hello world', {
|
|
80
|
+
* providerSpecificOptions: { stability: 0.7, similarityBoost: 0.8 },
|
|
81
|
+
* });
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
9
84
|
export declare class ElevenLabsTextToSpeechProvider implements TextToSpeechProvider {
|
|
10
85
|
private readonly config;
|
|
86
|
+
/** Unique provider identifier used for registration and resolution. */
|
|
11
87
|
readonly id = "elevenlabs";
|
|
88
|
+
/** Human-readable display name for UI and logging. */
|
|
12
89
|
readonly displayName = "ElevenLabs";
|
|
90
|
+
/**
|
|
91
|
+
* Streaming is supported — ElevenLabs offers a WebSocket streaming endpoint,
|
|
92
|
+
* and even the REST endpoint can be consumed as a stream.
|
|
93
|
+
*/
|
|
13
94
|
readonly supportsStreaming = true;
|
|
95
|
+
/** Fetch implementation — injected for testability, defaults to global fetch. */
|
|
14
96
|
private readonly fetchImpl;
|
|
97
|
+
/**
|
|
98
|
+
* Creates a new ElevenLabsTextToSpeechProvider.
|
|
99
|
+
*
|
|
100
|
+
* @param config - Provider configuration including API key and optional defaults.
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```ts
|
|
104
|
+
* const provider = new ElevenLabsTextToSpeechProvider({
|
|
105
|
+
* apiKey: 'xi-xxxx',
|
|
106
|
+
* voiceId: 'pNInz6obpgDQGcFmaJgB',
|
|
107
|
+
* model: 'eleven_multilingual_v2',
|
|
108
|
+
* });
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
15
111
|
constructor(config: ElevenLabsTextToSpeechProviderConfig);
|
|
112
|
+
/**
|
|
113
|
+
* Returns the human-readable provider name.
|
|
114
|
+
*
|
|
115
|
+
* @returns The display name string `'ElevenLabs'`.
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```ts
|
|
119
|
+
* provider.getProviderName(); // 'ElevenLabs'
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
16
122
|
getProviderName(): string;
|
|
123
|
+
/**
|
|
124
|
+
* Synthesizes speech from text using the ElevenLabs TTS API.
|
|
125
|
+
*
|
|
126
|
+
* @param text - The text to convert to audio.
|
|
127
|
+
* @param options - Optional synthesis settings. Use `providerSpecificOptions`
|
|
128
|
+
* to control ElevenLabs-specific voice settings (stability, similarityBoost,
|
|
129
|
+
* style, useSpeakerBoost).
|
|
130
|
+
* @returns A promise resolving to the MP3 audio buffer and metadata.
|
|
131
|
+
* @throws {Error} When the ElevenLabs API returns a non-2xx status code.
|
|
132
|
+
* Common causes: invalid API key (401), voice not found (404),
|
|
133
|
+
* character limit exceeded (400), or rate limit (429).
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* ```ts
|
|
137
|
+
* const result = await provider.synthesize('Hello there!', {
|
|
138
|
+
* voice: 'pNInz6obpgDQGcFmaJgB',
|
|
139
|
+
* providerSpecificOptions: {
|
|
140
|
+
* stability: 0.3, // More expressive
|
|
141
|
+
* similarityBoost: 0.9, // Closer to original voice
|
|
142
|
+
* style: 0.5, // Moderate style exaggeration
|
|
143
|
+
* },
|
|
144
|
+
* });
|
|
145
|
+
* ```
|
|
146
|
+
*/
|
|
17
147
|
synthesize(text: string, options?: SpeechSynthesisOptions): Promise<SpeechSynthesisResult>;
|
|
148
|
+
/**
|
|
149
|
+
* Fetches the user's voice library from the ElevenLabs API.
|
|
150
|
+
*
|
|
151
|
+
* Returns available voices mapped to the normalized {@link SpeechVoice} shape.
|
|
152
|
+
* Gracefully returns an empty array on API errors (e.g. network failure,
|
|
153
|
+
* invalid key) to avoid breaking voice selection UIs.
|
|
154
|
+
*
|
|
155
|
+
* The voice library includes both ElevenLabs' pre-made voices and any
|
|
156
|
+
* custom/cloned voices in the user's account.
|
|
157
|
+
*
|
|
158
|
+
* @returns A promise resolving to an array of available voices, or an empty
|
|
159
|
+
* array if the API call fails.
|
|
160
|
+
*
|
|
161
|
+
* @example
|
|
162
|
+
* ```ts
|
|
163
|
+
* const voices = await provider.listAvailableVoices();
|
|
164
|
+
* const rachel = voices.find(v => v.name === 'Rachel');
|
|
165
|
+
* ```
|
|
166
|
+
*/
|
|
18
167
|
listAvailableVoices(): Promise<SpeechVoice[]>;
|
|
19
168
|
}
|
|
20
169
|
//# sourceMappingURL=ElevenLabsTextToSpeechProvider.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ElevenLabsTextToSpeechProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/ElevenLabsTextToSpeechProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,sBAAsB,EACtB,qBAAqB,EACrB,WAAW,EACX,oBAAoB,EACrB,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,oCAAoC;IACnD,MAAM,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"ElevenLabsTextToSpeechProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/ElevenLabsTextToSpeechProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,sBAAsB,EACtB,qBAAqB,EACrB,WAAW,EACX,oBAAoB,EACrB,MAAM,aAAa,CAAC;AAErB;;;;;GAKG;AACH,MAAM,WAAW,oCAAoC;IACnD;;;OAGG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,qBAAa,8BAA+B,YAAW,oBAAoB;IA8B7D,OAAO,CAAC,QAAQ,CAAC,MAAM;IA7BnC,uEAAuE;IACvE,SAAgB,EAAE,gBAAgB;IAElC,sDAAsD;IACtD,SAAgB,WAAW,gBAAgB;IAE3C;;;OAGG;IACH,SAAgB,iBAAiB,QAAQ;IAEzC,iFAAiF;IACjF,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IAEzC;;;;;;;;;;;;;OAaG;gBAC0B,MAAM,EAAE,oCAAoC;IAIzE;;;;;;;;;OASG;IACH,eAAe,IAAI,MAAM;IAIzB;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,qBAAqB,CAAC;IAwEjC;;;;;;;;;;;;;;;;;;OAkBG;IACG,mBAAmB,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;CA6CpD"}
|
|
@@ -1,42 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-to-speech provider that uses the ElevenLabs TTS API.
|
|
3
|
+
*
|
|
4
|
+
* ## API Contract
|
|
5
|
+
*
|
|
6
|
+
* - **Endpoint:** `POST {baseUrl}/text-to-speech/{voiceId}`
|
|
7
|
+
* - **Authentication:** `xi-api-key: <apiKey>` header
|
|
8
|
+
* - **Content-Type:** `application/json`
|
|
9
|
+
* - **Accept:** `audio/mpeg` (MP3 response)
|
|
10
|
+
* - **Request body:** `{ text, model_id, voice_settings: { stability, similarity_boost, style, use_speaker_boost } }`
|
|
11
|
+
* - **Response:** Raw MP3 audio bytes
|
|
12
|
+
*
|
|
13
|
+
* ## Voice Settings
|
|
14
|
+
*
|
|
15
|
+
* ElevenLabs exposes fine-grained voice control via `voice_settings`:
|
|
16
|
+
* - **stability** (0.0–1.0) — Lower values = more expressive/variable, higher = more consistent
|
|
17
|
+
* - **similarity_boost** (0.0–1.0) — Higher values make output more similar to the original voice
|
|
18
|
+
* - **style** (0.0–1.0) — Style exaggeration (optional, only for v2+ models)
|
|
19
|
+
* - **use_speaker_boost** (boolean) — Enhances speaker similarity (default: true)
|
|
20
|
+
*
|
|
21
|
+
* These can be passed via `options.providerSpecificOptions`.
|
|
22
|
+
*
|
|
23
|
+
* ## Voice ID Resolution
|
|
24
|
+
*
|
|
25
|
+
* The voice ID is resolved with the following priority:
|
|
26
|
+
* 1. `options.voice` (per-call override)
|
|
27
|
+
* 2. `config.voiceId` (constructor default)
|
|
28
|
+
* 3. `options.providerSpecificOptions.voiceId` (legacy override path)
|
|
29
|
+
* 4. `'EXAVITQu4vr4xnSDxMaL'` (hardcoded fallback — the "Sarah" voice)
|
|
30
|
+
*
|
|
31
|
+
* ## Voice Listing
|
|
32
|
+
*
|
|
33
|
+
* {@link listAvailableVoices} fetches the user's voice library from the
|
|
34
|
+
* `/voices` endpoint and maps each entry to the normalized {@link SpeechVoice}
|
|
35
|
+
* shape. Returns an empty array on API errors (graceful degradation).
|
|
36
|
+
*
|
|
37
|
+
* @see {@link ElevenLabsTextToSpeechProviderConfig} for configuration options
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```ts
|
|
41
|
+
* const provider = new ElevenLabsTextToSpeechProvider({
|
|
42
|
+
* apiKey: process.env.ELEVENLABS_API_KEY!,
|
|
43
|
+
* voiceId: 'pNInz6obpgDQGcFmaJgB', // "Adam"
|
|
44
|
+
* });
|
|
45
|
+
* const result = await provider.synthesize('Hello world', {
|
|
46
|
+
* providerSpecificOptions: { stability: 0.7, similarityBoost: 0.8 },
|
|
47
|
+
* });
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
1
50
|
export class ElevenLabsTextToSpeechProvider {
|
|
51
|
+
/**
|
|
52
|
+
* Creates a new ElevenLabsTextToSpeechProvider.
|
|
53
|
+
*
|
|
54
|
+
* @param config - Provider configuration including API key and optional defaults.
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
* ```ts
|
|
58
|
+
* const provider = new ElevenLabsTextToSpeechProvider({
|
|
59
|
+
* apiKey: 'xi-xxxx',
|
|
60
|
+
* voiceId: 'pNInz6obpgDQGcFmaJgB',
|
|
61
|
+
* model: 'eleven_multilingual_v2',
|
|
62
|
+
* });
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
2
65
|
constructor(config) {
|
|
3
66
|
this.config = config;
|
|
67
|
+
/** Unique provider identifier used for registration and resolution. */
|
|
4
68
|
this.id = 'elevenlabs';
|
|
69
|
+
/** Human-readable display name for UI and logging. */
|
|
5
70
|
this.displayName = 'ElevenLabs';
|
|
71
|
+
/**
|
|
72
|
+
* Streaming is supported — ElevenLabs offers a WebSocket streaming endpoint,
|
|
73
|
+
* and even the REST endpoint can be consumed as a stream.
|
|
74
|
+
*/
|
|
6
75
|
this.supportsStreaming = true;
|
|
7
76
|
this.fetchImpl = config.fetchImpl ?? fetch;
|
|
8
77
|
}
|
|
78
|
+
/**
|
|
79
|
+
* Returns the human-readable provider name.
|
|
80
|
+
*
|
|
81
|
+
* @returns The display name string `'ElevenLabs'`.
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* ```ts
|
|
85
|
+
* provider.getProviderName(); // 'ElevenLabs'
|
|
86
|
+
* ```
|
|
87
|
+
*/
|
|
9
88
|
getProviderName() {
|
|
10
89
|
return this.displayName;
|
|
11
90
|
}
|
|
91
|
+
/**
|
|
92
|
+
* Synthesizes speech from text using the ElevenLabs TTS API.
|
|
93
|
+
*
|
|
94
|
+
* @param text - The text to convert to audio.
|
|
95
|
+
* @param options - Optional synthesis settings. Use `providerSpecificOptions`
|
|
96
|
+
* to control ElevenLabs-specific voice settings (stability, similarityBoost,
|
|
97
|
+
* style, useSpeakerBoost).
|
|
98
|
+
* @returns A promise resolving to the MP3 audio buffer and metadata.
|
|
99
|
+
* @throws {Error} When the ElevenLabs API returns a non-2xx status code.
|
|
100
|
+
* Common causes: invalid API key (401), voice not found (404),
|
|
101
|
+
* character limit exceeded (400), or rate limit (429).
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const result = await provider.synthesize('Hello there!', {
|
|
106
|
+
* voice: 'pNInz6obpgDQGcFmaJgB',
|
|
107
|
+
* providerSpecificOptions: {
|
|
108
|
+
* stability: 0.3, // More expressive
|
|
109
|
+
* similarityBoost: 0.9, // Closer to original voice
|
|
110
|
+
* style: 0.5, // Moderate style exaggeration
|
|
111
|
+
* },
|
|
112
|
+
* });
|
|
113
|
+
* ```
|
|
114
|
+
*/
|
|
12
115
|
async synthesize(text, options = {}) {
|
|
116
|
+
// Voice ID resolution with 4-level fallback chain.
|
|
117
|
+
// The providerSpecificOptions.voiceId path exists for backwards compat.
|
|
13
118
|
const voiceId = options.voice ??
|
|
14
119
|
this.config.voiceId ??
|
|
15
120
|
(typeof options.providerSpecificOptions?.voiceId === 'string'
|
|
16
121
|
? options.providerSpecificOptions.voiceId
|
|
17
122
|
: undefined) ??
|
|
18
|
-
'EXAVITQu4vr4xnSDxMaL';
|
|
123
|
+
'EXAVITQu4vr4xnSDxMaL'; // Default "Sarah" voice
|
|
19
124
|
const model = options.model ?? this.config.model ?? 'eleven_multilingual_v2';
|
|
20
125
|
const response = await this.fetchImpl(`${this.config.baseUrl ?? 'https://api.elevenlabs.io/v1'}/text-to-speech/${voiceId}`, {
|
|
21
126
|
method: 'POST',
|
|
22
127
|
headers: {
|
|
128
|
+
// ElevenLabs uses its own header format instead of standard Authorization
|
|
23
129
|
'xi-api-key': this.config.apiKey,
|
|
24
130
|
'Content-Type': 'application/json',
|
|
131
|
+
// Request MP3 format in the response
|
|
25
132
|
Accept: 'audio/mpeg',
|
|
26
133
|
},
|
|
27
134
|
body: JSON.stringify({
|
|
28
135
|
text,
|
|
29
136
|
model_id: model,
|
|
30
137
|
voice_settings: {
|
|
138
|
+
// Extract provider-specific settings with sensible defaults.
|
|
139
|
+
// These defaults produce natural-sounding output for most voices.
|
|
31
140
|
stability: typeof options.providerSpecificOptions?.stability === 'number'
|
|
32
141
|
? options.providerSpecificOptions.stability
|
|
33
142
|
: 0.5,
|
|
34
143
|
similarity_boost: typeof options.providerSpecificOptions?.similarityBoost === 'number'
|
|
35
144
|
? options.providerSpecificOptions.similarityBoost
|
|
36
145
|
: 0.75,
|
|
146
|
+
// Style is only meaningful for v2+ models; omit if not specified
|
|
37
147
|
style: typeof options.providerSpecificOptions?.style === 'number'
|
|
38
148
|
? options.providerSpecificOptions.style
|
|
39
149
|
: undefined,
|
|
150
|
+
// Speaker boost enhances vocal clarity and similarity
|
|
40
151
|
use_speaker_boost: typeof options.providerSpecificOptions?.useSpeakerBoost === 'boolean'
|
|
41
152
|
? options.providerSpecificOptions.useSpeakerBoost
|
|
42
153
|
: true,
|
|
@@ -51,7 +162,7 @@ export class ElevenLabsTextToSpeechProvider {
|
|
|
51
162
|
return {
|
|
52
163
|
audioBuffer,
|
|
53
164
|
mimeType: 'audio/mpeg',
|
|
54
|
-
cost: 0,
|
|
165
|
+
cost: 0, // Cost tracking is handled at a higher layer
|
|
55
166
|
voiceUsed: voiceId,
|
|
56
167
|
providerName: this.displayName,
|
|
57
168
|
usage: {
|
|
@@ -60,6 +171,25 @@ export class ElevenLabsTextToSpeechProvider {
|
|
|
60
171
|
},
|
|
61
172
|
};
|
|
62
173
|
}
|
|
174
|
+
/**
|
|
175
|
+
* Fetches the user's voice library from the ElevenLabs API.
|
|
176
|
+
*
|
|
177
|
+
* Returns available voices mapped to the normalized {@link SpeechVoice} shape.
|
|
178
|
+
* Gracefully returns an empty array on API errors (e.g. network failure,
|
|
179
|
+
* invalid key) to avoid breaking voice selection UIs.
|
|
180
|
+
*
|
|
181
|
+
* The voice library includes both ElevenLabs' pre-made voices and any
|
|
182
|
+
* custom/cloned voices in the user's account.
|
|
183
|
+
*
|
|
184
|
+
* @returns A promise resolving to an array of available voices, or an empty
|
|
185
|
+
* array if the API call fails.
|
|
186
|
+
*
|
|
187
|
+
* @example
|
|
188
|
+
* ```ts
|
|
189
|
+
* const voices = await provider.listAvailableVoices();
|
|
190
|
+
* const rachel = voices.find(v => v.name === 'Rachel');
|
|
191
|
+
* ```
|
|
192
|
+
*/
|
|
63
193
|
async listAvailableVoices() {
|
|
64
194
|
const response = await this.fetchImpl(`${this.config.baseUrl ?? 'https://api.elevenlabs.io/v1'}/voices`, {
|
|
65
195
|
method: 'GET',
|
|
@@ -67,6 +197,9 @@ export class ElevenLabsTextToSpeechProvider {
|
|
|
67
197
|
'xi-api-key': this.config.apiKey,
|
|
68
198
|
},
|
|
69
199
|
});
|
|
200
|
+
// Graceful degradation: return empty list on API failure rather than
|
|
201
|
+
// throwing, since voice listing is typically used for UI population
|
|
202
|
+
// and should not block core functionality.
|
|
70
203
|
if (!response.ok) {
|
|
71
204
|
return [];
|
|
72
205
|
}
|
|
@@ -74,6 +207,7 @@ export class ElevenLabsTextToSpeechProvider {
|
|
|
74
207
|
return (payload.voices ?? [])
|
|
75
208
|
.filter((voice) => typeof voice === 'object' && voice !== null)
|
|
76
209
|
.map((voice) => {
|
|
210
|
+
// Extract labels object for accent/language metadata
|
|
77
211
|
const labels = typeof voice.labels === 'object' && voice.labels !== null
|
|
78
212
|
? voice.labels
|
|
79
213
|
: {};
|
|
@@ -89,6 +223,7 @@ export class ElevenLabsTextToSpeechProvider {
|
|
|
89
223
|
provider: this.id,
|
|
90
224
|
};
|
|
91
225
|
})
|
|
226
|
+
// Filter out entries with empty IDs (malformed API response entries)
|
|
92
227
|
.filter((voice) => voice.id);
|
|
93
228
|
}
|
|
94
229
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ElevenLabsTextToSpeechProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/ElevenLabsTextToSpeechProvider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ElevenLabsTextToSpeechProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/ElevenLabsTextToSpeechProvider.ts"],"names":[],"mappings":"AA6CA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,MAAM,OAAO,8BAA8B;IAgBzC;;;;;;;;;;;;;OAaG;IACH,YAA6B,MAA4C;QAA5C,WAAM,GAAN,MAAM,CAAsC;QA7BzE,uEAAuE;QACvD,OAAE,GAAG,YAAY,CAAC;QAElC,sDAAsD;QACtC,gBAAW,GAAG,YAAY,CAAC;QAE3C;;;WAGG;QACa,sBAAiB,GAAG,IAAI,CAAC;QAoBvC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,KAAK,CAAC,UAAU,CACd,IAAY,EACZ,UAAkC,EAAE;QAEpC,mDAAmD;QACnD,wEAAwE;QACxE,MAAM,OAAO,GACX,OAAO,CAAC,KAAK;YACb,IAAI,CAAC,MAAM,CAAC,OAAO;YACnB,CAAC,OAAO,OAAO,CAAC,uBAAuB,EAAE,OAAO,KAAK,QAAQ;gBAC3D,CAAC,CAAC,OAAO,CAAC,uBAAuB,CAAC,OAAO;gBACzC,CAAC,CAAC,SAAS,CAAC;YACd,sBAAsB,CAAC,CAAC,wBAAwB;QAElD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,KAAK,IAAI,wBAAwB,CAAC;QAE7E,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CACnC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,8BAA8B,mBAAmB,OAAO,EAAE,EACpF;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,0EAA0E;gBAC1E,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBAChC,cAAc,EAAE,kBAAkB;gBAClC,qCAAqC;gBACrC,MAAM,EAAE,YAAY;aACrB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,IAAI;gBACJ,QAAQ,EAAE,KAAK;gBACf,cAAc,EAAE;oBACd,6DAA6D;oBAC7D,kEAAkE;oBAClE,SAAS,EACP,OAAO,OAAO,CAAC,uBAAuB,EAAE,SAAS,KAAK,QAAQ;wBAC5D,CAAC,CAAC,OAAO,CAAC,uBAAuB,CAAC,SAAS;wBAC3C,CAAC,CAAC,GAAG;oBACT,gBAAgB,EACd,OAAO,OAAO,CAAC,uBAAuB,EAAE,eAAe,KAAK,QAAQ;wBAClE,CAAC,CAAC,OAAO,CAAC,uBAAuB,CAAC,eAAe;wBACjD,CAAC,CAAC,IAAI;oBACV,iEAAiE;oBACjE,KAAK,EACH,OAAO,OAAO,CAAC,uBAAuB,EAAE,KAAK,KAAK,QAAQ;wBACxD,CAAC,CAAC,OAAO,CAAC,uBAAuB,CAAC,KAAK;wBACvC,CAAC,CAAC,SAAS;oBACf,sDAAsD;oBACtD,iBAAiB,EACf,OAAO,OAAO,CAAC,uBAAuB,EAAE,eAAe,KAAK,SAAS;wBACnE,CAAC,CAAC,OAAO,CAAC,uBAAuB,CAAC,eAAe;wBACjD,CAAC,CAAC,IAAI;iBACX;aACF,CAAC;SACH,CACF,CAAC;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,gCAAgC,QAAQ,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;QAClF,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;QAC9D,OAAO;YACL,WAAW;YACX,QAAQ,EAAE,YAAY;YACtB,IAAI,EAAE,CAAC,EAAE,6CAA6C;YACtD,SAAS,EAAE,OAAO;YAClB,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,KAAK,EAAE;gBACL,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,SAAS,EAAE,KAAK;aACjB;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACH,KAAK,CAAC,mBAAmB;QACvB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CACnC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,8BAA8B,SAAS,EACjE;YACE,MAAM,EAAE,KAAK;YACb,OAAO,EAAE;gBACP,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;aACjC;SACF,CACF,CAAC;QAEF,qEAAqE;QACrE,oEAAoE;QACpE,2CAA2C;QAC3C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAgD,CAAC;QACvF,OAAO,CAAC,OAAO,CAAC,MAAM,IAAI,EAAE,CAAC;aAC1B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC;aAC9D,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;YACb,qDAAqD;YACrD,MAAM,MAAM,GACV,OAAO,KAAK,CAAC,MAAM,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,KAAK,IAAI;gBACvD,CAAC,CAAE,KAAK,CAAC,MAAkC;gBAC3C,CAAC,CAAC,EAAE,CAAC;YAET,OAAO;gBACL,EAAE,EAAE,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE;gBAC5D,IAAI,EAAE,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;gBAC7D,IAAI,EACF,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ;oBAC/B,CAAC,CAAC,MAAM,CAAC,MAAM;oBACf,CAAC,CAAC,OAAO,MAAM,CAAC,QAAQ,KAAK,QAAQ;wBACrC,CAAC,CAAC,MAAM,CAAC,QAAQ;wBACjB,CAAC,CAAC,SAAS;gBACf,WAAW,EACT,OAAO,KAAK,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;gBACvE,QAAQ,EAAE,IAAI,CAAC,EAAE;aAClB,CAAC;QACJ,CAAC,CAAC;YACF,qEAAqE;aACpE,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACjC,CAAC;CACF"}
|