@livekit/agents-plugin-elevenlabs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/api-extractor.json +20 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/models.d.ts +2 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +5 -0
- package/dist/models.js.map +1 -0
- package/dist/tts.d.ts +51 -0
- package/dist/tts.d.ts.map +1 -0
- package/dist/tts.js +251 -0
- package/dist/tts.js.map +1 -0
- package/package.json +27 -0
- package/src/index.ts +5 -0
- package/src/models.ts +9 -0
- package/src/tts.ts +331 -0
- package/tsconfig.json +10 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config file for API Extractor. For more info, please visit: https://api-extractor.com
|
|
3
|
+
*/
|
|
4
|
+
{
|
|
5
|
+
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Optionally specifies another JSON config file that this file extends from. This provides a way for
|
|
9
|
+
* standard settings to be shared across multiple projects.
|
|
10
|
+
*
|
|
11
|
+
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains
|
|
12
|
+
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be
|
|
13
|
+
* resolved using NodeJS require().
|
|
14
|
+
*
|
|
15
|
+
* SUPPORTED TOKENS: none
|
|
16
|
+
* DEFAULT VALUE: ""
|
|
17
|
+
*/
|
|
18
|
+
"extends": "../../api-extractor-shared.json",
|
|
19
|
+
"mainEntryPointFilePath": "./dist/index.d.ts"
|
|
20
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,UAAU,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AAEtC,cAAc,UAAU,CAAC"}
|
package/dist/models.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,uBAAuB,GACvB,wBAAwB,GACxB,wBAAwB,GACxB,iBAAiB,CAAC"}
|
package/dist/models.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC"}
|
package/dist/tts.d.ts
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { tts } from '@livekit/agents';
|
|
2
|
+
import { WebSocket } from 'ws';
|
|
3
|
+
import type { TTSModels } from './models.js';
|
|
4
|
+
type Voice = {
|
|
5
|
+
id: string;
|
|
6
|
+
name: string;
|
|
7
|
+
category: string;
|
|
8
|
+
settings?: VoiceSettings;
|
|
9
|
+
};
|
|
10
|
+
type VoiceSettings = {
|
|
11
|
+
stability: number;
|
|
12
|
+
similarity_boost: number;
|
|
13
|
+
style?: number;
|
|
14
|
+
use_speaker_boost: boolean;
|
|
15
|
+
};
|
|
16
|
+
type TTSOptions = {
|
|
17
|
+
apiKey: string;
|
|
18
|
+
voice: Voice;
|
|
19
|
+
modelID: TTSModels;
|
|
20
|
+
baseURL: string;
|
|
21
|
+
sampleRate: number;
|
|
22
|
+
latency: number;
|
|
23
|
+
};
|
|
24
|
+
export declare class TTS extends tts.TTS {
|
|
25
|
+
config: TTSOptions;
|
|
26
|
+
constructor(voice?: Voice, modelID?: TTSModels, apiKey?: string, baseURL?: string, sampleRate?: number, latency?: number);
|
|
27
|
+
listVoices(): Promise<Voice[]>;
|
|
28
|
+
synthesize(text: string): Promise<tts.ChunkedStream>;
|
|
29
|
+
stream(): tts.SynthesizeStream;
|
|
30
|
+
}
|
|
31
|
+
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
|
32
|
+
closed: boolean;
|
|
33
|
+
config: TTSOptions;
|
|
34
|
+
text: string;
|
|
35
|
+
task: {
|
|
36
|
+
run: Promise<void>;
|
|
37
|
+
cancel: () => void;
|
|
38
|
+
};
|
|
39
|
+
queue: string[];
|
|
40
|
+
eventQueue: (tts.SynthesisEvent | undefined)[];
|
|
41
|
+
constructor(config: TTSOptions);
|
|
42
|
+
get streamURL(): string;
|
|
43
|
+
pushText(token?: string | undefined): void;
|
|
44
|
+
run(maxRetry: number): Promise<void>;
|
|
45
|
+
listenTask(ws: WebSocket): Promise<void>;
|
|
46
|
+
flush(): void;
|
|
47
|
+
next(): IteratorResult<tts.SynthesisEvent>;
|
|
48
|
+
close(wait: boolean): Promise<void>;
|
|
49
|
+
}
|
|
50
|
+
export {};
|
|
51
|
+
//# sourceMappingURL=tts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAAO,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG3C,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAC7C,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAE7C,KAAK,KAAK,GAAG;IACX,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B,CAAC;AAEF,KAAK,aAAa,GAAG;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iBAAiB,EAAE,OAAO,CAAC;CAC5B,CAAC;AAkBF,KAAK,UAAU,GAAG;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,EAAE,SAAS,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,MAAM,EAAE,UAAU,CAAC;gBAGjB,KAAK,QAAgB,EACrB,OAAO,GAAE,SAAoC,EAC7C,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,EAChB,UAAU,SAAQ,EAClB,OAAO,SAAI;IAoBP,UAAU,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;IAuB9B,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC;IAI1D,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE;QACJ,GAAG,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;QACnB,MAAM,EAAE,MAAM,IAAI,CAAC;KACpB,CAAC;IACF,KAAK,EAAE,MAAM,EAAE,CAAM;IACrB,UAAU,EAAE,CAAC,GAAG,CAAC,cAAc,GAAG,SAAS,CAAC,EAAE,CAAM;gBAExC,MAAM,EAAE,UAAU;IAa9B,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI;IAYpC,GAAG,CAAC,QAAQ,EAAE,MAAM;IAgEpB,UAAU,CAAC,EAAE,EAAE,SAAS;IA2B9B,KAAK;IAML,IAAI,IAAI,cAAc,CAAC,GAAG,CAAC,cAAc,CAAC;IASpC,KAAK,CAAC,IAAI,EAAE,OAAO;CAW1B"}
|
package/dist/tts.js
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { log, tts } from '@livekit/agents';
|
|
5
|
+
import { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
+
import { URL } from 'url';
|
|
7
|
+
import { WebSocket } from 'ws';
|
|
8
|
+
const DEFAULT_VOICE = {
|
|
9
|
+
id: 'EXAVITQu4vr4xnSDxMaL',
|
|
10
|
+
name: 'Bella',
|
|
11
|
+
category: 'premade',
|
|
12
|
+
settings: {
|
|
13
|
+
stability: 0.71,
|
|
14
|
+
similarity_boost: 0.5,
|
|
15
|
+
style: 0.0,
|
|
16
|
+
use_speaker_boost: true,
|
|
17
|
+
},
|
|
18
|
+
};
|
|
19
|
+
const API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1';
|
|
20
|
+
const AUTHORIZATION_HEADER = 'xi-api-key';
|
|
21
|
+
const STREAM_EOS = '';
|
|
22
|
+
export class TTS extends tts.TTS {
|
|
23
|
+
config;
|
|
24
|
+
constructor(voice = DEFAULT_VOICE, modelID = 'eleven_multilingual_v2', apiKey, baseURL, sampleRate = 24000, latency = 2) {
|
|
25
|
+
super(true);
|
|
26
|
+
apiKey = apiKey || process.env.ELEVEN_API_KEY;
|
|
27
|
+
if (apiKey === undefined) {
|
|
28
|
+
throw new Error('ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY');
|
|
29
|
+
}
|
|
30
|
+
this.config = {
|
|
31
|
+
voice,
|
|
32
|
+
modelID,
|
|
33
|
+
apiKey,
|
|
34
|
+
baseURL: baseURL || API_BASE_URL_V1,
|
|
35
|
+
sampleRate,
|
|
36
|
+
latency,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
async listVoices() {
|
|
40
|
+
return fetch(this.config.baseURL + '/voices', {
|
|
41
|
+
headers: {
|
|
42
|
+
[AUTHORIZATION_HEADER]: this.config.apiKey,
|
|
43
|
+
},
|
|
44
|
+
})
|
|
45
|
+
.then((data) => data.json())
|
|
46
|
+
.then((data) => {
|
|
47
|
+
const voices = [];
|
|
48
|
+
for (const voice of data.voices) {
|
|
49
|
+
voices.push({
|
|
50
|
+
id: voice.voice_id,
|
|
51
|
+
name: voice.name,
|
|
52
|
+
category: voice.category,
|
|
53
|
+
settings: undefined,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return voices;
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
async synthesize(text) {
|
|
60
|
+
return new ChunkedStream(text, this.config);
|
|
61
|
+
}
|
|
62
|
+
stream() {
|
|
63
|
+
return new SynthesizeStream(this.config);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
export class SynthesizeStream extends tts.SynthesizeStream {
|
|
67
|
+
closed;
|
|
68
|
+
config;
|
|
69
|
+
text;
|
|
70
|
+
task;
|
|
71
|
+
queue = [];
|
|
72
|
+
eventQueue = [];
|
|
73
|
+
constructor(config) {
|
|
74
|
+
super();
|
|
75
|
+
this.config = config;
|
|
76
|
+
this.closed = false;
|
|
77
|
+
this.text = '';
|
|
78
|
+
this.task = {
|
|
79
|
+
run: new Promise(() => {
|
|
80
|
+
this.run(32);
|
|
81
|
+
}),
|
|
82
|
+
cancel: () => { },
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
get streamURL() {
|
|
86
|
+
return `${this.config.baseURL}/text-to-speech/${this.config.voice.id}/stream-input?model_id=${this.config.modelID}&optimize_streaming_latency=${this.config.latency}`;
|
|
87
|
+
}
|
|
88
|
+
pushText(token) {
|
|
89
|
+
if (this.closed)
|
|
90
|
+
throw new Error('cannot push to a closed stream');
|
|
91
|
+
if (!token || token.length === 0)
|
|
92
|
+
return;
|
|
93
|
+
const splitters = '.,?!;:—-()[]} ';
|
|
94
|
+
this.text += token;
|
|
95
|
+
if (splitters.includes(token[token.length - 1])) {
|
|
96
|
+
this.queue.push(this.text);
|
|
97
|
+
this.text = '';
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
async run(maxRetry) {
|
|
101
|
+
let retries = 0;
|
|
102
|
+
while (!this.closed) {
|
|
103
|
+
const url = new URL(this.streamURL);
|
|
104
|
+
url.protocol = url.protocol.replace('http', 'ws');
|
|
105
|
+
const ws = new WebSocket(url, {
|
|
106
|
+
headers: { [AUTHORIZATION_HEADER]: this.config.apiKey },
|
|
107
|
+
});
|
|
108
|
+
try {
|
|
109
|
+
await new Promise((resolve, reject) => {
|
|
110
|
+
ws.on('open', resolve);
|
|
111
|
+
ws.on('error', (error) => reject(error));
|
|
112
|
+
ws.on('close', (code) => reject(`WebSocket returned ${code}`));
|
|
113
|
+
});
|
|
114
|
+
ws.send(JSON.stringify({ text: ' ', voice_settings: this.config.voice }));
|
|
115
|
+
let started = false;
|
|
116
|
+
const retryQueue = [];
|
|
117
|
+
const task = this.listenTask(ws);
|
|
118
|
+
while (ws.readyState !== ws.CLOSED) {
|
|
119
|
+
let text = undefined;
|
|
120
|
+
if (retryQueue.length === 0) {
|
|
121
|
+
text = this.queue.shift();
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
text = retryQueue.shift();
|
|
125
|
+
}
|
|
126
|
+
if (!started) {
|
|
127
|
+
this.eventQueue.push(new tts.SynthesisEvent(tts.SynthesisEventType.STARTED));
|
|
128
|
+
started = true;
|
|
129
|
+
}
|
|
130
|
+
try {
|
|
131
|
+
ws.send(JSON.stringify({ text, try_trigger_generation: true }));
|
|
132
|
+
}
|
|
133
|
+
catch (e) {
|
|
134
|
+
// XI closes idle connections after a while.
|
|
135
|
+
retryQueue.push(text);
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
if (text == STREAM_EOS) {
|
|
139
|
+
await task;
|
|
140
|
+
this.eventQueue.push(new tts.SynthesisEvent(tts.SynthesisEventType.FINISHED));
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
catch (e) {
|
|
146
|
+
if (retries >= maxRetry) {
|
|
147
|
+
throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
|
|
148
|
+
}
|
|
149
|
+
const delay = Math.min(retries * 5, 5);
|
|
150
|
+
retries++;
|
|
151
|
+
log.warn(`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`);
|
|
152
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1000));
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
this.closed = true;
|
|
156
|
+
}
|
|
157
|
+
async listenTask(ws) {
|
|
158
|
+
while (!this.closed) {
|
|
159
|
+
try {
|
|
160
|
+
await new Promise((resolve, reject) => {
|
|
161
|
+
ws.on('message', (data) => resolve(data));
|
|
162
|
+
ws.on('close', (code, reason) => reject(`WebSocket closed with code ${code}: ${reason}`));
|
|
163
|
+
}).then((msg) => {
|
|
164
|
+
const json = JSON.parse(msg.toString());
|
|
165
|
+
if ('audio' in json) {
|
|
166
|
+
const data = new Uint16Array(Buffer.from(json.audio, 'base64'));
|
|
167
|
+
const audioFrame = new AudioFrame(data, this.config.sampleRate, 1, Math.trunc(data.length / 2));
|
|
168
|
+
this.eventQueue.push(new tts.SynthesisEvent(tts.SynthesisEventType.AUDIO, { text: '', data: audioFrame }));
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
catch {
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
flush() {
|
|
178
|
+
this.queue.push(this.text + ' ');
|
|
179
|
+
this.text = '';
|
|
180
|
+
this.queue.push('');
|
|
181
|
+
}
|
|
182
|
+
next() {
|
|
183
|
+
const event = this.eventQueue.shift();
|
|
184
|
+
if (event) {
|
|
185
|
+
return { done: false, value: event };
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
return { done: true, value: undefined };
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
async close(wait) {
|
|
192
|
+
if (wait) {
|
|
193
|
+
log.warn('wait is not yet supported for ElevenLabs TTS');
|
|
194
|
+
}
|
|
195
|
+
try {
|
|
196
|
+
await this.task.run;
|
|
197
|
+
}
|
|
198
|
+
finally {
|
|
199
|
+
this.eventQueue.push(undefined);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
class ChunkedStream extends tts.ChunkedStream {
|
|
204
|
+
config;
|
|
205
|
+
text;
|
|
206
|
+
queue = [];
|
|
207
|
+
constructor(text, config) {
|
|
208
|
+
super();
|
|
209
|
+
this.config = config;
|
|
210
|
+
this.text = text;
|
|
211
|
+
}
|
|
212
|
+
async next() {
|
|
213
|
+
await this.run();
|
|
214
|
+
const audio = this.queue.shift();
|
|
215
|
+
if (audio) {
|
|
216
|
+
return { done: false, value: audio };
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
return { done: true, value: undefined };
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
async close() {
|
|
223
|
+
this.queue.push(undefined);
|
|
224
|
+
}
|
|
225
|
+
async run() {
|
|
226
|
+
const voice = this.config.voice;
|
|
227
|
+
const url = new URL(`${this.config.baseURL}/text-to-speech/${voice.id}/stream`);
|
|
228
|
+
url.searchParams.append('output_format', 'pcm_' + this.config.sampleRate);
|
|
229
|
+
url.searchParams.append('optimize_streaming_latency', this.config.latency.toString());
|
|
230
|
+
await fetch(url.toString(), {
|
|
231
|
+
method: 'POST',
|
|
232
|
+
headers: {
|
|
233
|
+
[AUTHORIZATION_HEADER]: this.config.apiKey,
|
|
234
|
+
'Content-Type': 'application/json',
|
|
235
|
+
},
|
|
236
|
+
body: JSON.stringify({
|
|
237
|
+
text: this.text,
|
|
238
|
+
model_id: this.config.modelID,
|
|
239
|
+
voice_settings: this.config.voice.settings || undefined,
|
|
240
|
+
}),
|
|
241
|
+
})
|
|
242
|
+
.then((data) => data.arrayBuffer())
|
|
243
|
+
.then((data) => new DataView(data, 0, data.byteLength))
|
|
244
|
+
.then((data) => this.queue.push({
|
|
245
|
+
text: this.text,
|
|
246
|
+
data: new AudioFrame(new Uint16Array(data.buffer), this.config.sampleRate, 1, data.byteLength / 2),
|
|
247
|
+
}, undefined))
|
|
248
|
+
.catch(() => this.queue.push(undefined));
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
//# sourceMappingURL=tts.js.map
|
package/dist/tts.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.js","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC;AAC1B,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAiB7C,MAAM,aAAa,GAAU;IAC3B,EAAE,EAAE,sBAAsB;IAC1B,IAAI,EAAE,OAAO;IACb,QAAQ,EAAE,SAAS;IACnB,QAAQ,EAAE;QACR,SAAS,EAAE,IAAI;QACf,gBAAgB,EAAE,GAAG;QACrB,KAAK,EAAE,GAAG;QACV,iBAAiB,EAAE,IAAI;KACxB;CACF,CAAC;AAEF,MAAM,eAAe,GAAG,8BAA8B,CAAC;AACvD,MAAM,oBAAoB,GAAG,YAAY,CAAC;AAC1C,MAAM,UAAU,GAAG,EAAE,CAAC;AAWtB,MAAM,OAAO,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,MAAM,CAAa;IAEnB,YACE,KAAK,GAAG,aAAa,EACrB,UAAqB,wBAAwB,EAC7C,MAAe,EACf,OAAgB,EAChB,UAAU,GAAG,KAAK,EAClB,OAAO,GAAG,CAAC;QAEX,KAAK,CAAC,IAAI,CAAC,CAAC;QACZ,MAAM,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAC9C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CACb,8EAA8E,CAC/E,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,MAAM,GAAG;YACZ,KAAK;YACL,OAAO;YACP,MAAM;YACN,OAAO,EAAE,OAAO,IAAI,eAAe;YACnC,UAAU;YACV,OAAO;SACR,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU;QACd,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;aAC3C;SACF,CAAC;aACC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAC3B,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YACb,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,KAAK,MAAM,KAAK,IACd,IACD,CAAC,MAAM,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,KAAK,CAAC,QAAQ;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,QAAQ,EAAE,SAAS;iBACpB,CAAC,CAAC;YACL,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAY;QAC3B,OAAO,IAAI,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,MAAM,OAAO,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,MAAM,CAAU;IAChB,MAAM,CAAa;IACnB,IAAI,CAAS;IACb,IAAI,CAGF;IACF,KAAK,GAAa,EAAE,CAAC;IACrB,UAAU,GAAuC,EAAE,CAAC;IAEpD,YAAY,MAAkB;QAC5B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACf,IAAI,CAAC,IAAI,GAAG;YACV,GAAG,EAAE,IAAI,OAAO,CAAC,GAAG,EAAE;gBACpB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACf,CAAC,CAAC;YACF,MAAM,EAAE,GAAG,EAAE,GAAE,CAAC;SACjB,CAAC;IACJ,CAAC;IAED,IAAI,SAAS;QACX,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,mBAAmB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,0BAA0B,IAAI,CAAC,MAAM,CAAC,OAAO,+BAA+B,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;IACxK,CAAC;IAED,QAAQ,CAAC,KAA0B;QACjC,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACnE,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEzC,MAAM,SAAS,GAAG,gBAAgB,CAAC;QACnC,IAAI,CAAC,IAAI,IAAI,KAAK,CAAC;QACnB,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3B,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,QAAgB;QACxB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACpB,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACpC,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAClD,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,GAAG,EAAE;gBAC5B,OAAO,EAAE,EAAE,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;aACxD,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBACpC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACvB,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC;gBAEH,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;gBAC1E,IAAI,OAAO,GAAG,KAAK,CAAC;gBACpB,MAAM,UAAU,GAAa,EAAE,CAAC;gBAChC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;gBACjC,OAAO,EAAE,CAAC,UAAU,KAAK,EAAE,CAAC,MAAM,EAAE,CAAC;oBACnC,IAAI,IAAI,GAAG,SAAS,CAAC;oBACrB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;wBAC5B,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC5B,CAAC;yBAAM,CAAC;wBACN,IAAI,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;oBAC5B,CAAC;oBAED,IAAI,CAAC,OAAO,EAAE,CAAC;wBACb,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;wBAC7E,OAAO,GAAG,IAAI,CAAC;oBACjB,CAAC;oBAED,IAAI,CAAC;wBACH,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;oBAClE,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,4CAA4C;wBAC5C,UAAU,CAAC,IAAI,CAAC,IAAK,CAAC,CAAC;wBACvB,MAAM;oBACR,CAAC;oBAED,IAAI,IAAI,IAAI,UAAU,EAAE,CAAC;wBACvB,MAAM,IAAI,CAAC;wBACX,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC;wBAC9E,MAAM;oBACR,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxB,MAAM,IAAI,KAAK,CAAC,yCAAyC,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC;gBACrF,CAAC;gBAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBACvC,OAAO,EAAE,CAAC;gBAEV,GAAG,CAAC,IAAI,CACN,gDAAgD,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ,GAAG,CAC/F,CAAC;gBACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,EAAa;QAC5B,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBAC7C,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC1C,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,8BAA8B,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC,CAAC;gBAC5F,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;oBACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;oBACxC,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;wBACpB,MAAM,IAAI,GAAG,IAAI,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;wBAChE,MAAM,UAAU,GAAG,IAAI,UAAU,CAC/B,IAAI,EACJ,IAAI,CAAC,MAAM,CAAC,UAAU,EACtB,CAAC,EACD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAC5B,CAAC;wBACF,IAAI,CAAC,UAAU,CAAC,IAAI,CAClB,IAAI,GAAG,CAAC,cAAc,CAAC,GAAG,CAAC,kBAAkB,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CACrF,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;QACjC,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACf,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACtB,CAAC;IAED,IAAI;QACF,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACtC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAC1C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAa;QACvB,IAAI,IAAI,EAAE,CAAC;YACT,GAAG,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QAC3D,CAAC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;QACtB,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;CACF;AAED,MAAM,aAAc,SAAQ,GAAG,CAAC,aAAa;IAC3C,MAAM,CAAa;IACnB,IAAI,CAAS;IACb,KAAK,GAAyC,EAAE,CAAC;IAEjD,YAAY,IAAY,EAAE,MAAkB;QAC1C,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,IAAI,CAAC,GAAG,EAAE,CAAC;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACjC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAC1C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,GAAG;QACP,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;QAEhC,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,mBAAmB,KAAK,CAAC,EAAE,SAAS,CAAC,CAAC;QAChF,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC1E,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,4BAA4B,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;QAEtF,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YAC1B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,CAAC,oBAAoB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBAC1C,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;gBAC7B,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,IAAI,SAAS;aACxD,CAAC;SACH,CAAC;aACC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;aAClC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;aACtD,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CACb,IAAI,CAAC,KAAK,CAAC,IAAI,CACb;YACE,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,UAAU,CAClB,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,EAC5B,IAAI,CAAC,MAAM,CAAC,UAAU,EACtB,CAAC,EACD,IAAI,CAAC,UAAU,GAAG,CAAC,CACpB;SACF,EACD,SAAS,CACV,CACF;aACA,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF"}
|
package/package.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@livekit/agents-plugin-elevenlabs",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "ElevenLabs plugin for LiveKit Node Agents",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"author": "aoife cassidy <aoife@cassidy.sh>",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"devDependencies": {
|
|
10
|
+
"@microsoft/api-extractor": "^7.35.0",
|
|
11
|
+
"@types/ws": "^8.5.10"
|
|
12
|
+
},
|
|
13
|
+
"peerDependencies": {
|
|
14
|
+
"typescript": "^5.0.0"
|
|
15
|
+
},
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@livekit/rtc-node": "^0.1.0",
|
|
18
|
+
"ws": "^8.16.0",
|
|
19
|
+
"@livekit/agents": "0.1.0"
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc",
|
|
23
|
+
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
|
|
24
|
+
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript",
|
|
25
|
+
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose"
|
|
26
|
+
}
|
|
27
|
+
}
|
package/src/index.ts
ADDED
package/src/models.ts
ADDED
package/src/tts.ts
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { log, tts } from '@livekit/agents';
|
|
5
|
+
import { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
+
import { URL } from 'url';
|
|
7
|
+
import { type RawData, WebSocket } from 'ws';
|
|
8
|
+
import type { TTSModels } from './models.js';
|
|
9
|
+
|
|
10
|
+
type Voice = {
|
|
11
|
+
id: string;
|
|
12
|
+
name: string;
|
|
13
|
+
category: string;
|
|
14
|
+
settings?: VoiceSettings;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
type VoiceSettings = {
|
|
18
|
+
stability: number; // 0..1
|
|
19
|
+
similarity_boost: number; // 0..1
|
|
20
|
+
style?: number; // 0..1
|
|
21
|
+
use_speaker_boost: boolean;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const DEFAULT_VOICE: Voice = {
|
|
25
|
+
id: 'EXAVITQu4vr4xnSDxMaL',
|
|
26
|
+
name: 'Bella',
|
|
27
|
+
category: 'premade',
|
|
28
|
+
settings: {
|
|
29
|
+
stability: 0.71,
|
|
30
|
+
similarity_boost: 0.5,
|
|
31
|
+
style: 0.0,
|
|
32
|
+
use_speaker_boost: true,
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1';
|
|
37
|
+
const AUTHORIZATION_HEADER = 'xi-api-key';
|
|
38
|
+
const STREAM_EOS = '';
|
|
39
|
+
|
|
40
|
+
type TTSOptions = {
|
|
41
|
+
apiKey: string;
|
|
42
|
+
voice: Voice;
|
|
43
|
+
modelID: TTSModels;
|
|
44
|
+
baseURL: string;
|
|
45
|
+
sampleRate: number;
|
|
46
|
+
latency: number;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export class TTS extends tts.TTS {
|
|
50
|
+
config: TTSOptions;
|
|
51
|
+
|
|
52
|
+
constructor(
|
|
53
|
+
voice = DEFAULT_VOICE,
|
|
54
|
+
modelID: TTSModels = 'eleven_multilingual_v2',
|
|
55
|
+
apiKey?: string,
|
|
56
|
+
baseURL?: string,
|
|
57
|
+
sampleRate = 24000,
|
|
58
|
+
latency = 2,
|
|
59
|
+
) {
|
|
60
|
+
super(true);
|
|
61
|
+
apiKey = apiKey || process.env.ELEVEN_API_KEY;
|
|
62
|
+
if (apiKey === undefined) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
'ElevenLabs API key is required, whether as an argument or as $ELEVEN_API_KEY',
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
this.config = {
|
|
69
|
+
voice,
|
|
70
|
+
modelID,
|
|
71
|
+
apiKey,
|
|
72
|
+
baseURL: baseURL || API_BASE_URL_V1,
|
|
73
|
+
sampleRate,
|
|
74
|
+
latency,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async listVoices(): Promise<Voice[]> {
|
|
79
|
+
return fetch(this.config.baseURL + '/voices', {
|
|
80
|
+
headers: {
|
|
81
|
+
[AUTHORIZATION_HEADER]: this.config.apiKey,
|
|
82
|
+
},
|
|
83
|
+
})
|
|
84
|
+
.then((data) => data.json())
|
|
85
|
+
.then((data) => {
|
|
86
|
+
const voices: Voice[] = [];
|
|
87
|
+
for (const voice of (
|
|
88
|
+
data as { voices: { voice_id: string; name: string; category: string }[] }
|
|
89
|
+
).voices) {
|
|
90
|
+
voices.push({
|
|
91
|
+
id: voice.voice_id,
|
|
92
|
+
name: voice.name,
|
|
93
|
+
category: voice.category,
|
|
94
|
+
settings: undefined,
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
return voices;
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
async synthesize(text: string): Promise<tts.ChunkedStream> {
|
|
102
|
+
return new ChunkedStream(text, this.config);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
stream(): tts.SynthesizeStream {
|
|
106
|
+
return new SynthesizeStream(this.config);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export class SynthesizeStream extends tts.SynthesizeStream {
|
|
111
|
+
closed: boolean;
|
|
112
|
+
config: TTSOptions;
|
|
113
|
+
text: string;
|
|
114
|
+
task: {
|
|
115
|
+
run: Promise<void>;
|
|
116
|
+
cancel: () => void;
|
|
117
|
+
};
|
|
118
|
+
queue: string[] = [];
|
|
119
|
+
eventQueue: (tts.SynthesisEvent | undefined)[] = [];
|
|
120
|
+
|
|
121
|
+
constructor(config: TTSOptions) {
|
|
122
|
+
super();
|
|
123
|
+
this.config = config;
|
|
124
|
+
this.closed = false;
|
|
125
|
+
this.text = '';
|
|
126
|
+
this.task = {
|
|
127
|
+
run: new Promise(() => {
|
|
128
|
+
this.run(32);
|
|
129
|
+
}),
|
|
130
|
+
cancel: () => {},
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
get streamURL(): string {
|
|
135
|
+
return `${this.config.baseURL}/text-to-speech/${this.config.voice.id}/stream-input?model_id=${this.config.modelID}&optimize_streaming_latency=${this.config.latency}`;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
pushText(token?: string | undefined): void {
|
|
139
|
+
if (this.closed) throw new Error('cannot push to a closed stream');
|
|
140
|
+
if (!token || token.length === 0) return;
|
|
141
|
+
|
|
142
|
+
const splitters = '.,?!;:—-()[]} ';
|
|
143
|
+
this.text += token;
|
|
144
|
+
if (splitters.includes(token[token.length - 1])) {
|
|
145
|
+
this.queue.push(this.text);
|
|
146
|
+
this.text = '';
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async run(maxRetry: number) {
|
|
151
|
+
let retries = 0;
|
|
152
|
+
while (!this.closed) {
|
|
153
|
+
const url = new URL(this.streamURL);
|
|
154
|
+
url.protocol = url.protocol.replace('http', 'ws');
|
|
155
|
+
const ws = new WebSocket(url, {
|
|
156
|
+
headers: { [AUTHORIZATION_HEADER]: this.config.apiKey },
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
await new Promise((resolve, reject) => {
|
|
161
|
+
ws.on('open', resolve);
|
|
162
|
+
ws.on('error', (error) => reject(error));
|
|
163
|
+
ws.on('close', (code) => reject(`WebSocket returned ${code}`));
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
ws.send(JSON.stringify({ text: ' ', voice_settings: this.config.voice }));
|
|
167
|
+
let started = false;
|
|
168
|
+
const retryQueue: string[] = [];
|
|
169
|
+
const task = this.listenTask(ws);
|
|
170
|
+
while (ws.readyState !== ws.CLOSED) {
|
|
171
|
+
let text = undefined;
|
|
172
|
+
if (retryQueue.length === 0) {
|
|
173
|
+
text = this.queue.shift();
|
|
174
|
+
} else {
|
|
175
|
+
text = retryQueue.shift();
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (!started) {
|
|
179
|
+
this.eventQueue.push(new tts.SynthesisEvent(tts.SynthesisEventType.STARTED));
|
|
180
|
+
started = true;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
try {
|
|
184
|
+
ws.send(JSON.stringify({ text, try_trigger_generation: true }));
|
|
185
|
+
} catch (e) {
|
|
186
|
+
// XI closes idle connections after a while.
|
|
187
|
+
retryQueue.push(text!);
|
|
188
|
+
break;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (text == STREAM_EOS) {
|
|
192
|
+
await task;
|
|
193
|
+
this.eventQueue.push(new tts.SynthesisEvent(tts.SynthesisEventType.FINISHED));
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
} catch (e) {
|
|
198
|
+
if (retries >= maxRetry) {
|
|
199
|
+
throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const delay = Math.min(retries * 5, 5);
|
|
203
|
+
retries++;
|
|
204
|
+
|
|
205
|
+
log.warn(
|
|
206
|
+
`failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,
|
|
207
|
+
);
|
|
208
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1000));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
this.closed = true;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async listenTask(ws: WebSocket) {
|
|
215
|
+
while (!this.closed) {
|
|
216
|
+
try {
|
|
217
|
+
await new Promise<RawData>((resolve, reject) => {
|
|
218
|
+
ws.on('message', (data) => resolve(data));
|
|
219
|
+
ws.on('close', (code, reason) => reject(`WebSocket closed with code ${code}: ${reason}`));
|
|
220
|
+
}).then((msg) => {
|
|
221
|
+
const json = JSON.parse(msg.toString());
|
|
222
|
+
if ('audio' in json) {
|
|
223
|
+
const data = new Uint16Array(Buffer.from(json.audio, 'base64'));
|
|
224
|
+
const audioFrame = new AudioFrame(
|
|
225
|
+
data,
|
|
226
|
+
this.config.sampleRate,
|
|
227
|
+
1,
|
|
228
|
+
Math.trunc(data.length / 2),
|
|
229
|
+
);
|
|
230
|
+
this.eventQueue.push(
|
|
231
|
+
new tts.SynthesisEvent(tts.SynthesisEventType.AUDIO, { text: '', data: audioFrame }),
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
} catch {
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
flush() {
|
|
242
|
+
this.queue.push(this.text + ' ');
|
|
243
|
+
this.text = '';
|
|
244
|
+
this.queue.push('');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
next(): IteratorResult<tts.SynthesisEvent> {
|
|
248
|
+
const event = this.eventQueue.shift();
|
|
249
|
+
if (event) {
|
|
250
|
+
return { done: false, value: event };
|
|
251
|
+
} else {
|
|
252
|
+
return { done: true, value: undefined };
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
async close(wait: boolean) {
|
|
257
|
+
if (wait) {
|
|
258
|
+
log.warn('wait is not yet supported for ElevenLabs TTS');
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
try {
|
|
262
|
+
await this.task.run;
|
|
263
|
+
} finally {
|
|
264
|
+
this.eventQueue.push(undefined);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
class ChunkedStream extends tts.ChunkedStream {
|
|
270
|
+
config: TTSOptions;
|
|
271
|
+
text: string;
|
|
272
|
+
queue: (tts.SynthesizedAudio | undefined)[] = [];
|
|
273
|
+
|
|
274
|
+
constructor(text: string, config: TTSOptions) {
|
|
275
|
+
super();
|
|
276
|
+
this.config = config;
|
|
277
|
+
this.text = text;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
async next(): Promise<IteratorResult<tts.SynthesizedAudio>> {
|
|
281
|
+
await this.run();
|
|
282
|
+
const audio = this.queue.shift();
|
|
283
|
+
if (audio) {
|
|
284
|
+
return { done: false, value: audio };
|
|
285
|
+
} else {
|
|
286
|
+
return { done: true, value: undefined };
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
async close() {
|
|
291
|
+
this.queue.push(undefined);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
async run() {
|
|
295
|
+
const voice = this.config.voice;
|
|
296
|
+
|
|
297
|
+
const url = new URL(`${this.config.baseURL}/text-to-speech/${voice.id}/stream`);
|
|
298
|
+
url.searchParams.append('output_format', 'pcm_' + this.config.sampleRate);
|
|
299
|
+
url.searchParams.append('optimize_streaming_latency', this.config.latency.toString());
|
|
300
|
+
|
|
301
|
+
await fetch(url.toString(), {
|
|
302
|
+
method: 'POST',
|
|
303
|
+
headers: {
|
|
304
|
+
[AUTHORIZATION_HEADER]: this.config.apiKey,
|
|
305
|
+
'Content-Type': 'application/json',
|
|
306
|
+
},
|
|
307
|
+
body: JSON.stringify({
|
|
308
|
+
text: this.text,
|
|
309
|
+
model_id: this.config.modelID,
|
|
310
|
+
voice_settings: this.config.voice.settings || undefined,
|
|
311
|
+
}),
|
|
312
|
+
})
|
|
313
|
+
.then((data) => data.arrayBuffer())
|
|
314
|
+
.then((data) => new DataView(data, 0, data.byteLength))
|
|
315
|
+
.then((data) =>
|
|
316
|
+
this.queue.push(
|
|
317
|
+
{
|
|
318
|
+
text: this.text,
|
|
319
|
+
data: new AudioFrame(
|
|
320
|
+
new Uint16Array(data.buffer),
|
|
321
|
+
this.config.sampleRate,
|
|
322
|
+
1,
|
|
323
|
+
data.byteLength / 2,
|
|
324
|
+
),
|
|
325
|
+
},
|
|
326
|
+
undefined,
|
|
327
|
+
),
|
|
328
|
+
)
|
|
329
|
+
.catch(() => this.queue.push(undefined));
|
|
330
|
+
}
|
|
331
|
+
}
|
package/tsconfig.json
ADDED