@livekit/agents-plugin-deepgram 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/index.cjs +23 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +1 -4
- package/dist/index.js.map +1 -1
- package/dist/models.cjs +17 -0
- package/dist/models.cjs.map +1 -0
- package/dist/models.js +0 -4
- package/dist/models.js.map +1 -1
- package/dist/stt.cjs +272 -0
- package/dist/stt.cjs.map +1 -0
- package/dist/stt.js +232 -243
- package/dist/stt.js.map +1 -1
- package/package.json +23 -7
- package/src/stt.ts +3 -3
- package/.turbo/turbo-build.log +0 -4
- package/CHANGELOG.md +0 -53
- package/api-extractor.json +0 -20
- package/tsconfig.json +0 -16
- package/tsconfig.tsbuildinfo +0 -1
package/dist/stt.js
CHANGED
|
@@ -1,258 +1,247 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
5
|
-
import { AudioByteStream, AudioEnergyFilter, log, stt } from '@livekit/agents';
|
|
6
|
-
import { WebSocket } from 'ws';
|
|
7
|
-
const API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';
|
|
1
|
+
import { AudioByteStream, AudioEnergyFilter, log, stt } from "@livekit/agents";
|
|
2
|
+
import { WebSocket } from "ws";
|
|
3
|
+
const API_BASE_URL_V1 = "wss://api.deepgram.com/v1/listen";
|
|
8
4
|
const defaultSTTOptions = {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
5
|
+
apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6
|
+
language: "en-US",
|
|
7
|
+
detectLanguage: false,
|
|
8
|
+
interimResults: true,
|
|
9
|
+
punctuate: true,
|
|
10
|
+
model: "nova-2-general",
|
|
11
|
+
smartFormat: true,
|
|
12
|
+
noDelay: true,
|
|
13
|
+
endpointing: 25,
|
|
14
|
+
fillerWords: false,
|
|
15
|
+
sampleRate: 16e3,
|
|
16
|
+
numChannels: 1,
|
|
17
|
+
keywords: [],
|
|
18
|
+
profanityFilter: false
|
|
23
19
|
};
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (this.#opts.detectLanguage) {
|
|
37
|
-
this.#opts.language = undefined;
|
|
38
|
-
}
|
|
39
|
-
else if (this.#opts.language &&
|
|
40
|
-
!['en-US', 'en'].includes(this.#opts.language) &&
|
|
41
|
-
[
|
|
42
|
-
'nova-2-meeting',
|
|
43
|
-
'nova-2-phonecall',
|
|
44
|
-
'nova-2-finance',
|
|
45
|
-
'nova-2-conversationalai',
|
|
46
|
-
'nova-2-voicemail',
|
|
47
|
-
'nova-2-video',
|
|
48
|
-
'nova-2-medical',
|
|
49
|
-
'nova-2-drivethru',
|
|
50
|
-
'nova-2-automotive',
|
|
51
|
-
].includes(this.#opts.model)) {
|
|
52
|
-
this.#logger.warn(`${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`);
|
|
53
|
-
this.#opts.model = 'nova-2-general';
|
|
54
|
-
}
|
|
20
|
+
class STT extends stt.STT {
|
|
21
|
+
#opts;
|
|
22
|
+
#logger = log();
|
|
23
|
+
constructor(opts = defaultSTTOptions) {
|
|
24
|
+
super({
|
|
25
|
+
streaming: true,
|
|
26
|
+
interimResults: opts.interimResults ?? defaultSTTOptions.interimResults
|
|
27
|
+
});
|
|
28
|
+
if (opts.apiKey === void 0 && defaultSTTOptions.apiKey === void 0) {
|
|
29
|
+
throw new Error(
|
|
30
|
+
"Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY"
|
|
31
|
+
);
|
|
55
32
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
33
|
+
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
34
|
+
if (this.#opts.detectLanguage) {
|
|
35
|
+
this.#opts.language = void 0;
|
|
36
|
+
} else if (this.#opts.language && !["en-US", "en"].includes(this.#opts.language) && [
|
|
37
|
+
"nova-2-meeting",
|
|
38
|
+
"nova-2-phonecall",
|
|
39
|
+
"nova-2-finance",
|
|
40
|
+
"nova-2-conversationalai",
|
|
41
|
+
"nova-2-voicemail",
|
|
42
|
+
"nova-2-video",
|
|
43
|
+
"nova-2-medical",
|
|
44
|
+
"nova-2-drivethru",
|
|
45
|
+
"nova-2-automotive"
|
|
46
|
+
].includes(this.#opts.model)) {
|
|
47
|
+
this.#logger.warn(
|
|
48
|
+
`${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`
|
|
49
|
+
);
|
|
50
|
+
this.#opts.model = "nova-2-general";
|
|
62
51
|
}
|
|
52
|
+
}
|
|
53
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
54
|
+
async recognize(_) {
|
|
55
|
+
throw new Error("Recognize is not supported on Deepgram STT");
|
|
56
|
+
}
|
|
57
|
+
stream() {
|
|
58
|
+
return new SpeechStream(this.#opts);
|
|
59
|
+
}
|
|
63
60
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
});
|
|
107
|
-
ws = new WebSocket(streamURL, {
|
|
108
|
-
headers: { Authorization: `Token ${this.#opts.apiKey}` },
|
|
109
|
-
});
|
|
110
|
-
try {
|
|
111
|
-
await new Promise((resolve, reject) => {
|
|
112
|
-
ws.on('open', resolve);
|
|
113
|
-
ws.on('error', (error) => reject(error));
|
|
114
|
-
ws.on('close', (code) => reject(`WebSocket returned ${code}`));
|
|
115
|
-
});
|
|
116
|
-
await this.#runWS(ws);
|
|
117
|
-
}
|
|
118
|
-
catch (e) {
|
|
119
|
-
if (retries >= maxRetry) {
|
|
120
|
-
throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
|
|
121
|
-
}
|
|
122
|
-
const delay = Math.min(retries * 5, 10);
|
|
123
|
-
retries++;
|
|
124
|
-
this.#logger.warn(`failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`);
|
|
125
|
-
await new Promise((resolve) => setTimeout(resolve, delay * 1000));
|
|
126
|
-
}
|
|
61
|
+
class SpeechStream extends stt.SpeechStream {
|
|
62
|
+
#opts;
|
|
63
|
+
#audioEnergyFilter;
|
|
64
|
+
#logger = log();
|
|
65
|
+
#speaking = false;
|
|
66
|
+
constructor(opts) {
|
|
67
|
+
super();
|
|
68
|
+
this.#opts = opts;
|
|
69
|
+
this.closed = false;
|
|
70
|
+
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
71
|
+
this.#run();
|
|
72
|
+
}
|
|
73
|
+
async #run(maxRetry = 32) {
|
|
74
|
+
let retries = 0;
|
|
75
|
+
let ws;
|
|
76
|
+
while (!this.input.closed) {
|
|
77
|
+
const streamURL = new URL(API_BASE_URL_V1);
|
|
78
|
+
const params = {
|
|
79
|
+
model: this.#opts.model,
|
|
80
|
+
punctuate: this.#opts.punctuate,
|
|
81
|
+
smart_format: this.#opts.smartFormat,
|
|
82
|
+
no_delay: this.#opts.noDelay,
|
|
83
|
+
interim_results: this.#opts.interimResults,
|
|
84
|
+
encoding: "linear16",
|
|
85
|
+
vad_events: true,
|
|
86
|
+
sample_rate: this.#opts.sampleRate,
|
|
87
|
+
channels: this.#opts.numChannels,
|
|
88
|
+
endpointing: this.#opts.endpointing || false,
|
|
89
|
+
filler_words: this.#opts.fillerWords,
|
|
90
|
+
keywords: this.#opts.keywords.map((x) => x.join(":")),
|
|
91
|
+
profanity_filter: this.#opts.profanityFilter,
|
|
92
|
+
language: this.#opts.language
|
|
93
|
+
};
|
|
94
|
+
Object.entries(params).forEach(([k, v]) => {
|
|
95
|
+
if (v !== void 0) {
|
|
96
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
|
|
97
|
+
streamURL.searchParams.append(k, encodeURIComponent(v));
|
|
98
|
+
} else {
|
|
99
|
+
v.forEach((x) => streamURL.searchParams.append("keywords", encodeURIComponent(x)));
|
|
100
|
+
}
|
|
127
101
|
}
|
|
128
|
-
|
|
102
|
+
});
|
|
103
|
+
ws = new WebSocket(streamURL, {
|
|
104
|
+
headers: { Authorization: `Token ${this.#opts.apiKey}` }
|
|
105
|
+
});
|
|
106
|
+
try {
|
|
107
|
+
await new Promise((resolve, reject) => {
|
|
108
|
+
ws.on("open", resolve);
|
|
109
|
+
ws.on("error", (error) => reject(error));
|
|
110
|
+
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
111
|
+
});
|
|
112
|
+
await this.#runWS(ws);
|
|
113
|
+
} catch (e) {
|
|
114
|
+
if (retries >= maxRetry) {
|
|
115
|
+
throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
|
|
116
|
+
}
|
|
117
|
+
const delay = Math.min(retries * 5, 10);
|
|
118
|
+
retries++;
|
|
119
|
+
this.#logger.warn(
|
|
120
|
+
`failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
|
|
121
|
+
);
|
|
122
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
123
|
+
}
|
|
129
124
|
}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
})
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
if (isFinal) {
|
|
206
|
-
this.queue.put({
|
|
207
|
-
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
208
|
-
alternatives: [alternatives[0], ...alternatives.splice(0)],
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
else {
|
|
212
|
-
this.queue.put({
|
|
213
|
-
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
214
|
-
alternatives: [alternatives[0], ...alternatives.splice(0)],
|
|
215
|
-
});
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
// if we receive an endpoint, only end the speech if
|
|
219
|
-
// we either had a SpeechStarted event or we have a seen
|
|
220
|
-
// a non-empty transcript (deepgram doesn't have a SpeechEnded event)
|
|
221
|
-
if (isEndpoint && this.#speaking) {
|
|
222
|
-
this.#speaking = false;
|
|
223
|
-
this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
224
|
-
}
|
|
225
|
-
break;
|
|
226
|
-
}
|
|
227
|
-
case 'Metadata': {
|
|
228
|
-
break;
|
|
229
|
-
}
|
|
230
|
-
default: {
|
|
231
|
-
this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');
|
|
232
|
-
break;
|
|
233
|
-
}
|
|
234
|
-
}
|
|
125
|
+
this.closed = true;
|
|
126
|
+
}
|
|
127
|
+
async #runWS(ws) {
|
|
128
|
+
let closing = false;
|
|
129
|
+
const keepalive = setInterval(() => {
|
|
130
|
+
try {
|
|
131
|
+
ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
132
|
+
} catch {
|
|
133
|
+
clearInterval(keepalive);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
}, 5e3);
|
|
137
|
+
const sendTask = async () => {
|
|
138
|
+
const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
|
|
139
|
+
const stream = new AudioByteStream(
|
|
140
|
+
this.#opts.sampleRate,
|
|
141
|
+
this.#opts.numChannels,
|
|
142
|
+
samples100Ms
|
|
143
|
+
);
|
|
144
|
+
for await (const data of this.input) {
|
|
145
|
+
let frames;
|
|
146
|
+
if (data === SpeechStream.FLUSH_SENTINEL) {
|
|
147
|
+
frames = stream.flush();
|
|
148
|
+
} else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
|
|
149
|
+
frames = stream.write(data.data.buffer);
|
|
150
|
+
} else {
|
|
151
|
+
throw new Error(`sample rate or channel count of frame does not match`);
|
|
152
|
+
}
|
|
153
|
+
for await (const frame of frames) {
|
|
154
|
+
if (this.#audioEnergyFilter.pushFrame(frame)) {
|
|
155
|
+
ws.send(frame.data.buffer);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
closing = true;
|
|
160
|
+
ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
161
|
+
};
|
|
162
|
+
const listenTask = async () => {
|
|
163
|
+
new Promise(
|
|
164
|
+
(_, reject) => ws.once("close", (code, reason) => {
|
|
165
|
+
if (!closing) {
|
|
166
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
167
|
+
reject();
|
|
168
|
+
}
|
|
169
|
+
})
|
|
170
|
+
);
|
|
171
|
+
while (!this.closed) {
|
|
172
|
+
try {
|
|
173
|
+
await new Promise((resolve) => {
|
|
174
|
+
ws.once("message", (data) => resolve(data));
|
|
175
|
+
}).then((msg) => {
|
|
176
|
+
const json = JSON.parse(msg.toString());
|
|
177
|
+
switch (json["type"]) {
|
|
178
|
+
case "SpeechStarted": {
|
|
179
|
+
if (this.#speaking) return;
|
|
180
|
+
this.#speaking = true;
|
|
181
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
182
|
+
break;
|
|
183
|
+
}
|
|
184
|
+
// see this page:
|
|
185
|
+
// https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
|
|
186
|
+
// for more information about the different types of events
|
|
187
|
+
case "Results": {
|
|
188
|
+
const isFinal = json["is_final"];
|
|
189
|
+
const isEndpoint = json["speech_final"];
|
|
190
|
+
const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
|
|
191
|
+
if (alternatives[0] && alternatives[0].text) {
|
|
192
|
+
if (!this.#speaking) {
|
|
193
|
+
this.#speaking = true;
|
|
194
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
195
|
+
}
|
|
196
|
+
if (isFinal) {
|
|
197
|
+
this.queue.put({
|
|
198
|
+
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
199
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
235
200
|
});
|
|
201
|
+
} else {
|
|
202
|
+
this.queue.put({
|
|
203
|
+
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
204
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
205
|
+
});
|
|
206
|
+
}
|
|
236
207
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
208
|
+
if (isEndpoint && this.#speaking) {
|
|
209
|
+
this.#speaking = false;
|
|
210
|
+
this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
240
211
|
}
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
case "Metadata": {
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
default: {
|
|
218
|
+
this.#logger.child({ msg: json }).warn("received unexpected message from Deepgram");
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
241
221
|
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
222
|
+
});
|
|
223
|
+
} catch (error) {
|
|
224
|
+
this.#logger.child({ error }).warn("unrecoverable error, exiting");
|
|
225
|
+
break;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
await Promise.all([sendTask(), listenTask()]);
|
|
230
|
+
clearInterval(keepalive);
|
|
231
|
+
}
|
|
246
232
|
}
|
|
247
|
-
_a = SpeechStream;
|
|
248
233
|
const liveTranscriptionToSpeechData = (language, data) => {
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
234
|
+
const alts = data["channel"]["alternatives"];
|
|
235
|
+
return alts.map((alt) => ({
|
|
236
|
+
language,
|
|
237
|
+
startTime: alt["words"].length ? alt["words"][0]["start"] : 0,
|
|
238
|
+
endTime: alt["words"].length ? alt["words"][alt["words"].length - 1]["end"] : 0,
|
|
239
|
+
confidence: alt["confidence"],
|
|
240
|
+
text: alt["transcript"]
|
|
241
|
+
}));
|
|
242
|
+
};
|
|
243
|
+
export {
|
|
244
|
+
STT,
|
|
245
|
+
SpeechStream
|
|
257
246
|
};
|
|
258
247
|
//# sourceMappingURL=stt.js.map
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.js","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAoB,eAAe,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAEjG,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAG7C,MAAM,eAAe,GAAG,kCAAkC,CAAC;AAmB3D,MAAM,iBAAiB,GAAe;IACpC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;IACpC,QAAQ,EAAE,OAAO;IACjB,cAAc,EAAE,KAAK;IACrB,cAAc,EAAE,IAAI;IACpB,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,gBAAgB;IACvB,WAAW,EAAE,IAAI;IACjB,OAAO,EAAE,IAAI;IACb,WAAW,EAAE,EAAE;IACf,WAAW,EAAE,KAAK;IAClB,UAAU,EAAE,KAAK;IACjB,WAAW,EAAE,CAAC;IACd,QAAQ,EAAE,EAAE;IACZ,eAAe,EAAE,KAAK;CACvB,CAAC;AAEF,MAAM,OAAO,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,KAAK,CAAa;IAClB,OAAO,GAAG,GAAG,EAAE,CAAC;IAEhB,YAAY,OAA4B,iBAAiB;QACvD,KAAK,CAAC;YACJ,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,iBAAiB,CAAC,cAAc;SACxE,CAAC,CAAC;QACH,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,iBAAiB,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACxE,MAAM,IAAI,KAAK,CACb,8EAA8E,CAC/E,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,EAAE,GAAG,iBAAiB,EAAE,GAAG,IAAI,EAAE,CAAC;QAE/C,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;YAC9B,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,SAAS,CAAC;QAClC,CAAC;aAAM,IACL,IAAI,CAAC,KAAK,CAAC,QAAQ;YACnB,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;YAC9C;gBACE,gBAAgB;gBAChB,kBAAkB;gBAClB,gBAAgB;gBAChB,yBAAyB;gBACzB,kBAAkB;gBAClB,cAAc;gBACd,gBAAgB;gBAChB,kBAAkB;gBAClB,mBAAmB;aACpB,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAC5B,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,8BAA8B,IAAI,CAAC,KAAK,CAAC,QAAQ,kCAAkC,CACvG,CAAC;YACF,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,gBAAgB,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,SAAS,CAAC,CAAc;QAC5B,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,GAAG,CAAC,YAAY;IAChD,KAAK,CAAa;IAClB,kBAAkB,CAAoB;IACtC,OAAO,GAAG,GAAG,EAAE,CAAC;IAChB,SAAS,GAAG,KAAK,CAAC;IAElB,YAAY,IAAgB;QAC1B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,kBAAkB,GAAG,IAAI,iBAAiB,EAAE,CAAC;QAElD,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,QAAQ,GAAG,EAAE;QACtB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,EAAa,CAAC;QAClB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YAC1B,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;YAC3C,MAAM,MAAM,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK;gBACvB,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS;gBAC/B,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBACpC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO;gBAC5B,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,cAAc;gBAC1C,QAAQ,EAAE,UAAU;gBACpB,UAAU,EAAE,IAAI;gBAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;gBAClC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBAChC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,KAAK;gBAC5C,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBACpC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACrD,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;gBAC5C,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;aAC9B,CAAC;YACF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE;gBACxC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;oBACpB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;wBAC7E,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1D,CAAC;yBAAM,CAAC;wBACN,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACrF,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,EAAE,GAAG,IAAI,SAAS,CAAC,SAAS,EAAE;gBAC5B,OAAO,EAAE,EAAE,aAAa,EAAE,SAAS,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE;aACzD,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBACpC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACvB,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC;gBAEH,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACxB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxB,MAAM,IAAI,KAAK,CAAC,uCAAuC,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC;gBACnF,CAAC;gBAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,OAAO,EAAE,CAAC;gBAEV,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ,GAAG,CAC7F,CAAC;gBACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,EAAa;QACxB,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE;YACjC,IAAI,CAAC;gBACH,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,CAAC,SAAS,CAAC,CAAC;gBACzB,OAAO;YACT,CAAC;QACH,CAAC,EAAE,IAAI,CAAC,CAAC;QAET,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;YAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;YAC5D,MAAM,MAAM,GAAG,IAAI,eAAe,CAChC,IAAI,CAAC,KAAK,CAAC,UAAU,EACrB,IAAI,CAAC,KAAK,CAAC,WAAW,EACtB,YAAY,CACb,CAAC;YAEF,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACpC,IAAI,MAAoB,CAAC;gBACzB,IAAI,IAAI,KAAK,EAAY,CAAC,cAAc,EAAE,CAAC;oBACzC,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;gBAC1B,CAAC;qBAAM,IACL,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU;oBACzC,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,KAAK,CAAC,WAAW,EACxC,CAAC;oBACD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC1C,CAAC;qBAAM,CAAC;oBACN,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;gBAC1E,CAAC;gBAED,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBACjC,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;wBAC7C,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBAC7B,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;YACf,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QACnD,CAAC,CAAC;QAEF,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;YAC5B,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAC9B,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;gBAChC,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,8BAA8B,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC;oBACpE,MAAM,EAAE,CAAC;gBACX,CAAC;YACH,CAAC,CAAC,CACH,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpB,IAAI,CAAC;oBACH,MAAM,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,EAAE;wBACrC,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC9C,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;wBACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACxC,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;4BACrB,KAAK,eAAe,CAAC,CAAC,CAAC;gCACrB,yDAAyD;gCACzD,yDAAyD;gCACzD,iEAAiE;gCACjE,4EAA4E;gCAC5E,IAAI,IAAI,CAAC,SAAS;oCAAE,OAAO;gCAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;gCACtB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,eAAe,EAAE,CAAC,CAAC;gCAC9D,MAAM;4BACR,CAAC;4BACD,iBAAiB;4BACjB,6GAA6G;4BAC7G,2DAA2D;4BAC3D,KAAK,SAAS,CAAC,CAAC,CAAC;gCACf,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;gCACjC,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC;gCAExC,MAAM,YAAY,GAAG,6BAA6B,CAAC,IAAI,CAAC,KAAK,CAAC,QAAS,EAAE,IAAI,CAAC,CAAC;gCAE/E,sEAAsE;gCACtE,sEAAsE;gCACtE,iBAAiB;gCACjB,IAAI,YAAY,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oCAC5C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;wCACpB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;wCACtB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,eAAe,EAAE,CAAC,CAAC;oCAChE,CAAC;oCAED,IAAI,OAAO,EAAE,CAAC;wCACZ,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;4CACb,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,gBAAgB;4CAC1C,YAAY,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;yCAC3D,CAAC,CAAC;oCACL,CAAC;yCAAM,CAAC;wCACN,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;4CACb,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,kBAAkB;4CAC5C,YAAY,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;yCAC3D,CAAC,CAAC;oCACL,CAAC;gCACH,CAAC;gCAED,oDAAoD;gCACpD,wDAAwD;gCACxD,qEAAqE;gCACrE,IAAI,UAAU,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oCACjC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;oCACvB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC;gCAC9D,CAAC;gCAED,MAAM;4BACR,CAAC;4BACD,KAAK,UAAU,CAAC,CAAC,CAAC;gCAChB,MAAM;4BACR,CAAC;4BACD,OAAO,CAAC,CAAC,CAAC;gCACR,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;gCACpF,MAAM;4BACR,CAAC;wBACH,CAAC;oBACH,CAAC,CAAC,CAAC;gBACL,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;oBACnE,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QAC9C,aAAa,CAAC,SAAS,CAAC,CAAC;IAC3B,CAAC;CACF;;AAED,MAAM,6BAA6B,GAAG,CACpC,QAA+B,EAC/B,IAA2B,EACT,EAAE;IACpB,MAAM,IAAI,GAAU,IAAI,CAAC,SAAS,CAAC,CAAC,cAAc,CAAC,CAAC;IAEpD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACxB,QAAQ;QACR,SAAS,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/E,UAAU,EAAE,GAAG,CAAC,YAAY,CAAC;QAC7B,IAAI,EAAE,GAAG,CAAC,YAAY,CAAC;KACxB,CAAC,CAAC,CAAC;AACN,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, AudioByteStream, AudioEnergyFilter, log, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n profanityFilter: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-2-general',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n profanityFilter: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n stream(): stt.SpeechStream {\n return new SpeechStream(this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n\n constructor(opts: STTOptions) {\n super();\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n\n this.#run();\n }\n\n async #run(maxRetry = 32) {\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append('keywords', encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const listenTask = async () => {\n new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject();\n }\n }),\n );\n\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n clearInterval(keepalive);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA,SAA2B,iBAAiB,mBAAmB,KAAK,WAAW;AAE/E,SAAuB,iBAAiB;AAGxC,MAAM,kBAAkB;AAmBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,iBAAiB;AACnB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,UAAU,GAA0C;AACxD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,aAAa,KAAK,KAAK;AAAA,EACpC;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EAEZ,YAAY,MAAkB;AAC5B,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAEhD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,KAAK,WAAW,IAAI;AACxB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,YAAY,mBAAmB,CAAC,CAAC,CAAC;AAAA,UACnF;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AAAA,QACxB,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI;AAAA,QAAc,CAAC,GAAG,WACpB,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO;AAAA,UACT;AAAA,QACF,CAAC;AAAA,MACH;AAEA,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AAEtC,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAC5C,kBAAc,SAAS;AAAA,EACzB;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,27 +1,43 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-deepgram",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Deepgram plugin for LiveKit Agents for Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
|
+
"require": "dist/index.cjs",
|
|
6
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
7
15
|
"author": "LiveKit",
|
|
8
16
|
"type": "module",
|
|
17
|
+
"repository": "git@github.com:livekit/agents-js.git",
|
|
18
|
+
"license": "Apache-2.0",
|
|
19
|
+
"files": [
|
|
20
|
+
"dist",
|
|
21
|
+
"src",
|
|
22
|
+
"README.md"
|
|
23
|
+
],
|
|
9
24
|
"devDependencies": {
|
|
25
|
+
"@livekit/agents": "^x",
|
|
26
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
10
27
|
"@microsoft/api-extractor": "^7.35.0",
|
|
11
|
-
"@livekit/rtc-node": "^0.11.1",
|
|
12
28
|
"@types/ws": "^8.5.10",
|
|
13
|
-
"
|
|
14
|
-
"
|
|
29
|
+
"tsup": "^8.3.5",
|
|
30
|
+
"typescript": "^5.0.0"
|
|
15
31
|
},
|
|
16
32
|
"dependencies": {
|
|
17
33
|
"ws": "^8.16.0"
|
|
18
34
|
},
|
|
19
35
|
"peerDependencies": {
|
|
20
|
-
"@livekit/rtc-node": "^0.
|
|
21
|
-
"@livekit/agents": "^0.
|
|
36
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
37
|
+
"@livekit/agents": "^0.5.0x"
|
|
22
38
|
},
|
|
23
39
|
"scripts": {
|
|
24
|
-
"build": "tsc",
|
|
40
|
+
"build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
|
|
25
41
|
"clean": "rm -rf dist",
|
|
26
42
|
"clean:build": "pnpm clean && pnpm build",
|
|
27
43
|
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
|
package/src/stt.ts
CHANGED
|
@@ -49,7 +49,7 @@ export class STT extends stt.STT {
|
|
|
49
49
|
constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
|
|
50
50
|
super({
|
|
51
51
|
streaming: true,
|
|
52
|
-
interimResults: opts.interimResults
|
|
52
|
+
interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,
|
|
53
53
|
});
|
|
54
54
|
if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {
|
|
55
55
|
throw new Error(
|
|
@@ -261,12 +261,12 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
261
261
|
if (isFinal) {
|
|
262
262
|
this.queue.put({
|
|
263
263
|
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
264
|
-
alternatives: [alternatives[0], ...alternatives.
|
|
264
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
265
265
|
});
|
|
266
266
|
} else {
|
|
267
267
|
this.queue.put({
|
|
268
268
|
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
269
|
-
alternatives: [alternatives[0], ...alternatives.
|
|
269
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
270
270
|
});
|
|
271
271
|
}
|
|
272
272
|
}
|
package/.turbo/turbo-build.log
DELETED