@livekit/agents-plugin-deepgram 0.4.6 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/index.cjs +23 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +1 -4
- package/dist/index.js.map +1 -1
- package/dist/models.cjs +17 -0
- package/dist/models.cjs.map +1 -0
- package/dist/models.js +0 -4
- package/dist/models.js.map +1 -1
- package/dist/stt.cjs +274 -0
- package/dist/stt.cjs.map +1 -0
- package/dist/stt.d.ts +4 -2
- package/dist/stt.d.ts.map +1 -1
- package/dist/stt.js +234 -243
- package/dist/stt.js.map +1 -1
- package/dist/stt.test.cjs +11 -0
- package/dist/stt.test.cjs.map +1 -0
- package/dist/stt.test.d.ts +2 -0
- package/dist/stt.test.d.ts.map +1 -0
- package/dist/stt.test.js +10 -0
- package/dist/stt.test.js.map +1 -0
- package/package.json +20 -8
- package/src/stt.test.ts +13 -0
- package/src/stt.ts +9 -7
package/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
3
|
+
|
|
4
|
+
SPDX-License-Identifier: Apache-2.0
|
|
5
|
+
-->
|
|
6
|
+
# Deepgram plugin for LiveKit Agents
|
|
7
|
+
|
|
8
|
+
The Agents Framework is designed for building realtime, programmable
|
|
9
|
+
participants that run on servers. Use it to create conversational, multi-modal
|
|
10
|
+
voice agents that can see, hear, and understand.
|
|
11
|
+
|
|
12
|
+
This package contains the Deepgram plugin, which allows for speech recognition.
|
|
13
|
+
Refer to the [documentation](https://docs.livekit.io/agents/overview/) for
|
|
14
|
+
information on how to use it, or browse the [API
|
|
15
|
+
reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_deepgram.html).
|
|
16
|
+
See the [repository](https://github.com/livekit/agents-js) for more information
|
|
17
|
+
about the framework as a whole.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
|
|
15
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
16
|
+
var src_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(src_exports);
|
|
18
|
+
__reExport(src_exports, require("./stt.cjs"), module.exports);
|
|
19
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
20
|
+
0 && (module.exports = {
|
|
21
|
+
...require("./stt.cjs")
|
|
22
|
+
});
|
|
23
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './stt.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
|
package/dist/index.js
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './stt.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
|
package/dist/models.cjs
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
15
|
+
var models_exports = {};
|
|
16
|
+
module.exports = __toCommonJS(models_exports);
|
|
17
|
+
//# sourceMappingURL=models.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type STTModels =\n | 'nova-general'\n | 'nova-phonecall'\n | 'nova-meeting'\n | 'nova-2-general'\n | 'nova-2-meeting'\n | 'nova-2-phonecall'\n | 'nova-2-finance'\n | 'nova-2-conversationalai'\n | 'nova-2-voicemail'\n | 'nova-2-video'\n | 'nova-2-medical'\n | 'nova-2-drivethru'\n | 'nova-2-automotive'\n | 'enhanced-general'\n | 'enhanced-meeting'\n | 'enhanced-phonecall'\n | 'enhanced-finance'\n | 'base'\n | 'meeting'\n | 'phonecall'\n | 'finance'\n | 'conversationalai'\n | 'voicemail'\n | 'video'\n | 'whisper-tiny'\n | 'whisper-base'\n | 'whisper-small'\n | 'whisper-medium'\n | 'whisper-large';\n\nexport type STTLanguages =\n | 'da'\n | 'de'\n | 'en'\n | 'en-AU'\n | 'en-GB'\n | 'en-IN'\n | 'en-NZ'\n | 'en-US'\n | 'es'\n | 'es-419'\n | 'es-LATAM'\n | 'fr'\n | 'fr-CA'\n | 'hi'\n | 'hi-Latn'\n | 'id'\n | 'it'\n | 'ja'\n | 'ko'\n | 'nl'\n | 'no'\n | 'pl'\n | 'pt'\n | 'pt-BR'\n | 'ru'\n | 'sv'\n | 'ta'\n | 'taq'\n | 'th'\n | 'tr'\n | 'uk'\n | 'zh'\n | 'zh-CN'\n | 'zh-TW';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
package/dist/models.js
CHANGED
package/dist/models.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/dist/stt.cjs
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var stt_exports = {};
|
|
20
|
+
__export(stt_exports, {
|
|
21
|
+
STT: () => STT,
|
|
22
|
+
SpeechStream: () => SpeechStream
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(stt_exports);
|
|
25
|
+
var import_agents = require("@livekit/agents");
|
|
26
|
+
var import_ws = require("ws");
|
|
27
|
+
const API_BASE_URL_V1 = "wss://api.deepgram.com/v1/listen";
|
|
28
|
+
const defaultSTTOptions = {
|
|
29
|
+
apiKey: process.env.DEEPGRAM_API_KEY,
|
|
30
|
+
language: "en-US",
|
|
31
|
+
detectLanguage: false,
|
|
32
|
+
interimResults: true,
|
|
33
|
+
punctuate: true,
|
|
34
|
+
model: "nova-2-general",
|
|
35
|
+
smartFormat: true,
|
|
36
|
+
noDelay: true,
|
|
37
|
+
endpointing: 25,
|
|
38
|
+
fillerWords: false,
|
|
39
|
+
sampleRate: 16e3,
|
|
40
|
+
numChannels: 1,
|
|
41
|
+
keywords: [],
|
|
42
|
+
profanityFilter: false
|
|
43
|
+
};
|
|
44
|
+
class STT extends import_agents.stt.STT {
|
|
45
|
+
#opts;
|
|
46
|
+
#logger = (0, import_agents.log)();
|
|
47
|
+
label = "deepgram.STT";
|
|
48
|
+
constructor(opts = defaultSTTOptions) {
|
|
49
|
+
super({
|
|
50
|
+
streaming: true,
|
|
51
|
+
interimResults: opts.interimResults ?? defaultSTTOptions.interimResults
|
|
52
|
+
});
|
|
53
|
+
if (opts.apiKey === void 0 && defaultSTTOptions.apiKey === void 0) {
|
|
54
|
+
throw new Error(
|
|
55
|
+
"Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY"
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
59
|
+
if (this.#opts.detectLanguage) {
|
|
60
|
+
this.#opts.language = void 0;
|
|
61
|
+
} else if (this.#opts.language && !["en-US", "en"].includes(this.#opts.language) && [
|
|
62
|
+
"nova-2-meeting",
|
|
63
|
+
"nova-2-phonecall",
|
|
64
|
+
"nova-2-finance",
|
|
65
|
+
"nova-2-conversationalai",
|
|
66
|
+
"nova-2-voicemail",
|
|
67
|
+
"nova-2-video",
|
|
68
|
+
"nova-2-medical",
|
|
69
|
+
"nova-2-drivethru",
|
|
70
|
+
"nova-2-automotive"
|
|
71
|
+
].includes(this.#opts.model)) {
|
|
72
|
+
this.#logger.warn(
|
|
73
|
+
`${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`
|
|
74
|
+
);
|
|
75
|
+
this.#opts.model = "nova-2-general";
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
79
|
+
async _recognize(_) {
|
|
80
|
+
throw new Error("Recognize is not supported on Deepgram STT");
|
|
81
|
+
}
|
|
82
|
+
stream() {
|
|
83
|
+
return new SpeechStream(this, this.#opts);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
class SpeechStream extends import_agents.stt.SpeechStream {
|
|
87
|
+
#opts;
|
|
88
|
+
#audioEnergyFilter;
|
|
89
|
+
#logger = (0, import_agents.log)();
|
|
90
|
+
#speaking = false;
|
|
91
|
+
label = "deepgram.SpeechStream";
|
|
92
|
+
constructor(stt2, opts) {
|
|
93
|
+
super(stt2);
|
|
94
|
+
this.#opts = opts;
|
|
95
|
+
this.closed = false;
|
|
96
|
+
this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
|
|
97
|
+
this.#run();
|
|
98
|
+
}
|
|
99
|
+
async #run(maxRetry = 32) {
|
|
100
|
+
let retries = 0;
|
|
101
|
+
let ws;
|
|
102
|
+
while (!this.input.closed) {
|
|
103
|
+
const streamURL = new URL(API_BASE_URL_V1);
|
|
104
|
+
const params = {
|
|
105
|
+
model: this.#opts.model,
|
|
106
|
+
punctuate: this.#opts.punctuate,
|
|
107
|
+
smart_format: this.#opts.smartFormat,
|
|
108
|
+
no_delay: this.#opts.noDelay,
|
|
109
|
+
interim_results: this.#opts.interimResults,
|
|
110
|
+
encoding: "linear16",
|
|
111
|
+
vad_events: true,
|
|
112
|
+
sample_rate: this.#opts.sampleRate,
|
|
113
|
+
channels: this.#opts.numChannels,
|
|
114
|
+
endpointing: this.#opts.endpointing || false,
|
|
115
|
+
filler_words: this.#opts.fillerWords,
|
|
116
|
+
keywords: this.#opts.keywords.map((x) => x.join(":")),
|
|
117
|
+
profanity_filter: this.#opts.profanityFilter,
|
|
118
|
+
language: this.#opts.language
|
|
119
|
+
};
|
|
120
|
+
Object.entries(params).forEach(([k, v]) => {
|
|
121
|
+
if (v !== void 0) {
|
|
122
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
|
|
123
|
+
streamURL.searchParams.append(k, encodeURIComponent(v));
|
|
124
|
+
} else {
|
|
125
|
+
v.forEach((x) => streamURL.searchParams.append("keywords", encodeURIComponent(x)));
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
ws = new import_ws.WebSocket(streamURL, {
|
|
130
|
+
headers: { Authorization: `Token ${this.#opts.apiKey}` }
|
|
131
|
+
});
|
|
132
|
+
try {
|
|
133
|
+
await new Promise((resolve, reject) => {
|
|
134
|
+
ws.on("open", resolve);
|
|
135
|
+
ws.on("error", (error) => reject(error));
|
|
136
|
+
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
137
|
+
});
|
|
138
|
+
await this.#runWS(ws);
|
|
139
|
+
} catch (e) {
|
|
140
|
+
if (retries >= maxRetry) {
|
|
141
|
+
throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
|
|
142
|
+
}
|
|
143
|
+
const delay = Math.min(retries * 5, 10);
|
|
144
|
+
retries++;
|
|
145
|
+
this.#logger.warn(
|
|
146
|
+
`failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
|
|
147
|
+
);
|
|
148
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
this.closed = true;
|
|
152
|
+
}
|
|
153
|
+
async #runWS(ws) {
|
|
154
|
+
let closing = false;
|
|
155
|
+
const keepalive = setInterval(() => {
|
|
156
|
+
try {
|
|
157
|
+
ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
158
|
+
} catch {
|
|
159
|
+
clearInterval(keepalive);
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
}, 5e3);
|
|
163
|
+
const sendTask = async () => {
|
|
164
|
+
const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
|
|
165
|
+
const stream = new import_agents.AudioByteStream(
|
|
166
|
+
this.#opts.sampleRate,
|
|
167
|
+
this.#opts.numChannels,
|
|
168
|
+
samples100Ms
|
|
169
|
+
);
|
|
170
|
+
for await (const data of this.input) {
|
|
171
|
+
let frames;
|
|
172
|
+
if (data === SpeechStream.FLUSH_SENTINEL) {
|
|
173
|
+
frames = stream.flush();
|
|
174
|
+
} else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
|
|
175
|
+
frames = stream.write(data.data.buffer);
|
|
176
|
+
} else {
|
|
177
|
+
throw new Error(`sample rate or channel count of frame does not match`);
|
|
178
|
+
}
|
|
179
|
+
for await (const frame of frames) {
|
|
180
|
+
if (this.#audioEnergyFilter.pushFrame(frame)) {
|
|
181
|
+
ws.send(frame.data.buffer);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
closing = true;
|
|
186
|
+
ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
187
|
+
};
|
|
188
|
+
const listenTask = async () => {
|
|
189
|
+
new Promise(
|
|
190
|
+
(_, reject) => ws.once("close", (code, reason) => {
|
|
191
|
+
if (!closing) {
|
|
192
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
193
|
+
reject();
|
|
194
|
+
}
|
|
195
|
+
})
|
|
196
|
+
);
|
|
197
|
+
while (!this.closed) {
|
|
198
|
+
try {
|
|
199
|
+
await new Promise((resolve) => {
|
|
200
|
+
ws.once("message", (data) => resolve(data));
|
|
201
|
+
}).then((msg) => {
|
|
202
|
+
const json = JSON.parse(msg.toString());
|
|
203
|
+
switch (json["type"]) {
|
|
204
|
+
case "SpeechStarted": {
|
|
205
|
+
if (this.#speaking) return;
|
|
206
|
+
this.#speaking = true;
|
|
207
|
+
this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
// see this page:
|
|
211
|
+
// https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
|
|
212
|
+
// for more information about the different types of events
|
|
213
|
+
case "Results": {
|
|
214
|
+
const isFinal = json["is_final"];
|
|
215
|
+
const isEndpoint = json["speech_final"];
|
|
216
|
+
const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
|
|
217
|
+
if (alternatives[0] && alternatives[0].text) {
|
|
218
|
+
if (!this.#speaking) {
|
|
219
|
+
this.#speaking = true;
|
|
220
|
+
this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
|
|
221
|
+
}
|
|
222
|
+
if (isFinal) {
|
|
223
|
+
this.queue.put({
|
|
224
|
+
type: import_agents.stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
225
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
226
|
+
});
|
|
227
|
+
} else {
|
|
228
|
+
this.queue.put({
|
|
229
|
+
type: import_agents.stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
230
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
if (isEndpoint && this.#speaking) {
|
|
235
|
+
this.#speaking = false;
|
|
236
|
+
this.queue.put({ type: import_agents.stt.SpeechEventType.END_OF_SPEECH });
|
|
237
|
+
}
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
case "Metadata": {
|
|
241
|
+
break;
|
|
242
|
+
}
|
|
243
|
+
default: {
|
|
244
|
+
this.#logger.child({ msg: json }).warn("received unexpected message from Deepgram");
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
});
|
|
249
|
+
} catch (error) {
|
|
250
|
+
this.#logger.child({ error }).warn("unrecoverable error, exiting");
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
await Promise.all([sendTask(), listenTask()]);
|
|
256
|
+
clearInterval(keepalive);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
const liveTranscriptionToSpeechData = (language, data) => {
|
|
260
|
+
const alts = data["channel"]["alternatives"];
|
|
261
|
+
return alts.map((alt) => ({
|
|
262
|
+
language,
|
|
263
|
+
startTime: alt["words"].length ? alt["words"][0]["start"] : 0,
|
|
264
|
+
endTime: alt["words"].length ? alt["words"][alt["words"].length - 1]["end"] : 0,
|
|
265
|
+
confidence: alt["confidence"],
|
|
266
|
+
text: alt["transcript"]
|
|
267
|
+
}));
|
|
268
|
+
};
|
|
269
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
270
|
+
0 && (module.exports = {
|
|
271
|
+
STT,
|
|
272
|
+
SpeechStream
|
|
273
|
+
});
|
|
274
|
+
//# sourceMappingURL=stt.cjs.map
|
package/dist/stt.cjs.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, AudioByteStream, AudioEnergyFilter, log, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n profanityFilter: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-2-general',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n profanityFilter: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n stream(): stt.SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n\n this.#run();\n }\n\n async #run(maxRetry = 32) {\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append('keywords', encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const listenTask = async () => {\n new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject();\n }\n }),\n );\n\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n clearInterval(keepalive);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA+E;AAE/E,gBAAwC;AAGxC,MAAM,kBAAkB;AAmBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,iBAAiB;AACnB;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAEhD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,KAAK,WAAW,IAAI;AACxB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,YAAY,mBAAmB,CAAC,CAAC,CAAC;AAAA,UACnF;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AAAA,QACxB,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI;AAAA,QAAc,CAAC,GAAG,WACpB,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO;AAAA,UACT;AAAA,QACF,CAAC;AAAA,MACH;AAEA,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AAEtC,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAC5C,kBAAc,SAAS;AAAA,EACzB;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
package/dist/stt.d.ts
CHANGED
|
@@ -18,12 +18,14 @@ export interface STTOptions {
|
|
|
18
18
|
}
|
|
19
19
|
export declare class STT extends stt.STT {
|
|
20
20
|
#private;
|
|
21
|
+
label: string;
|
|
21
22
|
constructor(opts?: Partial<STTOptions>);
|
|
22
|
-
|
|
23
|
+
_recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
|
|
23
24
|
stream(): stt.SpeechStream;
|
|
24
25
|
}
|
|
25
26
|
export declare class SpeechStream extends stt.SpeechStream {
|
|
26
27
|
#private;
|
|
27
|
-
|
|
28
|
+
label: string;
|
|
29
|
+
constructor(stt: STT, opts: STTOptions);
|
|
28
30
|
}
|
|
29
31
|
//# sourceMappingURL=stt.d.ts.map
|
package/dist/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,WAAW,EAA2C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAGjG,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,eAAe,EAAE,OAAO,CAAC;CAC1B;AAmBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,KAAK,WAAW,EAA2C,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAGjG,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,eAAe,EAAE,OAAO,CAAC;CAC1B;AAmBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAsCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,MAAM,IAAI,GAAG,CAAC,YAAY;CAG3B;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAKhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;CAyMvC"}
|
package/dist/stt.js
CHANGED
|
@@ -1,258 +1,249 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
// SPDX-License-Identifier: Apache-2.0
|
|
5
|
-
import { AudioByteStream, AudioEnergyFilter, log, stt } from '@livekit/agents';
|
|
6
|
-
import { WebSocket } from 'ws';
|
|
7
|
-
const API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';
|
|
1
|
+
import { AudioByteStream, AudioEnergyFilter, log, stt } from "@livekit/agents";
|
|
2
|
+
import { WebSocket } from "ws";
|
|
3
|
+
const API_BASE_URL_V1 = "wss://api.deepgram.com/v1/listen";
|
|
8
4
|
const defaultSTTOptions = {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
5
|
+
apiKey: process.env.DEEPGRAM_API_KEY,
|
|
6
|
+
language: "en-US",
|
|
7
|
+
detectLanguage: false,
|
|
8
|
+
interimResults: true,
|
|
9
|
+
punctuate: true,
|
|
10
|
+
model: "nova-2-general",
|
|
11
|
+
smartFormat: true,
|
|
12
|
+
noDelay: true,
|
|
13
|
+
endpointing: 25,
|
|
14
|
+
fillerWords: false,
|
|
15
|
+
sampleRate: 16e3,
|
|
16
|
+
numChannels: 1,
|
|
17
|
+
keywords: [],
|
|
18
|
+
profanityFilter: false
|
|
23
19
|
};
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
this.#opts.language = undefined;
|
|
38
|
-
}
|
|
39
|
-
else if (this.#opts.language &&
|
|
40
|
-
!['en-US', 'en'].includes(this.#opts.language) &&
|
|
41
|
-
[
|
|
42
|
-
'nova-2-meeting',
|
|
43
|
-
'nova-2-phonecall',
|
|
44
|
-
'nova-2-finance',
|
|
45
|
-
'nova-2-conversationalai',
|
|
46
|
-
'nova-2-voicemail',
|
|
47
|
-
'nova-2-video',
|
|
48
|
-
'nova-2-medical',
|
|
49
|
-
'nova-2-drivethru',
|
|
50
|
-
'nova-2-automotive',
|
|
51
|
-
].includes(this.#opts.model)) {
|
|
52
|
-
this.#logger.warn(`${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`);
|
|
53
|
-
this.#opts.model = 'nova-2-general';
|
|
54
|
-
}
|
|
20
|
+
class STT extends stt.STT {
|
|
21
|
+
#opts;
|
|
22
|
+
#logger = log();
|
|
23
|
+
label = "deepgram.STT";
|
|
24
|
+
constructor(opts = defaultSTTOptions) {
|
|
25
|
+
super({
|
|
26
|
+
streaming: true,
|
|
27
|
+
interimResults: opts.interimResults ?? defaultSTTOptions.interimResults
|
|
28
|
+
});
|
|
29
|
+
if (opts.apiKey === void 0 && defaultSTTOptions.apiKey === void 0) {
|
|
30
|
+
throw new Error(
|
|
31
|
+
"Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY"
|
|
32
|
+
);
|
|
55
33
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
34
|
+
this.#opts = { ...defaultSTTOptions, ...opts };
|
|
35
|
+
if (this.#opts.detectLanguage) {
|
|
36
|
+
this.#opts.language = void 0;
|
|
37
|
+
} else if (this.#opts.language && !["en-US", "en"].includes(this.#opts.language) && [
|
|
38
|
+
"nova-2-meeting",
|
|
39
|
+
"nova-2-phonecall",
|
|
40
|
+
"nova-2-finance",
|
|
41
|
+
"nova-2-conversationalai",
|
|
42
|
+
"nova-2-voicemail",
|
|
43
|
+
"nova-2-video",
|
|
44
|
+
"nova-2-medical",
|
|
45
|
+
"nova-2-drivethru",
|
|
46
|
+
"nova-2-automotive"
|
|
47
|
+
].includes(this.#opts.model)) {
|
|
48
|
+
this.#logger.warn(
|
|
49
|
+
`${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`
|
|
50
|
+
);
|
|
51
|
+
this.#opts.model = "nova-2-general";
|
|
62
52
|
}
|
|
53
|
+
}
|
|
54
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
55
|
+
async _recognize(_) {
|
|
56
|
+
throw new Error("Recognize is not supported on Deepgram STT");
|
|
57
|
+
}
|
|
58
|
+
stream() {
|
|
59
|
+
return new SpeechStream(this, this.#opts);
|
|
60
|
+
}
|
|
63
61
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
}
|
|
106
|
-
});
|
|
107
|
-
ws = new WebSocket(streamURL, {
|
|
108
|
-
headers: { Authorization: `Token ${this.#opts.apiKey}` },
|
|
109
|
-
});
|
|
110
|
-
try {
|
|
111
|
-
await new Promise((resolve, reject) => {
|
|
112
|
-
ws.on('open', resolve);
|
|
113
|
-
ws.on('error', (error) => reject(error));
|
|
114
|
-
ws.on('close', (code) => reject(`WebSocket returned ${code}`));
|
|
115
|
-
});
|
|
116
|
-
await this.#runWS(ws);
|
|
117
|
-
}
|
|
118
|
-
catch (e) {
|
|
119
|
-
if (retries >= maxRetry) {
|
|
120
|
-
throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
|
|
121
|
-
}
|
|
122
|
-
const delay = Math.min(retries * 5, 10);
|
|
123
|
-
retries++;
|
|
124
|
-
this.#logger.warn(`failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`);
|
|
125
|
-
await new Promise((resolve) => setTimeout(resolve, delay * 1000));
|
|
126
|
-
}
|
|
62
|
+
class SpeechStream extends stt.SpeechStream {
|
|
63
|
+
#opts;
|
|
64
|
+
#audioEnergyFilter;
|
|
65
|
+
#logger = log();
|
|
66
|
+
#speaking = false;
|
|
67
|
+
label = "deepgram.SpeechStream";
|
|
68
|
+
constructor(stt2, opts) {
|
|
69
|
+
super(stt2);
|
|
70
|
+
this.#opts = opts;
|
|
71
|
+
this.closed = false;
|
|
72
|
+
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
73
|
+
this.#run();
|
|
74
|
+
}
|
|
75
|
+
async #run(maxRetry = 32) {
|
|
76
|
+
let retries = 0;
|
|
77
|
+
let ws;
|
|
78
|
+
while (!this.input.closed) {
|
|
79
|
+
const streamURL = new URL(API_BASE_URL_V1);
|
|
80
|
+
const params = {
|
|
81
|
+
model: this.#opts.model,
|
|
82
|
+
punctuate: this.#opts.punctuate,
|
|
83
|
+
smart_format: this.#opts.smartFormat,
|
|
84
|
+
no_delay: this.#opts.noDelay,
|
|
85
|
+
interim_results: this.#opts.interimResults,
|
|
86
|
+
encoding: "linear16",
|
|
87
|
+
vad_events: true,
|
|
88
|
+
sample_rate: this.#opts.sampleRate,
|
|
89
|
+
channels: this.#opts.numChannels,
|
|
90
|
+
endpointing: this.#opts.endpointing || false,
|
|
91
|
+
filler_words: this.#opts.fillerWords,
|
|
92
|
+
keywords: this.#opts.keywords.map((x) => x.join(":")),
|
|
93
|
+
profanity_filter: this.#opts.profanityFilter,
|
|
94
|
+
language: this.#opts.language
|
|
95
|
+
};
|
|
96
|
+
Object.entries(params).forEach(([k, v]) => {
|
|
97
|
+
if (v !== void 0) {
|
|
98
|
+
if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
|
|
99
|
+
streamURL.searchParams.append(k, encodeURIComponent(v));
|
|
100
|
+
} else {
|
|
101
|
+
v.forEach((x) => streamURL.searchParams.append("keywords", encodeURIComponent(x)));
|
|
102
|
+
}
|
|
127
103
|
}
|
|
128
|
-
|
|
104
|
+
});
|
|
105
|
+
ws = new WebSocket(streamURL, {
|
|
106
|
+
headers: { Authorization: `Token ${this.#opts.apiKey}` }
|
|
107
|
+
});
|
|
108
|
+
try {
|
|
109
|
+
await new Promise((resolve, reject) => {
|
|
110
|
+
ws.on("open", resolve);
|
|
111
|
+
ws.on("error", (error) => reject(error));
|
|
112
|
+
ws.on("close", (code) => reject(`WebSocket returned ${code}`));
|
|
113
|
+
});
|
|
114
|
+
await this.#runWS(ws);
|
|
115
|
+
} catch (e) {
|
|
116
|
+
if (retries >= maxRetry) {
|
|
117
|
+
throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
|
|
118
|
+
}
|
|
119
|
+
const delay = Math.min(retries * 5, 10);
|
|
120
|
+
retries++;
|
|
121
|
+
this.#logger.warn(
|
|
122
|
+
`failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
|
|
123
|
+
);
|
|
124
|
+
await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
|
|
125
|
+
}
|
|
129
126
|
}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
})
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
if (isFinal) {
|
|
206
|
-
this.queue.put({
|
|
207
|
-
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
208
|
-
alternatives: [alternatives[0], ...alternatives.splice(0)],
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
else {
|
|
212
|
-
this.queue.put({
|
|
213
|
-
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
214
|
-
alternatives: [alternatives[0], ...alternatives.splice(0)],
|
|
215
|
-
});
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
// if we receive an endpoint, only end the speech if
|
|
219
|
-
// we either had a SpeechStarted event or we have a seen
|
|
220
|
-
// a non-empty transcript (deepgram doesn't have a SpeechEnded event)
|
|
221
|
-
if (isEndpoint && this.#speaking) {
|
|
222
|
-
this.#speaking = false;
|
|
223
|
-
this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
224
|
-
}
|
|
225
|
-
break;
|
|
226
|
-
}
|
|
227
|
-
case 'Metadata': {
|
|
228
|
-
break;
|
|
229
|
-
}
|
|
230
|
-
default: {
|
|
231
|
-
this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');
|
|
232
|
-
break;
|
|
233
|
-
}
|
|
234
|
-
}
|
|
127
|
+
this.closed = true;
|
|
128
|
+
}
|
|
129
|
+
async #runWS(ws) {
|
|
130
|
+
let closing = false;
|
|
131
|
+
const keepalive = setInterval(() => {
|
|
132
|
+
try {
|
|
133
|
+
ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
134
|
+
} catch {
|
|
135
|
+
clearInterval(keepalive);
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
}, 5e3);
|
|
139
|
+
const sendTask = async () => {
|
|
140
|
+
const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
|
|
141
|
+
const stream = new AudioByteStream(
|
|
142
|
+
this.#opts.sampleRate,
|
|
143
|
+
this.#opts.numChannels,
|
|
144
|
+
samples100Ms
|
|
145
|
+
);
|
|
146
|
+
for await (const data of this.input) {
|
|
147
|
+
let frames;
|
|
148
|
+
if (data === SpeechStream.FLUSH_SENTINEL) {
|
|
149
|
+
frames = stream.flush();
|
|
150
|
+
} else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
|
|
151
|
+
frames = stream.write(data.data.buffer);
|
|
152
|
+
} else {
|
|
153
|
+
throw new Error(`sample rate or channel count of frame does not match`);
|
|
154
|
+
}
|
|
155
|
+
for await (const frame of frames) {
|
|
156
|
+
if (this.#audioEnergyFilter.pushFrame(frame)) {
|
|
157
|
+
ws.send(frame.data.buffer);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
closing = true;
|
|
162
|
+
ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
163
|
+
};
|
|
164
|
+
const listenTask = async () => {
|
|
165
|
+
new Promise(
|
|
166
|
+
(_, reject) => ws.once("close", (code, reason) => {
|
|
167
|
+
if (!closing) {
|
|
168
|
+
this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
|
|
169
|
+
reject();
|
|
170
|
+
}
|
|
171
|
+
})
|
|
172
|
+
);
|
|
173
|
+
while (!this.closed) {
|
|
174
|
+
try {
|
|
175
|
+
await new Promise((resolve) => {
|
|
176
|
+
ws.once("message", (data) => resolve(data));
|
|
177
|
+
}).then((msg) => {
|
|
178
|
+
const json = JSON.parse(msg.toString());
|
|
179
|
+
switch (json["type"]) {
|
|
180
|
+
case "SpeechStarted": {
|
|
181
|
+
if (this.#speaking) return;
|
|
182
|
+
this.#speaking = true;
|
|
183
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
184
|
+
break;
|
|
185
|
+
}
|
|
186
|
+
// see this page:
|
|
187
|
+
// https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
|
|
188
|
+
// for more information about the different types of events
|
|
189
|
+
case "Results": {
|
|
190
|
+
const isFinal = json["is_final"];
|
|
191
|
+
const isEndpoint = json["speech_final"];
|
|
192
|
+
const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
|
|
193
|
+
if (alternatives[0] && alternatives[0].text) {
|
|
194
|
+
if (!this.#speaking) {
|
|
195
|
+
this.#speaking = true;
|
|
196
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
197
|
+
}
|
|
198
|
+
if (isFinal) {
|
|
199
|
+
this.queue.put({
|
|
200
|
+
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
201
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
235
202
|
});
|
|
203
|
+
} else {
|
|
204
|
+
this.queue.put({
|
|
205
|
+
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
206
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
207
|
+
});
|
|
208
|
+
}
|
|
236
209
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
210
|
+
if (isEndpoint && this.#speaking) {
|
|
211
|
+
this.#speaking = false;
|
|
212
|
+
this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
240
213
|
}
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
case "Metadata": {
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
default: {
|
|
220
|
+
this.#logger.child({ msg: json }).warn("received unexpected message from Deepgram");
|
|
221
|
+
break;
|
|
222
|
+
}
|
|
241
223
|
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
224
|
+
});
|
|
225
|
+
} catch (error) {
|
|
226
|
+
this.#logger.child({ error }).warn("unrecoverable error, exiting");
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
await Promise.all([sendTask(), listenTask()]);
|
|
232
|
+
clearInterval(keepalive);
|
|
233
|
+
}
|
|
246
234
|
}
|
|
247
|
-
_a = SpeechStream;
|
|
248
235
|
const liveTranscriptionToSpeechData = (language, data) => {
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
236
|
+
const alts = data["channel"]["alternatives"];
|
|
237
|
+
return alts.map((alt) => ({
|
|
238
|
+
language,
|
|
239
|
+
startTime: alt["words"].length ? alt["words"][0]["start"] : 0,
|
|
240
|
+
endTime: alt["words"].length ? alt["words"][alt["words"].length - 1]["end"] : 0,
|
|
241
|
+
confidence: alt["confidence"],
|
|
242
|
+
text: alt["transcript"]
|
|
243
|
+
}));
|
|
244
|
+
};
|
|
245
|
+
export {
|
|
246
|
+
STT,
|
|
247
|
+
SpeechStream
|
|
257
248
|
};
|
|
258
249
|
//# sourceMappingURL=stt.js.map
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.js","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAoB,eAAe,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAEjG,OAAO,EAAgB,SAAS,EAAE,MAAM,IAAI,CAAC;AAG7C,MAAM,eAAe,GAAG,kCAAkC,CAAC;AAmB3D,MAAM,iBAAiB,GAAe;IACpC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB;IACpC,QAAQ,EAAE,OAAO;IACjB,cAAc,EAAE,KAAK;IACrB,cAAc,EAAE,IAAI;IACpB,SAAS,EAAE,IAAI;IACf,KAAK,EAAE,gBAAgB;IACvB,WAAW,EAAE,IAAI;IACjB,OAAO,EAAE,IAAI;IACb,WAAW,EAAE,EAAE;IACf,WAAW,EAAE,KAAK;IAClB,UAAU,EAAE,KAAK;IACjB,WAAW,EAAE,CAAC;IACd,QAAQ,EAAE,EAAE;IACZ,eAAe,EAAE,KAAK;CACvB,CAAC;AAEF,MAAM,OAAO,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,KAAK,CAAa;IAClB,OAAO,GAAG,GAAG,EAAE,CAAC;IAEhB,YAAY,OAA4B,iBAAiB;QACvD,KAAK,CAAC;YACJ,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,iBAAiB,CAAC,cAAc;SACxE,CAAC,CAAC;QACH,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,iBAAiB,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YACxE,MAAM,IAAI,KAAK,CACb,8EAA8E,CAC/E,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,KAAK,GAAG,EAAE,GAAG,iBAAiB,EAAE,GAAG,IAAI,EAAE,CAAC;QAE/C,IAAI,IAAI,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;YAC9B,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,SAAS,CAAC;QAClC,CAAC;aAAM,IACL,IAAI,CAAC,KAAK,CAAC,QAAQ;YACnB,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;YAC9C;gBACE,gBAAgB;gBAChB,kBAAkB;gBAClB,gBAAgB;gBAChB,yBAAyB;gBACzB,kBAAkB;gBAClB,cAAc;gBACd,gBAAgB;gBAChB,kBAAkB;gBAClB,mBAAmB;aACpB,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAC5B,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,8BAA8B,IAAI,CAAC,KAAK,CAAC,QAAQ,kCAAkC,CACvG,CAAC;YACF,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,gBAAgB,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,SAAS,CAAC,CAAc;QAC5B,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAChE,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAQ,GAAG,CAAC,YAAY;IAChD,KAAK,CAAa;IAClB,kBAAkB,CAAoB;IACtC,OAAO,GAAG,GAAG,EAAE,CAAC;IAChB,SAAS,GAAG,KAAK,CAAC;IAElB,YAAY,IAAgB;QAC1B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,kBAAkB,GAAG,IAAI,iBAAiB,EAAE,CAAC;QAElD,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,QAAQ,GAAG,EAAE;QACtB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,EAAa,CAAC;QAClB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YAC1B,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC;YAC3C,MAAM,MAAM,GAAG;gBACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK;gBACvB,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS;gBAC/B,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBACpC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO;gBAC5B,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,cAAc;gBAC1C,QAAQ,EAAE,UAAU;gBACpB,UAAU,EAAE,IAAI;gBAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU;gBAClC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBAChC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,IAAI,KAAK;gBAC5C,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW;gBACpC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACrD,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;gBAC5C,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ;aAC9B,CAAC;YACF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE;gBACxC,IAAI,CAAC,KAAK,SAAS,EAAE,CAAC;oBACpB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;wBAC7E,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1D,CAAC;yBAAM,CAAC;wBACN,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,YAAY,CAAC,MAAM,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBACrF,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,EAAE,GAAG,IAAI,SAAS,CAAC,SAAS,EAAE;gBAC5B,OAAO,EAAE,EAAE,aAAa,EAAE,SAAS,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE;aACzD,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;oBACpC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACvB,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;oBACzC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAC,CAAC;gBACjE,CAAC,CAAC,CAAC;gBAEH,MAAM,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACxB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;oBACxB,MAAM,IAAI,KAAK,CAAC,uCAAuC,OAAO,cAAc,CAAC,EAAE,CAAC,CAAC;gBACnF,CAAC;gBAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;gBACxC,OAAO,EAAE,CAAC;gBAEV,IAAI,CAAC,OAAO,CAAC,IAAI,CACf,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ,GAAG,CAC7F,CAAC;gBACF,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;YACpE,CAAC;QACH,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,EAAa;QACxB,IAAI,OAAO,GAAG,KAAK,CAAC;QAEpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE;YACjC,IAAI,CAAC;gBACH,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,CAAC,SAAS,CAAC,CAAC;gBACzB,OAAO;YACT,CAAC;QACH,CAAC,EAAE,IAAI,CAAC,CAAC;QAET,MAAM,QAAQ,GAAG,KAAK,IAAI,EAAE;YAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;YAC5D,MAAM,MAAM,GAAG,IAAI,eAAe,CAChC,IAAI,CAAC,KAAK,CAAC,UAAU,EACrB,IAAI,CAAC,KAAK,CAAC,WAAW,EACtB,YAAY,CACb,CAAC;YAEF,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACpC,IAAI,MAAoB,CAAC;gBACzB,IAAI,IAAI,KAAK,EAAY,CAAC,cAAc,EAAE,CAAC;oBACzC,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;gBAC1B,CAAC;qBAAM,IACL,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU;oBACzC,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,KAAK,CAAC,WAAW,EACxC,CAAC;oBACD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC1C,CAAC;qBAAM,CAAC;oBACN,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;gBAC1E,CAAC;gBAED,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBACjC,IAAI,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;wBAC7C,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;oBAC7B,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;YACf,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;QACnD,CAAC,CAAC;QAEF,MAAM,UAAU,GAAG,KAAK,IAAI,EAAE;YAC5B,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAC9B,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE;gBAChC,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,8BAA8B,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC;oBACpE,MAAM,EAAE,CAAC;gBACX,CAAC;YACH,CAAC,CAAC,CACH,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpB,IAAI,CAAC;oBACH,MAAM,IAAI,OAAO,CAAU,CAAC,OAAO,EAAE,EAAE;wBACrC,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC9C,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;wBACd,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACxC,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;4BACrB,KAAK,eAAe,CAAC,CAAC,CAAC;gCACrB,yDAAyD;gCACzD,yDAAyD;gCACzD,iEAAiE;gCACjE,4EAA4E;gCAC5E,IAAI,IAAI,CAAC,SAAS;oCAAE,OAAO;gCAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;gCACtB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,eAAe,EAAE,CAAC,CAAC;gCAC9D,MAAM;4BACR,CAAC;4BACD,iBAAiB;4BACjB,6GAA6G;4BAC7G,2DAA2D;4BAC3D,KAAK,SAAS,CAAC,CAAC,CAAC;gCACf,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;gCACjC,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC;gCAExC,MAAM,YAAY,GAAG,6BAA6B,CAAC,IAAI,CAAC,KAAK,CAAC,QAAS,EAAE,IAAI,CAAC,CAAC;gCAE/E,sEAAsE;gCACtE,sEAAsE;gCACtE,iBAAiB;gCACjB,IAAI,YAAY,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;oCAC5C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;wCACpB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;wCACtB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,eAAe,EAAE,CAAC,CAAC;oCAChE,CAAC;oCAED,IAAI,OAAO,EAAE,CAAC;wCACZ,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;4CACb,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,gBAAgB;4CAC1C,YAAY,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;yCAC3D,CAAC,CAAC;oCACL,CAAC;yCAAM,CAAC;wCACN,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;4CACb,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,kBAAkB;4CAC5C,YAAY,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;yCAC3D,CAAC,CAAC;oCACL,CAAC;gCACH,CAAC;gCAED,oDAAoD;gCACpD,wDAAwD;gCACxD,qEAAqE;gCACrE,IAAI,UAAU,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oCACjC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;oCACvB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC;gCAC9D,CAAC;gCAED,MAAM;4BACR,CAAC;4BACD,KAAK,UAAU,CAAC,CAAC,CAAC;gCAChB,MAAM;4BACR,CAAC;4BACD,OAAO,CAAC,CAAC,CAAC;gCACR,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;gCACpF,MAAM;4BACR,CAAC;wBACH,CAAC;oBACH,CAAC,CAAC,CAAC;gBACL,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;oBACnE,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC;QAC9C,aAAa,CAAC,SAAS,CAAC,CAAC;IAC3B,CAAC;CACF;;AAED,MAAM,6BAA6B,GAAG,CACpC,QAA+B,EAC/B,IAA2B,EACT,EAAE;IACpB,MAAM,IAAI,GAAU,IAAI,CAAC,SAAS,CAAC,CAAC,cAAc,CAAC,CAAC;IAEpD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACxB,QAAQ;QACR,SAAS,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7D,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/E,UAAU,EAAE,GAAG,CAAC,YAAY,CAAC;QAC7B,IAAI,EAAE,GAAG,CAAC,YAAY,CAAC;KACxB,CAAC,CAAC,CAAC;AACN,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioBuffer, AudioByteStream, AudioEnergyFilter, log, stt } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n profanityFilter: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-2-general',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n profanityFilter: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n stream(): stt.SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n\n this.#run();\n }\n\n async #run(maxRetry = 32) {\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append('keywords', encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n async #runWS(ws: WebSocket) {\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const listenTask = async () => {\n new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject();\n }\n }),\n );\n\n while (!this.closed) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.all([sendTask(), listenTask()]);\n clearInterval(keepalive);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA,SAA2B,iBAAiB,mBAAmB,KAAK,WAAW;AAE/E,SAAuB,iBAAiB;AAGxC,MAAM,kBAAkB;AAmBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,iBAAiB;AACnB;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,SAA2B;AACzB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAEhD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,KAAK,WAAW,IAAI;AACxB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,YAAY,mBAAmB,CAAC,CAAC,CAAC;AAAA,UACnF;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AAAA,QACxB,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,aAAa,YAAY;AAC7B,UAAI;AAAA,QAAc,CAAC,GAAG,WACpB,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO;AAAA,UACT;AAAA,QACF,CAAC;AAAA,MACH;AAEA,aAAO,CAAC,KAAK,QAAQ;AACnB,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AAEtC,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC;AAC5C,kBAAc,SAAS;AAAA,EACzB;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var import_agents = require("@livekit/agents");
|
|
3
|
+
var import_agents_plugin_silero = require("@livekit/agents-plugin-silero");
|
|
4
|
+
var import_agents_plugins_test = require("@livekit/agents-plugins-test");
|
|
5
|
+
var import_vitest = require("vitest");
|
|
6
|
+
var import_stt = require("./stt.cjs");
|
|
7
|
+
(0, import_vitest.describe)("Deepgram", async () => {
|
|
8
|
+
(0, import_agents.initializeLogger)({ pretty: false });
|
|
9
|
+
await (0, import_agents_plugins_test.stt)(new import_stt.STT(), await import_agents_plugin_silero.VAD.load(), { nonStreaming: false });
|
|
10
|
+
});
|
|
11
|
+
//# sourceMappingURL=stt.test.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { initializeLogger } from '@livekit/agents';\nimport { VAD } from '@livekit/agents-plugin-silero';\nimport { stt } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { STT } from './stt.js';\n\ndescribe('Deepgram', async () => {\n initializeLogger({ pretty: false });\n await stt(new STT(), await VAD.load(), { nonStreaming: false });\n});\n"],"mappings":";AAGA,oBAAiC;AACjC,kCAAoB;AACpB,iCAAoB;AACpB,oBAAyB;AACzB,iBAAoB;AAAA,IAEpB,wBAAS,YAAY,YAAY;AAC/B,sCAAiB,EAAE,QAAQ,MAAM,CAAC;AAClC,YAAM,gCAAI,IAAI,eAAI,GAAG,MAAM,gCAAI,KAAK,GAAG,EAAE,cAAc,MAAM,CAAC;AAChE,CAAC;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stt.test.d.ts","sourceRoot":"","sources":["../src/stt.test.ts"],"names":[],"mappings":""}
|
package/dist/stt.test.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { initializeLogger } from "@livekit/agents";
|
|
2
|
+
import { VAD } from "@livekit/agents-plugin-silero";
|
|
3
|
+
import { stt } from "@livekit/agents-plugins-test";
|
|
4
|
+
import { describe } from "vitest";
|
|
5
|
+
import { STT } from "./stt.js";
|
|
6
|
+
describe("Deepgram", async () => {
|
|
7
|
+
initializeLogger({ pretty: false });
|
|
8
|
+
await stt(new STT(), await VAD.load(), { nonStreaming: false });
|
|
9
|
+
});
|
|
10
|
+
//# sourceMappingURL=stt.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/stt.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { initializeLogger } from '@livekit/agents';\nimport { VAD } from '@livekit/agents-plugin-silero';\nimport { stt } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { STT } from './stt.js';\n\ndescribe('Deepgram', async () => {\n initializeLogger({ pretty: false });\n await stt(new STT(), await VAD.load(), { nonStreaming: false });\n});\n"],"mappings":"AAGA,SAAS,wBAAwB;AACjC,SAAS,WAAW;AACpB,SAAS,WAAW;AACpB,SAAS,gBAAgB;AACzB,SAAS,WAAW;AAEpB,SAAS,YAAY,YAAY;AAC/B,mBAAiB,EAAE,QAAQ,MAAM,CAAC;AAClC,QAAM,IAAI,IAAI,IAAI,GAAG,MAAM,IAAI,KAAK,GAAG,EAAE,cAAc,MAAM,CAAC;AAChE,CAAC;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,33 +1,45 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-deepgram",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "Deepgram plugin for LiveKit Agents for Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
|
+
"require": "dist/index.cjs",
|
|
6
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
7
15
|
"author": "LiveKit",
|
|
8
16
|
"type": "module",
|
|
9
17
|
"repository": "git@github.com:livekit/agents-js.git",
|
|
10
18
|
"license": "Apache-2.0",
|
|
11
19
|
"files": [
|
|
12
20
|
"dist",
|
|
13
|
-
"src"
|
|
21
|
+
"src",
|
|
22
|
+
"README.md"
|
|
14
23
|
],
|
|
15
24
|
"devDependencies": {
|
|
25
|
+
"@livekit/agents": "^x",
|
|
26
|
+
"@livekit/agents-plugin-silero": "^x",
|
|
27
|
+
"@livekit/agents-plugins-test": "^x",
|
|
28
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
16
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
17
|
-
"@livekit/rtc-node": "^0.11.1",
|
|
18
30
|
"@types/ws": "^8.5.10",
|
|
19
|
-
"
|
|
20
|
-
"
|
|
31
|
+
"tsup": "^8.3.5",
|
|
32
|
+
"typescript": "^5.0.0"
|
|
21
33
|
},
|
|
22
34
|
"dependencies": {
|
|
23
35
|
"ws": "^8.16.0"
|
|
24
36
|
},
|
|
25
37
|
"peerDependencies": {
|
|
26
|
-
"@livekit/rtc-node": "^0.
|
|
27
|
-
"@livekit/agents": "^0.
|
|
38
|
+
"@livekit/rtc-node": "^0.12.1",
|
|
39
|
+
"@livekit/agents": "^0.6.0x"
|
|
28
40
|
},
|
|
29
41
|
"scripts": {
|
|
30
|
-
"build": "tsc",
|
|
42
|
+
"build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"",
|
|
31
43
|
"clean": "rm -rf dist",
|
|
32
44
|
"clean:build": "pnpm clean && pnpm build",
|
|
33
45
|
"lint": "eslint -f unix \"src/**/*.{ts,js}\"",
|
package/src/stt.test.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { initializeLogger } from '@livekit/agents';
|
|
5
|
+
import { VAD } from '@livekit/agents-plugin-silero';
|
|
6
|
+
import { stt } from '@livekit/agents-plugins-test';
|
|
7
|
+
import { describe } from 'vitest';
|
|
8
|
+
import { STT } from './stt.js';
|
|
9
|
+
|
|
10
|
+
describe('Deepgram', async () => {
|
|
11
|
+
initializeLogger({ pretty: false });
|
|
12
|
+
await stt(new STT(), await VAD.load(), { nonStreaming: false });
|
|
13
|
+
});
|
package/src/stt.ts
CHANGED
|
@@ -45,11 +45,12 @@ const defaultSTTOptions: STTOptions = {
|
|
|
45
45
|
export class STT extends stt.STT {
|
|
46
46
|
#opts: STTOptions;
|
|
47
47
|
#logger = log();
|
|
48
|
+
label = 'deepgram.STT';
|
|
48
49
|
|
|
49
50
|
constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
|
|
50
51
|
super({
|
|
51
52
|
streaming: true,
|
|
52
|
-
interimResults: opts.interimResults
|
|
53
|
+
interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,
|
|
53
54
|
});
|
|
54
55
|
if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {
|
|
55
56
|
throw new Error(
|
|
@@ -84,12 +85,12 @@ export class STT extends stt.STT {
|
|
|
84
85
|
}
|
|
85
86
|
|
|
86
87
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
87
|
-
async
|
|
88
|
+
async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {
|
|
88
89
|
throw new Error('Recognize is not supported on Deepgram STT');
|
|
89
90
|
}
|
|
90
91
|
|
|
91
92
|
stream(): stt.SpeechStream {
|
|
92
|
-
return new SpeechStream(this.#opts);
|
|
93
|
+
return new SpeechStream(this, this.#opts);
|
|
93
94
|
}
|
|
94
95
|
}
|
|
95
96
|
|
|
@@ -98,9 +99,10 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
98
99
|
#audioEnergyFilter: AudioEnergyFilter;
|
|
99
100
|
#logger = log();
|
|
100
101
|
#speaking = false;
|
|
102
|
+
label = 'deepgram.SpeechStream';
|
|
101
103
|
|
|
102
|
-
constructor(opts: STTOptions) {
|
|
103
|
-
super();
|
|
104
|
+
constructor(stt: STT, opts: STTOptions) {
|
|
105
|
+
super(stt);
|
|
104
106
|
this.#opts = opts;
|
|
105
107
|
this.closed = false;
|
|
106
108
|
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
@@ -261,12 +263,12 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
261
263
|
if (isFinal) {
|
|
262
264
|
this.queue.put({
|
|
263
265
|
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
264
|
-
alternatives: [alternatives[0], ...alternatives.
|
|
266
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
265
267
|
});
|
|
266
268
|
} else {
|
|
267
269
|
this.queue.put({
|
|
268
270
|
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
269
|
-
alternatives: [alternatives[0], ...alternatives.
|
|
271
|
+
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
270
272
|
});
|
|
271
273
|
}
|
|
272
274
|
}
|