@livekit/agents-plugin-deepgram 0.5.7 → 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var utils_exports = {};
20
+ __export(utils_exports, {
21
+ PeriodicCollector: () => PeriodicCollector
22
+ });
23
+ module.exports = __toCommonJS(utils_exports);
24
+ class PeriodicCollector {
25
+ duration;
26
+ callback;
27
+ lastFlushTime;
28
+ total = null;
29
+ constructor(callback, options) {
30
+ this.duration = options.duration;
31
+ this.callback = callback;
32
+ this.lastFlushTime = performance.now() / 1e3;
33
+ }
34
+ push(value) {
35
+ if (this.total === null) {
36
+ this.total = value;
37
+ } else {
38
+ this.total = this.total + value;
39
+ }
40
+ if (performance.now() / 1e3 - this.lastFlushTime >= this.duration) {
41
+ this.flush();
42
+ }
43
+ }
44
+ flush() {
45
+ if (this.total !== null) {
46
+ this.callback(this.total);
47
+ this.total = null;
48
+ }
49
+ this.lastFlushTime = performance.now() / 1e3;
50
+ }
51
+ }
52
+ // Annotate the CommonJS export names for ESM import in node:
53
+ 0 && (module.exports = {
54
+ PeriodicCollector
55
+ });
56
+ //# sourceMappingURL=_utils.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/_utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport class PeriodicCollector<T> {\n private duration: number;\n private callback: (value: T) => void;\n private lastFlushTime: number;\n private total: T | null = null;\n\n constructor(callback: (value: T) => void, options: { duration: number }) {\n /**\n * Create a new periodic collector that accumulates values and calls the callback\n * after the specified duration if there are values to report.\n *\n * @param callback Function to call with accumulated value when duration expires\n * @param options.duration Time in seconds between callback invocations\n */\n this.duration = options.duration;\n this.callback = callback;\n this.lastFlushTime = performance.now() / 1000; // Convert to seconds\n }\n\n push(value: T): void {\n /**\n * Add a value to the accumulator\n */\n if (this.total === null) {\n this.total = value;\n } else {\n // Type assertion needed for generic addition\n this.total = (this.total as any) + (value as any);\n }\n\n if (performance.now() / 1000 - this.lastFlushTime >= this.duration) {\n this.flush();\n }\n }\n\n flush(): void {\n /**\n * Force callback to be called with current total if non-zero\n */\n if (this.total !== null) {\n this.callback(this.total);\n this.total = null;\n }\n this.lastFlushTime = performance.now() / 1000;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIO,MAAM,kBAAqB;AAAA,EACxB;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAkB;AAAA,EAE1B,YAAY,UAA8B,SAA+B;AAQvE,SAAK,WAAW,QAAQ;AACxB,SAAK,WAAW;AAChB,SAAK,gBAAgB,YAAY,IAAI,IAAI;AAAA,EAC3C;AAAA,EAEA,KAAK,OAAgB;AAInB,QAAI,KAAK,UAAU,MAAM;AACvB,WAAK,QAAQ;AAAA,IACf,OAAO;AAEL,WAAK,QAAS,KAAK,QAAiB;AAAA,IACtC;AAEA,QAAI,YAAY,IAAI,IAAI,MAAO,KAAK,iBAAiB,KAAK,UAAU;AAClE,WAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,QAAc;AAIZ,QAAI,KAAK,UAAU,MAAM;AACvB,WAAK,SAAS,KAAK,KAAK;AACxB,WAAK,QAAQ;AAAA,IACf;AACA,SAAK,gBAAgB,YAAY,IAAI,IAAI;AAAA,EAC3C;AACF;","names":[]}
@@ -0,0 +1,12 @@
1
+ export declare class PeriodicCollector<T> {
2
+ private duration;
3
+ private callback;
4
+ private lastFlushTime;
5
+ private total;
6
+ constructor(callback: (value: T) => void, options: {
7
+ duration: number;
8
+ });
9
+ push(value: T): void;
10
+ flush(): void;
11
+ }
12
+ //# sourceMappingURL=_utils.d.ts.map
@@ -0,0 +1,12 @@
1
+ export declare class PeriodicCollector<T> {
2
+ private duration;
3
+ private callback;
4
+ private lastFlushTime;
5
+ private total;
6
+ constructor(callback: (value: T) => void, options: {
7
+ duration: number;
8
+ });
9
+ push(value: T): void;
10
+ flush(): void;
11
+ }
12
+ //# sourceMappingURL=_utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"_utils.d.ts","sourceRoot":"","sources":["../src/_utils.ts"],"names":[],"mappings":"AAIA,qBAAa,iBAAiB,CAAC,CAAC;IAC9B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,KAAK,CAAkB;gBAEnB,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC,KAAK,IAAI,EAAE,OAAO,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE;IAavE,IAAI,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;IAgBpB,KAAK,IAAI,IAAI;CAUd"}
package/dist/_utils.js ADDED
@@ -0,0 +1,32 @@
1
+ class PeriodicCollector {
2
+ duration;
3
+ callback;
4
+ lastFlushTime;
5
+ total = null;
6
+ constructor(callback, options) {
7
+ this.duration = options.duration;
8
+ this.callback = callback;
9
+ this.lastFlushTime = performance.now() / 1e3;
10
+ }
11
+ push(value) {
12
+ if (this.total === null) {
13
+ this.total = value;
14
+ } else {
15
+ this.total = this.total + value;
16
+ }
17
+ if (performance.now() / 1e3 - this.lastFlushTime >= this.duration) {
18
+ this.flush();
19
+ }
20
+ }
21
+ flush() {
22
+ if (this.total !== null) {
23
+ this.callback(this.total);
24
+ this.total = null;
25
+ }
26
+ this.lastFlushTime = performance.now() / 1e3;
27
+ }
28
+ }
29
+ export {
30
+ PeriodicCollector
31
+ };
32
+ //# sourceMappingURL=_utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/_utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport class PeriodicCollector<T> {\n private duration: number;\n private callback: (value: T) => void;\n private lastFlushTime: number;\n private total: T | null = null;\n\n constructor(callback: (value: T) => void, options: { duration: number }) {\n /**\n * Create a new periodic collector that accumulates values and calls the callback\n * after the specified duration if there are values to report.\n *\n * @param callback Function to call with accumulated value when duration expires\n * @param options.duration Time in seconds between callback invocations\n */\n this.duration = options.duration;\n this.callback = callback;\n this.lastFlushTime = performance.now() / 1000; // Convert to seconds\n }\n\n push(value: T): void {\n /**\n * Add a value to the accumulator\n */\n if (this.total === null) {\n this.total = value;\n } else {\n // Type assertion needed for generic addition\n this.total = (this.total as any) + (value as any);\n }\n\n if (performance.now() / 1000 - this.lastFlushTime >= this.duration) {\n this.flush();\n }\n }\n\n flush(): void {\n /**\n * Force callback to be called with current total if non-zero\n */\n if (this.total !== null) {\n this.callback(this.total);\n this.total = null;\n }\n this.lastFlushTime = performance.now() / 1000;\n }\n}\n"],"mappings":"AAIO,MAAM,kBAAqB;AAAA,EACxB;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAkB;AAAA,EAE1B,YAAY,UAA8B,SAA+B;AAQvE,SAAK,WAAW,QAAQ;AACxB,SAAK,WAAW;AAChB,SAAK,gBAAgB,YAAY,IAAI,IAAI;AAAA,EAC3C;AAAA,EAEA,KAAK,OAAgB;AAInB,QAAI,KAAK,UAAU,MAAM;AACvB,WAAK,QAAQ;AAAA,IACf,OAAO;AAEL,WAAK,QAAS,KAAK,QAAiB;AAAA,IACtC;AAEA,QAAI,YAAY,IAAI,IAAI,MAAO,KAAK,iBAAiB,KAAK,UAAU;AAClE,WAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,QAAc;AAIZ,QAAI,KAAK,UAAU,MAAM;AACvB,WAAK,SAAS,KAAK,KAAK;AACxB,WAAK,QAAQ;AAAA,IACf;AACA,SAAK,gBAAgB,YAAY,IAAI,IAAI;AAAA,EAC3C;AACF;","names":[]}
package/dist/index.cjs CHANGED
@@ -13,9 +13,20 @@ var __copyProps = (to, from, except, desc) => {
13
13
  };
14
14
  var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
15
15
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
16
- var src_exports = {};
17
- module.exports = __toCommonJS(src_exports);
18
- __reExport(src_exports, require("./stt.cjs"), module.exports);
16
+ var index_exports = {};
17
+ module.exports = __toCommonJS(index_exports);
18
+ var import_agents = require("@livekit/agents");
19
+ __reExport(index_exports, require("./stt.cjs"), module.exports);
20
+ class DeepgramPlugin extends import_agents.Plugin {
21
+ constructor() {
22
+ super({
23
+ title: "deepgram",
24
+ version: "0.5.6",
25
+ package: "@livekit/agents-plugin-deepgram"
26
+ });
27
+ }
28
+ }
29
+ import_agents.Plugin.registerPlugin(new DeepgramPlugin());
19
30
  // Annotate the CommonJS export names for ESM import in node:
20
31
  0 && (module.exports = {
21
32
  ...require("./stt.cjs")
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './stt.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAIA,wBAAc,qBAJd;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './stt.js';\n\nclass DeepgramPlugin extends Plugin {\n constructor() {\n super({\n title: 'deepgram',\n version: '0.5.6',\n package: '@livekit/agents-plugin-deepgram',\n });\n }\n}\n\nPlugin.registerPlugin(new DeepgramPlugin());\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,oBAAuB;AAEvB,0BAAc,qBALd;AAOA,MAAM,uBAAuB,qBAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,cAAc,UAAU,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,cAAc,UAAU,CAAC"}
package/dist/index.js CHANGED
@@ -1,2 +1,13 @@
1
+ import { Plugin } from "@livekit/agents";
1
2
  export * from "./stt.js";
3
+ class DeepgramPlugin extends Plugin {
4
+ constructor() {
5
+ super({
6
+ title: "deepgram",
7
+ version: "0.5.6",
8
+ package: "@livekit/agents-plugin-deepgram"
9
+ });
10
+ }
11
+ }
12
+ Plugin.registerPlugin(new DeepgramPlugin());
2
13
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport * from './stt.js';\n"],"mappings":"AAIA,cAAc;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport * from './stt.js';\n\nclass DeepgramPlugin extends Plugin {\n constructor() {\n super({\n title: 'deepgram',\n version: '0.5.6',\n package: '@livekit/agents-plugin-deepgram',\n });\n }\n}\n\nPlugin.registerPlugin(new DeepgramPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,cAAc;AAEd,MAAM,uBAAuB,OAAO;AAAA,EAClC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,eAAe,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type STTModels =\n | 'nova-general'\n | 'nova-phonecall'\n | 'nova-meeting'\n | 'nova-2-general'\n | 'nova-2-meeting'\n | 'nova-2-phonecall'\n | 'nova-2-finance'\n | 'nova-2-conversationalai'\n | 'nova-2-voicemail'\n | 'nova-2-video'\n | 'nova-2-medical'\n | 'nova-2-drivethru'\n | 'nova-2-automotive'\n | 'nova-3-general'\n | 'enhanced-general'\n | 'enhanced-meeting'\n | 'enhanced-phonecall'\n | 'enhanced-finance'\n | 'base'\n | 'meeting'\n | 'phonecall'\n | 'finance'\n | 'conversationalai'\n | 'voicemail'\n | 'video'\n | 'whisper-tiny'\n | 'whisper-base'\n | 'whisper-small'\n | 'whisper-medium'\n | 'whisper-large';\n\nexport type STTLanguages =\n | 'da'\n | 'de'\n | 'en'\n | 'en-AU'\n | 'en-GB'\n | 'en-IN'\n | 'en-NZ'\n | 'en-US'\n | 'es'\n | 'es-419'\n | 'es-LATAM'\n | 'fr'\n | 'fr-CA'\n | 'hi'\n | 'hi-Latn'\n | 'id'\n | 'it'\n | 'ja'\n | 'ko'\n | 'nl'\n | 'no'\n | 'pl'\n | 'pt'\n | 'pt-BR'\n | 'ru'\n | 'sv'\n | 'ta'\n | 'taq'\n | 'th'\n | 'tr'\n | 'uk'\n | 'zh'\n | 'zh-CN'\n | 'zh-TW';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
1
+ {"version":3,"sources":["../src/models.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport type STTModels =\n | 'nova-general'\n | 'nova-phonecall'\n | 'nova-meeting'\n | 'nova-2-general'\n | 'nova-2-meeting'\n | 'nova-2-phonecall'\n | 'nova-2-finance'\n | 'nova-2-conversationalai'\n | 'nova-2-voicemail'\n | 'nova-2-video'\n | 'nova-2-medical'\n | 'nova-2-drivethru'\n | 'nova-2-automotive'\n | 'nova-3'\n | 'nova-3-general'\n | 'nova-3-medical'\n | 'enhanced-general'\n | 'enhanced-meeting'\n | 'enhanced-phonecall'\n | 'enhanced-finance'\n | 'base'\n | 'meeting'\n | 'phonecall'\n | 'finance'\n | 'conversationalai'\n | 'voicemail'\n | 'video'\n | 'whisper-tiny'\n | 'whisper-base'\n | 'whisper-small'\n | 'whisper-medium'\n | 'whisper-large';\n\nexport type STTLanguages =\n | 'da'\n | 'de'\n | 'en'\n | 'en-AU'\n | 'en-GB'\n | 'en-IN'\n | 'en-NZ'\n | 'en-US'\n | 'es'\n | 'es-419'\n | 'es-LATAM'\n | 'fr'\n | 'fr-CA'\n | 'hi'\n | 'hi-Latn'\n | 'id'\n | 'it'\n | 'ja'\n | 'ko'\n | 'nl'\n | 'no'\n | 'pl'\n | 'pt'\n | 'pt-BR'\n | 'ru'\n | 'sv'\n | 'ta'\n | 'taq'\n | 'th'\n | 'tr'\n | 'uk'\n | 'zh'\n | 'zh-CN'\n | 'zh-TW'\n | 'multi';\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
package/dist/models.d.cts CHANGED
@@ -1,3 +1,3 @@
1
- export type STTModels = 'nova-general' | 'nova-phonecall' | 'nova-meeting' | 'nova-2-general' | 'nova-2-meeting' | 'nova-2-phonecall' | 'nova-2-finance' | 'nova-2-conversationalai' | 'nova-2-voicemail' | 'nova-2-video' | 'nova-2-medical' | 'nova-2-drivethru' | 'nova-2-automotive' | 'nova-3-general' | 'enhanced-general' | 'enhanced-meeting' | 'enhanced-phonecall' | 'enhanced-finance' | 'base' | 'meeting' | 'phonecall' | 'finance' | 'conversationalai' | 'voicemail' | 'video' | 'whisper-tiny' | 'whisper-base' | 'whisper-small' | 'whisper-medium' | 'whisper-large';
2
- export type STTLanguages = 'da' | 'de' | 'en' | 'en-AU' | 'en-GB' | 'en-IN' | 'en-NZ' | 'en-US' | 'es' | 'es-419' | 'es-LATAM' | 'fr' | 'fr-CA' | 'hi' | 'hi-Latn' | 'id' | 'it' | 'ja' | 'ko' | 'nl' | 'no' | 'pl' | 'pt' | 'pt-BR' | 'ru' | 'sv' | 'ta' | 'taq' | 'th' | 'tr' | 'uk' | 'zh' | 'zh-CN' | 'zh-TW';
1
+ export type STTModels = 'nova-general' | 'nova-phonecall' | 'nova-meeting' | 'nova-2-general' | 'nova-2-meeting' | 'nova-2-phonecall' | 'nova-2-finance' | 'nova-2-conversationalai' | 'nova-2-voicemail' | 'nova-2-video' | 'nova-2-medical' | 'nova-2-drivethru' | 'nova-2-automotive' | 'nova-3' | 'nova-3-general' | 'nova-3-medical' | 'enhanced-general' | 'enhanced-meeting' | 'enhanced-phonecall' | 'enhanced-finance' | 'base' | 'meeting' | 'phonecall' | 'finance' | 'conversationalai' | 'voicemail' | 'video' | 'whisper-tiny' | 'whisper-base' | 'whisper-small' | 'whisper-medium' | 'whisper-large';
2
+ export type STTLanguages = 'da' | 'de' | 'en' | 'en-AU' | 'en-GB' | 'en-IN' | 'en-NZ' | 'en-US' | 'es' | 'es-419' | 'es-LATAM' | 'fr' | 'fr-CA' | 'hi' | 'hi-Latn' | 'id' | 'it' | 'ja' | 'ko' | 'nl' | 'no' | 'pl' | 'pt' | 'pt-BR' | 'ru' | 'sv' | 'ta' | 'taq' | 'th' | 'tr' | 'uk' | 'zh' | 'zh-CN' | 'zh-TW' | 'multi';
3
3
  //# sourceMappingURL=models.d.ts.map
package/dist/models.d.ts CHANGED
@@ -1,3 +1,3 @@
1
- export type STTModels = 'nova-general' | 'nova-phonecall' | 'nova-meeting' | 'nova-2-general' | 'nova-2-meeting' | 'nova-2-phonecall' | 'nova-2-finance' | 'nova-2-conversationalai' | 'nova-2-voicemail' | 'nova-2-video' | 'nova-2-medical' | 'nova-2-drivethru' | 'nova-2-automotive' | 'nova-3-general' | 'enhanced-general' | 'enhanced-meeting' | 'enhanced-phonecall' | 'enhanced-finance' | 'base' | 'meeting' | 'phonecall' | 'finance' | 'conversationalai' | 'voicemail' | 'video' | 'whisper-tiny' | 'whisper-base' | 'whisper-small' | 'whisper-medium' | 'whisper-large';
2
- export type STTLanguages = 'da' | 'de' | 'en' | 'en-AU' | 'en-GB' | 'en-IN' | 'en-NZ' | 'en-US' | 'es' | 'es-419' | 'es-LATAM' | 'fr' | 'fr-CA' | 'hi' | 'hi-Latn' | 'id' | 'it' | 'ja' | 'ko' | 'nl' | 'no' | 'pl' | 'pt' | 'pt-BR' | 'ru' | 'sv' | 'ta' | 'taq' | 'th' | 'tr' | 'uk' | 'zh' | 'zh-CN' | 'zh-TW';
1
+ export type STTModels = 'nova-general' | 'nova-phonecall' | 'nova-meeting' | 'nova-2-general' | 'nova-2-meeting' | 'nova-2-phonecall' | 'nova-2-finance' | 'nova-2-conversationalai' | 'nova-2-voicemail' | 'nova-2-video' | 'nova-2-medical' | 'nova-2-drivethru' | 'nova-2-automotive' | 'nova-3' | 'nova-3-general' | 'nova-3-medical' | 'enhanced-general' | 'enhanced-meeting' | 'enhanced-phonecall' | 'enhanced-finance' | 'base' | 'meeting' | 'phonecall' | 'finance' | 'conversationalai' | 'voicemail' | 'video' | 'whisper-tiny' | 'whisper-base' | 'whisper-small' | 'whisper-medium' | 'whisper-large';
2
+ export type STTLanguages = 'da' | 'de' | 'en' | 'en-AU' | 'en-GB' | 'en-IN' | 'en-NZ' | 'en-US' | 'es' | 'es-419' | 'es-LATAM' | 'fr' | 'fr-CA' | 'hi' | 'hi-Latn' | 'id' | 'it' | 'ja' | 'ko' | 'nl' | 'no' | 'pl' | 'pt' | 'pt-BR' | 'ru' | 'sv' | 'ta' | 'taq' | 'th' | 'tr' | 'uk' | 'zh' | 'zh-CN' | 'zh-TW' | 'multi';
3
3
  //# sourceMappingURL=models.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,cAAc,GACd,gBAAgB,GAChB,cAAc,GACd,gBAAgB,GAChB,gBAAgB,GAChB,kBAAkB,GAClB,gBAAgB,GAChB,yBAAyB,GACzB,kBAAkB,GAClB,cAAc,GACd,gBAAgB,GAChB,kBAAkB,GAClB,mBAAmB,GACnB,gBAAgB,GAChB,kBAAkB,GAClB,kBAAkB,GAClB,oBAAoB,GACpB,kBAAkB,GAClB,MAAM,GACN,SAAS,GACT,WAAW,GACX,SAAS,GACT,kBAAkB,GAClB,WAAW,GACX,OAAO,GACP,cAAc,GACd,cAAc,GACd,eAAe,GACf,gBAAgB,GAChB,eAAe,CAAC;AAEpB,MAAM,MAAM,YAAY,GACpB,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,IAAI,GACJ,QAAQ,GACR,UAAU,GACV,IAAI,GACJ,OAAO,GACP,IAAI,GACJ,SAAS,GACT,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,KAAK,GACL,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,OAAO,CAAC"}
1
+ {"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAIA,MAAM,MAAM,SAAS,GACjB,cAAc,GACd,gBAAgB,GAChB,cAAc,GACd,gBAAgB,GAChB,gBAAgB,GAChB,kBAAkB,GAClB,gBAAgB,GAChB,yBAAyB,GACzB,kBAAkB,GAClB,cAAc,GACd,gBAAgB,GAChB,kBAAkB,GAClB,mBAAmB,GACnB,QAAQ,GACR,gBAAgB,GAChB,gBAAgB,GAChB,kBAAkB,GAClB,kBAAkB,GAClB,oBAAoB,GACpB,kBAAkB,GAClB,MAAM,GACN,SAAS,GACT,WAAW,GACX,SAAS,GACT,kBAAkB,GAClB,WAAW,GACX,OAAO,GACP,cAAc,GACd,cAAc,GACd,eAAe,GACf,gBAAgB,GAChB,eAAe,CAAC;AAEpB,MAAM,MAAM,YAAY,GACpB,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,IAAI,GACJ,QAAQ,GACR,UAAU,GACV,IAAI,GACJ,OAAO,GACP,IAAI,GACJ,SAAS,GACT,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,KAAK,GACL,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,OAAO,GACP,OAAO,GACP,OAAO,CAAC"}
package/dist/stt.cjs CHANGED
@@ -24,6 +24,7 @@ __export(stt_exports, {
24
24
  module.exports = __toCommonJS(stt_exports);
25
25
  var import_agents = require("@livekit/agents");
26
26
  var import_ws = require("ws");
27
+ var import_utils = require("./_utils.cjs");
27
28
  const API_BASE_URL_V1 = "wss://api.deepgram.com/v1/listen";
28
29
  const defaultSTTOptions = {
29
30
  apiKey: process.env.DEEPGRAM_API_KEY,
@@ -31,7 +32,7 @@ const defaultSTTOptions = {
31
32
  detectLanguage: false,
32
33
  interimResults: true,
33
34
  punctuate: true,
34
- model: "nova-2-general",
35
+ model: "nova-3",
35
36
  smartFormat: true,
36
37
  noDelay: true,
37
38
  endpointing: 25,
@@ -97,15 +98,21 @@ class SpeechStream extends import_agents.stt.SpeechStream {
97
98
  #logger = (0, import_agents.log)();
98
99
  #speaking = false;
99
100
  #resetWS = new import_agents.Future();
101
+ #requestId = "";
102
+ #audioDurationCollector;
100
103
  label = "deepgram.SpeechStream";
101
104
  constructor(stt2, opts) {
102
- super(stt2);
105
+ super(stt2, opts.sampleRate);
103
106
  this.#opts = opts;
104
107
  this.closed = false;
105
108
  this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
106
- this.#run();
109
+ this.#audioDurationCollector = new import_utils.PeriodicCollector(
110
+ (duration) => this.onAudioDurationReport(duration),
111
+ { duration: 5 }
112
+ );
107
113
  }
108
- async #run(maxRetry = 32) {
114
+ async run() {
115
+ const maxRetry = 32;
109
116
  let retries = 0;
110
117
  let ws;
111
118
  while (!this.input.closed) {
@@ -189,6 +196,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
189
196
  let frames;
190
197
  if (data === SpeechStream.FLUSH_SENTINEL) {
191
198
  frames = stream.flush();
199
+ this.#audioDurationCollector.flush();
192
200
  } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
193
201
  frames = stream.write(data.data.buffer);
194
202
  } else {
@@ -196,6 +204,8 @@ class SpeechStream extends import_agents.stt.SpeechStream {
196
204
  }
197
205
  for await (const frame of frames) {
198
206
  if (this.#audioEnergyFilter.pushFrame(frame)) {
207
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
208
+ this.#audioDurationCollector.push(frameDuration);
199
209
  ws.send(frame.data.buffer);
200
210
  }
201
211
  }
@@ -207,7 +217,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
207
217
  (_, reject) => ws.once("close", (code, reason) => {
208
218
  if (!closing) {
209
219
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
210
- reject();
220
+ reject(new Error("WebSocket closed"));
211
221
  }
212
222
  })
213
223
  );
@@ -229,8 +239,11 @@ class SpeechStream extends import_agents.stt.SpeechStream {
229
239
  // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
230
240
  // for more information about the different types of events
231
241
  case "Results": {
242
+ const metadata = json["metadata"];
243
+ const requestId = metadata["request_id"];
232
244
  const isFinal = json["is_final"];
233
245
  const isEndpoint = json["speech_final"];
246
+ this.#requestId = requestId;
234
247
  const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
235
248
  if (alternatives[0] && alternatives[0].text) {
236
249
  if (!this.#speaking) {
@@ -275,6 +288,16 @@ class SpeechStream extends import_agents.stt.SpeechStream {
275
288
  ws.close();
276
289
  clearInterval(keepalive);
277
290
  }
291
+ onAudioDurationReport(duration) {
292
+ const usageEvent = {
293
+ type: import_agents.stt.SpeechEventType.RECOGNITION_USAGE,
294
+ requestId: this.#requestId,
295
+ recognitionUsage: {
296
+ audioDuration: duration
297
+ }
298
+ };
299
+ this.queue.put(usageEvent);
300
+ }
278
301
  }
279
302
  const liveTranscriptionToSpeechData = (language, data) => {
280
303
  const alts = data["channel"]["alternatives"];
package/dist/stt.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-2-general',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n\n this.#run();\n }\n\n async #run(maxRetry = 32) {\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject();\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,gBAAwC;AAGxC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAEhD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,KAAK,WAAW,IAAI;AACxB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AAAA,QACxB,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO;AAAA,QACT;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AAEtC,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,gBAAwC;AACxC,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,MAAK,KAAK,UAAU;AAC1B,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AACtB,eAAK,wBAAwB,MAAM;AAAA,QACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,kBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,iBAAK,wBAAwB,KAAK,aAAa;AAC/C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,QACtC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/dist/stt.d.cts CHANGED
@@ -32,6 +32,8 @@ export declare class SpeechStream extends stt.SpeechStream {
32
32
  #private;
33
33
  label: string;
34
34
  constructor(stt: STT, opts: STTOptions);
35
+ protected run(): Promise<void>;
35
36
  updateOptions(opts: Partial<STTOptions>): void;
37
+ private onAudioDurationReport;
36
38
  }
37
39
  //# sourceMappingURL=stt.d.ts.map
package/dist/stt.d.ts CHANGED
@@ -32,6 +32,8 @@ export declare class SpeechStream extends stt.SpeechStream {
32
32
  #private;
33
33
  label: string;
34
34
  constructor(stt: STT, opts: STTOptions);
35
+ protected run(): Promise<void>;
35
36
  updateOptions(opts: Partial<STTOptions>): void;
37
+ private onAudioDurationReport;
36
38
  }
37
39
  //# sourceMappingURL=stt.d.ts.map
package/dist/stt.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAKhB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAGzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;CAGvB;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAMhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;IA0EtC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;CA2IxC"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAKhB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;CAGvB;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAWtB,GAAG;IAkEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAkJvC,OAAO,CAAC,qBAAqB;CAU9B"}
package/dist/stt.js CHANGED
@@ -6,6 +6,7 @@ import {
6
6
  stt
7
7
  } from "@livekit/agents";
8
8
  import { WebSocket } from "ws";
9
+ import { PeriodicCollector } from "./_utils.js";
9
10
  const API_BASE_URL_V1 = "wss://api.deepgram.com/v1/listen";
10
11
  const defaultSTTOptions = {
11
12
  apiKey: process.env.DEEPGRAM_API_KEY,
@@ -13,7 +14,7 @@ const defaultSTTOptions = {
13
14
  detectLanguage: false,
14
15
  interimResults: true,
15
16
  punctuate: true,
16
- model: "nova-2-general",
17
+ model: "nova-3",
17
18
  smartFormat: true,
18
19
  noDelay: true,
19
20
  endpointing: 25,
@@ -79,15 +80,21 @@ class SpeechStream extends stt.SpeechStream {
79
80
  #logger = log();
80
81
  #speaking = false;
81
82
  #resetWS = new Future();
83
+ #requestId = "";
84
+ #audioDurationCollector;
82
85
  label = "deepgram.SpeechStream";
83
86
  constructor(stt2, opts) {
84
- super(stt2);
87
+ super(stt2, opts.sampleRate);
85
88
  this.#opts = opts;
86
89
  this.closed = false;
87
90
  this.#audioEnergyFilter = new AudioEnergyFilter();
88
- this.#run();
91
+ this.#audioDurationCollector = new PeriodicCollector(
92
+ (duration) => this.onAudioDurationReport(duration),
93
+ { duration: 5 }
94
+ );
89
95
  }
90
- async #run(maxRetry = 32) {
96
+ async run() {
97
+ const maxRetry = 32;
91
98
  let retries = 0;
92
99
  let ws;
93
100
  while (!this.input.closed) {
@@ -171,6 +178,7 @@ class SpeechStream extends stt.SpeechStream {
171
178
  let frames;
172
179
  if (data === SpeechStream.FLUSH_SENTINEL) {
173
180
  frames = stream.flush();
181
+ this.#audioDurationCollector.flush();
174
182
  } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
175
183
  frames = stream.write(data.data.buffer);
176
184
  } else {
@@ -178,6 +186,8 @@ class SpeechStream extends stt.SpeechStream {
178
186
  }
179
187
  for await (const frame of frames) {
180
188
  if (this.#audioEnergyFilter.pushFrame(frame)) {
189
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
190
+ this.#audioDurationCollector.push(frameDuration);
181
191
  ws.send(frame.data.buffer);
182
192
  }
183
193
  }
@@ -189,7 +199,7 @@ class SpeechStream extends stt.SpeechStream {
189
199
  (_, reject) => ws.once("close", (code, reason) => {
190
200
  if (!closing) {
191
201
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
192
- reject();
202
+ reject(new Error("WebSocket closed"));
193
203
  }
194
204
  })
195
205
  );
@@ -211,8 +221,11 @@ class SpeechStream extends stt.SpeechStream {
211
221
  // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
212
222
  // for more information about the different types of events
213
223
  case "Results": {
224
+ const metadata = json["metadata"];
225
+ const requestId = metadata["request_id"];
214
226
  const isFinal = json["is_final"];
215
227
  const isEndpoint = json["speech_final"];
228
+ this.#requestId = requestId;
216
229
  const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
217
230
  if (alternatives[0] && alternatives[0].text) {
218
231
  if (!this.#speaking) {
@@ -257,6 +270,16 @@ class SpeechStream extends stt.SpeechStream {
257
270
  ws.close();
258
271
  clearInterval(keepalive);
259
272
  }
273
+ onAudioDurationReport(duration) {
274
+ const usageEvent = {
275
+ type: stt.SpeechEventType.RECOGNITION_USAGE,
276
+ requestId: this.#requestId,
277
+ recognitionUsage: {
278
+ audioDuration: duration
279
+ }
280
+ };
281
+ this.queue.put(usageEvent);
282
+ }
260
283
  }
261
284
  const liveTranscriptionToSpeechData = (language, data) => {
262
285
  const alts = data["channel"]["alternatives"];
package/dist/stt.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-2-general',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n\n this.#run();\n }\n\n async #run(maxRetry = 32) {\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject();\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AAGxC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAEhD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,KAAK,WAAW,IAAI;AACxB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AAAA,QACxB,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO;AAAA,QACT;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AAEtC,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AACxC,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,MAAK,KAAK,UAAU;AAC1B,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AACtB,eAAK,wBAAwB,MAAM;AAAA,QACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,kBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,iBAAK,wBAAwB,KAAK,aAAa;AAC/C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,QACtC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-deepgram",
3
- "version": "0.5.7",
3
+ "version": "1.0.0-next.1",
4
4
  "description": "Deepgram plugin for LiveKit Agents for Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,10 +25,10 @@
25
25
  "README.md"
26
26
  ],
27
27
  "devDependencies": {
28
- "@livekit/agents": "^x",
29
- "@livekit/agents-plugin-silero": "^x",
30
- "@livekit/agents-plugins-test": "^x",
31
- "@livekit/rtc-node": "^0.13.11",
28
+ "@livekit/agents": "^1.0.0-next.1",
29
+ "@livekit/agents-plugin-silero": "^1.0.0-next.1",
30
+ "@livekit/agents-plugins-test": "^1.0.0-next.1",
31
+ "@livekit/rtc-node": "^0.13.12",
32
32
  "@microsoft/api-extractor": "^7.35.0",
33
33
  "@types/ws": "^8.5.10",
34
34
  "tsup": "^8.3.5",
@@ -38,8 +38,8 @@
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
- "@livekit/rtc-node": "^0.13.11",
42
- "@livekit/agents": "^0.7.7x"
41
+ "@livekit/rtc-node": "^0.13.12",
42
+ "@livekit/agents": "^1.0.0-next.11.0.0-next.1"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/_utils.ts ADDED
@@ -0,0 +1,50 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ export class PeriodicCollector<T> {
6
+ private duration: number;
7
+ private callback: (value: T) => void;
8
+ private lastFlushTime: number;
9
+ private total: T | null = null;
10
+
11
+ constructor(callback: (value: T) => void, options: { duration: number }) {
12
+ /**
13
+ * Create a new periodic collector that accumulates values and calls the callback
14
+ * after the specified duration if there are values to report.
15
+ *
16
+ * @param callback Function to call with accumulated value when duration expires
17
+ * @param options.duration Time in seconds between callback invocations
18
+ */
19
+ this.duration = options.duration;
20
+ this.callback = callback;
21
+ this.lastFlushTime = performance.now() / 1000; // Convert to seconds
22
+ }
23
+
24
+ push(value: T): void {
25
+ /**
26
+ * Add a value to the accumulator
27
+ */
28
+ if (this.total === null) {
29
+ this.total = value;
30
+ } else {
31
+ // Type assertion needed for generic addition
32
+ this.total = (this.total as any) + (value as any);
33
+ }
34
+
35
+ if (performance.now() / 1000 - this.lastFlushTime >= this.duration) {
36
+ this.flush();
37
+ }
38
+ }
39
+
40
+ flush(): void {
41
+ /**
42
+ * Force callback to be called with current total if non-zero
43
+ */
44
+ if (this.total !== null) {
45
+ this.callback(this.total);
46
+ this.total = null;
47
+ }
48
+ this.lastFlushTime = performance.now() / 1000;
49
+ }
50
+ }
package/src/index.ts CHANGED
@@ -1,5 +1,18 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Plugin } from '@livekit/agents';
4
5
 
5
6
  export * from './stt.js';
7
+
8
+ class DeepgramPlugin extends Plugin {
9
+ constructor() {
10
+ super({
11
+ title: 'deepgram',
12
+ version: '0.5.6',
13
+ package: '@livekit/agents-plugin-deepgram',
14
+ });
15
+ }
16
+ }
17
+
18
+ Plugin.registerPlugin(new DeepgramPlugin());
package/src/models.ts CHANGED
@@ -16,7 +16,9 @@ export type STTModels =
16
16
  | 'nova-2-medical'
17
17
  | 'nova-2-drivethru'
18
18
  | 'nova-2-automotive'
19
+ | 'nova-3'
19
20
  | 'nova-3-general'
21
+ | 'nova-3-medical'
20
22
  | 'enhanced-general'
21
23
  | 'enhanced-meeting'
22
24
  | 'enhanced-phonecall'
@@ -68,4 +70,5 @@ export type STTLanguages =
68
70
  | 'uk'
69
71
  | 'zh'
70
72
  | 'zh-CN'
71
- | 'zh-TW';
73
+ | 'zh-TW'
74
+ | 'multi';
package/src/stt.ts CHANGED
@@ -11,6 +11,7 @@ import {
11
11
  } from '@livekit/agents';
12
12
  import type { AudioFrame } from '@livekit/rtc-node';
13
13
  import { type RawData, WebSocket } from 'ws';
14
+ import { PeriodicCollector } from './_utils.js';
14
15
  import type { STTLanguages, STTModels } from './models.js';
15
16
 
16
17
  const API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';
@@ -42,7 +43,7 @@ const defaultSTTOptions: STTOptions = {
42
43
  detectLanguage: false,
43
44
  interimResults: true,
44
45
  punctuate: true,
45
- model: 'nova-2-general',
46
+ model: 'nova-3',
46
47
  smartFormat: true,
47
48
  noDelay: true,
48
49
  endpointing: 25,
@@ -120,18 +121,23 @@ export class SpeechStream extends stt.SpeechStream {
120
121
  #logger = log();
121
122
  #speaking = false;
122
123
  #resetWS = new Future();
124
+ #requestId = '';
125
+ #audioDurationCollector: PeriodicCollector<number>;
123
126
  label = 'deepgram.SpeechStream';
124
127
 
125
128
  constructor(stt: STT, opts: STTOptions) {
126
- super(stt);
129
+ super(stt, opts.sampleRate);
127
130
  this.#opts = opts;
128
131
  this.closed = false;
129
132
  this.#audioEnergyFilter = new AudioEnergyFilter();
130
-
131
- this.#run();
133
+ this.#audioDurationCollector = new PeriodicCollector(
134
+ (duration) => this.onAudioDurationReport(duration),
135
+ { duration: 5.0 },
136
+ );
132
137
  }
133
138
 
134
- async #run(maxRetry = 32) {
139
+ protected async run() {
140
+ const maxRetry = 32;
135
141
  let retries = 0;
136
142
  let ws: WebSocket;
137
143
  while (!this.input.closed) {
@@ -226,6 +232,7 @@ export class SpeechStream extends stt.SpeechStream {
226
232
  let frames: AudioFrame[];
227
233
  if (data === SpeechStream.FLUSH_SENTINEL) {
228
234
  frames = stream.flush();
235
+ this.#audioDurationCollector.flush();
229
236
  } else if (
230
237
  data.sampleRate === this.#opts.sampleRate ||
231
238
  data.channels === this.#opts.numChannels
@@ -237,6 +244,8 @@ export class SpeechStream extends stt.SpeechStream {
237
244
 
238
245
  for await (const frame of frames) {
239
246
  if (this.#audioEnergyFilter.pushFrame(frame)) {
247
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
248
+ this.#audioDurationCollector.push(frameDuration);
240
249
  ws.send(frame.data.buffer);
241
250
  }
242
251
  }
@@ -250,7 +259,7 @@ export class SpeechStream extends stt.SpeechStream {
250
259
  ws.once('close', (code, reason) => {
251
260
  if (!closing) {
252
261
  this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
253
- reject();
262
+ reject(new Error('WebSocket closed'));
254
263
  }
255
264
  }),
256
265
  );
@@ -277,8 +286,11 @@ export class SpeechStream extends stt.SpeechStream {
277
286
  // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final
278
287
  // for more information about the different types of events
279
288
  case 'Results': {
289
+ const metadata = json['metadata'];
290
+ const requestId = metadata['request_id'];
280
291
  const isFinal = json['is_final'];
281
292
  const isEndpoint = json['speech_final'];
293
+ this.#requestId = requestId;
282
294
 
283
295
  const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);
284
296
 
@@ -335,6 +347,17 @@ export class SpeechStream extends stt.SpeechStream {
335
347
  ws.close();
336
348
  clearInterval(keepalive);
337
349
  }
350
+
351
+ private onAudioDurationReport(duration: number) {
352
+ const usageEvent: stt.SpeechEvent = {
353
+ type: stt.SpeechEventType.RECOGNITION_USAGE,
354
+ requestId: this.#requestId,
355
+ recognitionUsage: {
356
+ audioDuration: duration,
357
+ },
358
+ };
359
+ this.queue.put(usageEvent);
360
+ }
338
361
  }
339
362
 
340
363
  const liveTranscriptionToSpeechData = (