@livekit/agents 0.4.6 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +17 -0
  2. package/dist/audio.cjs +77 -0
  3. package/dist/audio.cjs.map +1 -0
  4. package/dist/audio.js +48 -37
  5. package/dist/audio.js.map +1 -1
  6. package/dist/cli.cjs +131 -0
  7. package/dist/cli.cjs.map +1 -0
  8. package/dist/cli.js +96 -122
  9. package/dist/cli.js.map +1 -1
  10. package/dist/generator.cjs +36 -0
  11. package/dist/generator.cjs.map +1 -0
  12. package/dist/generator.js +8 -22
  13. package/dist/generator.js.map +1 -1
  14. package/dist/http_server.cjs +72 -0
  15. package/dist/http_server.cjs.map +1 -0
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +44 -47
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.cjs +78 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/index.js +26 -28
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.cjs +33 -0
  24. package/dist/ipc/job_executor.cjs.map +1 -0
  25. package/dist/ipc/job_executor.js +7 -4
  26. package/dist/ipc/job_executor.js.map +1 -1
  27. package/dist/ipc/job_main.cjs +147 -0
  28. package/dist/ipc/job_main.cjs.map +1 -0
  29. package/dist/ipc/job_main.d.ts +1 -1
  30. package/dist/ipc/job_main.js +103 -103
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.cjs +17 -0
  33. package/dist/ipc/message.cjs.map +1 -0
  34. package/dist/ipc/message.js +0 -1
  35. package/dist/ipc/message.js.map +1 -1
  36. package/dist/ipc/proc_job_executor.cjs +174 -0
  37. package/dist/ipc/proc_job_executor.cjs.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +130 -126
  39. package/dist/ipc/proc_job_executor.js.map +1 -1
  40. package/dist/ipc/proc_pool.cjs +126 -0
  41. package/dist/ipc/proc_pool.cjs.map +1 -0
  42. package/dist/ipc/proc_pool.js +93 -96
  43. package/dist/ipc/proc_pool.js.map +1 -1
  44. package/dist/job.cjs +230 -0
  45. package/dist/job.cjs.map +1 -0
  46. package/dist/job.d.ts +6 -1
  47. package/dist/job.d.ts.map +1 -1
  48. package/dist/job.js +195 -198
  49. package/dist/job.js.map +1 -1
  50. package/dist/llm/chat_context.cjs +131 -0
  51. package/dist/llm/chat_context.cjs.map +1 -0
  52. package/dist/llm/chat_context.js +98 -86
  53. package/dist/llm/chat_context.js.map +1 -1
  54. package/dist/llm/function_context.cjs +103 -0
  55. package/dist/llm/function_context.cjs.map +1 -0
  56. package/dist/llm/function_context.js +72 -81
  57. package/dist/llm/function_context.js.map +1 -1
  58. package/dist/llm/function_context.test.cjs +218 -0
  59. package/dist/llm/function_context.test.cjs.map +1 -0
  60. package/dist/llm/function_context.test.js +209 -210
  61. package/dist/llm/function_context.test.js.map +1 -1
  62. package/dist/llm/index.cjs +43 -0
  63. package/dist/llm/index.cjs.map +1 -0
  64. package/dist/llm/index.js +22 -6
  65. package/dist/llm/index.js.map +1 -1
  66. package/dist/llm/llm.cjs +76 -0
  67. package/dist/llm/llm.cjs.map +1 -0
  68. package/dist/llm/llm.js +48 -42
  69. package/dist/llm/llm.js.map +1 -1
  70. package/dist/log.cjs +57 -0
  71. package/dist/log.cjs.map +1 -0
  72. package/dist/log.js +27 -26
  73. package/dist/log.js.map +1 -1
  74. package/dist/multimodal/agent_playout.cjs +228 -0
  75. package/dist/multimodal/agent_playout.cjs.map +1 -0
  76. package/dist/multimodal/agent_playout.d.ts +1 -1
  77. package/dist/multimodal/agent_playout.js +193 -180
  78. package/dist/multimodal/agent_playout.js.map +1 -1
  79. package/dist/multimodal/index.cjs +25 -0
  80. package/dist/multimodal/index.cjs.map +1 -0
  81. package/dist/multimodal/index.js +2 -5
  82. package/dist/multimodal/index.js.map +1 -1
  83. package/dist/multimodal/multimodal_agent.cjs +404 -0
  84. package/dist/multimodal/multimodal_agent.cjs.map +1 -0
  85. package/dist/multimodal/multimodal_agent.d.ts +1 -1
  86. package/dist/multimodal/multimodal_agent.js +351 -330
  87. package/dist/multimodal/multimodal_agent.js.map +1 -1
  88. package/dist/pipeline/agent_output.cjs +172 -0
  89. package/dist/pipeline/agent_output.cjs.map +1 -0
  90. package/dist/pipeline/agent_output.js +136 -138
  91. package/dist/pipeline/agent_output.js.map +1 -1
  92. package/dist/pipeline/agent_playout.cjs +169 -0
  93. package/dist/pipeline/agent_playout.cjs.map +1 -0
  94. package/dist/pipeline/agent_playout.js +126 -136
  95. package/dist/pipeline/agent_playout.js.map +1 -1
  96. package/dist/pipeline/human_input.cjs +158 -0
  97. package/dist/pipeline/human_input.cjs.map +1 -0
  98. package/dist/pipeline/human_input.js +124 -125
  99. package/dist/pipeline/human_input.js.map +1 -1
  100. package/dist/pipeline/index.cjs +31 -0
  101. package/dist/pipeline/index.cjs.map +1 -0
  102. package/dist/pipeline/index.js +8 -4
  103. package/dist/pipeline/index.js.map +1 -1
  104. package/dist/pipeline/pipeline_agent.cjs +642 -0
  105. package/dist/pipeline/pipeline_agent.cjs.map +1 -0
  106. package/dist/pipeline/pipeline_agent.js +595 -651
  107. package/dist/pipeline/pipeline_agent.js.map +1 -1
  108. package/dist/pipeline/speech_handle.cjs +128 -0
  109. package/dist/pipeline/speech_handle.cjs.map +1 -0
  110. package/dist/pipeline/speech_handle.js +102 -100
  111. package/dist/pipeline/speech_handle.js.map +1 -1
  112. package/dist/plugin.cjs +46 -0
  113. package/dist/plugin.cjs.map +1 -0
  114. package/dist/plugin.js +20 -20
  115. package/dist/plugin.js.map +1 -1
  116. package/dist/stt/index.cjs +38 -0
  117. package/dist/stt/index.cjs.map +1 -0
  118. package/dist/stt/index.js +13 -5
  119. package/dist/stt/index.js.map +1 -1
  120. package/dist/stt/stream_adapter.cjs +87 -0
  121. package/dist/stt/stream_adapter.cjs.map +1 -0
  122. package/dist/stt/stream_adapter.js +58 -55
  123. package/dist/stt/stream_adapter.js.map +1 -1
  124. package/dist/stt/stt.cjs +98 -0
  125. package/dist/stt/stt.cjs.map +1 -0
  126. package/dist/stt/stt.js +63 -98
  127. package/dist/stt/stt.js.map +1 -1
  128. package/dist/tokenize/basic/basic.cjs +98 -0
  129. package/dist/tokenize/basic/basic.cjs.map +1 -0
  130. package/dist/tokenize/basic/basic.d.ts +1 -1
  131. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  132. package/dist/tokenize/basic/basic.js +56 -45
  133. package/dist/tokenize/basic/basic.js.map +1 -1
  134. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  135. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  136. package/dist/tokenize/basic/hyphenator.js +66 -82
  137. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  138. package/dist/tokenize/basic/index.cjs +35 -0
  139. package/dist/tokenize/basic/index.cjs.map +1 -0
  140. package/dist/tokenize/basic/index.js +7 -4
  141. package/dist/tokenize/basic/index.js.map +1 -1
  142. package/dist/tokenize/basic/paragraph.cjs +57 -0
  143. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  144. package/dist/tokenize/basic/paragraph.js +30 -35
  145. package/dist/tokenize/basic/paragraph.js.map +1 -1
  146. package/dist/tokenize/basic/sentence.cjs +89 -0
  147. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  148. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  149. package/dist/tokenize/basic/sentence.js +62 -57
  150. package/dist/tokenize/basic/sentence.js.map +1 -1
  151. package/dist/tokenize/basic/word.cjs +44 -0
  152. package/dist/tokenize/basic/word.cjs.map +1 -0
  153. package/dist/tokenize/basic/word.js +17 -20
  154. package/dist/tokenize/basic/word.js.map +1 -1
  155. package/dist/tokenize/index.cjs +55 -0
  156. package/dist/tokenize/index.cjs.map +1 -0
  157. package/dist/tokenize/index.js +18 -7
  158. package/dist/tokenize/index.js.map +1 -1
  159. package/dist/tokenize/token_stream.cjs +164 -0
  160. package/dist/tokenize/token_stream.cjs.map +1 -0
  161. package/dist/tokenize/token_stream.js +133 -139
  162. package/dist/tokenize/token_stream.js.map +1 -1
  163. package/dist/tokenize/tokenizer.cjs +184 -0
  164. package/dist/tokenize/tokenizer.cjs.map +1 -0
  165. package/dist/tokenize/tokenizer.js +138 -99
  166. package/dist/tokenize/tokenizer.js.map +1 -1
  167. package/dist/tokenize/tokenizer.test.cjs +220 -0
  168. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  169. package/dist/tokenize/tokenizer.test.d.ts +2 -0
  170. package/dist/tokenize/tokenizer.test.d.ts.map +1 -0
  171. package/dist/tokenize/tokenizer.test.js +219 -0
  172. package/dist/tokenize/tokenizer.test.js.map +1 -0
  173. package/dist/transcription.cjs +131 -0
  174. package/dist/transcription.cjs.map +1 -0
  175. package/dist/transcription.js +99 -96
  176. package/dist/transcription.js.map +1 -1
  177. package/dist/tts/index.cjs +38 -0
  178. package/dist/tts/index.cjs.map +1 -0
  179. package/dist/tts/index.js +13 -5
  180. package/dist/tts/index.js.map +1 -1
  181. package/dist/tts/stream_adapter.cjs +78 -0
  182. package/dist/tts/stream_adapter.cjs.map +1 -0
  183. package/dist/tts/stream_adapter.js +50 -47
  184. package/dist/tts/stream_adapter.js.map +1 -1
  185. package/dist/tts/tts.cjs +127 -0
  186. package/dist/tts/tts.cjs.map +1 -0
  187. package/dist/tts/tts.js +90 -120
  188. package/dist/tts/tts.js.map +1 -1
  189. package/dist/utils.cjs +284 -0
  190. package/dist/utils.cjs.map +1 -0
  191. package/dist/utils.js +242 -247
  192. package/dist/utils.js.map +1 -1
  193. package/dist/vad.cjs +92 -0
  194. package/dist/vad.cjs.map +1 -0
  195. package/dist/vad.js +57 -52
  196. package/dist/vad.js.map +1 -1
  197. package/dist/version.cjs +29 -0
  198. package/dist/version.cjs.map +1 -0
  199. package/dist/version.js +4 -4
  200. package/dist/version.js.map +1 -1
  201. package/dist/worker.cjs +577 -0
  202. package/dist/worker.cjs.map +1 -0
  203. package/dist/worker.d.ts +1 -1
  204. package/dist/worker.d.ts.map +1 -1
  205. package/dist/worker.js +512 -484
  206. package/dist/worker.js.map +1 -1
  207. package/package.json +18 -8
  208. package/src/ipc/job_main.ts +66 -64
  209. package/src/job.ts +3 -2
  210. package/src/pipeline/pipeline_agent.ts +23 -23
  211. package/src/tokenize/basic/basic.ts +1 -1
  212. package/src/tokenize/basic/sentence.ts +14 -8
  213. package/src/tokenize/tokenizer.test.ts +255 -0
  214. package/src/worker.ts +1 -0
@@ -1 +1 @@
1
- {"version":3,"file":"plugin.js","sourceRoot":"","sources":["../src/plugin.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AAEtC,MAAM,OAAgB,MAAM;IAC1B,iBAAiB,GAAa,EAAE,CAAC;IACjC,MAAM,CAAS;IACf,QAAQ,CAAS;IAEjB,YAAY,KAAa,EAAE,OAAe;QACxC,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;IAC1B,CAAC;IAEM,MAAM,CAAC,eAAe,CAAC,MAAc;QAC1C,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACxC,CAAC;IAID,IAAI,KAAK;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;CACF"}
1
+ {"version":3,"sources":["../src/plugin.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport abstract class Plugin {\n registeredPlugins: Plugin[] = [];\n #title: string;\n #version: string;\n\n constructor(title: string, version: string) {\n this.#title = title;\n this.#version = version;\n }\n\n public static registerPlugins(plugin: Plugin) {\n plugin.registeredPlugins.push(plugin);\n }\n\n abstract downloadFiles(): void;\n\n get title(): string {\n return this.#title;\n }\n\n get version(): string {\n return this.#version;\n }\n}\n"],"mappings":"AAIO,MAAe,OAAO;AAAA,EAC3B,oBAA8B,CAAC;AAAA,EAC/B;AAAA,EACA;AAAA,EAEA,YAAY,OAAe,SAAiB;AAC1C,SAAK,SAAS;AACd,SAAK,WAAW;AAAA,EAClB;AAAA,EAEA,OAAc,gBAAgB,QAAgB;AAC5C,WAAO,kBAAkB,KAAK,MAAM;AAAA,EACtC;AAAA,EAIA,IAAI,QAAgB;AAClB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAkB;AACpB,WAAO,KAAK;AAAA,EACd;AACF;","names":[]}
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var stt_exports = {};
20
+ __export(stt_exports, {
21
+ STT: () => import_stt.STT,
22
+ SpeechEventType: () => import_stt.SpeechEventType,
23
+ SpeechStream: () => import_stt.SpeechStream,
24
+ StreamAdapter: () => import_stream_adapter.StreamAdapter,
25
+ StreamAdapterWrapper: () => import_stream_adapter.StreamAdapterWrapper
26
+ });
27
+ module.exports = __toCommonJS(stt_exports);
28
+ var import_stt = require("./stt.cjs");
29
+ var import_stream_adapter = require("./stream_adapter.cjs");
30
+ // Annotate the CommonJS export names for ESM import in node:
31
+ 0 && (module.exports = {
32
+ STT,
33
+ SpeechEventType,
34
+ SpeechStream,
35
+ StreamAdapter,
36
+ StreamAdapterWrapper
37
+ });
38
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/stt/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type SpeechEvent,\n type SpeechData,\n type STTCapabilities,\n SpeechEventType,\n STT,\n SpeechStream,\n} from './stt.js';\nexport { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAOO;AACP,4BAAoD;","names":[]}
package/dist/stt/index.js CHANGED
@@ -1,6 +1,14 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- export { SpeechEventType, STT, SpeechStream, } from './stt.js';
5
- export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
1
+ import {
2
+ SpeechEventType,
3
+ STT,
4
+ SpeechStream
5
+ } from "./stt.js";
6
+ import { StreamAdapter, StreamAdapterWrapper } from "./stream_adapter.js";
7
+ export {
8
+ STT,
9
+ SpeechEventType,
10
+ SpeechStream,
11
+ StreamAdapter,
12
+ StreamAdapterWrapper
13
+ };
6
14
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AAEtC,OAAO,EAIL,eAAe,EACf,GAAG,EACH,YAAY,GACb,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC"}
1
+ {"version":3,"sources":["../../src/stt/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type SpeechEvent,\n type SpeechData,\n type STTCapabilities,\n SpeechEventType,\n STT,\n SpeechStream,\n} from './stt.js';\nexport { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';\n"],"mappings":"AAIA;AAAA,EAIE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,eAAe,4BAA4B;","names":[]}
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var stream_adapter_exports = {};
20
+ __export(stream_adapter_exports, {
21
+ StreamAdapter: () => StreamAdapter,
22
+ StreamAdapterWrapper: () => StreamAdapterWrapper
23
+ });
24
+ module.exports = __toCommonJS(stream_adapter_exports);
25
+ var import_vad = require("../vad.cjs");
26
+ var import_stt = require("./stt.cjs");
27
+ class StreamAdapter extends import_stt.STT {
28
+ #stt;
29
+ #vad;
30
+ constructor(stt, vad) {
31
+ super({ streaming: true, interimResults: false });
32
+ this.#stt = stt;
33
+ this.#vad = vad;
34
+ }
35
+ recognize(frame) {
36
+ return this.#stt.recognize(frame);
37
+ }
38
+ stream() {
39
+ return new StreamAdapterWrapper(this.#stt, this.#vad);
40
+ }
41
+ }
42
+ class StreamAdapterWrapper extends import_stt.SpeechStream {
43
+ #stt;
44
+ #vadStream;
45
+ constructor(stt, vad) {
46
+ super();
47
+ this.#stt = stt;
48
+ this.#vadStream = vad.stream();
49
+ this.#run();
50
+ }
51
+ async #run() {
52
+ const forwardInput = async () => {
53
+ for await (const input of this.input) {
54
+ if (input === import_stt.SpeechStream.FLUSH_SENTINEL) {
55
+ this.#vadStream.flush();
56
+ } else {
57
+ this.#vadStream.pushFrame(input);
58
+ }
59
+ }
60
+ this.#vadStream.endInput();
61
+ };
62
+ const recognize = async () => {
63
+ for await (const ev of this.#vadStream) {
64
+ switch (ev.type) {
65
+ case import_vad.VADEventType.START_OF_SPEECH:
66
+ this.queue.put({ type: import_stt.SpeechEventType.START_OF_SPEECH });
67
+ break;
68
+ case import_vad.VADEventType.END_OF_SPEECH:
69
+ this.queue.put({ type: import_stt.SpeechEventType.END_OF_SPEECH });
70
+ const event = await this.#stt.recognize(ev.frames);
71
+ if (!event.alternatives[0].text) {
72
+ continue;
73
+ }
74
+ this.queue.put(event);
75
+ break;
76
+ }
77
+ }
78
+ };
79
+ Promise.all([forwardInput(), recognize()]);
80
+ }
81
+ }
82
+ // Annotate the CommonJS export names for ESM import in node:
83
+ 0 && (module.exports = {
84
+ StreamAdapter,
85
+ StreamAdapterWrapper
86
+ });
87
+ //# sourceMappingURL=stream_adapter.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n }\n\n recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n\n constructor(stt: STT, vad: VAD) {\n super();\n this.#stt = stt;\n this.#vadStream = vad.stream();\n\n this.#run();\n }\n\n async #run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.queue.put(event);\n break;\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,iBAA6B;AAE7B,iBAAmD;AAE5C,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,UAAU,OAAyC;AACjD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,wBAAa;AAAA,EACrD;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAE7B,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,wBAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,wBAAa;AAChB,iBAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,gBAAgB,CAAC;AACxD;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,cAAc,CAAC;AAEtD,kBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,gBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,YACF;AAEA,iBAAK,MAAM,IAAI,KAAK;AACpB;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
@@ -1,59 +1,62 @@
1
- import { VADEventType } from '../vad.js';
2
- import { STT, SpeechEventType, SpeechStream } from './stt.js';
3
- export class StreamAdapter extends STT {
4
- #stt;
5
- #vad;
6
- constructor(stt, vad) {
7
- super({ streaming: true, interimResults: false });
8
- this.#stt = stt;
9
- this.#vad = vad;
10
- }
11
- recognize(frame) {
12
- return this.#stt.recognize(frame);
13
- }
14
- stream() {
15
- return new StreamAdapterWrapper(this.#stt, this.#vad);
16
- }
1
+ import { VADEventType } from "../vad.js";
2
+ import { STT, SpeechEventType, SpeechStream } from "./stt.js";
3
+ class StreamAdapter extends STT {
4
+ #stt;
5
+ #vad;
6
+ constructor(stt, vad) {
7
+ super({ streaming: true, interimResults: false });
8
+ this.#stt = stt;
9
+ this.#vad = vad;
10
+ }
11
+ recognize(frame) {
12
+ return this.#stt.recognize(frame);
13
+ }
14
+ stream() {
15
+ return new StreamAdapterWrapper(this.#stt, this.#vad);
16
+ }
17
17
  }
18
- export class StreamAdapterWrapper extends SpeechStream {
19
- #stt;
20
- #vadStream;
21
- constructor(stt, vad) {
22
- super();
23
- this.#stt = stt;
24
- this.#vadStream = vad.stream();
25
- this.#run();
26
- }
27
- async #run() {
28
- const forwardInput = async () => {
29
- for await (const input of this.input) {
30
- if (input === SpeechStream.FLUSH_SENTINEL) {
31
- this.#vadStream.flush();
32
- }
33
- else {
34
- this.#vadStream.pushFrame(input);
35
- }
18
+ class StreamAdapterWrapper extends SpeechStream {
19
+ #stt;
20
+ #vadStream;
21
+ constructor(stt, vad) {
22
+ super();
23
+ this.#stt = stt;
24
+ this.#vadStream = vad.stream();
25
+ this.#run();
26
+ }
27
+ async #run() {
28
+ const forwardInput = async () => {
29
+ for await (const input of this.input) {
30
+ if (input === SpeechStream.FLUSH_SENTINEL) {
31
+ this.#vadStream.flush();
32
+ } else {
33
+ this.#vadStream.pushFrame(input);
34
+ }
35
+ }
36
+ this.#vadStream.endInput();
37
+ };
38
+ const recognize = async () => {
39
+ for await (const ev of this.#vadStream) {
40
+ switch (ev.type) {
41
+ case VADEventType.START_OF_SPEECH:
42
+ this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
43
+ break;
44
+ case VADEventType.END_OF_SPEECH:
45
+ this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
46
+ const event = await this.#stt.recognize(ev.frames);
47
+ if (!event.alternatives[0].text) {
48
+ continue;
36
49
  }
37
- this.#vadStream.endInput();
38
- };
39
- const recognize = async () => {
40
- for await (const ev of this.#vadStream) {
41
- switch (ev.type) {
42
- case VADEventType.START_OF_SPEECH:
43
- this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
44
- break;
45
- case VADEventType.END_OF_SPEECH:
46
- this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
47
- const event = await this.#stt.recognize(ev.frames);
48
- if (!event.alternatives[0].text) {
49
- continue;
50
- }
51
- this.queue.put(event);
52
- break;
53
- }
54
- }
55
- };
56
- Promise.all([forwardInput(), recognize()]);
57
- }
50
+ this.queue.put(event);
51
+ break;
52
+ }
53
+ }
54
+ };
55
+ Promise.all([forwardInput(), recognize()]);
56
+ }
58
57
  }
58
+ export {
59
+ StreamAdapter,
60
+ StreamAdapterWrapper
61
+ };
59
62
  //# sourceMappingURL=stream_adapter.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"stream_adapter.js","sourceRoot":"","sources":["../../src/stt/stream_adapter.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,GAAG,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAE9D,MAAM,OAAO,aAAc,SAAQ,GAAG;IACpC,IAAI,CAAM;IACV,IAAI,CAAM;IAEV,YAAY,GAAQ,EAAE,GAAQ;QAC5B,KAAK,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QAClD,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,SAAS,CAAC,KAAiB;QACzB,OAAO,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,CAAC;CACF;AAED,MAAM,OAAO,oBAAqB,SAAQ,YAAY;IACpD,IAAI,CAAM;IACV,UAAU,CAAY;IAEtB,YAAY,GAAQ,EAAE,GAAQ;QAC5B,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,IAAI,GAAG,GAAG,CAAC;QAChB,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;QAE/B,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,YAAY,GAAG,KAAK,IAAI,EAAE;YAC9B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACrC,IAAI,KAAK,KAAK,YAAY,CAAC,cAAc,EAAE,CAAC;oBAC1C,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBAC1B,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC;YACD,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;QAC7B,CAAC,CAAC;QAEF,MAAM,SAAS,GAAG,KAAK,IAAI,EAAE;YAC3B,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBACvC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;oBAChB,KAAK,YAAY,CAAC,eAAe;wBAC/B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,eAAe,CAAC,eAAe,EAAE,CAAC,CAAC;wBAC1D,MAAM;oBACR,KAAK,YAAY,CAAC,aAAa;wBAC7B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC;wBAExD,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;wBACnD,IAAI,CAAC,KAAK,CAAC,YAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;4BACjC,SAAS;wBACX,CAAC;wBAED,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;wBACtB,MAAM;gBACV,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,CAAC,YAAY,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;IAC7C,CAAC;CACF"}
1
+ {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n }\n\n recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n\n constructor(stt: STT, vad: VAD) {\n super();\n this.#stt = stt;\n this.#vadStream = vad.stream();\n\n this.#run();\n }\n\n async #run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.queue.put(event);\n break;\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAE7B,SAAS,KAAK,iBAAiB,oBAAoB;AAE5C,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,UAAU,OAAyC;AACjD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,aAAa;AAAA,EACrD;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM;AACN,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAE7B,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,aAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAEtD,kBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,gBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,YACF;AAEA,iBAAK,MAAM,IAAI,KAAK;AACpB;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var stt_exports = {};
20
+ __export(stt_exports, {
21
+ STT: () => STT,
22
+ SpeechEventType: () => SpeechEventType,
23
+ SpeechStream: () => SpeechStream
24
+ });
25
+ module.exports = __toCommonJS(stt_exports);
26
+ var import_utils = require("../utils.cjs");
27
+ var SpeechEventType = /* @__PURE__ */ ((SpeechEventType2) => {
28
+ SpeechEventType2[SpeechEventType2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
29
+ SpeechEventType2[SpeechEventType2["INTERIM_TRANSCRIPT"] = 1] = "INTERIM_TRANSCRIPT";
30
+ SpeechEventType2[SpeechEventType2["FINAL_TRANSCRIPT"] = 2] = "FINAL_TRANSCRIPT";
31
+ SpeechEventType2[SpeechEventType2["END_OF_SPEECH"] = 3] = "END_OF_SPEECH";
32
+ return SpeechEventType2;
33
+ })(SpeechEventType || {});
34
+ class STT {
35
+ #capabilities;
36
+ constructor(capabilities) {
37
+ this.#capabilities = capabilities;
38
+ }
39
+ /** Returns this STT's capabilities */
40
+ get capabilities() {
41
+ return this.#capabilities;
42
+ }
43
+ }
44
+ class SpeechStream {
45
+ static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
46
+ input = new import_utils.AsyncIterableQueue();
47
+ queue = new import_utils.AsyncIterableQueue();
48
+ closed = false;
49
+ /** Push an audio frame to the STT */
50
+ pushFrame(frame) {
51
+ if (this.input.closed) {
52
+ throw new Error("Input is closed");
53
+ }
54
+ if (this.closed) {
55
+ throw new Error("Stream is closed");
56
+ }
57
+ this.input.put(frame);
58
+ }
59
+ /** Flush the STT, causing it to process all pending text */
60
+ flush() {
61
+ if (this.input.closed) {
62
+ throw new Error("Input is closed");
63
+ }
64
+ if (this.closed) {
65
+ throw new Error("Stream is closed");
66
+ }
67
+ this.input.put(SpeechStream.FLUSH_SENTINEL);
68
+ }
69
+ /** Mark the input as ended and forbid additional pushes */
70
+ endInput() {
71
+ if (this.input.closed) {
72
+ throw new Error("Input is closed");
73
+ }
74
+ if (this.closed) {
75
+ throw new Error("Stream is closed");
76
+ }
77
+ this.input.close();
78
+ }
79
+ next() {
80
+ return this.queue.next();
81
+ }
82
+ /** Close both the input and output of the STT stream */
83
+ close() {
84
+ this.input.close();
85
+ this.queue.close();
86
+ this.closed = true;
87
+ }
88
+ [Symbol.asyncIterator]() {
89
+ return this;
90
+ }
91
+ }
92
+ // Annotate the CommonJS export names for ESM import in node:
93
+ 0 && (module.exports = {
94
+ STT,
95
+ SpeechEventType,
96
+ SpeechStream
97
+ });
98
+ //# sourceMappingURL=stt.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT {\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected closed = false;\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(frame);\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.queue.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,mBAAmC;AAG5B,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AApBU,SAAAA;AAAA,GAAA;AAwDL,MAAe,IAAI;AAAA,EACxB;AAAA,EAEA,YAAY,cAA+B;AACzC,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAUF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,QAAQ,IAAI,gCAAgC;AAAA,EAC5C,SAAS;AAAA;AAAA,EAGnB,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,KAAK;AAAA,EACtB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,MAAM,KAAK;AAAA,EACzB;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType"]}
package/dist/stt/stt.js CHANGED
@@ -1,107 +1,72 @@
1
- import { AsyncIterableQueue } from '../utils.js';
2
- /** Indicates start/middle/end of speech */
3
- export var SpeechEventType;
4
- (function (SpeechEventType) {
5
- /**
6
- * Indicate the start of speech.
7
- * If the STT doesn't support this event, this will be emitted at the same time
8
- * as the first INTERIM_TRANSCRIPT.
9
- */
10
- SpeechEventType[SpeechEventType["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
11
- /**
12
- * Interim transcript, useful for real-time transcription.
13
- */
14
- SpeechEventType[SpeechEventType["INTERIM_TRANSCRIPT"] = 1] = "INTERIM_TRANSCRIPT";
15
- /**
16
- * Final transcript, emitted when the STT is confident enough that a certain
17
- * portion of the speech will not change.
18
- */
19
- SpeechEventType[SpeechEventType["FINAL_TRANSCRIPT"] = 2] = "FINAL_TRANSCRIPT";
20
- /**
21
- * Indicate the end of speech, emitted when the user stops speaking.
22
- * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.
23
- */
24
- SpeechEventType[SpeechEventType["END_OF_SPEECH"] = 3] = "END_OF_SPEECH";
25
- })(SpeechEventType || (SpeechEventType = {}));
26
- /**
27
- * An instance of a speech-to-text adapter.
28
- *
29
- * @remarks
30
- * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
31
- * exports its own child STT class, which inherits this class's methods.
32
- */
33
- export class STT {
34
- #capabilities;
35
- constructor(capabilities) {
36
- this.#capabilities = capabilities;
37
- }
38
- /** Returns this STT's capabilities */
39
- get capabilities() {
40
- return this.#capabilities;
41
- }
1
+ import { AsyncIterableQueue } from "../utils.js";
2
+ var SpeechEventType = /* @__PURE__ */ ((SpeechEventType2) => {
3
+ SpeechEventType2[SpeechEventType2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
4
+ SpeechEventType2[SpeechEventType2["INTERIM_TRANSCRIPT"] = 1] = "INTERIM_TRANSCRIPT";
5
+ SpeechEventType2[SpeechEventType2["FINAL_TRANSCRIPT"] = 2] = "FINAL_TRANSCRIPT";
6
+ SpeechEventType2[SpeechEventType2["END_OF_SPEECH"] = 3] = "END_OF_SPEECH";
7
+ return SpeechEventType2;
8
+ })(SpeechEventType || {});
9
+ class STT {
10
+ #capabilities;
11
+ constructor(capabilities) {
12
+ this.#capabilities = capabilities;
13
+ }
14
+ /** Returns this STT's capabilities */
15
+ get capabilities() {
16
+ return this.#capabilities;
17
+ }
42
18
  }
43
- /**
44
- * An instance of a speech-to-text stream, as an asynchronous iterable iterator.
45
- *
46
- * @example Looping through frames
47
- * ```ts
48
- * for await (const event of stream) {
49
- * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {
50
- * console.log(event.alternatives[0].text)
51
- * }
52
- * }
53
- * ```
54
- *
55
- * @remarks
56
- * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
57
- * exports its own child SpeechStream class, which inherits this class's methods.
58
- */
59
- export class SpeechStream {
60
- static FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
61
- input = new AsyncIterableQueue();
62
- queue = new AsyncIterableQueue();
63
- closed = false;
64
- /** Push an audio frame to the STT */
65
- pushFrame(frame) {
66
- if (this.input.closed) {
67
- throw new Error('Input is closed');
68
- }
69
- if (this.closed) {
70
- throw new Error('Stream is closed');
71
- }
72
- this.input.put(frame);
19
+ class SpeechStream {
20
+ static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
21
+ input = new AsyncIterableQueue();
22
+ queue = new AsyncIterableQueue();
23
+ closed = false;
24
+ /** Push an audio frame to the STT */
25
+ pushFrame(frame) {
26
+ if (this.input.closed) {
27
+ throw new Error("Input is closed");
73
28
  }
74
- /** Flush the STT, causing it to process all pending text */
75
- flush() {
76
- if (this.input.closed) {
77
- throw new Error('Input is closed');
78
- }
79
- if (this.closed) {
80
- throw new Error('Stream is closed');
81
- }
82
- this.input.put(SpeechStream.FLUSH_SENTINEL);
29
+ if (this.closed) {
30
+ throw new Error("Stream is closed");
83
31
  }
84
- /** Mark the input as ended and forbid additional pushes */
85
- endInput() {
86
- if (this.input.closed) {
87
- throw new Error('Input is closed');
88
- }
89
- if (this.closed) {
90
- throw new Error('Stream is closed');
91
- }
92
- this.input.close();
32
+ this.input.put(frame);
33
+ }
34
+ /** Flush the STT, causing it to process all pending text */
35
+ flush() {
36
+ if (this.input.closed) {
37
+ throw new Error("Input is closed");
93
38
  }
94
- next() {
95
- return this.queue.next();
39
+ if (this.closed) {
40
+ throw new Error("Stream is closed");
96
41
  }
97
- /** Close both the input and output of the STT stream */
98
- close() {
99
- this.input.close();
100
- this.queue.close();
101
- this.closed = true;
42
+ this.input.put(SpeechStream.FLUSH_SENTINEL);
43
+ }
44
+ /** Mark the input as ended and forbid additional pushes */
45
+ endInput() {
46
+ if (this.input.closed) {
47
+ throw new Error("Input is closed");
102
48
  }
103
- [Symbol.asyncIterator]() {
104
- return this;
49
+ if (this.closed) {
50
+ throw new Error("Stream is closed");
105
51
  }
52
+ this.input.close();
53
+ }
54
+ next() {
55
+ return this.queue.next();
56
+ }
57
+ /** Close both the input and output of the STT stream */
58
+ close() {
59
+ this.input.close();
60
+ this.queue.close();
61
+ this.closed = true;
62
+ }
63
+ [Symbol.asyncIterator]() {
64
+ return this;
65
+ }
106
66
  }
67
+ export {
68
+ STT,
69
+ SpeechEventType,
70
+ SpeechStream
71
+ };
107
72
  //# sourceMappingURL=stt.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"stt.js","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,2CAA2C;AAC3C,MAAM,CAAN,IAAY,eAqBX;AArBD,WAAY,eAAe;IACzB;;;;OAIG;IACH,2EAAmB,CAAA;IACnB;;OAEG;IACH,iFAAsB,CAAA;IACtB;;;OAGG;IACH,6EAAoB,CAAA;IACpB;;;OAGG;IACH,uEAAiB,CAAA;AACnB,CAAC,EArBW,eAAe,KAAf,eAAe,QAqB1B;AA4BD;;;;;;GAMG;AACH,MAAM,OAAgB,GAAG;IACvB,aAAa,CAAkB;IAE/B,YAAY,YAA6B;QACvC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACpC,CAAC;IAED,sCAAsC;IACtC,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;CAUF;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAgB,YAAY;IACtB,MAAM,CAAU,cAAc,GAAG,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAC1D,KAAK,GAAG,IAAI,kBAAkB,EAAmD,CAAC;IAClF,KAAK,GAAG,IAAI,kBAAkB,EAAe,CAAC;IAC9C,MAAM,GAAG,KAAK,CAAC;IAEzB,qCAAqC;IACrC,SAAS,CAAC,KAAiB;QACzB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACrC,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACxB,CAAC;IAED,4DAA4D;IAC5D,KAAK;QACH,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACrC,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,CAAC,cAAc,CAAC,CAAC;IAC9C,CAAC;IAED,2DAA2D;IAC3D,QAAQ;QACN,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACrC,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IAC3B,CAAC;IAED,wDAAwD;IACxD,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,CAAC,MAAM,CAAC,aAAa,CAAC;QACpB,OAAO,IAAI,CAAC;IACd,CAAC"}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT {\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected closed = false;\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(frame);\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.queue.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,0BAA0B;AAG5B,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AApBU,SAAAA;AAAA,GAAA;AAwDL,MAAe,IAAI;AAAA,EACxB;AAAA,EAEA,YAAY,cAA+B;AACzC,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAUF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,QAAQ,IAAI,mBAAgC;AAAA,EAC5C,SAAS;AAAA;AAAA,EAGnB,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,KAAK;AAAA,EACtB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,MAAM,KAAK;AAAA,EACzB;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType"]}