@livekit/agents 0.4.6 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +17 -0
  2. package/dist/audio.cjs +77 -0
  3. package/dist/audio.cjs.map +1 -0
  4. package/dist/audio.js +48 -37
  5. package/dist/audio.js.map +1 -1
  6. package/dist/cli.cjs +131 -0
  7. package/dist/cli.cjs.map +1 -0
  8. package/dist/cli.js +96 -122
  9. package/dist/cli.js.map +1 -1
  10. package/dist/generator.cjs +36 -0
  11. package/dist/generator.cjs.map +1 -0
  12. package/dist/generator.js +8 -22
  13. package/dist/generator.js.map +1 -1
  14. package/dist/http_server.cjs +72 -0
  15. package/dist/http_server.cjs.map +1 -0
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +44 -47
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.cjs +78 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/index.js +26 -28
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.cjs +33 -0
  24. package/dist/ipc/job_executor.cjs.map +1 -0
  25. package/dist/ipc/job_executor.js +7 -4
  26. package/dist/ipc/job_executor.js.map +1 -1
  27. package/dist/ipc/job_main.cjs +147 -0
  28. package/dist/ipc/job_main.cjs.map +1 -0
  29. package/dist/ipc/job_main.d.ts +1 -1
  30. package/dist/ipc/job_main.js +103 -103
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.cjs +17 -0
  33. package/dist/ipc/message.cjs.map +1 -0
  34. package/dist/ipc/message.js +0 -1
  35. package/dist/ipc/message.js.map +1 -1
  36. package/dist/ipc/proc_job_executor.cjs +174 -0
  37. package/dist/ipc/proc_job_executor.cjs.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +130 -126
  39. package/dist/ipc/proc_job_executor.js.map +1 -1
  40. package/dist/ipc/proc_pool.cjs +126 -0
  41. package/dist/ipc/proc_pool.cjs.map +1 -0
  42. package/dist/ipc/proc_pool.js +93 -96
  43. package/dist/ipc/proc_pool.js.map +1 -1
  44. package/dist/job.cjs +230 -0
  45. package/dist/job.cjs.map +1 -0
  46. package/dist/job.d.ts +6 -1
  47. package/dist/job.d.ts.map +1 -1
  48. package/dist/job.js +195 -198
  49. package/dist/job.js.map +1 -1
  50. package/dist/llm/chat_context.cjs +131 -0
  51. package/dist/llm/chat_context.cjs.map +1 -0
  52. package/dist/llm/chat_context.js +98 -86
  53. package/dist/llm/chat_context.js.map +1 -1
  54. package/dist/llm/function_context.cjs +103 -0
  55. package/dist/llm/function_context.cjs.map +1 -0
  56. package/dist/llm/function_context.js +72 -81
  57. package/dist/llm/function_context.js.map +1 -1
  58. package/dist/llm/function_context.test.cjs +218 -0
  59. package/dist/llm/function_context.test.cjs.map +1 -0
  60. package/dist/llm/function_context.test.js +209 -210
  61. package/dist/llm/function_context.test.js.map +1 -1
  62. package/dist/llm/index.cjs +43 -0
  63. package/dist/llm/index.cjs.map +1 -0
  64. package/dist/llm/index.js +22 -6
  65. package/dist/llm/index.js.map +1 -1
  66. package/dist/llm/llm.cjs +76 -0
  67. package/dist/llm/llm.cjs.map +1 -0
  68. package/dist/llm/llm.js +48 -42
  69. package/dist/llm/llm.js.map +1 -1
  70. package/dist/log.cjs +57 -0
  71. package/dist/log.cjs.map +1 -0
  72. package/dist/log.js +27 -26
  73. package/dist/log.js.map +1 -1
  74. package/dist/multimodal/agent_playout.cjs +228 -0
  75. package/dist/multimodal/agent_playout.cjs.map +1 -0
  76. package/dist/multimodal/agent_playout.d.ts +1 -1
  77. package/dist/multimodal/agent_playout.js +193 -180
  78. package/dist/multimodal/agent_playout.js.map +1 -1
  79. package/dist/multimodal/index.cjs +25 -0
  80. package/dist/multimodal/index.cjs.map +1 -0
  81. package/dist/multimodal/index.js +2 -5
  82. package/dist/multimodal/index.js.map +1 -1
  83. package/dist/multimodal/multimodal_agent.cjs +404 -0
  84. package/dist/multimodal/multimodal_agent.cjs.map +1 -0
  85. package/dist/multimodal/multimodal_agent.d.ts +1 -1
  86. package/dist/multimodal/multimodal_agent.js +351 -330
  87. package/dist/multimodal/multimodal_agent.js.map +1 -1
  88. package/dist/pipeline/agent_output.cjs +172 -0
  89. package/dist/pipeline/agent_output.cjs.map +1 -0
  90. package/dist/pipeline/agent_output.js +136 -138
  91. package/dist/pipeline/agent_output.js.map +1 -1
  92. package/dist/pipeline/agent_playout.cjs +169 -0
  93. package/dist/pipeline/agent_playout.cjs.map +1 -0
  94. package/dist/pipeline/agent_playout.js +126 -136
  95. package/dist/pipeline/agent_playout.js.map +1 -1
  96. package/dist/pipeline/human_input.cjs +158 -0
  97. package/dist/pipeline/human_input.cjs.map +1 -0
  98. package/dist/pipeline/human_input.js +124 -125
  99. package/dist/pipeline/human_input.js.map +1 -1
  100. package/dist/pipeline/index.cjs +31 -0
  101. package/dist/pipeline/index.cjs.map +1 -0
  102. package/dist/pipeline/index.js +8 -4
  103. package/dist/pipeline/index.js.map +1 -1
  104. package/dist/pipeline/pipeline_agent.cjs +642 -0
  105. package/dist/pipeline/pipeline_agent.cjs.map +1 -0
  106. package/dist/pipeline/pipeline_agent.js +595 -651
  107. package/dist/pipeline/pipeline_agent.js.map +1 -1
  108. package/dist/pipeline/speech_handle.cjs +128 -0
  109. package/dist/pipeline/speech_handle.cjs.map +1 -0
  110. package/dist/pipeline/speech_handle.js +102 -100
  111. package/dist/pipeline/speech_handle.js.map +1 -1
  112. package/dist/plugin.cjs +46 -0
  113. package/dist/plugin.cjs.map +1 -0
  114. package/dist/plugin.js +20 -20
  115. package/dist/plugin.js.map +1 -1
  116. package/dist/stt/index.cjs +38 -0
  117. package/dist/stt/index.cjs.map +1 -0
  118. package/dist/stt/index.js +13 -5
  119. package/dist/stt/index.js.map +1 -1
  120. package/dist/stt/stream_adapter.cjs +87 -0
  121. package/dist/stt/stream_adapter.cjs.map +1 -0
  122. package/dist/stt/stream_adapter.js +58 -55
  123. package/dist/stt/stream_adapter.js.map +1 -1
  124. package/dist/stt/stt.cjs +98 -0
  125. package/dist/stt/stt.cjs.map +1 -0
  126. package/dist/stt/stt.js +63 -98
  127. package/dist/stt/stt.js.map +1 -1
  128. package/dist/tokenize/basic/basic.cjs +98 -0
  129. package/dist/tokenize/basic/basic.cjs.map +1 -0
  130. package/dist/tokenize/basic/basic.d.ts +1 -1
  131. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  132. package/dist/tokenize/basic/basic.js +56 -45
  133. package/dist/tokenize/basic/basic.js.map +1 -1
  134. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  135. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  136. package/dist/tokenize/basic/hyphenator.js +66 -82
  137. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  138. package/dist/tokenize/basic/index.cjs +35 -0
  139. package/dist/tokenize/basic/index.cjs.map +1 -0
  140. package/dist/tokenize/basic/index.js +7 -4
  141. package/dist/tokenize/basic/index.js.map +1 -1
  142. package/dist/tokenize/basic/paragraph.cjs +57 -0
  143. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  144. package/dist/tokenize/basic/paragraph.js +30 -35
  145. package/dist/tokenize/basic/paragraph.js.map +1 -1
  146. package/dist/tokenize/basic/sentence.cjs +89 -0
  147. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  148. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  149. package/dist/tokenize/basic/sentence.js +62 -57
  150. package/dist/tokenize/basic/sentence.js.map +1 -1
  151. package/dist/tokenize/basic/word.cjs +44 -0
  152. package/dist/tokenize/basic/word.cjs.map +1 -0
  153. package/dist/tokenize/basic/word.js +17 -20
  154. package/dist/tokenize/basic/word.js.map +1 -1
  155. package/dist/tokenize/index.cjs +55 -0
  156. package/dist/tokenize/index.cjs.map +1 -0
  157. package/dist/tokenize/index.js +18 -7
  158. package/dist/tokenize/index.js.map +1 -1
  159. package/dist/tokenize/token_stream.cjs +164 -0
  160. package/dist/tokenize/token_stream.cjs.map +1 -0
  161. package/dist/tokenize/token_stream.js +133 -139
  162. package/dist/tokenize/token_stream.js.map +1 -1
  163. package/dist/tokenize/tokenizer.cjs +184 -0
  164. package/dist/tokenize/tokenizer.cjs.map +1 -0
  165. package/dist/tokenize/tokenizer.js +138 -99
  166. package/dist/tokenize/tokenizer.js.map +1 -1
  167. package/dist/tokenize/tokenizer.test.cjs +220 -0
  168. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  169. package/dist/tokenize/tokenizer.test.d.ts +2 -0
  170. package/dist/tokenize/tokenizer.test.d.ts.map +1 -0
  171. package/dist/tokenize/tokenizer.test.js +219 -0
  172. package/dist/tokenize/tokenizer.test.js.map +1 -0
  173. package/dist/transcription.cjs +131 -0
  174. package/dist/transcription.cjs.map +1 -0
  175. package/dist/transcription.js +99 -96
  176. package/dist/transcription.js.map +1 -1
  177. package/dist/tts/index.cjs +38 -0
  178. package/dist/tts/index.cjs.map +1 -0
  179. package/dist/tts/index.js +13 -5
  180. package/dist/tts/index.js.map +1 -1
  181. package/dist/tts/stream_adapter.cjs +78 -0
  182. package/dist/tts/stream_adapter.cjs.map +1 -0
  183. package/dist/tts/stream_adapter.js +50 -47
  184. package/dist/tts/stream_adapter.js.map +1 -1
  185. package/dist/tts/tts.cjs +127 -0
  186. package/dist/tts/tts.cjs.map +1 -0
  187. package/dist/tts/tts.js +90 -120
  188. package/dist/tts/tts.js.map +1 -1
  189. package/dist/utils.cjs +284 -0
  190. package/dist/utils.cjs.map +1 -0
  191. package/dist/utils.js +242 -247
  192. package/dist/utils.js.map +1 -1
  193. package/dist/vad.cjs +92 -0
  194. package/dist/vad.cjs.map +1 -0
  195. package/dist/vad.js +57 -52
  196. package/dist/vad.js.map +1 -1
  197. package/dist/version.cjs +29 -0
  198. package/dist/version.cjs.map +1 -0
  199. package/dist/version.js +4 -4
  200. package/dist/version.js.map +1 -1
  201. package/dist/worker.cjs +577 -0
  202. package/dist/worker.cjs.map +1 -0
  203. package/dist/worker.d.ts +1 -1
  204. package/dist/worker.d.ts.map +1 -1
  205. package/dist/worker.js +512 -484
  206. package/dist/worker.js.map +1 -1
  207. package/package.json +18 -8
  208. package/src/ipc/job_main.ts +66 -64
  209. package/src/job.ts +3 -2
  210. package/src/pipeline/pipeline_agent.ts +23 -23
  211. package/src/tokenize/basic/basic.ts +1 -1
  212. package/src/tokenize/basic/sentence.ts +14 -8
  213. package/src/tokenize/tokenizer.test.ts +255 -0
  214. package/src/worker.ts +1 -0
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var agent_playout_exports = {};
30
+ __export(agent_playout_exports, {
31
+ AgentPlayout: () => AgentPlayout,
32
+ AgentPlayoutEvent: () => AgentPlayoutEvent,
33
+ PlayoutHandle: () => PlayoutHandle
34
+ });
35
+ module.exports = __toCommonJS(agent_playout_exports);
36
+ var import_node_events = __toESM(require("node:events"), 1);
37
+ var import_log = require("../log.cjs");
38
+ var import_utils = require("../utils.cjs");
39
+ var import_agent_output = require("./agent_output.cjs");
40
+ var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
41
+ AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
42
+ AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
43
+ return AgentPlayoutEvent2;
44
+ })(AgentPlayoutEvent || {});
45
+ class PlayoutHandle {
46
+ #speechId;
47
+ #audioSource;
48
+ playoutSource;
49
+ totalPlayedTime;
50
+ #interrupted = false;
51
+ pushedDuration = 0;
52
+ intFut = new import_utils.Future();
53
+ doneFut = new import_utils.Future();
54
+ constructor(speechId, audioSource, playoutSource) {
55
+ this.#speechId = speechId;
56
+ this.#audioSource = audioSource;
57
+ this.playoutSource = playoutSource;
58
+ }
59
+ get speechId() {
60
+ return this.#speechId;
61
+ }
62
+ get interrupted() {
63
+ return this.#interrupted;
64
+ }
65
+ get timePlayed() {
66
+ return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
67
+ }
68
+ get done() {
69
+ return this.doneFut.done || this.#interrupted;
70
+ }
71
+ interrupt() {
72
+ if (this.done) {
73
+ return;
74
+ }
75
+ this.intFut.resolve();
76
+ this.#interrupted = true;
77
+ }
78
+ join() {
79
+ return this.doneFut;
80
+ }
81
+ }
82
+ class AgentPlayout extends import_node_events.default {
83
+ #closed = false;
84
+ #audioSource;
85
+ #targetVolume = 1;
86
+ #playoutTask;
87
+ #logger = (0, import_log.log)();
88
+ constructor(audioSource) {
89
+ super();
90
+ this.#audioSource = audioSource;
91
+ }
92
+ get targetVolume() {
93
+ return this.#targetVolume;
94
+ }
95
+ set targetVolume(vol) {
96
+ this.#targetVolume = vol;
97
+ }
98
+ play(speechId, playoutSource) {
99
+ if (this.#closed) {
100
+ throw new Error("source closed");
101
+ }
102
+ const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
103
+ this.#playoutTask = this.#playout(handle, this.#playoutTask);
104
+ return handle;
105
+ }
106
+ #playout(handle, oldTask) {
107
+ return new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
108
+ const cancel = () => {
109
+ captureTask.cancel();
110
+ handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
111
+ if (handle.interrupted || captureTask.error) {
112
+ this.#audioSource.clearQueue();
113
+ }
114
+ if (!firstFrame) {
115
+ this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
116
+ }
117
+ handle.doneFut.resolve();
118
+ this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
119
+ };
120
+ onCancel(() => {
121
+ cancel();
122
+ });
123
+ if (oldTask) {
124
+ await (0, import_utils.gracefullyCancel)(oldTask);
125
+ }
126
+ if (this.#audioSource.queuedDuration > 0) {
127
+ this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
128
+ }
129
+ let firstFrame = true;
130
+ const captureTask = new import_utils.CancellablePromise(async (resolve2, _2, onCancel2) => {
131
+ let cancelled = false;
132
+ onCancel2(() => {
133
+ cancelled = true;
134
+ });
135
+ for await (const frame of handle.playoutSource) {
136
+ if (cancelled || frame === import_agent_output.SynthesisHandle.FLUSH_SENTINEL) {
137
+ break;
138
+ }
139
+ if (firstFrame) {
140
+ this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
141
+ this.emit(0 /* PLAYOUT_STARTED */);
142
+ firstFrame = false;
143
+ }
144
+ handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
145
+ await this.#audioSource.captureFrame(frame);
146
+ await this.#audioSource.waitForPlayout();
147
+ }
148
+ resolve2();
149
+ });
150
+ try {
151
+ await Promise.any([captureTask, handle.intFut.await]);
152
+ } finally {
153
+ cancel();
154
+ resolve();
155
+ }
156
+ });
157
+ }
158
+ async close() {
159
+ this.#closed = true;
160
+ await this.#playoutTask;
161
+ }
162
+ }
163
+ // Annotate the CommonJS export names for ESM import in node:
164
+ 0 && (module.exports = {
165
+ AgentPlayout,
166
+ AgentPlayoutEvent,
167
+ PlayoutHandle
168
+ });
169
+ //# sourceMappingURL=agent_playout.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AACpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
@@ -1,143 +1,133 @@
1
- import EventEmitter from 'node:events';
2
- import { log } from '../log.js';
3
- import { CancellablePromise, Future, gracefullyCancel } from '../utils.js';
4
- import { SynthesisHandle } from './agent_output.js';
5
- export var AgentPlayoutEvent;
6
- (function (AgentPlayoutEvent) {
7
- AgentPlayoutEvent[AgentPlayoutEvent["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
8
- AgentPlayoutEvent[AgentPlayoutEvent["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
9
- })(AgentPlayoutEvent || (AgentPlayoutEvent = {}));
10
- export class PlayoutHandle {
11
- #speechId;
12
- #audioSource;
13
- playoutSource;
14
- totalPlayedTime;
15
- #interrupted = false;
16
- pushedDuration = 0;
17
- intFut = new Future();
18
- doneFut = new Future();
19
- constructor(speechId, audioSource, playoutSource) {
20
- this.#speechId = speechId;
21
- this.#audioSource = audioSource;
22
- this.playoutSource = playoutSource;
23
- }
24
- get speechId() {
25
- return this.#speechId;
26
- }
27
- get interrupted() {
28
- return this.#interrupted;
29
- }
30
- get timePlayed() {
31
- return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
32
- }
33
- get done() {
34
- return this.doneFut.done || this.#interrupted;
35
- }
36
- interrupt() {
37
- if (this.done) {
38
- return;
39
- }
40
- this.intFut.resolve();
41
- this.#interrupted = true;
42
- }
43
- join() {
44
- return this.doneFut;
1
+ import EventEmitter from "node:events";
2
+ import { log } from "../log.js";
3
+ import { CancellablePromise, Future, gracefullyCancel } from "../utils.js";
4
+ import { SynthesisHandle } from "./agent_output.js";
5
+ var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
6
+ AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
7
+ AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
8
+ return AgentPlayoutEvent2;
9
+ })(AgentPlayoutEvent || {});
10
+ class PlayoutHandle {
11
+ #speechId;
12
+ #audioSource;
13
+ playoutSource;
14
+ totalPlayedTime;
15
+ #interrupted = false;
16
+ pushedDuration = 0;
17
+ intFut = new Future();
18
+ doneFut = new Future();
19
+ constructor(speechId, audioSource, playoutSource) {
20
+ this.#speechId = speechId;
21
+ this.#audioSource = audioSource;
22
+ this.playoutSource = playoutSource;
23
+ }
24
+ get speechId() {
25
+ return this.#speechId;
26
+ }
27
+ get interrupted() {
28
+ return this.#interrupted;
29
+ }
30
+ get timePlayed() {
31
+ return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
32
+ }
33
+ get done() {
34
+ return this.doneFut.done || this.#interrupted;
35
+ }
36
+ interrupt() {
37
+ if (this.done) {
38
+ return;
45
39
  }
40
+ this.intFut.resolve();
41
+ this.#interrupted = true;
42
+ }
43
+ join() {
44
+ return this.doneFut;
45
+ }
46
46
  }
47
- export class AgentPlayout extends EventEmitter {
48
- #closed = false;
49
- #audioSource;
50
- #targetVolume = 1;
51
- #playoutTask;
52
- #logger = log();
53
- constructor(audioSource) {
54
- super();
55
- this.#audioSource = audioSource;
47
+ class AgentPlayout extends EventEmitter {
48
+ #closed = false;
49
+ #audioSource;
50
+ #targetVolume = 1;
51
+ #playoutTask;
52
+ #logger = log();
53
+ constructor(audioSource) {
54
+ super();
55
+ this.#audioSource = audioSource;
56
+ }
57
+ get targetVolume() {
58
+ return this.#targetVolume;
59
+ }
60
+ set targetVolume(vol) {
61
+ this.#targetVolume = vol;
62
+ }
63
+ play(speechId, playoutSource) {
64
+ if (this.#closed) {
65
+ throw new Error("source closed");
56
66
  }
57
- get targetVolume() {
58
- return this.#targetVolume;
59
- }
60
- set targetVolume(vol) {
61
- this.#targetVolume = vol;
62
- }
63
- play(speechId, playoutSource) {
64
- if (this.#closed) {
65
- throw new Error('source closed');
67
+ const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
68
+ this.#playoutTask = this.#playout(handle, this.#playoutTask);
69
+ return handle;
70
+ }
71
+ #playout(handle, oldTask) {
72
+ return new CancellablePromise(async (resolve, _, onCancel) => {
73
+ const cancel = () => {
74
+ captureTask.cancel();
75
+ handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
76
+ if (handle.interrupted || captureTask.error) {
77
+ this.#audioSource.clearQueue();
66
78
  }
67
- const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
68
- this.#playoutTask = this.#playout(handle, this.#playoutTask);
69
- return handle;
70
- }
71
- #playout(handle, oldTask) {
72
- return new CancellablePromise(async (resolve, _, onCancel) => {
73
- const cancel = () => {
74
- captureTask.cancel();
75
- handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
76
- if (handle.interrupted || captureTask.error) {
77
- this.#audioSource.clearQueue(); // make sure to remove any queued frames
78
- }
79
- if (!firstFrame) {
80
- this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);
81
- }
82
- handle.doneFut.resolve();
83
- this.#logger
84
- .child({ speechId: handle.speechId, interrupted: handle.interrupted })
85
- .debug('playout finished');
86
- };
87
- onCancel(() => {
88
- cancel();
89
- });
90
- if (oldTask) {
91
- await gracefullyCancel(oldTask);
92
- }
93
- if (this.#audioSource.queuedDuration > 0) {
94
- // this should not happen, but log it just in case
95
- this.#logger
96
- .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })
97
- .warn('new playout while the source is still playing');
98
- }
99
- let firstFrame = true;
100
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
101
- const captureTask = new CancellablePromise(async (resolve, _, onCancel) => {
102
- let cancelled = false;
103
- onCancel(() => {
104
- cancelled = true;
105
- });
106
- for await (const frame of handle.playoutSource) {
107
- if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
108
- break;
109
- }
110
- if (firstFrame) {
111
- this.#logger
112
- .child({ speechId: handle.speechId })
113
- .debug('started playing the first time');
114
- this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);
115
- firstFrame = false;
116
- }
117
- handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;
118
- await this.#audioSource.captureFrame(frame);
119
- await this.#audioSource.waitForPlayout();
120
- }
121
- // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,
122
- // but for some reason too many TTS frames can gunk up the buffer and lead to
123
- // FFI errors. this works 🤷‍♀️
124
- // if (this.#audioSource.queuedDuration > 0) {
125
- // await this.#audioSource.waitForPlayout();
126
- // }
127
- resolve();
128
- });
129
- try {
130
- await Promise.any([captureTask, handle.intFut.await]);
131
- }
132
- finally {
133
- cancel();
134
- resolve();
135
- }
79
+ if (!firstFrame) {
80
+ this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
81
+ }
82
+ handle.doneFut.resolve();
83
+ this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
84
+ };
85
+ onCancel(() => {
86
+ cancel();
87
+ });
88
+ if (oldTask) {
89
+ await gracefullyCancel(oldTask);
90
+ }
91
+ if (this.#audioSource.queuedDuration > 0) {
92
+ this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
93
+ }
94
+ let firstFrame = true;
95
+ const captureTask = new CancellablePromise(async (resolve2, _2, onCancel2) => {
96
+ let cancelled = false;
97
+ onCancel2(() => {
98
+ cancelled = true;
136
99
  });
137
- }
138
- async close() {
139
- this.#closed = true;
140
- await this.#playoutTask;
141
- }
100
+ for await (const frame of handle.playoutSource) {
101
+ if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
102
+ break;
103
+ }
104
+ if (firstFrame) {
105
+ this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
106
+ this.emit(0 /* PLAYOUT_STARTED */);
107
+ firstFrame = false;
108
+ }
109
+ handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
110
+ await this.#audioSource.captureFrame(frame);
111
+ await this.#audioSource.waitForPlayout();
112
+ }
113
+ resolve2();
114
+ });
115
+ try {
116
+ await Promise.any([captureTask, handle.intFut.await]);
117
+ } finally {
118
+ cancel();
119
+ resolve();
120
+ }
121
+ });
122
+ }
123
+ async close() {
124
+ this.#closed = true;
125
+ await this.#playoutTask;
126
+ }
142
127
  }
128
+ export {
129
+ AgentPlayout,
130
+ AgentPlayoutEvent,
131
+ PlayoutHandle
132
+ };
143
133
  //# sourceMappingURL=agent_playout.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"agent_playout.js","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAKA,OAAO,YAAY,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,GAAG,EAAE,MAAM,WAAW,CAAC;AAChC,OAAO,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,MAAM,CAAN,IAAY,iBAGX;AAHD,WAAY,iBAAiB;IAC3B,+EAAe,CAAA;IACf,+EAAe,CAAA;AACjB,CAAC,EAHW,iBAAiB,KAAjB,iBAAiB,QAG5B;AAOD,MAAM,OAAO,aAAa;IACxB,SAAS,CAAS;IAClB,YAAY,CAAc;IAC1B,aAAa,CAAoE;IACjF,eAAe,CAAU;IACzB,YAAY,GAAG,KAAK,CAAC;IACrB,cAAc,GAAG,CAAC,CAAC;IACnB,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IACtB,OAAO,GAAG,IAAI,MAAM,EAAE,CAAC;IAEvB,YACE,QAAgB,EAChB,WAAwB,EACxB,aAAgF;QAEhF,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC;QAC1B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,IAAI,WAAW;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC;IACxF,CAAC;IAED,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,IAAI,CAAC,YAAY,CAAC;IAChD,CAAC;IAED,SAAS;QACP,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACd,OAAO;QACT,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACtB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;IAC3B,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF;AAED,MAAM,OAAO,YAAa,SAAS,YAA8D;IAC/F,OAAO,GAAG,KAAK,CAAC;IAChB,YAAY,CAAc;IAC1B,aAAa,GAAG,CAAC,CAAC;IAClB,YAAY,CAA4B;IACxC,OAAO,GAAG,GAAG,EAAE,CAAC;IAEhB,YAAY,WAAwB;QAClC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;IAClC,CAAC;IAED,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,IAAI,YAAY,CAAC,GAAW;QAC1B,IAAI,CAAC,aAAa,GAAG,GAAG,CAAC;IAC3B,CAAC;IAED,IAAI,CACF,QAAgB,EAChB,aAAgF;QAEhF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;QACnC,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,YAAY,EAAE,aAAa,CAAC,CAAC;QAE7E,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7D,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,QAAQ,CAAC,MAAqB,EAAE,OAAkC;QAChE,OAAO,IAAI,kBAAkB,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;YAC3D,MAAM,MAAM,GAAG,GAAG,EAAE;gBAClB,WAAW,CAAC,MAAM,EAAE,CAAC;gBACrB,MAAM,CAAC,eAAe,GAAG,MAAM,CAAC,cAAc,GAAG,IAAI,CAAC,YAAY,CAAC,cAAc,CAAC;gBAElF,IAAI,MAAM,CAAC,WAAW,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;oBAC5C,IAAI,CAAC,YAAY,CAAC,UAAU,EAAE,CAAC,CAAC,wCAAwC;gBAC1E,CAAC;gBAED,IAAI,CAAC,UAAU,EAAE,CAAC;oBAChB,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,eAAe,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC;gBACnE,CAAC;gBAED,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBAEzB,IAAI,CAAC,OAAO;qBACT,KAAK,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC;qBACrE,KAAK,CAAC,kBAAkB,CAAC,CAAC;YAC/B,CAAC,CAAC;YAEF,QAAQ,CAAC,GAAG,EAAE;gBACZ,MAAM,EAAE,CAAC;YACX,CAAC,CAAC,CAAC;YAEH,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,IAAI,CAAC,YAAY,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC;gBACzC,kDAAkD;gBAClD,IAAI,CAAC,OAAO;qBACT,KAAK,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,cAAc,EAAE,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE,CAAC;qBACtF,IAAI,CAAC,+CAA+C,CAAC,CAAC;YAC3D,CAAC;YAED,IAAI,UAAU,GAAG,IAAI,CAAC;YAEtB,6DAA6D;YAC7D,MAAM,WAAW,GAAG,IAAI,kBAAkB,CAAO,KAAK,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;gBAC9E,IAAI,SAAS,GAAG,KAAK,CAAC;gBACtB,QAAQ,CAAC,GAAG,EAAE;oBACZ,SAAS,GAAG,IAAI,CAAC;gBACnB,CAAC,CAAC,CAAC;gBAEH,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;oBAC/C,IAAI,SAAS,IAAI,KAAK,KAAK,eAAe,CAAC,cAAc,EAAE,CAAC;wBAC1D,MAAM;oBACR,CAAC;oBACD,IAAI,UAAU,EAAE,CAAC;wBACf,IAAI,CAAC,OAAO;6BACT,KAAK,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC;6BACpC,KAAK,CAAC,gCAAgC,CAAC,CAAC;wBAC3C,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC;wBAC7C,UAAU,GAAG,KAAK,CAAC;oBACrB,CAAC;oBACD,MAAM,CAAC,cAAc,IAAI,CAAC,KAAK,CAAC,iBAAiB,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;oBAC7E,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;oBAC5C,MAAM,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE,CAAC;gBAC3C,CAAC;gBAED,oFAAoF;gBACpF,wFAAwF;gBACxF,0CAA0C;gBAC1C,8CAA8C;gBAC9C,8CAA8C;gBAC9C,IAAI;gBAEJ,OAAO,EAAE,CAAC;YACZ,CAAC,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACxD,CAAC;oBAAS,CAAC;gBACT,MAAM,EAAE,CAAC;gBACT,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,MAAM,IAAI,CAAC,YAAY,CAAC;IAC1B,CAAC;CACF"}
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AACpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
@@ -0,0 +1,158 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var human_input_exports = {};
20
+ __export(human_input_exports, {
21
+ HumanInput: () => HumanInput,
22
+ HumanInputEvent: () => HumanInputEvent
23
+ });
24
+ module.exports = __toCommonJS(human_input_exports);
25
+ var import_rtc_node = require("@livekit/rtc-node");
26
+ var import_node_events = require("node:events");
27
+ var import_log = require("../log.cjs");
28
+ var import_stt = require("../stt/stt.cjs");
29
+ var import_utils = require("../utils.cjs");
30
+ var import_vad = require("../vad.cjs");
31
+ var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
32
+ HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
33
+ HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
34
+ HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
35
+ HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
36
+ HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
37
+ return HumanInputEvent2;
38
+ })(HumanInputEvent || {});
39
+ class HumanInput extends import_node_events.EventEmitter {
40
+ #closed = false;
41
+ #room;
42
+ #vad;
43
+ #stt;
44
+ #participant;
45
+ #subscribedTrack;
46
+ #recognizeTask;
47
+ #speaking = false;
48
+ #speechProbability = 0;
49
+ #logger = (0, import_log.log)();
50
+ constructor(room, vad, stt, participant) {
51
+ super();
52
+ this.#room = room;
53
+ this.#vad = vad;
54
+ this.#stt = stt;
55
+ this.#participant = participant;
56
+ this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
57
+ this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
58
+ this.#subscribeToMicrophone();
59
+ }
60
+ #subscribeToMicrophone() {
61
+ if (!this.#participant) {
62
+ this.#logger.error("Participant is not set");
63
+ return;
64
+ }
65
+ let microphonePublication = void 0;
66
+ for (const publication of this.#participant.trackPublications.values()) {
67
+ if (publication.source === import_rtc_node.TrackSource.SOURCE_MICROPHONE) {
68
+ microphonePublication = publication;
69
+ break;
70
+ }
71
+ }
72
+ if (!microphonePublication) {
73
+ return;
74
+ }
75
+ if (!microphonePublication.subscribed) {
76
+ microphonePublication.setSubscribed(true);
77
+ }
78
+ const track = microphonePublication.track;
79
+ if (track && track !== this.#subscribedTrack) {
80
+ this.#subscribedTrack = track;
81
+ if (this.#recognizeTask) {
82
+ this.#recognizeTask.cancel();
83
+ }
84
+ const audioStream = new import_rtc_node.AudioStream(track, 16e3);
85
+ this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
86
+ let cancelled = false;
87
+ onCancel(() => {
88
+ cancelled = true;
89
+ });
90
+ const sttStream = this.#stt.stream();
91
+ const vadStream = this.#vad.stream();
92
+ const audioStreamCo = async () => {
93
+ for await (const ev of audioStream) {
94
+ if (cancelled) return;
95
+ sttStream.pushFrame(ev);
96
+ vadStream.pushFrame(ev);
97
+ }
98
+ };
99
+ const vadStreamCo = async () => {
100
+ for await (const ev of vadStream) {
101
+ if (cancelled) return;
102
+ switch (ev.type) {
103
+ case import_vad.VADEventType.START_OF_SPEECH:
104
+ this.#speaking = true;
105
+ this.emit(0 /* START_OF_SPEECH */, ev);
106
+ break;
107
+ case import_vad.VADEventType.INFERENCE_DONE:
108
+ this.#speechProbability = ev.probability;
109
+ this.emit(1 /* VAD_INFERENCE_DONE */, ev);
110
+ break;
111
+ case import_vad.VADEventType.END_OF_SPEECH:
112
+ this.#speaking = false;
113
+ this.emit(2 /* END_OF_SPEECH */, ev);
114
+ break;
115
+ }
116
+ }
117
+ };
118
+ const sttStreamCo = async () => {
119
+ for await (const ev of sttStream) {
120
+ if (cancelled) return;
121
+ if (ev.type === import_stt.SpeechEventType.FINAL_TRANSCRIPT) {
122
+ this.emit(3 /* FINAL_TRANSCRIPT */, ev);
123
+ } else if (ev.type == import_stt.SpeechEventType.INTERIM_TRANSCRIPT) {
124
+ this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
125
+ }
126
+ }
127
+ };
128
+ await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
129
+ sttStream.close();
130
+ vadStream.close();
131
+ resolve();
132
+ });
133
+ }
134
+ }
135
+ get speaking() {
136
+ return this.#speaking;
137
+ }
138
+ get speakingProbability() {
139
+ return this.#speechProbability;
140
+ }
141
+ async close() {
142
+ if (this.#closed) {
143
+ throw new Error("HumanInput already closed");
144
+ }
145
+ this.#closed = true;
146
+ this.#room.removeAllListeners();
147
+ this.#speaking = false;
148
+ if (this.#recognizeTask) {
149
+ await (0, import_utils.gracefullyCancel)(this.#recognizeTask);
150
+ }
151
+ }
152
+ }
153
+ // Annotate the CommonJS export names for ESM import in node:
154
+ 0 && (module.exports = {
155
+ HumanInput,
156
+ HumanInputEvent
157
+ });
158
+ //# sourceMappingURL=human_input.cjs.map