@livekit/agents 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +6 -4
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +6 -4
- package/dist/cli.js.map +1 -1
- package/dist/http_server.cjs +4 -1
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.ts +7 -1
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js +4 -1
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +14 -14
- package/dist/index.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.cjs +3 -1
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +3 -1
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +4 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +4 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/multimodal/agent_playout.cjs +9 -6
- package/dist/multimodal/agent_playout.cjs.map +1 -1
- package/dist/multimodal/agent_playout.d.ts.map +1 -1
- package/dist/multimodal/agent_playout.js +9 -6
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +10 -2
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +3 -2
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +10 -2
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/human_input.cjs +9 -2
- package/dist/pipeline/human_input.cjs.map +1 -1
- package/dist/pipeline/human_input.d.ts +2 -2
- package/dist/pipeline/human_input.d.ts.map +1 -1
- package/dist/pipeline/human_input.js +9 -2
- package/dist/pipeline/human_input.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +7 -1
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts +3 -1
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +7 -1
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/worker.cjs +21 -9
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +21 -9
- package/dist/worker.js.map +1 -1
- package/package.json +4 -4
- package/src/cli.ts +6 -4
- package/src/http_server.ts +10 -1
- package/src/ipc/inference_proc_lazy_main.ts +7 -3
- package/src/ipc/job_proc_lazy_main.ts +8 -3
- package/src/multimodal/agent_playout.ts +10 -7
- package/src/multimodal/multimodal_agent.ts +11 -3
- package/src/pipeline/human_input.ts +17 -3
- package/src/pipeline/pipeline_agent.ts +15 -2
- package/src/worker.ts +23 -9
- package/dist/llm/function_context.test.d.ts +0 -2
- package/dist/llm/function_context.test.d.ts.map +0 -1
- package/dist/tokenize/tokenizer.test.d.ts +0 -2
- package/dist/tokenize/tokenizer.test.d.ts.map +0 -1
|
@@ -23,6 +23,7 @@ __export(agent_playout_exports, {
|
|
|
23
23
|
proto: () => proto
|
|
24
24
|
});
|
|
25
25
|
module.exports = __toCommonJS(agent_playout_exports);
|
|
26
|
+
var import_rtc_node = require("@livekit/rtc-node");
|
|
26
27
|
var import_node_events = require("node:events");
|
|
27
28
|
var import_audio = require("../audio.cjs");
|
|
28
29
|
var import_utils = require("../utils.cjs");
|
|
@@ -142,7 +143,9 @@ class AgentPlayout extends import_node_events.EventEmitter {
|
|
|
142
143
|
}
|
|
143
144
|
handle.synchronizer.pushText(text);
|
|
144
145
|
}
|
|
145
|
-
|
|
146
|
+
if (!cancelled) {
|
|
147
|
+
handle.synchronizer.markTextSegmentEnd();
|
|
148
|
+
}
|
|
146
149
|
resolveText();
|
|
147
150
|
} catch (error) {
|
|
148
151
|
rejectText(error);
|
|
@@ -199,19 +202,19 @@ class AgentPlayout extends import_node_events.EventEmitter {
|
|
|
199
202
|
if (!captureTask.isCancelled) {
|
|
200
203
|
await (0, import_utils.gracefullyCancel)(captureTask);
|
|
201
204
|
}
|
|
205
|
+
if (!readTextTask.isCancelled) {
|
|
206
|
+
await (0, import_utils.gracefullyCancel)(readTextTask);
|
|
207
|
+
}
|
|
202
208
|
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
203
209
|
if (handle.interrupted || captureTask.error) {
|
|
204
|
-
await handle.synchronizer.close(true);
|
|
205
210
|
this.#audioSource.clearQueue();
|
|
206
211
|
}
|
|
207
|
-
if (!readTextTask.isCancelled) {
|
|
208
|
-
await (0, import_utils.gracefullyCancel)(readTextTask);
|
|
209
|
-
}
|
|
210
212
|
if (!firstFrame) {
|
|
211
213
|
this.emit("playout_stopped", handle.interrupted);
|
|
212
214
|
}
|
|
213
215
|
handle.doneFut.resolve();
|
|
214
|
-
|
|
216
|
+
const isInterrupted = handle.interrupted || !!captureTask.error;
|
|
217
|
+
await handle.synchronizer.close(isInterrupted);
|
|
215
218
|
}
|
|
216
219
|
resolve();
|
|
217
220
|
} catch (error) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type AudioSource } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\n\nexport const proto = {};\n\nexport class PlayoutHandle extends EventEmitter {\n #audioSource: AudioSource;\n #sampleRate: number;\n #itemId: string;\n #contentIndex: number;\n /** @internal */\n synchronizer: TextAudioSynchronizer;\n /** @internal */\n doneFut: Future;\n /** @internal */\n intFut: Future;\n /** @internal */\n #interrupted: boolean;\n /** @internal */\n pushedDuration: number;\n /** @internal */\n totalPlayedTime: number | undefined; // Set when playout is done\n\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#sampleRate = sampleRate;\n this.#itemId = itemId;\n this.#contentIndex = contentIndex;\n this.synchronizer = synchronizer;\n this.doneFut = new Future();\n this.intFut = new Future();\n this.#interrupted = false;\n this.pushedDuration = 0;\n this.totalPlayedTime = undefined;\n }\n\n get itemId(): string {\n return this.#itemId;\n }\n\n get audioSamples(): number {\n if (this.totalPlayedTime !== undefined) {\n return Math.floor(this.totalPlayedTime * this.#sampleRate);\n }\n\n return Math.max(\n 0,\n Math.floor(\n (this.pushedDuration - this.#audioSource.queuedDuration) * (this.#sampleRate / 1000),\n ),\n );\n }\n\n get textChars(): number {\n return this.synchronizer.playedText.length;\n }\n\n get contentIndex(): number {\n return this.#contentIndex;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.doneFut.done) return;\n this.intFut.resolve();\n this.#interrupted = true;\n }\n}\n\nexport class AgentPlayout extends EventEmitter {\n #audioSource: AudioSource;\n #playoutTask: CancellablePromise<void> | null;\n #sampleRate: number;\n #numChannels: number;\n #inFrameSize: number;\n #outFrameSize: number;\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n numChannels: number,\n inFrameSize: number,\n outFrameSize: number,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#playoutTask = null;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n this.#inFrameSize = inFrameSize;\n this.#outFrameSize = outFrameSize;\n }\n\n play(\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): PlayoutHandle {\n const handle = new PlayoutHandle(\n this.#audioSource,\n this.#sampleRate,\n itemId,\n contentIndex,\n synchronizer,\n );\n this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);\n return handle;\n }\n\n #makePlayoutTask(\n oldTask: CancellablePromise<void> | null,\n handle: PlayoutHandle,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): CancellablePromise<void> {\n return new CancellablePromise<void>((resolve, reject, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n (async () => {\n try {\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n let firstFrame = true;\n\n const readText = () =>\n new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {\n let cancelledText = false;\n onCancelText(() => {\n cancelledText = true;\n });\n\n (async () => {\n try {\n for await (const text of textStream) {\n if (cancelledText || cancelled) {\n break;\n }\n handle.synchronizer.pushText(text);\n }\n handle.synchronizer.markTextSegmentEnd();\n resolveText();\n } catch (error) {\n rejectText(error);\n }\n })();\n });\n\n const capture = () =>\n new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {\n let cancelledCapture = false;\n onCancelCapture(() => {\n cancelledCapture = true;\n });\n\n (async () => {\n try {\n const samplesPerChannel = this.#outFrameSize;\n const bstream = new AudioByteStream(\n this.#sampleRate,\n this.#numChannels,\n samplesPerChannel,\n );\n\n for await (const frame of audioStream) {\n if (cancelledCapture || cancelled) {\n break;\n }\n if (firstFrame) {\n handle.synchronizer.segmentPlayoutStarted();\n this.emit('playout_started');\n firstFrame = false;\n }\n\n handle.synchronizer.pushAudio(frame);\n\n for (const f of bstream.write(frame.data.buffer)) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n }\n\n if (!cancelledCapture && !cancelled) {\n for (const f of bstream.flush()) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n\n handle.synchronizer.markAudioSegmentEnd();\n\n await this.#audioSource.waitForPlayout();\n }\n\n resolveCapture();\n } catch (error) {\n rejectCapture(error);\n }\n })();\n });\n\n const readTextTask = readText();\n const captureTask = capture();\n\n try {\n await Promise.race([captureTask, handle.intFut.await]);\n } finally {\n if (!captureTask.isCancelled) {\n await gracefullyCancel(captureTask);\n }\n\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n await handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!readTextTask.isCancelled) {\n await gracefullyCancel(readTextTask);\n }\n\n if (!firstFrame) {\n this.emit('playout_stopped', handle.interrupted);\n }\n\n handle.doneFut.resolve();\n await handle.synchronizer.close(false);\n }\n\n resolve();\n } catch (error) {\n reject(error);\n }\n })();\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAC7B,mBAAgC;AAEhC,mBAAsF;AAE/E,MAAM,QAAQ,CAAC;AAEf,MAAM,sBAAsB,gCAAa;AAAA,EAC9C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA,YACE,aACA,YACA,QACA,cACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,UAAU;AACf,SAAK,gBAAgB;AACrB,SAAK,eAAe;AACpB,SAAK,UAAU,IAAI,oBAAO;AAC1B,SAAK,SAAS,IAAI,oBAAO;AACzB,SAAK,eAAe;AACpB,SAAK,iBAAiB;AACtB,SAAK,kBAAkB;AAAA,EACzB;AAAA,EAEA,IAAI,SAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,QAAI,KAAK,oBAAoB,QAAW;AACtC,aAAO,KAAK,MAAM,KAAK,kBAAkB,KAAK,WAAW;AAAA,IAC3D;AAEA,WAAO,KAAK;AAAA,MACV;AAAA,MACA,KAAK;AAAA,SACF,KAAK,iBAAiB,KAAK,aAAa,mBAAmB,KAAK,cAAc;AAAA,MACjF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,YAAoB;AACtB,WAAO,KAAK,aAAa,WAAW;AAAA,EACtC;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,QAAQ,KAAM;AACvB,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AACF;AAEO,MAAM,qBAAqB,gCAAa;AAAA,EAC7C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YACE,aACA,YACA,aACA,aACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,QACA,cACA,cACA,YACA,aACe;AACf,UAAM,SAAS,IAAI;AAAA,MACjB,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,SAAK,eAAe,KAAK,iBAAiB,KAAK,cAAc,QAAQ,YAAY,WAAW;AAC5F,WAAO;AAAA,EACT;AAAA,EAEA,iBACE,SACA,QACA,YACA,aAC0B;AAC1B,WAAO,IAAI,gCAAyB,CAAC,SAAS,QAAQ,aAAa;AACjE,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,OAAC,YAAY;AACX,YAAI;AACF,cAAI,SAAS;AACX,sBAAM,+BAAiB,OAAO;AAAA,UAChC;AAEA,cAAI,aAAa;AAEjB,gBAAM,WAAW,MACf,IAAI,gCAAyB,CAAC,aAAa,YAAY,iBAAiB;AACtE,gBAAI,gBAAgB;AACpB,yBAAa,MAAM;AACjB,8BAAgB;AAAA,YAClB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,iCAAiB,QAAQ,YAAY;AACnC,sBAAI,iBAAiB,WAAW;AAC9B;AAAA,kBACF;AACA,yBAAO,aAAa,SAAS,IAAI;AAAA,gBACnC;AACA,uBAAO,aAAa,mBAAmB;AACvC,4BAAY;AAAA,cACd,SAAS,OAAO;AACd,2BAAW,KAAK;AAAA,cAClB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,UAAU,MACd,IAAI,gCAAyB,CAAC,gBAAgB,eAAe,oBAAoB;AAC/E,gBAAI,mBAAmB;AACvB,4BAAgB,MAAM;AACpB,iCAAmB;AAAA,YACrB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,sBAAM,oBAAoB,KAAK;AAC/B,sBAAM,UAAU,IAAI;AAAA,kBAClB,KAAK;AAAA,kBACL,KAAK;AAAA,kBACL;AAAA,gBACF;AAEA,iCAAiB,SAAS,aAAa;AACrC,sBAAI,oBAAoB,WAAW;AACjC;AAAA,kBACF;AACA,sBAAI,YAAY;AACd,2BAAO,aAAa,sBAAsB;AAC1C,yBAAK,KAAK,iBAAiB;AAC3B,iCAAa;AAAA,kBACf;AAEA,yBAAO,aAAa,UAAU,KAAK;AAEnC,6BAAW,KAAK,QAAQ,MAAM,MAAM,KAAK,MAAM,GAAG;AAChD,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAAA,gBACF;AAEA,oBAAI,CAAC,oBAAoB,CAAC,WAAW;AACnC,6BAAW,KAAK,QAAQ,MAAM,GAAG;AAC/B,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAEA,yBAAO,aAAa,oBAAoB;AAExC,wBAAM,KAAK,aAAa,eAAe;AAAA,gBACzC;AAEA,+BAAe;AAAA,cACjB,SAAS,OAAO;AACd,8BAAc,KAAK;AAAA,cACrB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,eAAe,SAAS;AAC9B,gBAAM,cAAc,QAAQ;AAE5B,cAAI;AACF,kBAAM,QAAQ,KAAK,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,UACvD,UAAE;AACA,gBAAI,CAAC,YAAY,aAAa;AAC5B,wBAAM,+BAAiB,WAAW;AAAA,YACpC;AAEA,mBAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,gBAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,oBAAM,OAAO,aAAa,MAAM,IAAI;AACpC,mBAAK,aAAa,WAAW;AAAA,YAC/B;AAEA,gBAAI,CAAC,aAAa,aAAa;AAC7B,wBAAM,+BAAiB,YAAY;AAAA,YACrC;AAEA,gBAAI,CAAC,YAAY;AACf,mBAAK,KAAK,mBAAmB,OAAO,WAAW;AAAA,YACjD;AAEA,mBAAO,QAAQ,QAAQ;AACvB,kBAAM,OAAO,aAAa,MAAM,KAAK;AAAA,UACvC;AAEA,kBAAQ;AAAA,QACV,SAAS,OAAO;AACd,iBAAO,KAAK;AAAA,QACd;AAAA,MACF,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type AudioSource } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\n\nexport const proto = {};\n\nexport class PlayoutHandle extends EventEmitter {\n #audioSource: AudioSource;\n #sampleRate: number;\n #itemId: string;\n #contentIndex: number;\n /** @internal */\n synchronizer: TextAudioSynchronizer;\n /** @internal */\n doneFut: Future;\n /** @internal */\n intFut: Future;\n /** @internal */\n #interrupted: boolean;\n /** @internal */\n pushedDuration: number;\n /** @internal */\n totalPlayedTime: number | undefined; // Set when playout is done\n\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#sampleRate = sampleRate;\n this.#itemId = itemId;\n this.#contentIndex = contentIndex;\n this.synchronizer = synchronizer;\n this.doneFut = new Future();\n this.intFut = new Future();\n this.#interrupted = false;\n this.pushedDuration = 0;\n this.totalPlayedTime = undefined;\n }\n\n get itemId(): string {\n return this.#itemId;\n }\n\n get audioSamples(): number {\n if (this.totalPlayedTime !== undefined) {\n return Math.floor(this.totalPlayedTime * this.#sampleRate);\n }\n\n return Math.max(\n 0,\n Math.floor(\n (this.pushedDuration - this.#audioSource.queuedDuration) * (this.#sampleRate / 1000),\n ),\n );\n }\n\n get textChars(): number {\n return this.synchronizer.playedText.length;\n }\n\n get contentIndex(): number {\n return this.#contentIndex;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.doneFut.done) return;\n this.intFut.resolve();\n this.#interrupted = true;\n }\n}\n\nexport class AgentPlayout extends EventEmitter {\n #audioSource: AudioSource;\n #playoutTask: CancellablePromise<void> | null;\n #sampleRate: number;\n #numChannels: number;\n #inFrameSize: number;\n #outFrameSize: number;\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n numChannels: number,\n inFrameSize: number,\n outFrameSize: number,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#playoutTask = null;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n this.#inFrameSize = inFrameSize;\n this.#outFrameSize = outFrameSize;\n }\n\n play(\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): PlayoutHandle {\n const handle = new PlayoutHandle(\n this.#audioSource,\n this.#sampleRate,\n itemId,\n contentIndex,\n synchronizer,\n );\n this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);\n return handle;\n }\n\n #makePlayoutTask(\n oldTask: CancellablePromise<void> | null,\n handle: PlayoutHandle,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): CancellablePromise<void> {\n return new CancellablePromise<void>((resolve, reject, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n (async () => {\n try {\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n let firstFrame = true;\n\n const readText = () =>\n new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {\n let cancelledText = false;\n onCancelText(() => {\n cancelledText = true;\n });\n\n (async () => {\n try {\n for await (const text of textStream) {\n if (cancelledText || cancelled) {\n break;\n }\n handle.synchronizer.pushText(text);\n }\n if (!cancelled) {\n handle.synchronizer.markTextSegmentEnd();\n }\n resolveText();\n } catch (error) {\n rejectText(error);\n }\n })();\n });\n\n const capture = () =>\n new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {\n let cancelledCapture = false;\n onCancelCapture(() => {\n cancelledCapture = true;\n });\n\n (async () => {\n try {\n const samplesPerChannel = this.#outFrameSize;\n const bstream = new AudioByteStream(\n this.#sampleRate,\n this.#numChannels,\n samplesPerChannel,\n );\n\n for await (const frame of audioStream) {\n if (cancelledCapture || cancelled) {\n break;\n }\n if (firstFrame) {\n handle.synchronizer.segmentPlayoutStarted();\n this.emit('playout_started');\n firstFrame = false;\n }\n\n handle.synchronizer.pushAudio(frame);\n\n for (const f of bstream.write(frame.data.buffer)) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n }\n\n if (!cancelledCapture && !cancelled) {\n for (const f of bstream.flush()) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n\n handle.synchronizer.markAudioSegmentEnd();\n\n await this.#audioSource.waitForPlayout();\n }\n\n resolveCapture();\n } catch (error) {\n rejectCapture(error);\n }\n })();\n });\n\n const readTextTask = readText();\n const captureTask = capture();\n\n try {\n await Promise.race([captureTask, handle.intFut.await]);\n } finally {\n if (!captureTask.isCancelled) {\n await gracefullyCancel(captureTask);\n }\n\n if (!readTextTask.isCancelled) {\n await gracefullyCancel(readTextTask);\n }\n\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit('playout_stopped', handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n const isInterrupted = handle.interrupted || !!captureTask.error;\n await handle.synchronizer.close(isInterrupted);\n }\n\n resolve();\n } catch (error) {\n reject(error);\n }\n })();\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,sBAAiC;AACjC,yBAA6B;AAC7B,mBAAgC;AAEhC,mBAAsF;AAE/E,MAAM,QAAQ,CAAC;AAEf,MAAM,sBAAsB,gCAAa;AAAA,EAC9C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA,YACE,aACA,YACA,QACA,cACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,UAAU;AACf,SAAK,gBAAgB;AACrB,SAAK,eAAe;AACpB,SAAK,UAAU,IAAI,oBAAO;AAC1B,SAAK,SAAS,IAAI,oBAAO;AACzB,SAAK,eAAe;AACpB,SAAK,iBAAiB;AACtB,SAAK,kBAAkB;AAAA,EACzB;AAAA,EAEA,IAAI,SAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,QAAI,KAAK,oBAAoB,QAAW;AACtC,aAAO,KAAK,MAAM,KAAK,kBAAkB,KAAK,WAAW;AAAA,IAC3D;AAEA,WAAO,KAAK;AAAA,MACV;AAAA,MACA,KAAK;AAAA,SACF,KAAK,iBAAiB,KAAK,aAAa,mBAAmB,KAAK,cAAc;AAAA,MACjF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,YAAoB;AACtB,WAAO,KAAK,aAAa,WAAW;AAAA,EACtC;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,QAAQ,KAAM;AACvB,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AACF;AAEO,MAAM,qBAAqB,gCAAa;AAAA,EAC7C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YACE,aACA,YACA,aACA,aACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,QACA,cACA,cACA,YACA,aACe;AACf,UAAM,SAAS,IAAI;AAAA,MACjB,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,SAAK,eAAe,KAAK,iBAAiB,KAAK,cAAc,QAAQ,YAAY,WAAW;AAC5F,WAAO;AAAA,EACT;AAAA,EAEA,iBACE,SACA,QACA,YACA,aAC0B;AAC1B,WAAO,IAAI,gCAAyB,CAAC,SAAS,QAAQ,aAAa;AACjE,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,OAAC,YAAY;AACX,YAAI;AACF,cAAI,SAAS;AACX,sBAAM,+BAAiB,OAAO;AAAA,UAChC;AAEA,cAAI,aAAa;AAEjB,gBAAM,WAAW,MACf,IAAI,gCAAyB,CAAC,aAAa,YAAY,iBAAiB;AACtE,gBAAI,gBAAgB;AACpB,yBAAa,MAAM;AACjB,8BAAgB;AAAA,YAClB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,iCAAiB,QAAQ,YAAY;AACnC,sBAAI,iBAAiB,WAAW;AAC9B;AAAA,kBACF;AACA,yBAAO,aAAa,SAAS,IAAI;AAAA,gBACnC;AACA,oBAAI,CAAC,WAAW;AACd,yBAAO,aAAa,mBAAmB;AAAA,gBACzC;AACA,4BAAY;AAAA,cACd,SAAS,OAAO;AACd,2BAAW,KAAK;AAAA,cAClB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,UAAU,MACd,IAAI,gCAAyB,CAAC,gBAAgB,eAAe,oBAAoB;AAC/E,gBAAI,mBAAmB;AACvB,4BAAgB,MAAM;AACpB,iCAAmB;AAAA,YACrB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,sBAAM,oBAAoB,KAAK;AAC/B,sBAAM,UAAU,IAAI;AAAA,kBAClB,KAAK;AAAA,kBACL,KAAK;AAAA,kBACL;AAAA,gBACF;AAEA,iCAAiB,SAAS,aAAa;AACrC,sBAAI,oBAAoB,WAAW;AACjC;AAAA,kBACF;AACA,sBAAI,YAAY;AACd,2BAAO,aAAa,sBAAsB;AAC1C,yBAAK,KAAK,iBAAiB;AAC3B,iCAAa;AAAA,kBACf;AAEA,yBAAO,aAAa,UAAU,KAAK;AAEnC,6BAAW,KAAK,QAAQ,MAAM,MAAM,KAAK,MAAM,GAAG;AAChD,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAAA,gBACF;AAEA,oBAAI,CAAC,oBAAoB,CAAC,WAAW;AACnC,6BAAW,KAAK,QAAQ,MAAM,GAAG;AAC/B,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAEA,yBAAO,aAAa,oBAAoB;AAExC,wBAAM,KAAK,aAAa,eAAe;AAAA,gBACzC;AAEA,+BAAe;AAAA,cACjB,SAAS,OAAO;AACd,8BAAc,KAAK;AAAA,cACrB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,eAAe,SAAS;AAC9B,gBAAM,cAAc,QAAQ;AAE5B,cAAI;AACF,kBAAM,QAAQ,KAAK,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,UACvD,UAAE;AACA,gBAAI,CAAC,YAAY,aAAa;AAC5B,wBAAM,+BAAiB,WAAW;AAAA,YACpC;AAEA,gBAAI,CAAC,aAAa,aAAa;AAC7B,wBAAM,+BAAiB,YAAY;AAAA,YACrC;AAEA,mBAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,gBAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,mBAAK,aAAa,WAAW;AAAA,YAC/B;AAEA,gBAAI,CAAC,YAAY;AACf,mBAAK,KAAK,mBAAmB,OAAO,WAAW;AAAA,YACjD;AAEA,mBAAO,QAAQ,QAAQ;AAEvB,kBAAM,gBAAgB,OAAO,eAAe,CAAC,CAAC,YAAY;AAC1D,kBAAM,OAAO,aAAa,MAAM,aAAa;AAAA,UAC/C;AAEA,kBAAQ;AAAA,QACV,SAAS,OAAO;AACd,iBAAO,KAAK;AAAA,QACd;AAAA,MACF,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/multimodal/agent_playout.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,KAAK,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAEpG,eAAO,MAAM,KAAK,IAAK,CAAC;AAExB,qBAAa,aAAc,SAAQ,YAAY;;IAK7C,gBAAgB;IAChB,YAAY,EAAE,qBAAqB,CAAC;IACpC,gBAAgB;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB;IAChB,MAAM,EAAE,MAAM,CAAC;IAGf,gBAAgB;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB;IAChB,eAAe,EAAE,MAAM,GAAG,SAAS,CAAC;gBAGlC,WAAW,EAAE,WAAW,EACxB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,qBAAqB;IAerC,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,IAAI,YAAY,IAAI,MAAM,CAWzB;IAED,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;CAKV;AAED,qBAAa,YAAa,SAAQ,YAAY;;gBAQ1C,WAAW,EAAE,WAAW,EACxB,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM;IAWtB,IAAI,CACF,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,qBAAqB,EACnC,UAAU,EAAE,kBAAkB,CAAC,MAAM,CAAC,EACtC,WAAW,EAAE,kBAAkB,CAAC,UAAU,CAAC,GAC1C,aAAa;
|
|
1
|
+
{"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/multimodal/agent_playout.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,KAAK,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAEpG,eAAO,MAAM,KAAK,IAAK,CAAC;AAExB,qBAAa,aAAc,SAAQ,YAAY;;IAK7C,gBAAgB;IAChB,YAAY,EAAE,qBAAqB,CAAC;IACpC,gBAAgB;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,gBAAgB;IAChB,MAAM,EAAE,MAAM,CAAC;IAGf,gBAAgB;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB;IAChB,eAAe,EAAE,MAAM,GAAG,SAAS,CAAC;gBAGlC,WAAW,EAAE,WAAW,EACxB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,qBAAqB;IAerC,IAAI,MAAM,IAAI,MAAM,CAEnB;IAED,IAAI,YAAY,IAAI,MAAM,CAWzB;IAED,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;CAKV;AAED,qBAAa,YAAa,SAAQ,YAAY;;gBAQ1C,WAAW,EAAE,WAAW,EACxB,UAAU,EAAE,MAAM,EAClB,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM;IAWtB,IAAI,CACF,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,qBAAqB,EACnC,UAAU,EAAE,kBAAkB,CAAC,MAAM,CAAC,EACtC,WAAW,EAAE,kBAAkB,CAAC,UAAU,CAAC,GAC1C,aAAa;CAkJjB"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import {} from "@livekit/rtc-node";
|
|
1
2
|
import { EventEmitter } from "node:events";
|
|
2
3
|
import { AudioByteStream } from "../audio.js";
|
|
3
4
|
import { CancellablePromise, Future, gracefullyCancel } from "../utils.js";
|
|
@@ -117,7 +118,9 @@ class AgentPlayout extends EventEmitter {
|
|
|
117
118
|
}
|
|
118
119
|
handle.synchronizer.pushText(text);
|
|
119
120
|
}
|
|
120
|
-
|
|
121
|
+
if (!cancelled) {
|
|
122
|
+
handle.synchronizer.markTextSegmentEnd();
|
|
123
|
+
}
|
|
121
124
|
resolveText();
|
|
122
125
|
} catch (error) {
|
|
123
126
|
rejectText(error);
|
|
@@ -174,19 +177,19 @@ class AgentPlayout extends EventEmitter {
|
|
|
174
177
|
if (!captureTask.isCancelled) {
|
|
175
178
|
await gracefullyCancel(captureTask);
|
|
176
179
|
}
|
|
180
|
+
if (!readTextTask.isCancelled) {
|
|
181
|
+
await gracefullyCancel(readTextTask);
|
|
182
|
+
}
|
|
177
183
|
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
178
184
|
if (handle.interrupted || captureTask.error) {
|
|
179
|
-
await handle.synchronizer.close(true);
|
|
180
185
|
this.#audioSource.clearQueue();
|
|
181
186
|
}
|
|
182
|
-
if (!readTextTask.isCancelled) {
|
|
183
|
-
await gracefullyCancel(readTextTask);
|
|
184
|
-
}
|
|
185
187
|
if (!firstFrame) {
|
|
186
188
|
this.emit("playout_stopped", handle.interrupted);
|
|
187
189
|
}
|
|
188
190
|
handle.doneFut.resolve();
|
|
189
|
-
|
|
191
|
+
const isInterrupted = handle.interrupted || !!captureTask.error;
|
|
192
|
+
await handle.synchronizer.close(isInterrupted);
|
|
190
193
|
}
|
|
191
194
|
resolve();
|
|
192
195
|
} catch (error) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type AudioSource } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\n\nexport const proto = {};\n\nexport class PlayoutHandle extends EventEmitter {\n #audioSource: AudioSource;\n #sampleRate: number;\n #itemId: string;\n #contentIndex: number;\n /** @internal */\n synchronizer: TextAudioSynchronizer;\n /** @internal */\n doneFut: Future;\n /** @internal */\n intFut: Future;\n /** @internal */\n #interrupted: boolean;\n /** @internal */\n pushedDuration: number;\n /** @internal */\n totalPlayedTime: number | undefined; // Set when playout is done\n\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#sampleRate = sampleRate;\n this.#itemId = itemId;\n this.#contentIndex = contentIndex;\n this.synchronizer = synchronizer;\n this.doneFut = new Future();\n this.intFut = new Future();\n this.#interrupted = false;\n this.pushedDuration = 0;\n this.totalPlayedTime = undefined;\n }\n\n get itemId(): string {\n return this.#itemId;\n }\n\n get audioSamples(): number {\n if (this.totalPlayedTime !== undefined) {\n return Math.floor(this.totalPlayedTime * this.#sampleRate);\n }\n\n return Math.max(\n 0,\n Math.floor(\n (this.pushedDuration - this.#audioSource.queuedDuration) * (this.#sampleRate / 1000),\n ),\n );\n }\n\n get textChars(): number {\n return this.synchronizer.playedText.length;\n }\n\n get contentIndex(): number {\n return this.#contentIndex;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.doneFut.done) return;\n this.intFut.resolve();\n this.#interrupted = true;\n }\n}\n\nexport class AgentPlayout extends EventEmitter {\n #audioSource: AudioSource;\n #playoutTask: CancellablePromise<void> | null;\n #sampleRate: number;\n #numChannels: number;\n #inFrameSize: number;\n #outFrameSize: number;\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n numChannels: number,\n inFrameSize: number,\n outFrameSize: number,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#playoutTask = null;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n this.#inFrameSize = inFrameSize;\n this.#outFrameSize = outFrameSize;\n }\n\n play(\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): PlayoutHandle {\n const handle = new PlayoutHandle(\n this.#audioSource,\n this.#sampleRate,\n itemId,\n contentIndex,\n synchronizer,\n );\n this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);\n return handle;\n }\n\n #makePlayoutTask(\n oldTask: CancellablePromise<void> | null,\n handle: PlayoutHandle,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): CancellablePromise<void> {\n return new CancellablePromise<void>((resolve, reject, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n (async () => {\n try {\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n let firstFrame = true;\n\n const readText = () =>\n new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {\n let cancelledText = false;\n onCancelText(() => {\n cancelledText = true;\n });\n\n (async () => {\n try {\n for await (const text of textStream) {\n if (cancelledText || cancelled) {\n break;\n }\n handle.synchronizer.pushText(text);\n }\n handle.synchronizer.markTextSegmentEnd();\n resolveText();\n } catch (error) {\n rejectText(error);\n }\n })();\n });\n\n const capture = () =>\n new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {\n let cancelledCapture = false;\n onCancelCapture(() => {\n cancelledCapture = true;\n });\n\n (async () => {\n try {\n const samplesPerChannel = this.#outFrameSize;\n const bstream = new AudioByteStream(\n this.#sampleRate,\n this.#numChannels,\n samplesPerChannel,\n );\n\n for await (const frame of audioStream) {\n if (cancelledCapture || cancelled) {\n break;\n }\n if (firstFrame) {\n handle.synchronizer.segmentPlayoutStarted();\n this.emit('playout_started');\n firstFrame = false;\n }\n\n handle.synchronizer.pushAudio(frame);\n\n for (const f of bstream.write(frame.data.buffer)) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n }\n\n if (!cancelledCapture && !cancelled) {\n for (const f of bstream.flush()) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n\n handle.synchronizer.markAudioSegmentEnd();\n\n await this.#audioSource.waitForPlayout();\n }\n\n resolveCapture();\n } catch (error) {\n rejectCapture(error);\n }\n })();\n });\n\n const readTextTask = readText();\n const captureTask = capture();\n\n try {\n await Promise.race([captureTask, handle.intFut.await]);\n } finally {\n if (!captureTask.isCancelled) {\n await gracefullyCancel(captureTask);\n }\n\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n await handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!readTextTask.isCancelled) {\n await gracefullyCancel(readTextTask);\n }\n\n if (!firstFrame) {\n this.emit('playout_stopped', handle.interrupted);\n }\n\n handle.doneFut.resolve();\n await handle.synchronizer.close(false);\n }\n\n resolve();\n } catch (error) {\n reject(error);\n }\n })();\n });\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAEhC,SAAkC,oBAAoB,QAAQ,wBAAwB;AAE/E,MAAM,QAAQ,CAAC;AAEf,MAAM,sBAAsB,aAAa;AAAA,EAC9C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA,YACE,aACA,YACA,QACA,cACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,UAAU;AACf,SAAK,gBAAgB;AACrB,SAAK,eAAe;AACpB,SAAK,UAAU,IAAI,OAAO;AAC1B,SAAK,SAAS,IAAI,OAAO;AACzB,SAAK,eAAe;AACpB,SAAK,iBAAiB;AACtB,SAAK,kBAAkB;AAAA,EACzB;AAAA,EAEA,IAAI,SAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,QAAI,KAAK,oBAAoB,QAAW;AACtC,aAAO,KAAK,MAAM,KAAK,kBAAkB,KAAK,WAAW;AAAA,IAC3D;AAEA,WAAO,KAAK;AAAA,MACV;AAAA,MACA,KAAK;AAAA,SACF,KAAK,iBAAiB,KAAK,aAAa,mBAAmB,KAAK,cAAc;AAAA,MACjF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,YAAoB;AACtB,WAAO,KAAK,aAAa,WAAW;AAAA,EACtC;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,QAAQ,KAAM;AACvB,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AACF;AAEO,MAAM,qBAAqB,aAAa;AAAA,EAC7C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YACE,aACA,YACA,aACA,aACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,QACA,cACA,cACA,YACA,aACe;AACf,UAAM,SAAS,IAAI;AAAA,MACjB,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,SAAK,eAAe,KAAK,iBAAiB,KAAK,cAAc,QAAQ,YAAY,WAAW;AAC5F,WAAO;AAAA,EACT;AAAA,EAEA,iBACE,SACA,QACA,YACA,aAC0B;AAC1B,WAAO,IAAI,mBAAyB,CAAC,SAAS,QAAQ,aAAa;AACjE,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,OAAC,YAAY;AACX,YAAI;AACF,cAAI,SAAS;AACX,kBAAM,iBAAiB,OAAO;AAAA,UAChC;AAEA,cAAI,aAAa;AAEjB,gBAAM,WAAW,MACf,IAAI,mBAAyB,CAAC,aAAa,YAAY,iBAAiB;AACtE,gBAAI,gBAAgB;AACpB,yBAAa,MAAM;AACjB,8BAAgB;AAAA,YAClB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,iCAAiB,QAAQ,YAAY;AACnC,sBAAI,iBAAiB,WAAW;AAC9B;AAAA,kBACF;AACA,yBAAO,aAAa,SAAS,IAAI;AAAA,gBACnC;AACA,uBAAO,aAAa,mBAAmB;AACvC,4BAAY;AAAA,cACd,SAAS,OAAO;AACd,2BAAW,KAAK;AAAA,cAClB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,UAAU,MACd,IAAI,mBAAyB,CAAC,gBAAgB,eAAe,oBAAoB;AAC/E,gBAAI,mBAAmB;AACvB,4BAAgB,MAAM;AACpB,iCAAmB;AAAA,YACrB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,sBAAM,oBAAoB,KAAK;AAC/B,sBAAM,UAAU,IAAI;AAAA,kBAClB,KAAK;AAAA,kBACL,KAAK;AAAA,kBACL;AAAA,gBACF;AAEA,iCAAiB,SAAS,aAAa;AACrC,sBAAI,oBAAoB,WAAW;AACjC;AAAA,kBACF;AACA,sBAAI,YAAY;AACd,2BAAO,aAAa,sBAAsB;AAC1C,yBAAK,KAAK,iBAAiB;AAC3B,iCAAa;AAAA,kBACf;AAEA,yBAAO,aAAa,UAAU,KAAK;AAEnC,6BAAW,KAAK,QAAQ,MAAM,MAAM,KAAK,MAAM,GAAG;AAChD,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAAA,gBACF;AAEA,oBAAI,CAAC,oBAAoB,CAAC,WAAW;AACnC,6BAAW,KAAK,QAAQ,MAAM,GAAG;AAC/B,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAEA,yBAAO,aAAa,oBAAoB;AAExC,wBAAM,KAAK,aAAa,eAAe;AAAA,gBACzC;AAEA,+BAAe;AAAA,cACjB,SAAS,OAAO;AACd,8BAAc,KAAK;AAAA,cACrB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,eAAe,SAAS;AAC9B,gBAAM,cAAc,QAAQ;AAE5B,cAAI;AACF,kBAAM,QAAQ,KAAK,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,UACvD,UAAE;AACA,gBAAI,CAAC,YAAY,aAAa;AAC5B,oBAAM,iBAAiB,WAAW;AAAA,YACpC;AAEA,mBAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,gBAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,oBAAM,OAAO,aAAa,MAAM,IAAI;AACpC,mBAAK,aAAa,WAAW;AAAA,YAC/B;AAEA,gBAAI,CAAC,aAAa,aAAa;AAC7B,oBAAM,iBAAiB,YAAY;AAAA,YACrC;AAEA,gBAAI,CAAC,YAAY;AACf,mBAAK,KAAK,mBAAmB,OAAO,WAAW;AAAA,YACjD;AAEA,mBAAO,QAAQ,QAAQ;AACvB,kBAAM,OAAO,aAAa,MAAM,KAAK;AAAA,UACvC;AAEA,kBAAQ;AAAA,QACV,SAAS,OAAO;AACd,iBAAO,KAAK;AAAA,QACd;AAAA,MACF,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type AudioSource } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\n\nexport const proto = {};\n\nexport class PlayoutHandle extends EventEmitter {\n #audioSource: AudioSource;\n #sampleRate: number;\n #itemId: string;\n #contentIndex: number;\n /** @internal */\n synchronizer: TextAudioSynchronizer;\n /** @internal */\n doneFut: Future;\n /** @internal */\n intFut: Future;\n /** @internal */\n #interrupted: boolean;\n /** @internal */\n pushedDuration: number;\n /** @internal */\n totalPlayedTime: number | undefined; // Set when playout is done\n\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#sampleRate = sampleRate;\n this.#itemId = itemId;\n this.#contentIndex = contentIndex;\n this.synchronizer = synchronizer;\n this.doneFut = new Future();\n this.intFut = new Future();\n this.#interrupted = false;\n this.pushedDuration = 0;\n this.totalPlayedTime = undefined;\n }\n\n get itemId(): string {\n return this.#itemId;\n }\n\n get audioSamples(): number {\n if (this.totalPlayedTime !== undefined) {\n return Math.floor(this.totalPlayedTime * this.#sampleRate);\n }\n\n return Math.max(\n 0,\n Math.floor(\n (this.pushedDuration - this.#audioSource.queuedDuration) * (this.#sampleRate / 1000),\n ),\n );\n }\n\n get textChars(): number {\n return this.synchronizer.playedText.length;\n }\n\n get contentIndex(): number {\n return this.#contentIndex;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.doneFut.done) return;\n this.intFut.resolve();\n this.#interrupted = true;\n }\n}\n\nexport class AgentPlayout extends EventEmitter {\n #audioSource: AudioSource;\n #playoutTask: CancellablePromise<void> | null;\n #sampleRate: number;\n #numChannels: number;\n #inFrameSize: number;\n #outFrameSize: number;\n constructor(\n audioSource: AudioSource,\n sampleRate: number,\n numChannels: number,\n inFrameSize: number,\n outFrameSize: number,\n ) {\n super();\n this.#audioSource = audioSource;\n this.#playoutTask = null;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n this.#inFrameSize = inFrameSize;\n this.#outFrameSize = outFrameSize;\n }\n\n play(\n itemId: string,\n contentIndex: number,\n synchronizer: TextAudioSynchronizer,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): PlayoutHandle {\n const handle = new PlayoutHandle(\n this.#audioSource,\n this.#sampleRate,\n itemId,\n contentIndex,\n synchronizer,\n );\n this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);\n return handle;\n }\n\n #makePlayoutTask(\n oldTask: CancellablePromise<void> | null,\n handle: PlayoutHandle,\n textStream: AsyncIterableQueue<string>,\n audioStream: AsyncIterableQueue<AudioFrame>,\n ): CancellablePromise<void> {\n return new CancellablePromise<void>((resolve, reject, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n (async () => {\n try {\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n let firstFrame = true;\n\n const readText = () =>\n new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {\n let cancelledText = false;\n onCancelText(() => {\n cancelledText = true;\n });\n\n (async () => {\n try {\n for await (const text of textStream) {\n if (cancelledText || cancelled) {\n break;\n }\n handle.synchronizer.pushText(text);\n }\n if (!cancelled) {\n handle.synchronizer.markTextSegmentEnd();\n }\n resolveText();\n } catch (error) {\n rejectText(error);\n }\n })();\n });\n\n const capture = () =>\n new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {\n let cancelledCapture = false;\n onCancelCapture(() => {\n cancelledCapture = true;\n });\n\n (async () => {\n try {\n const samplesPerChannel = this.#outFrameSize;\n const bstream = new AudioByteStream(\n this.#sampleRate,\n this.#numChannels,\n samplesPerChannel,\n );\n\n for await (const frame of audioStream) {\n if (cancelledCapture || cancelled) {\n break;\n }\n if (firstFrame) {\n handle.synchronizer.segmentPlayoutStarted();\n this.emit('playout_started');\n firstFrame = false;\n }\n\n handle.synchronizer.pushAudio(frame);\n\n for (const f of bstream.write(frame.data.buffer)) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n }\n\n if (!cancelledCapture && !cancelled) {\n for (const f of bstream.flush()) {\n handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;\n await this.#audioSource.captureFrame(f);\n }\n\n handle.synchronizer.markAudioSegmentEnd();\n\n await this.#audioSource.waitForPlayout();\n }\n\n resolveCapture();\n } catch (error) {\n rejectCapture(error);\n }\n })();\n });\n\n const readTextTask = readText();\n const captureTask = capture();\n\n try {\n await Promise.race([captureTask, handle.intFut.await]);\n } finally {\n if (!captureTask.isCancelled) {\n await gracefullyCancel(captureTask);\n }\n\n if (!readTextTask.isCancelled) {\n await gracefullyCancel(readTextTask);\n }\n\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit('playout_stopped', handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n const isInterrupted = handle.interrupted || !!captureTask.error;\n await handle.synchronizer.close(isInterrupted);\n }\n\n resolve();\n } catch (error) {\n reject(error);\n }\n })();\n });\n }\n}\n"],"mappings":"AAIA,eAAiC;AACjC,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAEhC,SAAkC,oBAAoB,QAAQ,wBAAwB;AAE/E,MAAM,QAAQ,CAAC;AAEf,MAAM,sBAAsB,aAAa;AAAA,EAC9C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA;AAAA;AAAA,EAEA,YACE,aACA,YACA,QACA,cACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,UAAU;AACf,SAAK,gBAAgB;AACrB,SAAK,eAAe;AACpB,SAAK,UAAU,IAAI,OAAO;AAC1B,SAAK,SAAS,IAAI,OAAO;AACzB,SAAK,eAAe;AACpB,SAAK,iBAAiB;AACtB,SAAK,kBAAkB;AAAA,EACzB;AAAA,EAEA,IAAI,SAAiB;AACnB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,QAAI,KAAK,oBAAoB,QAAW;AACtC,aAAO,KAAK,MAAM,KAAK,kBAAkB,KAAK,WAAW;AAAA,IAC3D;AAEA,WAAO,KAAK;AAAA,MACV;AAAA,MACA,KAAK;AAAA,SACF,KAAK,iBAAiB,KAAK,aAAa,mBAAmB,KAAK,cAAc;AAAA,MACjF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,YAAoB;AACtB,WAAO,KAAK,aAAa,WAAW;AAAA,EACtC;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,QAAQ,KAAM;AACvB,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AACF;AAEO,MAAM,qBAAqB,aAAa;AAAA,EAC7C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YACE,aACA,YACA,aACA,aACA,cACA;AACA,UAAM;AACN,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,QACA,cACA,cACA,YACA,aACe;AACf,UAAM,SAAS,IAAI;AAAA,MACjB,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,IACF;AACA,SAAK,eAAe,KAAK,iBAAiB,KAAK,cAAc,QAAQ,YAAY,WAAW;AAC5F,WAAO;AAAA,EACT;AAAA,EAEA,iBACE,SACA,QACA,YACA,aAC0B;AAC1B,WAAO,IAAI,mBAAyB,CAAC,SAAS,QAAQ,aAAa;AACjE,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,OAAC,YAAY;AACX,YAAI;AACF,cAAI,SAAS;AACX,kBAAM,iBAAiB,OAAO;AAAA,UAChC;AAEA,cAAI,aAAa;AAEjB,gBAAM,WAAW,MACf,IAAI,mBAAyB,CAAC,aAAa,YAAY,iBAAiB;AACtE,gBAAI,gBAAgB;AACpB,yBAAa,MAAM;AACjB,8BAAgB;AAAA,YAClB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,iCAAiB,QAAQ,YAAY;AACnC,sBAAI,iBAAiB,WAAW;AAC9B;AAAA,kBACF;AACA,yBAAO,aAAa,SAAS,IAAI;AAAA,gBACnC;AACA,oBAAI,CAAC,WAAW;AACd,yBAAO,aAAa,mBAAmB;AAAA,gBACzC;AACA,4BAAY;AAAA,cACd,SAAS,OAAO;AACd,2BAAW,KAAK;AAAA,cAClB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,UAAU,MACd,IAAI,mBAAyB,CAAC,gBAAgB,eAAe,oBAAoB;AAC/E,gBAAI,mBAAmB;AACvB,4BAAgB,MAAM;AACpB,iCAAmB;AAAA,YACrB,CAAC;AAED,aAAC,YAAY;AACX,kBAAI;AACF,sBAAM,oBAAoB,KAAK;AAC/B,sBAAM,UAAU,IAAI;AAAA,kBAClB,KAAK;AAAA,kBACL,KAAK;AAAA,kBACL;AAAA,gBACF;AAEA,iCAAiB,SAAS,aAAa;AACrC,sBAAI,oBAAoB,WAAW;AACjC;AAAA,kBACF;AACA,sBAAI,YAAY;AACd,2BAAO,aAAa,sBAAsB;AAC1C,yBAAK,KAAK,iBAAiB;AAC3B,iCAAa;AAAA,kBACf;AAEA,yBAAO,aAAa,UAAU,KAAK;AAEnC,6BAAW,KAAK,QAAQ,MAAM,MAAM,KAAK,MAAM,GAAG;AAChD,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAAA,gBACF;AAEA,oBAAI,CAAC,oBAAoB,CAAC,WAAW;AACnC,6BAAW,KAAK,QAAQ,MAAM,GAAG;AAC/B,2BAAO,kBAAmB,EAAE,oBAAoB,EAAE,aAAc;AAChE,0BAAM,KAAK,aAAa,aAAa,CAAC;AAAA,kBACxC;AAEA,yBAAO,aAAa,oBAAoB;AAExC,wBAAM,KAAK,aAAa,eAAe;AAAA,gBACzC;AAEA,+BAAe;AAAA,cACjB,SAAS,OAAO;AACd,8BAAc,KAAK;AAAA,cACrB;AAAA,YACF,GAAG;AAAA,UACL,CAAC;AAEH,gBAAM,eAAe,SAAS;AAC9B,gBAAM,cAAc,QAAQ;AAE5B,cAAI;AACF,kBAAM,QAAQ,KAAK,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,UACvD,UAAE;AACA,gBAAI,CAAC,YAAY,aAAa;AAC5B,oBAAM,iBAAiB,WAAW;AAAA,YACpC;AAEA,gBAAI,CAAC,aAAa,aAAa;AAC7B,oBAAM,iBAAiB,YAAY;AAAA,YACrC;AAEA,mBAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,gBAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,mBAAK,aAAa,WAAW;AAAA,YAC/B;AAEA,gBAAI,CAAC,YAAY;AACf,mBAAK,KAAK,mBAAmB,OAAO,WAAW;AAAA,YACjD;AAEA,mBAAO,QAAQ,QAAQ;AAEvB,kBAAM,gBAAgB,OAAO,eAAe,CAAC,CAAC,YAAY;AAC1D,kBAAM,OAAO,aAAa,MAAM,aAAa;AAAA,UAC/C;AAEA,kBAAQ;AAAA,QACV,SAAS,OAAO;AACd,iBAAO,KAAK;AAAA,QACd;AAAA,MACF,GAAG;AAAA,IACL,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
@@ -59,13 +59,15 @@ class MultimodalAgent extends import_node_events.EventEmitter {
|
|
|
59
59
|
model,
|
|
60
60
|
chatCtx,
|
|
61
61
|
fncCtx,
|
|
62
|
-
maxTextResponseRetries = 5
|
|
62
|
+
maxTextResponseRetries = 5,
|
|
63
|
+
noiseCancellation
|
|
63
64
|
}) {
|
|
64
65
|
super();
|
|
65
66
|
this.model = model;
|
|
66
67
|
this.#chatCtx = chatCtx;
|
|
67
68
|
this.#fncCtx = fncCtx;
|
|
68
69
|
this.#maxTextResponseRetries = maxTextResponseRetries;
|
|
70
|
+
this.#noiseCancellation = noiseCancellation;
|
|
69
71
|
}
|
|
70
72
|
#participant = null;
|
|
71
73
|
#agentPublication = null;
|
|
@@ -77,6 +79,7 @@ class MultimodalAgent extends import_node_events.EventEmitter {
|
|
|
77
79
|
#session = null;
|
|
78
80
|
#fncCtx = void 0;
|
|
79
81
|
#chatCtx = void 0;
|
|
82
|
+
#noiseCancellation = void 0;
|
|
80
83
|
#_started = false;
|
|
81
84
|
#_pendingFunctionCalls = /* @__PURE__ */ new Set();
|
|
82
85
|
#_speaking = false;
|
|
@@ -359,7 +362,12 @@ class MultimodalAgent extends import_node_events.EventEmitter {
|
|
|
359
362
|
};
|
|
360
363
|
this.subscribedTrack = track;
|
|
361
364
|
this.readMicroTask = new Promise((resolve, reject) => {
|
|
362
|
-
|
|
365
|
+
const audioStreamOptions = {
|
|
366
|
+
sampleRate: this.model.sampleRate,
|
|
367
|
+
numChannels: this.model.numChannels,
|
|
368
|
+
...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
|
|
369
|
+
};
|
|
370
|
+
readAudioStreamTask(new import_rtc_node.AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);
|
|
363
371
|
});
|
|
364
372
|
}
|
|
365
373
|
#getLocalTrackSid() {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAWA,sBAOO;AACP,yBAA6B;AAC7B,mBAAgC;AAChC,UAAqB;AACrB,iBAAoB;AAEpB,2BAA8D;AAC9D,mBAAiC;AACjC,2BAAiD;AAM1C,MAAe,wBAAwB,gCAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,gCAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,EAC3B,GAKG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,cAAU,gBAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAhJlD;AAiJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,0BAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,0BAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,4BAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,0BAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,4BAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oCAAoB;AACxC,cAAQ,SAAS,4BAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAnPnE,YAAAC;AAqPQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,2CAAsB,2CAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA3S9D,YAAAA,KAAA;AA6SQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AAvT5E,YAAAA,KAAA;AAyTQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AAzU5D,YAAAA,KAAA;AA0UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhbJ;AAibI,QACE,YAAY,WAAW,4BAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,4BAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,qBAAiB,+BAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AA3dV;AA4dI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAjgB/B;AAkgBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAYA,sBAOO;AACP,yBAA6B;AAC7B,mBAAgC;AAChC,UAAqB;AACrB,iBAAoB;AAEpB,2BAA8D;AAC9D,mBAAiC;AACjC,2BAAiD;AAM1C,MAAe,wBAAwB,gCAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,gCAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,cAAU,gBAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AArJlD;AAsJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,0BAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,0BAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,4BAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,0BAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,4BAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oCAAoB;AACxC,cAAQ,SAAS,4BAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAxPnE,YAAAC;AA0PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,2CAAsB,2CAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AAhT9D,YAAAA,KAAA;AAkTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA5T5E,YAAAA,KAAA;AA8TQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA9U5D,YAAAA,KAAA;AA+UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AArbJ;AAsbI,QACE,YAAY,WAAW,4BAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,4BAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,qBAAiB,+BAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AAneV;AAoeI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAzgB/B;AA0gBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
-
import type { RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
2
|
+
import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
3
3
|
import { EventEmitter } from 'node:events';
|
|
4
4
|
import * as llm from '../llm/index.js';
|
|
5
5
|
/**
|
|
@@ -34,11 +34,12 @@ export declare class MultimodalAgent extends EventEmitter {
|
|
|
34
34
|
linkedParticipant: RemoteParticipant | null;
|
|
35
35
|
subscribedTrack: RemoteAudioTrack | null;
|
|
36
36
|
readMicroTask: Promise<void> | null;
|
|
37
|
-
constructor({ model, chatCtx, fncCtx, maxTextResponseRetries, }: {
|
|
37
|
+
constructor({ model, chatCtx, fncCtx, maxTextResponseRetries, noiseCancellation, }: {
|
|
38
38
|
model: RealtimeModel;
|
|
39
39
|
chatCtx?: llm.ChatContext;
|
|
40
40
|
fncCtx?: llm.FunctionContext;
|
|
41
41
|
maxTextResponseRetries?: number;
|
|
42
|
+
noiseCancellation?: NoiseCancellationOptions;
|
|
42
43
|
});
|
|
43
44
|
get fncCtx(): llm.FunctionContext | undefined;
|
|
44
45
|
set fncCtx(ctx: llm.FunctionContext | undefined);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"multimodal_agent.d.ts","sourceRoot":"","sources":["../../src/multimodal/multimodal_agent.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAEV,gBAAgB,EAChB,iBAAiB,EAGjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,GAAG,MAAM,iBAAiB,CAAC;AAOvC;;;GAGG;AACH,8BAAsB,eAAgB,SAAQ,YAAY;IAExD,QAAQ,CAAC,YAAY,EAAE,GAAG,CAAC;IAE3B,QAAQ,CAAC,gBAAgB,EAAE,GAAG,CAAC;IAC/B,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,CAAC;IACjD,QAAQ,CAAC,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CACvD;AAED;;;GAGG;AACH,8BAAsB,aAAa;IAEjC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,GAAG,eAAe;IAC/C,QAAQ,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,YAAY;AACZ,qBAAa,eAAgB,SAAQ,YAAY;;IAC/C,KAAK,EAAE,aAAa,CAAC;IACrB,IAAI,EAAE,IAAI,GAAG,IAAI,CAAQ;IACzB,iBAAiB,EAAE,iBAAiB,GAAG,IAAI,CAAQ;IACnD,eAAe,EAAE,gBAAgB,GAAG,IAAI,CAAQ;IAChD,aAAa,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAQ;gBAK/B,EACV,KAAK,EACL,OAAO,EACP,MAAM,EACN,sBAA0B,
|
|
1
|
+
{"version":3,"file":"multimodal_agent.d.ts","sourceRoot":"","sources":["../../src/multimodal/multimodal_agent.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAEV,wBAAwB,EACxB,gBAAgB,EAChB,iBAAiB,EAGjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,GAAG,MAAM,iBAAiB,CAAC;AAOvC;;;GAGG;AACH,8BAAsB,eAAgB,SAAQ,YAAY;IAExD,QAAQ,CAAC,YAAY,EAAE,GAAG,CAAC;IAE3B,QAAQ,CAAC,gBAAgB,EAAE,GAAG,CAAC;IAC/B,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,CAAC;IACjD,QAAQ,CAAC,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CACvD;AAED;;;GAGG;AACH,8BAAsB,aAAa;IAEjC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,GAAG,eAAe;IAC/C,QAAQ,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,YAAY;AACZ,qBAAa,eAAgB,SAAQ,YAAY;;IAC/C,KAAK,EAAE,aAAa,CAAC;IACrB,IAAI,EAAE,IAAI,GAAG,IAAI,CAAQ;IACzB,iBAAiB,EAAE,iBAAiB,GAAG,IAAI,CAAQ;IACnD,eAAe,EAAE,gBAAgB,GAAG,IAAI,CAAQ;IAChD,aAAa,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAQ;gBAK/B,EACV,KAAK,EACL,OAAO,EACP,MAAM,EACN,sBAA0B,EAC1B,iBAAiB,GAClB,EAAE;QACD,KAAK,EAAE,aAAa,CAAC;QACrB,OAAO,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC;QAC1B,MAAM,CAAC,EAAE,GAAG,CAAC,eAAe,CAAC;QAC7B,sBAAsB,CAAC,EAAE,MAAM,CAAC;QAChC,iBAAiB,CAAC,EAAE,wBAAwB,CAAC;KAC9C;IAyBD,IAAI,MAAM,IAAI,GAAG,CAAC,eAAe,GAAG,SAAS,CAE5C;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,EAK9C;IA6BD,KAAK,CACH,IAAI,EAAE,IAAI,EACV,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW,GACpD,OAAO,CAAC,eAAe,CAAC;CAgY5B"}
|
|
@@ -30,13 +30,15 @@ class MultimodalAgent extends EventEmitter {
|
|
|
30
30
|
model,
|
|
31
31
|
chatCtx,
|
|
32
32
|
fncCtx,
|
|
33
|
-
maxTextResponseRetries = 5
|
|
33
|
+
maxTextResponseRetries = 5,
|
|
34
|
+
noiseCancellation
|
|
34
35
|
}) {
|
|
35
36
|
super();
|
|
36
37
|
this.model = model;
|
|
37
38
|
this.#chatCtx = chatCtx;
|
|
38
39
|
this.#fncCtx = fncCtx;
|
|
39
40
|
this.#maxTextResponseRetries = maxTextResponseRetries;
|
|
41
|
+
this.#noiseCancellation = noiseCancellation;
|
|
40
42
|
}
|
|
41
43
|
#participant = null;
|
|
42
44
|
#agentPublication = null;
|
|
@@ -48,6 +50,7 @@ class MultimodalAgent extends EventEmitter {
|
|
|
48
50
|
#session = null;
|
|
49
51
|
#fncCtx = void 0;
|
|
50
52
|
#chatCtx = void 0;
|
|
53
|
+
#noiseCancellation = void 0;
|
|
51
54
|
#_started = false;
|
|
52
55
|
#_pendingFunctionCalls = /* @__PURE__ */ new Set();
|
|
53
56
|
#_speaking = false;
|
|
@@ -330,7 +333,12 @@ class MultimodalAgent extends EventEmitter {
|
|
|
330
333
|
};
|
|
331
334
|
this.subscribedTrack = track;
|
|
332
335
|
this.readMicroTask = new Promise((resolve, reject) => {
|
|
333
|
-
|
|
336
|
+
const audioStreamOptions = {
|
|
337
|
+
sampleRate: this.model.sampleRate,
|
|
338
|
+
numChannels: this.model.numChannels,
|
|
339
|
+
...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
|
|
340
|
+
};
|
|
341
|
+
readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);
|
|
334
342
|
});
|
|
335
343
|
}
|
|
336
344
|
#getLocalTrackSid() {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,EAC3B,GAKG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAhJlD;AAiJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAnPnE,YAAAC;AAqPQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA3S9D,YAAAA,KAAA;AA6SQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AAvT5E,YAAAA,KAAA;AAyTQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AAzU5D,YAAAA,KAAA;AA0UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhbJ;AAibI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AA3dV;AA4dI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAjgB/B;AAkgBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AArJlD;AAsJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAxPnE,YAAAC;AA0PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AAhT9D,YAAAA,KAAA;AAkTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA5T5E,YAAAA,KAAA;AA8TQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA9U5D,YAAAA,KAAA;AA+UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AArbJ;AAsbI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,YAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AAneV;AAoeI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAzgB/B;AA0gBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
@@ -47,12 +47,14 @@ class HumanInput extends import_node_events.EventEmitter {
|
|
|
47
47
|
#speaking = false;
|
|
48
48
|
#speechProbability = 0;
|
|
49
49
|
#logger = (0, import_log.log)();
|
|
50
|
-
|
|
50
|
+
#noiseCancellation;
|
|
51
|
+
constructor(room, vad, stt, participant, noiseCancellation) {
|
|
51
52
|
super();
|
|
52
53
|
this.#room = room;
|
|
53
54
|
this.#vad = vad;
|
|
54
55
|
this.#stt = stt;
|
|
55
56
|
this.#participant = participant;
|
|
57
|
+
this.#noiseCancellation = noiseCancellation;
|
|
56
58
|
this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
|
|
57
59
|
this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
|
|
58
60
|
this.#subscribeToMicrophone();
|
|
@@ -87,7 +89,12 @@ class HumanInput extends import_node_events.EventEmitter {
|
|
|
87
89
|
if (this.#recognizeTask) {
|
|
88
90
|
this.#recognizeTask.cancel();
|
|
89
91
|
}
|
|
90
|
-
const
|
|
92
|
+
const audioStreamOptions = {
|
|
93
|
+
sampleRate: 16e3,
|
|
94
|
+
numChannels: 1,
|
|
95
|
+
...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
|
|
96
|
+
};
|
|
97
|
+
const audioStream = new import_rtc_node.AudioStream(track, audioStreamOptions);
|
|
91
98
|
this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
|
|
92
99
|
let cancelled = false;
|
|
93
100
|
onCancel(() => {
|