@livekit/agents 0.7.2 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/audio.cjs +1 -1
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.js +1 -1
  4. package/dist/audio.js.map +1 -1
  5. package/dist/cli.cjs.map +1 -1
  6. package/dist/constants.cjs +38 -0
  7. package/dist/constants.cjs.map +1 -0
  8. package/dist/constants.d.ts +5 -0
  9. package/dist/constants.d.ts.map +1 -0
  10. package/dist/constants.js +11 -0
  11. package/dist/constants.js.map +1 -0
  12. package/dist/index.cjs +14 -14
  13. package/dist/index.cjs.map +1 -1
  14. package/dist/ipc/inference_proc_lazy_main.cjs +14 -27
  15. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
  16. package/dist/ipc/inference_proc_lazy_main.js +14 -5
  17. package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.cjs +23 -10
  19. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  20. package/dist/ipc/job_proc_lazy_main.js +23 -10
  21. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  22. package/dist/ipc/supervised_proc.cjs +4 -5
  23. package/dist/ipc/supervised_proc.cjs.map +1 -1
  24. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  25. package/dist/ipc/supervised_proc.js +4 -5
  26. package/dist/ipc/supervised_proc.js.map +1 -1
  27. package/dist/multimodal/agent_playout.cjs +1 -0
  28. package/dist/multimodal/agent_playout.cjs.map +1 -1
  29. package/dist/multimodal/agent_playout.js +1 -0
  30. package/dist/multimodal/agent_playout.js.map +1 -1
  31. package/dist/multimodal/multimodal_agent.cjs +36 -11
  32. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  33. package/dist/multimodal/multimodal_agent.d.ts +3 -2
  34. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  35. package/dist/multimodal/multimodal_agent.js +40 -11
  36. package/dist/multimodal/multimodal_agent.js.map +1 -1
  37. package/dist/pipeline/agent_playout.cjs +1 -1
  38. package/dist/pipeline/agent_playout.cjs.map +1 -1
  39. package/dist/pipeline/agent_playout.d.ts.map +1 -1
  40. package/dist/pipeline/agent_playout.js +1 -1
  41. package/dist/pipeline/agent_playout.js.map +1 -1
  42. package/dist/pipeline/human_input.cjs +9 -2
  43. package/dist/pipeline/human_input.cjs.map +1 -1
  44. package/dist/pipeline/human_input.d.ts +2 -2
  45. package/dist/pipeline/human_input.d.ts.map +1 -1
  46. package/dist/pipeline/human_input.js +9 -2
  47. package/dist/pipeline/human_input.js.map +1 -1
  48. package/dist/pipeline/pipeline_agent.cjs +59 -37
  49. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  50. package/dist/pipeline/pipeline_agent.d.ts +3 -1
  51. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  52. package/dist/pipeline/pipeline_agent.js +63 -37
  53. package/dist/pipeline/pipeline_agent.js.map +1 -1
  54. package/dist/worker.cjs +1 -1
  55. package/dist/worker.cjs.map +1 -1
  56. package/dist/worker.d.ts.map +1 -1
  57. package/dist/worker.js +1 -1
  58. package/dist/worker.js.map +1 -1
  59. package/package.json +4 -4
  60. package/src/audio.ts +1 -1
  61. package/src/constants.ts +7 -0
  62. package/src/ipc/inference_proc_lazy_main.ts +21 -6
  63. package/src/ipc/job_proc_lazy_main.ts +27 -9
  64. package/src/ipc/supervised_proc.ts +5 -6
  65. package/src/multimodal/multimodal_agent.ts +43 -13
  66. package/src/pipeline/agent_playout.ts +1 -7
  67. package/src/pipeline/human_input.ts +17 -3
  68. package/src/pipeline/pipeline_agent.ts +79 -38
  69. package/src/worker.ts +1 -1
  70. package/dist/llm/function_context.test.d.ts +0 -2
  71. package/dist/llm/function_context.test.d.ts.map +0 -1
  72. package/dist/tokenize/tokenizer.test.d.ts +0 -2
  73. package/dist/tokenize/tokenizer.test.d.ts.map +0 -1
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAWA,sBAOO;AACP,yBAA6B;AAC7B,mBAAgC;AAChC,UAAqB;AACrB,iBAAoB;AAEpB,2BAA8D;AAC9D,mBAAiC;AACjC,2BAAiD;AAM1C,MAAe,wBAAwB,gCAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,gCAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,EAC3B,GAKG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,cAAU,gBAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAhJlD;AAiJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,0BAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,0BAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,4BAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,0BAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,4BAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oCAAoB;AACxC,cAAQ,SAAS,4BAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAnPnE,YAAAC;AAqPQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,2CAAsB,2CAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA3S9D,YAAAA,KAAA;AA6SQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AAvT5E,YAAAA,KAAA;AAyTQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AAzU5D,YAAAA,KAAA;AA0UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhbJ;AAibI,QACE,YAAY,WAAW,4BAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,4BAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,qBAAiB,+BAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AA3dV;AA4dI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAjgB/B;AAkgBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
1
+ {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport {\n ATTRIBUTE_TRANSCRIPTION_FINAL,\n ATTRIBUTE_TRANSCRIPTION_TRACK_ID,\n TOPIC_TRANSCRIPTION,\n} from '../constants.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', async (text) => {\n await this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', async (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', async (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(\n participantIdentity,\n trackSid,\n transcription,\n true,\n ev.itemId,\n );\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', async (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n async #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): Promise<void> {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n\n const stream = await this.room.localParticipant.streamText({\n topic: TOPIC_TRANSCRIPTION,\n senderIdentity: participantIdentity,\n attributes: {\n [ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,\n [ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),\n },\n });\n await stream.write(text);\n await stream.close();\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAYA,sBAOO;AACP,yBAA6B;AAC7B,mBAAgC;AAChC,uBAIO;AACP,UAAqB;AACrB,iBAAoB;AAEpB,2BAA8D;AAC9D,mBAAiC;AACjC,2BAAiD;AAM1C,MAAe,wBAAwB,gCAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,gCAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,cAAU,gBAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AA1JlD;AA2JM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,0BAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,0BAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,4BAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,0BAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,4BAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oCAAoB;AACxC,cAAQ,SAAS,4BAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA7PnE,YAAAC;AA+PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,2CAAsB,2CAAsB;AACrE,qBAAa,GAAG,eAAe,OAAO,SAAS;AAC7C,gBAAM,KAAK;AAAA,YACT,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,OAAO,OAAY;AArTpE,YAAAA,KAAA;AAuTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,OAAO,OAAY;AAjUlF,YAAAA,KAAA;AAmUQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK;AAAA,YACT;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,GAAG;AAAA,UACL;AAAA,QACF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,OAAO,OAAY;AAzVlE,YAAAA,KAAA;AA0VQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhcJ;AAicI,QACE,YAAY,WAAW,4BAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,4BAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,qBAAiB,+BAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBACJ,qBACA,UACA,MACA,SACA,IACe;AA9enB;AA+eI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAED,UAAM,SAAS,MAAM,KAAK,KAAK,iBAAiB,WAAW;AAAA,MACzD,OAAO;AAAA,MACP,gBAAgB;AAAA,MAChB,YAAY;AAAA,QACV,CAAC,iDAAgC,GAAG;AAAA,QACpC,CAAC,8CAA6B,GAAG,QAAQ,SAAS;AAAA,MACpD;AAAA,IACF,CAAC;AACD,UAAM,OAAO,MAAM,IAAI;AACvB,UAAM,OAAO,MAAM;AAAA,EACrB;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AA/hB/B;AAgiBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
@@ -1,5 +1,5 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
- import type { RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
2
+ import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
3
3
  import { EventEmitter } from 'node:events';
4
4
  import * as llm from '../llm/index.js';
5
5
  /**
@@ -34,11 +34,12 @@ export declare class MultimodalAgent extends EventEmitter {
34
34
  linkedParticipant: RemoteParticipant | null;
35
35
  subscribedTrack: RemoteAudioTrack | null;
36
36
  readMicroTask: Promise<void> | null;
37
- constructor({ model, chatCtx, fncCtx, maxTextResponseRetries, }: {
37
+ constructor({ model, chatCtx, fncCtx, maxTextResponseRetries, noiseCancellation, }: {
38
38
  model: RealtimeModel;
39
39
  chatCtx?: llm.ChatContext;
40
40
  fncCtx?: llm.FunctionContext;
41
41
  maxTextResponseRetries?: number;
42
+ noiseCancellation?: NoiseCancellationOptions;
42
43
  });
43
44
  get fncCtx(): llm.FunctionContext | undefined;
44
45
  set fncCtx(ctx: llm.FunctionContext | undefined);
@@ -1 +1 @@
1
- {"version":3,"file":"multimodal_agent.d.ts","sourceRoot":"","sources":["../../src/multimodal/multimodal_agent.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAEV,gBAAgB,EAChB,iBAAiB,EAGjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,GAAG,MAAM,iBAAiB,CAAC;AAOvC;;;GAGG;AACH,8BAAsB,eAAgB,SAAQ,YAAY;IAExD,QAAQ,CAAC,YAAY,EAAE,GAAG,CAAC;IAE3B,QAAQ,CAAC,gBAAgB,EAAE,GAAG,CAAC;IAC/B,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,CAAC;IACjD,QAAQ,CAAC,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CACvD;AAED;;;GAGG;AACH,8BAAsB,aAAa;IAEjC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,GAAG,eAAe;IAC/C,QAAQ,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,YAAY;AACZ,qBAAa,eAAgB,SAAQ,YAAY;;IAC/C,KAAK,EAAE,aAAa,CAAC;IACrB,IAAI,EAAE,IAAI,GAAG,IAAI,CAAQ;IACzB,iBAAiB,EAAE,iBAAiB,GAAG,IAAI,CAAQ;IACnD,eAAe,EAAE,gBAAgB,GAAG,IAAI,CAAQ;IAChD,aAAa,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAQ;gBAK/B,EACV,KAAK,EACL,OAAO,EACP,MAAM,EACN,sBAA0B,GAC3B,EAAE;QACD,KAAK,EAAE,aAAa,CAAC;QACrB,OAAO,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC;QAC1B,MAAM,CAAC,EAAE,GAAG,CAAC,eAAe,CAAC;QAC7B,sBAAsB,CAAC,EAAE,MAAM,CAAC;KACjC;IAuBD,IAAI,MAAM,IAAI,GAAG,CAAC,eAAe,GAAG,SAAS,CAE5C;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,EAK9C;IA6BD,KAAK,CACH,IAAI,EAAE,IAAI,EACV,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW,GACpD,OAAO,CAAC,eAAe,CAAC;CA6X5B"}
1
+ {"version":3,"file":"multimodal_agent.d.ts","sourceRoot":"","sources":["../../src/multimodal/multimodal_agent.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAEV,wBAAwB,EACxB,gBAAgB,EAChB,iBAAiB,EAGjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAO3C,OAAO,KAAK,GAAG,MAAM,iBAAiB,CAAC;AAOvC;;;GAGG;AACH,8BAAsB,eAAgB,SAAQ,YAAY;IAExD,QAAQ,CAAC,YAAY,EAAE,GAAG,CAAC;IAE3B,QAAQ,CAAC,gBAAgB,EAAE,GAAG,CAAC;IAC/B,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,CAAC;IACjD,QAAQ,CAAC,uBAAuB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;CACvD;AAED;;;GAGG;AACH,8BAAsB,aAAa;IAEjC,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,GAAG,eAAe;IAC/C,QAAQ,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAC/B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,YAAY;AACZ,qBAAa,eAAgB,SAAQ,YAAY;;IAC/C,KAAK,EAAE,aAAa,CAAC;IACrB,IAAI,EAAE,IAAI,GAAG,IAAI,CAAQ;IACzB,iBAAiB,EAAE,iBAAiB,GAAG,IAAI,CAAQ;IACnD,eAAe,EAAE,gBAAgB,GAAG,IAAI,CAAQ;IAChD,aAAa,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAQ;gBAK/B,EACV,KAAK,EACL,OAAO,EACP,MAAM,EACN,sBAA0B,EAC1B,iBAAiB,GAClB,EAAE;QACD,KAAK,EAAE,aAAa,CAAC;QACrB,OAAO,CAAC,EAAE,GAAG,CAAC,WAAW,CAAC;QAC1B,MAAM,CAAC,EAAE,GAAG,CAAC,eAAe,CAAC;QAC7B,sBAAsB,CAAC,EAAE,MAAM,CAAC;QAChC,iBAAiB,CAAC,EAAE,wBAAwB,CAAC;KAC9C;IAyBD,IAAI,MAAM,IAAI,GAAG,CAAC,eAAe,GAAG,SAAS,CAE5C;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,eAAe,GAAG,SAAS,EAK9C;IA6BD,KAAK,CACH,IAAI,EAAE,IAAI,EACV,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW,GACpD,OAAO,CAAC,eAAe,CAAC;CAiZ5B"}
@@ -8,6 +8,11 @@ import {
8
8
  } from "@livekit/rtc-node";
9
9
  import { EventEmitter } from "node:events";
10
10
  import { AudioByteStream } from "../audio.js";
11
+ import {
12
+ ATTRIBUTE_TRANSCRIPTION_FINAL,
13
+ ATTRIBUTE_TRANSCRIPTION_TRACK_ID,
14
+ TOPIC_TRANSCRIPTION
15
+ } from "../constants.js";
11
16
  import * as llm from "../llm/index.js";
12
17
  import { log } from "../log.js";
13
18
  import { TextAudioSynchronizer, defaultTextSyncOptions } from "../transcription.js";
@@ -30,13 +35,15 @@ class MultimodalAgent extends EventEmitter {
30
35
  model,
31
36
  chatCtx,
32
37
  fncCtx,
33
- maxTextResponseRetries = 5
38
+ maxTextResponseRetries = 5,
39
+ noiseCancellation
34
40
  }) {
35
41
  super();
36
42
  this.model = model;
37
43
  this.#chatCtx = chatCtx;
38
44
  this.#fncCtx = fncCtx;
39
45
  this.#maxTextResponseRetries = maxTextResponseRetries;
46
+ this.#noiseCancellation = noiseCancellation;
40
47
  }
41
48
  #participant = null;
42
49
  #agentPublication = null;
@@ -48,6 +55,7 @@ class MultimodalAgent extends EventEmitter {
48
55
  #session = null;
49
56
  #fncCtx = void 0;
50
57
  #chatCtx = void 0;
58
+ #noiseCancellation = void 0;
51
59
  #_started = false;
52
60
  #_pendingFunctionCalls = /* @__PURE__ */ new Set();
53
61
  #_speaking = false;
@@ -167,8 +175,8 @@ class MultimodalAgent extends EventEmitter {
167
175
  var _a2;
168
176
  if (message.contentType === "text") return;
169
177
  const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);
170
- synchronizer.on("textUpdated", (text) => {
171
- this.#publishTranscription(
178
+ synchronizer.on("textUpdated", async (text) => {
179
+ await this.#publishTranscription(
172
180
  this.room.localParticipant.identity,
173
181
  this.#getLocalTrackSid(),
174
182
  text.text,
@@ -205,23 +213,29 @@ class MultimodalAgent extends EventEmitter {
205
213
  this.#textResponseRetries = 0;
206
214
  }
207
215
  });
208
- this.#session.on("input_speech_committed", (ev) => {
216
+ this.#session.on("input_speech_committed", async (ev) => {
209
217
  var _a2, _b;
210
218
  const participantIdentity = (_a2 = this.linkedParticipant) == null ? void 0 : _a2.identity;
211
219
  const trackSid = (_b = this.subscribedTrack) == null ? void 0 : _b.sid;
212
220
  if (participantIdentity && trackSid) {
213
- this.#publishTranscription(participantIdentity, trackSid, "\u2026", false, ev.itemId);
221
+ await this.#publishTranscription(participantIdentity, trackSid, "\u2026", false, ev.itemId);
214
222
  } else {
215
223
  this.#logger.error("Participant or track not set");
216
224
  }
217
225
  });
218
- this.#session.on("input_speech_transcription_completed", (ev) => {
226
+ this.#session.on("input_speech_transcription_completed", async (ev) => {
219
227
  var _a2, _b;
220
228
  const transcription = ev.transcript;
221
229
  const participantIdentity = (_a2 = this.linkedParticipant) == null ? void 0 : _a2.identity;
222
230
  const trackSid = (_b = this.subscribedTrack) == null ? void 0 : _b.sid;
223
231
  if (participantIdentity && trackSid) {
224
- this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);
232
+ await this.#publishTranscription(
233
+ participantIdentity,
234
+ trackSid,
235
+ transcription,
236
+ true,
237
+ ev.itemId
238
+ );
225
239
  } else {
226
240
  this.#logger.error("Participant or track not set");
227
241
  }
@@ -232,7 +246,7 @@ class MultimodalAgent extends EventEmitter {
232
246
  this.emit("user_speech_committed", userMsg);
233
247
  this.#logger.child({ transcription }).debug("committed user speech");
234
248
  });
235
- this.#session.on("input_speech_started", (ev) => {
249
+ this.#session.on("input_speech_started", async (ev) => {
236
250
  var _a2, _b;
237
251
  this.emit("user_started_speaking");
238
252
  if (this.#playingHandle && !this.#playingHandle.done) {
@@ -247,7 +261,7 @@ class MultimodalAgent extends EventEmitter {
247
261
  const participantIdentity = (_a2 = this.linkedParticipant) == null ? void 0 : _a2.identity;
248
262
  const trackSid = (_b = this.subscribedTrack) == null ? void 0 : _b.sid;
249
263
  if (participantIdentity && trackSid) {
250
- this.#publishTranscription(participantIdentity, trackSid, "\u2026", false, ev.itemId);
264
+ await this.#publishTranscription(participantIdentity, trackSid, "\u2026", false, ev.itemId);
251
265
  }
252
266
  });
253
267
  this.#session.on("input_speech_stopped", (ev) => {
@@ -330,7 +344,12 @@ class MultimodalAgent extends EventEmitter {
330
344
  };
331
345
  this.subscribedTrack = track;
332
346
  this.readMicroTask = new Promise((resolve, reject) => {
333
- readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels)).then(resolve).catch(reject);
347
+ const audioStreamOptions = {
348
+ sampleRate: this.model.sampleRate,
349
+ numChannels: this.model.numChannels,
350
+ ...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
351
+ };
352
+ readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);
334
353
  });
335
354
  }
336
355
  #getLocalTrackSid() {
@@ -339,7 +358,7 @@ class MultimodalAgent extends EventEmitter {
339
358
  }
340
359
  return this.#localTrackSid;
341
360
  }
342
- #publishTranscription(participantIdentity, trackSid, text, isFinal, id) {
361
+ async #publishTranscription(participantIdentity, trackSid, text, isFinal, id) {
343
362
  var _a;
344
363
  this.#logger.debug(
345
364
  `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`
@@ -362,6 +381,16 @@ class MultimodalAgent extends EventEmitter {
362
381
  }
363
382
  ]
364
383
  });
384
+ const stream = await this.room.localParticipant.streamText({
385
+ topic: TOPIC_TRANSCRIPTION,
386
+ senderIdentity: participantIdentity,
387
+ attributes: {
388
+ [ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
389
+ [ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString()
390
+ }
391
+ });
392
+ await stream.write(text);
393
+ await stream.close();
365
394
  }
366
395
  #updateState() {
367
396
  let newState = "initializing";
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,EAC3B,GAKG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAhJlD;AAiJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAnPnE,YAAAC;AAqPQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA3S9D,YAAAA,KAAA;AA6SQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AAvT5E,YAAAA,KAAA;AAyTQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AAzU5D,YAAAA,KAAA;AA0UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhbJ;AAibI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AA3dV;AA4dI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAjgB/B;AAkgBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
1
+ {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport {\n ATTRIBUTE_TRANSCRIPTION_FINAL,\n ATTRIBUTE_TRANSCRIPTION_TRACK_ID,\n TOPIC_TRANSCRIPTION,\n} from '../constants.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', async (text) => {\n await this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', async (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', async (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(\n participantIdentity,\n trackSid,\n transcription,\n true,\n ev.itemId,\n );\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', async (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n async #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): Promise<void> {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n\n const stream = await this.room.localParticipant.streamText({\n topic: TOPIC_TRANSCRIPTION,\n senderIdentity: participantIdentity,\n attributes: {\n [ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,\n [ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),\n },\n });\n await stream.write(text);\n await stream.close();\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AA1JlD;AA2JM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA7PnE,YAAAC;AA+PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,OAAO,SAAS;AAC7C,gBAAM,KAAK;AAAA,YACT,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,OAAO,OAAY;AArTpE,YAAAA,KAAA;AAuTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,OAAO,OAAY;AAjUlF,YAAAA,KAAA;AAmUQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK;AAAA,YACT;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,GAAG;AAAA,UACL;AAAA,QACF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,OAAO,OAAY;AAzVlE,YAAAA,KAAA;AA0VQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhcJ;AAicI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,YAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBACJ,qBACA,UACA,MACA,SACA,IACe;AA9enB;AA+eI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAED,UAAM,SAAS,MAAM,KAAK,KAAK,iBAAiB,WAAW;AAAA,MACzD,OAAO;AAAA,MACP,gBAAgB;AAAA,MAChB,YAAY;AAAA,QACV,CAAC,gCAAgC,GAAG;AAAA,QACpC,CAAC,6BAA6B,GAAG,QAAQ,SAAS;AAAA,MACpD;AAAA,IACF,CAAC;AACD,UAAM,OAAO,MAAM,IAAI;AACvB,UAAM,OAAO,MAAM;AAAA,EACrB;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AA/hB/B;AAgiBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
@@ -148,8 +148,8 @@ class AgentPlayout extends import_node_events.default {
148
148
  handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
149
149
  handle.synchronizer.pushAudio(frame);
150
150
  await this.#audioSource.captureFrame(frame);
151
- await this.#audioSource.waitForPlayout();
152
151
  }
152
+ await this.#audioSource.waitForPlayout();
153
153
  handle.synchronizer.close(false);
154
154
  resolve2();
155
155
  });
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AAEpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AAEpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
@@ -1 +1 @@
1
- {"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IA+FV,KAAK;CAIZ"}
1
+ {"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IAyFV,KAAK;CAIZ"}
@@ -113,8 +113,8 @@ class AgentPlayout extends EventEmitter {
113
113
  handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
114
114
  handle.synchronizer.pushAudio(frame);
115
115
  await this.#audioSource.captureFrame(frame);
116
- await this.#audioSource.waitForPlayout();
117
116
  }
117
+ await this.#audioSource.waitForPlayout();
118
118
  handle.synchronizer.close(false);
119
119
  resolve2();
120
120
  });
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
@@ -47,12 +47,14 @@ class HumanInput extends import_node_events.EventEmitter {
47
47
  #speaking = false;
48
48
  #speechProbability = 0;
49
49
  #logger = (0, import_log.log)();
50
- constructor(room, vad, stt, participant) {
50
+ #noiseCancellation;
51
+ constructor(room, vad, stt, participant, noiseCancellation) {
51
52
  super();
52
53
  this.#room = room;
53
54
  this.#vad = vad;
54
55
  this.#stt = stt;
55
56
  this.#participant = participant;
57
+ this.#noiseCancellation = noiseCancellation;
56
58
  this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
57
59
  this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
58
60
  this.#subscribeToMicrophone();
@@ -87,7 +89,12 @@ class HumanInput extends import_node_events.EventEmitter {
87
89
  if (this.#recognizeTask) {
88
90
  this.#recognizeTask.cancel();
89
91
  }
90
- const audioStream = new import_rtc_node.AudioStream(track, 16e3);
92
+ const audioStreamOptions = {
93
+ sampleRate: 16e3,
94
+ numChannels: 1,
95
+ ...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
96
+ };
97
+ const audioStream = new import_rtc_node.AudioStream(track, audioStreamOptions);
91
98
  this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
92
99
  let cancelled = false;
93
100
  onCancel(() => {