@livekit/agents 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +2 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +2 -0
  6. package/dist/index.js.map +1 -1
  7. package/dist/llm/index.cjs +2 -0
  8. package/dist/llm/index.cjs.map +1 -1
  9. package/dist/llm/index.d.ts +1 -1
  10. package/dist/llm/index.d.ts.map +1 -1
  11. package/dist/llm/index.js +2 -0
  12. package/dist/llm/index.js.map +1 -1
  13. package/dist/llm/llm.cjs +47 -3
  14. package/dist/llm/llm.cjs.map +1 -1
  15. package/dist/llm/llm.d.ts +15 -2
  16. package/dist/llm/llm.d.ts.map +1 -1
  17. package/dist/llm/llm.js +46 -3
  18. package/dist/llm/llm.js.map +1 -1
  19. package/dist/metrics/base.cjs +44 -0
  20. package/dist/metrics/base.cjs.map +1 -0
  21. package/dist/metrics/base.d.ts +96 -0
  22. package/dist/metrics/base.d.ts.map +1 -0
  23. package/dist/metrics/base.js +20 -0
  24. package/dist/metrics/base.js.map +1 -0
  25. package/dist/metrics/index.cjs +35 -0
  26. package/dist/metrics/index.cjs.map +1 -0
  27. package/dist/metrics/index.d.ts +5 -0
  28. package/dist/metrics/index.d.ts.map +1 -0
  29. package/dist/metrics/index.js +9 -0
  30. package/dist/metrics/index.js.map +1 -0
  31. package/dist/metrics/usage_collector.cjs +53 -0
  32. package/dist/metrics/usage_collector.cjs.map +1 -0
  33. package/dist/metrics/usage_collector.d.ts +14 -0
  34. package/dist/metrics/usage_collector.d.ts.map +1 -0
  35. package/dist/metrics/usage_collector.js +29 -0
  36. package/dist/metrics/usage_collector.js.map +1 -0
  37. package/dist/metrics/utils.cjs +104 -0
  38. package/dist/metrics/utils.cjs.map +1 -0
  39. package/dist/metrics/utils.d.ts +10 -0
  40. package/dist/metrics/utils.d.ts.map +1 -0
  41. package/dist/metrics/utils.js +73 -0
  42. package/dist/metrics/utils.js.map +1 -0
  43. package/dist/multimodal/multimodal_agent.cjs +7 -13
  44. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  45. package/dist/multimodal/multimodal_agent.d.ts +1 -4
  46. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  47. package/dist/multimodal/multimodal_agent.js +7 -13
  48. package/dist/multimodal/multimodal_agent.js.map +1 -1
  49. package/dist/pipeline/index.cjs +2 -0
  50. package/dist/pipeline/index.cjs.map +1 -1
  51. package/dist/pipeline/index.d.ts +1 -1
  52. package/dist/pipeline/index.d.ts.map +1 -1
  53. package/dist/pipeline/index.js +3 -1
  54. package/dist/pipeline/index.js.map +1 -1
  55. package/dist/pipeline/pipeline_agent.cjs +166 -66
  56. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  57. package/dist/pipeline/pipeline_agent.d.ts +10 -4
  58. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  59. package/dist/pipeline/pipeline_agent.js +169 -69
  60. package/dist/pipeline/pipeline_agent.js.map +1 -1
  61. package/dist/pipeline/speech_handle.cjs +49 -1
  62. package/dist/pipeline/speech_handle.cjs.map +1 -1
  63. package/dist/pipeline/speech_handle.d.ts +12 -2
  64. package/dist/pipeline/speech_handle.d.ts.map +1 -1
  65. package/dist/pipeline/speech_handle.js +50 -2
  66. package/dist/pipeline/speech_handle.js.map +1 -1
  67. package/dist/stt/index.cjs.map +1 -1
  68. package/dist/stt/index.d.ts +1 -1
  69. package/dist/stt/index.d.ts.map +1 -1
  70. package/dist/stt/index.js.map +1 -1
  71. package/dist/stt/stream_adapter.cjs +15 -5
  72. package/dist/stt/stream_adapter.cjs.map +1 -1
  73. package/dist/stt/stream_adapter.d.ts +4 -1
  74. package/dist/stt/stream_adapter.d.ts.map +1 -1
  75. package/dist/stt/stream_adapter.js +15 -5
  76. package/dist/stt/stream_adapter.js.map +1 -1
  77. package/dist/stt/stt.cjs +46 -2
  78. package/dist/stt/stt.cjs.map +1 -1
  79. package/dist/stt/stt.d.ts +25 -3
  80. package/dist/stt/stt.d.ts.map +1 -1
  81. package/dist/stt/stt.js +46 -2
  82. package/dist/stt/stt.js.map +1 -1
  83. package/dist/tts/index.cjs +4 -2
  84. package/dist/tts/index.cjs.map +1 -1
  85. package/dist/tts/index.d.ts +1 -1
  86. package/dist/tts/index.d.ts.map +1 -1
  87. package/dist/tts/index.js +3 -1
  88. package/dist/tts/index.js.map +1 -1
  89. package/dist/tts/stream_adapter.cjs +14 -3
  90. package/dist/tts/stream_adapter.cjs.map +1 -1
  91. package/dist/tts/stream_adapter.d.ts +3 -0
  92. package/dist/tts/stream_adapter.d.ts.map +1 -1
  93. package/dist/tts/stream_adapter.js +15 -4
  94. package/dist/tts/stream_adapter.js.map +1 -1
  95. package/dist/tts/tts.cjs +109 -6
  96. package/dist/tts/tts.cjs.map +1 -1
  97. package/dist/tts/tts.d.ts +24 -1
  98. package/dist/tts/tts.d.ts.map +1 -1
  99. package/dist/tts/tts.js +107 -5
  100. package/dist/tts/tts.js.map +1 -1
  101. package/dist/vad.cjs +43 -2
  102. package/dist/vad.cjs.map +1 -1
  103. package/dist/vad.d.ts +21 -4
  104. package/dist/vad.d.ts.map +1 -1
  105. package/dist/vad.js +43 -2
  106. package/dist/vad.js.map +1 -1
  107. package/package.json +1 -1
  108. package/src/index.ts +2 -1
  109. package/src/llm/index.ts +2 -0
  110. package/src/llm/llm.ts +55 -3
  111. package/src/metrics/base.ts +127 -0
  112. package/src/metrics/index.ts +20 -0
  113. package/src/metrics/usage_collector.ts +40 -0
  114. package/src/metrics/utils.ts +100 -0
  115. package/src/multimodal/multimodal_agent.ts +12 -17
  116. package/src/pipeline/index.ts +1 -1
  117. package/src/pipeline/pipeline_agent.ts +206 -87
  118. package/src/pipeline/speech_handle.ts +67 -2
  119. package/src/stt/index.ts +2 -0
  120. package/src/stt/stream_adapter.ts +17 -5
  121. package/src/stt/stt.ts +67 -3
  122. package/src/tts/index.ts +2 -0
  123. package/src/tts/stream_adapter.ts +17 -4
  124. package/src/tts/tts.ts +127 -4
  125. package/src/vad.ts +61 -4
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport { BasicTranscriptionForwarder } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: { promise: Promise<void>; cancel: () => void } | null = null;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.transcriptionFwd.text;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n const trFwd = new BasicTranscriptionForwarder(\n this.room!,\n this.room!.localParticipant!.identity,\n this.#getLocalTrackSid()!,\n message.responseId,\n );\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n trFwd,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n if (this.readMicroTask) {\n this.readMicroTask.cancel();\n }\n\n let cancel: () => void;\n this.readMicroTask = {\n promise: new Promise<void>((resolve, reject) => {\n cancel = () => {\n reject(new Error('Task cancelled'));\n };\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n }),\n cancel: () => cancel(),\n };\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant?.identity);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes[AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AACpB,SAAS,mCAAmC;AAC5C,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAM3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAuE;AAAA,EAEvE,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAIG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAxIlD;AAyIM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAQ;AAAA,MAC5C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,iBAAiB;AAChD,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAQ;AAAA,QAC5C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAQ;AAC1C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA3OnE,YAAAC;AA6OQ,cAAM,QAAQ,IAAI;AAAA,UAChB,KAAK;AAAA,UACL,KAAK,KAAM,iBAAkB;AAAA,UAC7B,KAAK,kBAAkB;AAAA,UACvB,QAAQ;AAAA,QACV;AAEA,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA/P9D,YAAAA,KAAA;AAiQQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA3Q5E,YAAAA,KAAA;AA6QQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA7R5D,YAAAA,KAAA;AA8RQ,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AA/XJ;AAgYI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,QAAI,KAAK,eAAe;AACtB,WAAK,cAAc,OAAO;AAAA,IAC5B;AAEA,QAAI;AACJ,SAAK,gBAAgB;AAAA,MACnB,SAAS,IAAI,QAAc,CAAC,SAAS,WAAW;AAC9C,iBAAS,MAAM;AACb,iBAAO,IAAI,MAAM,gBAAgB,CAAC;AAAA,QACpC;AACA,4BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,MACjB,CAAC;AAAA,MACD,QAAQ,MAAM,OAAO;AAAA,IACvB;AAAA,EACF;AAAA,EAEA,oBAAmC;AAxarC;AAyaI,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,OAAM,UAAK,KAAK,qBAAV,mBAA4B,QAAQ;AAAA,IACxF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AArbV;AAsbI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AA3d/B;AA4dI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAW,qBAAqB;AAChF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
1
+ {"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { BasicTranscriptionForwarder } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.transcriptionFwd.text;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n const trFwd = new BasicTranscriptionForwarder(\n this.room!,\n this.room!.localParticipant!.identity,\n this.#getLocalTrackSid()!,\n message.responseId,\n );\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n trFwd,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant?.identity);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes[AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,mCAAmC;AAC5C,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAM3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAIG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAzIlD;AA0IM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAQ;AAAA,MAC5C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,iBAAiB;AAChD,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAQ;AAAA,QAC5C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAQ;AAC1C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA5OnE,YAAAC;AA8OQ,cAAM,QAAQ,IAAI;AAAA,UAChB,KAAK;AAAA,UACL,KAAK,KAAM,iBAAkB;AAAA,UAC7B,KAAK,kBAAkB;AAAA,UACvB,QAAQ;AAAA,QACV;AAEA,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AAhQ9D,YAAAA,KAAA;AAkQQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA5Q5E,YAAAA,KAAA;AA8QQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA9R5D,YAAAA,KAAA;AA+RQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AArYJ;AAsYI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AAnarC;AAoaI,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,OAAM,UAAK,KAAK,qBAAV,mBAA4B,QAAQ;AAAA,IACxF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AAhbV;AAibI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAtd/B;AAudI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAW,qBAAqB;AAChF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
@@ -18,6 +18,7 @@ var __copyProps = (to, from, except, desc) => {
18
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
19
  var pipeline_exports = {};
20
20
  __export(pipeline_exports, {
21
+ AgentCallContext: () => import_pipeline_agent.AgentCallContext,
21
22
  VPAEvent: () => import_pipeline_agent.VPAEvent,
22
23
  VoicePipelineAgent: () => import_pipeline_agent.VoicePipelineAgent
23
24
  });
@@ -25,6 +26,7 @@ module.exports = __toCommonJS(pipeline_exports);
25
26
  var import_pipeline_agent = require("./pipeline_agent.cjs");
26
27
  // Annotate the CommonJS export names for ESM import in node:
27
28
  0 && (module.exports = {
29
+ AgentCallContext,
28
30
  VPAEvent,
29
31
  VoicePipelineAgent
30
32
  });
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentCallContext,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n} from './pipeline_agent.js';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,4BAUO;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n AgentCallContext,\n} from './pipeline_agent.js';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,4BAUO;","names":[]}
@@ -1,2 +1,2 @@
1
- export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type AgentCallContext, type AgentTranscriptionOptions, type VPAOptions, VPAEvent, VoicePipelineAgent, } from './pipeline_agent.js';
1
+ export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type AgentTranscriptionOptions, type VPAOptions, VPAEvent, VoicePipelineAgent, AgentCallContext, } from './pipeline_agent.js';
2
2
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,yBAAyB,EAC9B,KAAK,UAAU,EACf,QAAQ,EACR,kBAAkB,GACnB,MAAM,qBAAqB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,yBAAyB,EAC9B,KAAK,UAAU,EACf,QAAQ,EACR,kBAAkB,EAClB,gBAAgB,GACjB,MAAM,qBAAqB,CAAC"}
@@ -1,8 +1,10 @@
1
1
  import {
2
2
  VPAEvent,
3
- VoicePipelineAgent
3
+ VoicePipelineAgent,
4
+ AgentCallContext
4
5
  } from "./pipeline_agent.js";
5
6
  export {
7
+ AgentCallContext,
6
8
  VPAEvent,
7
9
  VoicePipelineAgent
8
10
  };
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentCallContext,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n} from './pipeline_agent.js';\n"],"mappings":"AAIA;AAAA,EAQE;AAAA,EACA;AAAA,OACK;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n AgentCallContext,\n} from './pipeline_agent.js';\n"],"mappings":"AAIA;AAAA,EAOE;AAAA,EACA;AAAA,EACA;AAAA,OACK;","names":[]}
@@ -43,11 +43,13 @@ var import_stt = require("../stt/index.cjs");
43
43
  var import_basic = require("../tokenize/basic/index.cjs");
44
44
  var import_tts = require("../tts/index.cjs");
45
45
  var import_utils = require("../utils.cjs");
46
+ var import_vad = require("../vad.cjs");
46
47
  var import_agent_output = require("./agent_output.cjs");
47
48
  var import_agent_playout = require("./agent_playout.cjs");
48
49
  var import_human_input = require("./human_input.cjs");
49
50
  var import_speech_handle = require("./speech_handle.cjs");
50
51
  const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
52
+ let speechData;
51
53
  var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
52
54
  VPAEvent2[VPAEvent2["USER_STARTED_SPEAKING"] = 0] = "USER_STARTED_SPEAKING";
53
55
  VPAEvent2[VPAEvent2["USER_STOPPED_SPEAKING"] = 1] = "USER_STOPPED_SPEAKING";
@@ -58,12 +60,14 @@ var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
58
60
  VPAEvent2[VPAEvent2["AGENT_SPEECH_INTERRUPTED"] = 6] = "AGENT_SPEECH_INTERRUPTED";
59
61
  VPAEvent2[VPAEvent2["FUNCTION_CALLS_COLLECTED"] = 7] = "FUNCTION_CALLS_COLLECTED";
60
62
  VPAEvent2[VPAEvent2["FUNCTION_CALLS_FINISHED"] = 8] = "FUNCTION_CALLS_FINISHED";
63
+ VPAEvent2[VPAEvent2["METRICS_COLLECTED"] = 9] = "METRICS_COLLECTED";
61
64
  return VPAEvent2;
62
65
  })(VPAEvent || {});
63
66
  class AgentCallContext {
64
67
  #agent;
65
68
  #llmStream;
66
69
  #metadata = /* @__PURE__ */ new Map();
70
+ #extraChatMessages = [];
67
71
  static #current;
68
72
  constructor(agent, llmStream) {
69
73
  this.#agent = agent;
@@ -85,6 +89,12 @@ class AgentCallContext {
85
89
  get llmStream() {
86
90
  return this.#llmStream;
87
91
  }
92
+ get extraChatMessages() {
93
+ return this.#extraChatMessages;
94
+ }
95
+ addExtraChatMessage(message) {
96
+ this.#extraChatMessages.push(message);
97
+ }
88
98
  }
89
99
  const defaultBeforeLLMCallback = (agent, chatCtx) => {
90
100
  return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });
@@ -106,7 +116,7 @@ const defaultVPAOptions = {
106
116
  interruptSpeechDuration: 50,
107
117
  interruptMinWords: 0,
108
118
  minEndpointingDelay: 500,
109
- maxRecursiveFncCalls: 1,
119
+ maxNestedFncCalls: 1,
110
120
  preemptiveSynthesis: false,
111
121
  beforeLLMCallback: defaultBeforeLLMCallback,
112
122
  beforeTTSCallback: defaultBeforeTTSCallback,
@@ -131,7 +141,6 @@ class VoicePipelineAgent extends import_node_events.default {
131
141
  #transcribedInterimText = "";
132
142
  #speechQueueOpen = new import_utils.Future();
133
143
  #speechQueue = new import_utils.AsyncIterableQueue();
134
- #lastEndOfSpeechTime;
135
144
  #updateStateTask;
136
145
  #started = false;
137
146
  #room;
@@ -139,6 +148,8 @@ class VoicePipelineAgent extends import_node_events.default {
139
148
  #deferredValidation;
140
149
  #logger = (0, import_log.log)();
141
150
  #agentPublication;
151
+ #lastFinalTranscriptTime;
152
+ #lastSpeechTime;
142
153
  constructor(vad, stt, llm, tts, opts = defaultVPAOptions) {
143
154
  super();
144
155
  this.#opts = { ...defaultVPAOptions, ...opts };
@@ -183,6 +194,20 @@ class VoicePipelineAgent extends import_node_events.default {
183
194
  if (this.#started) {
184
195
  throw new Error("voice assistant already started");
185
196
  }
197
+ this.#stt.on(import_stt.SpeechEventType.METRICS_COLLECTED, (metrics) => {
198
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
199
+ });
200
+ this.#tts.on(import_tts.TTSEvent.METRICS_COLLECTED, (metrics) => {
201
+ if (!speechData) return;
202
+ this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
203
+ });
204
+ this.#llm.on(import_llm.LLMEvent.METRICS_COLLECTED, (metrics) => {
205
+ if (!speechData) return;
206
+ this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
207
+ });
208
+ this.#vad.on(import_vad.VADEventType.METRICS_COLLECTED, (metrics) => {
209
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
210
+ });
186
211
  room.on(import_rtc_node.RoomEvent.ParticipantConnected, (participant2) => {
187
212
  if (this.#participant) {
188
213
  return;
@@ -203,10 +228,43 @@ class VoicePipelineAgent extends import_node_events.default {
203
228
  /** Play a speech source through the voice assistant. */
204
229
  async say(source, allowInterruptions = true, addToChatCtx = true) {
205
230
  await this.#trackPublishedFut.await;
231
+ let callContext;
232
+ let fncSource;
233
+ if (addToChatCtx) {
234
+ callContext = AgentCallContext.getCurrent();
235
+ if (source instanceof import_llm.LLMStream) {
236
+ this.#logger.warn("LLMStream will be ignored for function call chat context");
237
+ } else if (typeof source === "string") {
238
+ fncSource = source;
239
+ } else {
240
+ fncSource = source;
241
+ source = new import_utils.AsyncIterableQueue();
242
+ }
243
+ }
206
244
  const newHandle = import_speech_handle.SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);
207
245
  const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);
208
246
  newHandle.initialize(source, synthesisHandle);
209
- this.#addSpeechForPlayout(newHandle);
247
+ if (this.#playingSpeech && !this.#playingSpeech.nestedSpeechFinished) {
248
+ this.#playingSpeech.addNestedSpeech(newHandle);
249
+ } else {
250
+ this.#addSpeechForPlayout(newHandle);
251
+ }
252
+ if (callContext && fncSource) {
253
+ let text;
254
+ if (typeof source === "string") {
255
+ text = fncSource;
256
+ } else {
257
+ text = "";
258
+ for await (const chunk of fncSource) {
259
+ source.put(chunk);
260
+ text += chunk;
261
+ }
262
+ source.close();
263
+ }
264
+ callContext.addExtraChatMessage(import_llm2.ChatMessage.create({ text, role: import_llm2.ChatRole.ASSISTANT }));
265
+ this.#logger.child({ text }).debug("added speech to function call chat context");
266
+ }
267
+ return newHandle;
210
268
  }
211
269
  #updateState(state, delay = 0) {
212
270
  const runTask = (delay2) => {
@@ -260,11 +318,13 @@ class VoicePipelineAgent extends import_node_events.default {
260
318
  if (event.speechDuration >= this.#opts.interruptSpeechDuration) {
261
319
  this.#interruptIfPossible();
262
320
  }
321
+ if (event.rawAccumulatedSpeech > 0) {
322
+ this.#lastSpeechTime = Date.now() - event.rawAccumulatedSilence;
323
+ }
263
324
  });
264
325
  this.#humanInput.on(import_human_input.HumanInputEvent.END_OF_SPEECH, (event) => {
265
326
  this.emit(0 /* USER_STARTED_SPEAKING */);
266
327
  this.#deferredValidation.onHumanEndOfSpeech(event);
267
- this.#lastEndOfSpeechTime = Date.now();
268
328
  });
269
329
  this.#humanInput.on(import_human_input.HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
270
330
  this.#transcribedInterimText = event.alternatives[0].text;
@@ -272,7 +332,7 @@ class VoicePipelineAgent extends import_node_events.default {
272
332
  this.#humanInput.on(import_human_input.HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
273
333
  const newTranscript = event.alternatives[0].text;
274
334
  if (!newTranscript) return;
275
- this.#logger.child({ userTranscript: newTranscript }).debug("received user transcript");
335
+ this.#lastFinalTranscriptTime = Date.now();
276
336
  this.#transcribedText += (this.#transcribedText ? " " : "") + newTranscript;
277
337
  if (this.#opts.preemptiveSynthesis && (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)) {
278
338
  this.#synthesizeAgentReply();
@@ -356,23 +416,26 @@ class VoicePipelineAgent extends import_node_events.default {
356
416
  role: import_llm2.ChatRole.USER
357
417
  })
358
418
  );
359
- if (cancelled) resolve();
360
- let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);
361
- if (llmStream === false) {
362
- handle == null ? void 0 : handle.cancel();
363
- return;
364
- }
365
- if (cancelled) resolve();
366
- if (!(llmStream instanceof import_llm.LLMStream)) {
367
- llmStream = await defaultBeforeLLMCallback(this, copiedCtx);
368
- }
369
- if (handle.interrupted) {
370
- return;
419
+ speechData = { sequenceId: handle.id };
420
+ try {
421
+ if (cancelled) resolve();
422
+ let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);
423
+ if (llmStream === false) {
424
+ handle == null ? void 0 : handle.cancel();
425
+ return;
426
+ }
427
+ if (cancelled) resolve();
428
+ if (!(llmStream instanceof import_llm.LLMStream)) {
429
+ llmStream = await defaultBeforeLLMCallback(this, copiedCtx);
430
+ }
431
+ if (handle.interrupted) {
432
+ return;
433
+ }
434
+ const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
435
+ handle.initialize(llmStream, synthesisHandle);
436
+ } finally {
437
+ speechData = void 0;
371
438
  }
372
- const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
373
- handle.initialize(llmStream, synthesisHandle);
374
- const elapsed = !!this.#lastEndOfSpeechTime ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1e3) / 1e3 : -1;
375
- this.#logger.child({ speechId: handle.id, elapsed }).debug("synthesizing agent reply");
376
439
  resolve();
377
440
  });
378
441
  }
@@ -414,59 +477,81 @@ class VoicePipelineAgent extends import_node_events.default {
414
477
  commitUserQuestionIfNeeded();
415
478
  const collectedText = handle.synthesisHandle.text;
416
479
  const isUsingTools = handle.source instanceof import_llm.LLMStream && !!handle.source.functionCalls.length;
417
- const extraToolsMessages = [];
418
- let interrupted = handle.interrupted;
419
- if (isUsingTools && !interrupted) {
480
+ const interrupted = handle.interrupted;
481
+ const executeFunctionCalls = async () => {
482
+ if (!isUsingTools || interrupted) return;
483
+ if (handle.fncNestedDepth >= this.#opts.maxNestedFncCalls) {
484
+ this.#logger.child({ speechId: handle.id, fncNestedDepth: handle.fncNestedDepth }).warn("max function calls nested depth reached");
485
+ return;
486
+ }
420
487
  if (!userQuestion || !handle.userCommitted) {
421
488
  throw new Error("user speech should have been committed before using tools");
422
489
  }
423
490
  const llmStream = handle.source;
424
- let newFunctionCalls = llmStream.functionCalls;
425
- for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {
426
- this.emit(7 /* FUNCTION_CALLS_COLLECTED */, newFunctionCalls);
427
- const calledFuncs = [];
428
- for (const func of newFunctionCalls) {
429
- const task = func.func.execute(func.params).then(
430
- (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),
431
- (error) => ({ name: func.name, toolCallId: func.toolCallId, error })
432
- );
433
- calledFuncs.push({ ...func, task });
434
- this.#logger.child({ function: func.name, speechId: handle.id }).debug("executing AI function");
435
- try {
436
- await task;
437
- } catch {
438
- this.#logger.child({ function: func.name, speechId: handle.id }).error("error executing AI function");
439
- }
440
- }
441
- const toolCallsInfo = [];
442
- const toolCallsResults = [];
443
- for (const fnc of calledFuncs) {
444
- const task = await fnc.task;
445
- if (!task || task.result === void 0) continue;
446
- toolCallsInfo.push(fnc);
447
- toolCallsResults.push(import_llm2.ChatMessage.createToolFromFunctionResult(task));
491
+ const newFunctionCalls = llmStream.functionCalls;
492
+ new AgentCallContext(this, llmStream);
493
+ this.emit(7 /* FUNCTION_CALLS_COLLECTED */, newFunctionCalls);
494
+ const calledFuncs = [];
495
+ for (const func of newFunctionCalls) {
496
+ const task2 = func.func.execute(func.params).then(
497
+ (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),
498
+ (error) => ({ name: func.name, toolCallId: func.toolCallId, error })
499
+ );
500
+ calledFuncs.push({ ...func, task: task2 });
501
+ this.#logger.child({ function: func.name, speechId: handle.id }).debug("executing AI function");
502
+ try {
503
+ await task2;
504
+ } catch {
505
+ this.#logger.child({ function: func.name, speechId: handle.id }).error("error executing AI function");
448
506
  }
449
- if (!toolCallsInfo.length) break;
450
- extraToolsMessages.push(import_llm2.ChatMessage.createToolCalls(toolCallsInfo, collectedText));
451
- extraToolsMessages.push(...toolCallsResults);
452
- const chatCtx = handle.source.chatCtx.copy();
453
- chatCtx.messages.push(...extraToolsMessages);
454
- const answerLLMStream = this.llm.chat({
455
- chatCtx,
456
- fncCtx: this.fncCtx
457
- });
458
- const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);
459
- handle.synthesisHandle = answerSynthesis;
460
- const playHandle2 = answerSynthesis.play();
461
- await playHandle2.join().await;
462
- interrupted = answerSynthesis.interrupted;
463
- newFunctionCalls = answerLLMStream.functionCalls;
464
- this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
465
- if (!newFunctionCalls) break;
507
+ }
508
+ const toolCallsInfo = [];
509
+ const toolCallsResults = [];
510
+ for (const fnc of calledFuncs) {
511
+ const task2 = await fnc.task;
512
+ if (!task2 || task2.result === void 0) continue;
513
+ toolCallsInfo.push(fnc);
514
+ toolCallsResults.push(import_llm2.ChatMessage.createToolFromFunctionResult(task2));
515
+ }
516
+ if (!toolCallsInfo.length) return;
517
+ const extraToolsMessages = [import_llm2.ChatMessage.createToolCalls(toolCallsInfo, collectedText)];
518
+ extraToolsMessages.push(...toolCallsResults);
519
+ const newSpeechHandle = import_speech_handle.SpeechHandle.createToolSpeech(
520
+ handle.allowInterruptions,
521
+ handle.addToChatCtx,
522
+ handle.fncNestedDepth + 1,
523
+ extraToolsMessages
524
+ );
525
+ const chatCtx = handle.source.chatCtx.copy();
526
+ chatCtx.messages.push(...extraToolsMessages);
527
+ chatCtx.messages.push(...AgentCallContext.getCurrent().extraChatMessages);
528
+ const answerLLMStream = this.llm.chat({
529
+ chatCtx,
530
+ fncCtx: this.fncCtx
531
+ });
532
+ const answerSynthesis = this.#synthesizeAgentSpeech(newSpeechHandle.id, answerLLMStream);
533
+ newSpeechHandle.initialize(answerLLMStream, answerSynthesis);
534
+ handle.addNestedSpeech(newSpeechHandle);
535
+ this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
536
+ };
537
+ const task = executeFunctionCalls().then(() => {
538
+ handle.markNestedSpeechFinished();
539
+ });
540
+ while (!handle.nestedSpeechFinished) {
541
+ const changed = handle.nestedSpeechChanged();
542
+ await Promise.race([changed, task]);
543
+ while (handle.nestedSpeechHandles.length) {
544
+ const speech = handle.nestedSpeechHandles[0];
545
+ this.#playingSpeech = speech;
546
+ await this.#playSpeech(speech);
547
+ handle.nestedSpeechHandles.shift();
548
+ this.#playingSpeech = handle;
466
549
  }
467
550
  }
468
551
  if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {
469
- this.chatCtx.messages.push(...extraToolsMessages);
552
+ if (handle.extraToolsMessages) {
553
+ this.chatCtx.messages.push(...handle.extraToolsMessages);
554
+ }
470
555
  if (interrupted) {
471
556
  collectedText + "\u2026";
472
557
  }
@@ -483,6 +568,7 @@ class VoicePipelineAgent extends import_node_events.default {
483
568
  interrupted,
484
569
  speechId: handle.id
485
570
  }).debug("committed agent speech");
571
+ handle.setDone();
486
572
  }
487
573
  }
488
574
  #synthesizeAgentSpeech(speechId, source) {
@@ -523,6 +609,20 @@ class VoicePipelineAgent extends import_node_events.default {
523
609
  }
524
610
  }
525
611
  this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug("validated agent reply");
612
+ if (this.#lastSpeechTime) {
613
+ const timeSinceLastSpeech = Date.now() - this.#lastSpeechTime;
614
+ const transcriptionDelay = Math.max(
615
+ (this.#lastFinalTranscriptTime || 0) - this.#lastSpeechTime,
616
+ 0
617
+ );
618
+ const metrics = {
619
+ timestamp: Date.now(),
620
+ sequenceId: this.#pendingAgentReply.id,
621
+ endOfUtteranceDelay: timeSinceLastSpeech,
622
+ transcriptionDelay
623
+ };
624
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
625
+ }
526
626
  this.#addSpeechForPlayout(this.#pendingAgentReply);
527
627
  this.#pendingAgentReply = void 0;
528
628
  this.#transcribedInterimText = "";