@livekit/agents 0.7.3 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio.cjs +1 -1
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.js +1 -1
- package/dist/audio.js.map +1 -1
- package/dist/constants.cjs +38 -0
- package/dist/constants.cjs.map +1 -0
- package/dist/constants.d.ts +5 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +11 -0
- package/dist/constants.js.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.cjs +14 -27
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +14 -5
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +23 -10
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +23 -10
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +4 -5
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +4 -5
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +26 -9
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +30 -9
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_playout.cjs +1 -1
- package/dist/pipeline/agent_playout.cjs.map +1 -1
- package/dist/pipeline/agent_playout.d.ts.map +1 -1
- package/dist/pipeline/agent_playout.js +1 -1
- package/dist/pipeline/agent_playout.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +52 -36
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +56 -36
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/package.json +1 -1
- package/src/audio.ts +1 -1
- package/src/constants.ts +7 -0
- package/src/ipc/inference_proc_lazy_main.ts +21 -6
- package/src/ipc/job_proc_lazy_main.ts +27 -9
- package/src/ipc/supervised_proc.ts +5 -6
- package/src/multimodal/multimodal_agent.ts +32 -10
- package/src/pipeline/agent_playout.ts +1 -7
- package/src/pipeline/pipeline_agent.ts +64 -36
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', (text) => {\n this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AArJlD;AAsJM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AAxPnE,YAAAC;AA0PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,CAAC,SAAS;AACvC,eAAK;AAAA,YACH,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AAhT9D,YAAAA,KAAA;AAkTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA5T5E,YAAAA,KAAA;AA8TQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA9U5D,YAAAA,KAAA;AA+UQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AArbJ;AAsbI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,YAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AAneV;AAoeI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAzgB/B;AA0gBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport {\n ATTRIBUTE_TRANSCRIPTION_FINAL,\n ATTRIBUTE_TRANSCRIPTION_TRACK_ID,\n TOPIC_TRANSCRIPTION,\n} from '../constants.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n abstract recoverFromTextResponse(itemId: string): void;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n #textResponseRetries = 0;\n #maxTextResponseRetries: number;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n maxTextResponseRetries = 5,\n noiseCancellation,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n maxTextResponseRetries?: number;\n noiseCancellation?: NoiseCancellationOptions;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n this.#maxTextResponseRetries = maxTextResponseRetries;\n this.#noiseCancellation = noiseCancellation;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n #noiseCancellation: NoiseCancellationOptions | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity!);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.synchronizer.playedText;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity!);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity!);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') return;\n\n const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);\n synchronizer.on('textUpdated', async (text) => {\n await this.#publishTranscription(\n this.room!.localParticipant!.identity!,\n this.#getLocalTrackSid()!,\n text.text,\n text.final,\n text.id,\n );\n });\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n synchronizer,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_done', (message: any) => {\n // openai.realtime.RealtimeContent\n if (message.contentType === 'text') {\n if (this.#textResponseRetries >= this.#maxTextResponseRetries) {\n throw new Error(\n 'The OpenAI Realtime API returned a text response ' +\n `after ${this.#maxTextResponseRetries} retries. ` +\n 'Please try to reduce the number of text system or ' +\n 'assistant messages in the chat context.',\n );\n }\n\n this.#textResponseRetries++;\n this.#logger\n .child({\n itemId: message.itemId,\n text: message.text,\n retries: this.#textResponseRetries,\n })\n .warn(\n 'The OpenAI Realtime API returned a text response instead of audio. ' +\n 'Attempting to recover to audio mode...',\n );\n this.#session!.recoverFromTextResponse(message.itemId);\n } else {\n this.#textResponseRetries = 0;\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', async (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', async (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(\n participantIdentity,\n trackSid,\n transcription,\n true,\n ev.itemId,\n );\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', async (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n await this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n const audioStreamOptions = {\n sampleRate: this.model.sampleRate,\n numChannels: this.model.numChannels,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n readAudioStreamTask(new AudioStream(track, audioStreamOptions)).then(resolve).catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant!.identity!);\n }\n return this.#localTrackSid;\n }\n\n async #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): Promise<void> {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n\n const stream = await this.room.localParticipant.streamText({\n topic: TOPIC_TRANSCRIPTION,\n senderIdentity: participantIdentity,\n attributes: {\n [ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,\n [ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString(),\n },\n });\n await stream.write(text);\n await stream.close();\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes![AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAYA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,8BAA8B;AAC9D,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAO3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,uBAAuB;AAAA,EACvB;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA,yBAAyB;AAAA,IACzB;AAAA,EACF,GAMG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AACf,SAAK,0BAA0B;AAC/B,SAAK,qBAAqB;AAAA,EAC5B;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EACxC,qBAA2D;AAAA,EAE3D,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AA1JlD;AA2JM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAS;AAAA,MAC7C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,aAAa;AAC5C,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAS;AAAA,QAC7C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAS;AAC3C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA7PnE,YAAAC;AA+PQ,YAAI,QAAQ,gBAAgB,OAAQ;AAEpC,cAAM,eAAe,IAAI,sBAAsB,sBAAsB;AACrE,qBAAa,GAAG,eAAe,OAAO,SAAS;AAC7C,gBAAM,KAAK;AAAA,YACT,KAAK,KAAM,iBAAkB;AAAA,YAC7B,KAAK,kBAAkB;AAAA,YACvB,KAAK;AAAA,YACL,KAAK;AAAA,YACL,KAAK;AAAA,UACP;AAAA,QACF,CAAC;AAED,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,YAAiB;AAE1D,YAAI,QAAQ,gBAAgB,QAAQ;AAClC,cAAI,KAAK,wBAAwB,KAAK,yBAAyB;AAC7D,kBAAM,IAAI;AAAA,cACR,0DACW,KAAK,uBAAuB;AAAA,YAGzC;AAAA,UACF;AAEA,eAAK;AACL,eAAK,QACF,MAAM;AAAA,YACL,QAAQ,QAAQ;AAAA,YAChB,MAAM,QAAQ;AAAA,YACd,SAAS,KAAK;AAAA,UAChB,CAAC,EACA;AAAA,YACC;AAAA,UAEF;AACF,eAAK,SAAU,wBAAwB,QAAQ,MAAM;AAAA,QACvD,OAAO;AACL,eAAK,uBAAuB;AAAA,QAC9B;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,OAAO,OAAY;AArTpE,YAAAA,KAAA;AAuTQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,OAAO,OAAY;AAjUlF,YAAAA,KAAA;AAmUQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK;AAAA,YACT;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA,GAAG;AAAA,UACL;AAAA,QACF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,OAAO,OAAY;AAzVlE,YAAAA,KAAA;AA0VQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,gBAAM,KAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACvF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AAhcJ;AAicI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,YAAM,qBAAqB;AAAA,QACzB,YAAY,KAAK,MAAM;AAAA,QACvB,aAAa,KAAK,MAAM;AAAA,QACxB,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,0BAAoB,IAAI,YAAY,OAAO,kBAAkB,CAAC,EAAE,KAAK,OAAO,EAAE,MAAM,MAAM;AAAA,IAC5F,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AACjC,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,MAAM,KAAK,KAAK,iBAAkB,QAAS;AAAA,IACzF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,sBACJ,qBACA,UACA,MACA,SACA,IACe;AA9enB;AA+eI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAED,UAAM,SAAS,MAAM,KAAK,KAAK,iBAAiB,WAAW;AAAA,MACzD,OAAO;AAAA,MACP,gBAAgB;AAAA,MAChB,YAAY;AAAA,QACV,CAAC,gCAAgC,GAAG;AAAA,QACpC,CAAC,6BAA6B,GAAG,QAAQ,SAAS;AAAA,MACpD;AAAA,IACF,CAAC;AACD,UAAM,OAAO,MAAM,IAAI;AACvB,UAAM,OAAO,MAAM;AAAA,EACrB;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AA/hB/B;AAgiBI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAY,qBAAqB;AACjF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
@@ -148,8 +148,8 @@ class AgentPlayout extends import_node_events.default {
|
|
|
148
148
|
handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
|
|
149
149
|
handle.synchronizer.pushAudio(frame);
|
|
150
150
|
await this.#audioSource.captureFrame(frame);
|
|
151
|
-
await this.#audioSource.waitForPlayout();
|
|
152
151
|
}
|
|
152
|
+
await this.#audioSource.waitForPlayout();
|
|
153
153
|
handle.synchronizer.close(false);
|
|
154
154
|
resolve2();
|
|
155
155
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AAEpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;
|
|
1
|
+
{"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IAyFV,KAAK;CAIZ"}
|
|
@@ -113,8 +113,8 @@ class AgentPlayout extends EventEmitter {
|
|
|
113
113
|
handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
|
|
114
114
|
handle.synchronizer.pushAudio(frame);
|
|
115
115
|
await this.#audioSource.captureFrame(frame);
|
|
116
|
-
await this.#audioSource.waitForPlayout();
|
|
117
116
|
}
|
|
117
|
+
await this.#audioSource.waitForPlayout();
|
|
118
118
|
handle.synchronizer.close(false);
|
|
119
119
|
resolve2();
|
|
120
120
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
|
|
@@ -37,6 +37,7 @@ module.exports = __toCommonJS(pipeline_agent_exports);
|
|
|
37
37
|
var import_rtc_node = require("@livekit/rtc-node");
|
|
38
38
|
var import_node_crypto = require("node:crypto");
|
|
39
39
|
var import_node_events = __toESM(require("node:events"), 1);
|
|
40
|
+
var import_constants = require("../constants.cjs");
|
|
40
41
|
var import_llm = require("../llm/index.cjs");
|
|
41
42
|
var import_llm2 = require("../llm/index.cjs");
|
|
42
43
|
var import_log = require("../log.cjs");
|
|
@@ -338,27 +339,20 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
338
339
|
this.emit(1 /* USER_STOPPED_SPEAKING */);
|
|
339
340
|
this.#deferredValidation.onHumanEndOfSpeech(event);
|
|
340
341
|
});
|
|
341
|
-
this.#humanInput.on(import_human_input.HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
|
|
342
|
+
this.#humanInput.on(import_human_input.HumanInputEvent.INTERIM_TRANSCRIPT, async (event) => {
|
|
342
343
|
if (!this.#transcriptionId) {
|
|
343
344
|
this.#transcriptionId = (0, import_node_crypto.randomUUID)();
|
|
344
345
|
}
|
|
345
346
|
this.#transcribedInterimText = event.alternatives[0].text;
|
|
346
|
-
this.#
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
final: true,
|
|
354
|
-
startTime: BigInt(0),
|
|
355
|
-
endTime: BigInt(0),
|
|
356
|
-
language: ""
|
|
357
|
-
}
|
|
358
|
-
]
|
|
359
|
-
});
|
|
347
|
+
await this.#publishTranscription(
|
|
348
|
+
this.#humanInput.participant.identity,
|
|
349
|
+
this.#humanInput.subscribedTrack.sid,
|
|
350
|
+
this.#transcribedInterimText,
|
|
351
|
+
false,
|
|
352
|
+
this.#transcriptionId
|
|
353
|
+
);
|
|
360
354
|
});
|
|
361
|
-
this.#humanInput.on(import_human_input.HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
|
|
355
|
+
this.#humanInput.on(import_human_input.HumanInputEvent.FINAL_TRANSCRIPT, async (event) => {
|
|
362
356
|
const newTranscript = event.alternatives[0].text;
|
|
363
357
|
if (!newTranscript) return;
|
|
364
358
|
if (!this.#transcriptionId) {
|
|
@@ -366,20 +360,13 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
366
360
|
}
|
|
367
361
|
this.#lastFinalTranscriptTime = Date.now();
|
|
368
362
|
this.transcribedText += (this.transcribedText ? " " : "") + newTranscript;
|
|
369
|
-
this.#
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
final: true,
|
|
377
|
-
startTime: BigInt(0),
|
|
378
|
-
endTime: BigInt(0),
|
|
379
|
-
language: ""
|
|
380
|
-
}
|
|
381
|
-
]
|
|
382
|
-
});
|
|
363
|
+
await this.#publishTranscription(
|
|
364
|
+
this.#humanInput.participant.identity,
|
|
365
|
+
this.#humanInput.subscribedTrack.sid,
|
|
366
|
+
this.transcribedText,
|
|
367
|
+
true,
|
|
368
|
+
this.#transcriptionId
|
|
369
|
+
);
|
|
383
370
|
this.#transcriptionId = void 0;
|
|
384
371
|
if (this.#opts.preemptiveSynthesis && (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)) {
|
|
385
372
|
this.#synthesizeAgentReply();
|
|
@@ -624,15 +611,44 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
624
611
|
}
|
|
625
612
|
handle.setDone();
|
|
626
613
|
}
|
|
614
|
+
async #publishTranscription(participantIdentity, trackSid, text, isFinal, id) {
|
|
615
|
+
this.#room.localParticipant.publishTranscription({
|
|
616
|
+
participantIdentity,
|
|
617
|
+
trackSid,
|
|
618
|
+
segments: [
|
|
619
|
+
{
|
|
620
|
+
text,
|
|
621
|
+
final: isFinal,
|
|
622
|
+
id,
|
|
623
|
+
startTime: BigInt(0),
|
|
624
|
+
endTime: BigInt(0),
|
|
625
|
+
language: ""
|
|
626
|
+
}
|
|
627
|
+
]
|
|
628
|
+
});
|
|
629
|
+
const stream = await this.#room.localParticipant.streamText({
|
|
630
|
+
senderIdentity: participantIdentity,
|
|
631
|
+
topic: import_constants.TOPIC_TRANSCRIPTION,
|
|
632
|
+
attributes: {
|
|
633
|
+
[import_constants.ATTRIBUTE_TRANSCRIPTION_TRACK_ID]: trackSid,
|
|
634
|
+
[import_constants.ATTRIBUTE_TRANSCRIPTION_FINAL]: isFinal.toString()
|
|
635
|
+
}
|
|
636
|
+
});
|
|
637
|
+
await stream.write(text);
|
|
638
|
+
await stream.close();
|
|
639
|
+
}
|
|
627
640
|
#synthesizeAgentSpeech(speechId, source) {
|
|
628
641
|
const synchronizer = new import_transcription.TextAudioSynchronizer(import_transcription.defaultTextSyncOptions);
|
|
629
|
-
synchronizer.on("textUpdated", (text) => {
|
|
642
|
+
synchronizer.on("textUpdated", async (text) => {
|
|
643
|
+
var _a;
|
|
630
644
|
this.#agentTranscribedText = text.text;
|
|
631
|
-
this.#
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
645
|
+
await this.#publishTranscription(
|
|
646
|
+
this.#room.localParticipant.identity,
|
|
647
|
+
((_a = this.#agentPublication) == null ? void 0 : _a.sid) ?? "",
|
|
648
|
+
text.text,
|
|
649
|
+
text.final,
|
|
650
|
+
text.id
|
|
651
|
+
);
|
|
636
652
|
});
|
|
637
653
|
if (!this.#agentOutput) {
|
|
638
654
|
throw new Error("agent output should be initialized when ready");
|