@livekit/agents 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +47 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.ts +15 -2
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +46 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/metrics/base.cjs +44 -0
- package/dist/metrics/base.cjs.map +1 -0
- package/dist/metrics/base.d.ts +96 -0
- package/dist/metrics/base.d.ts.map +1 -0
- package/dist/metrics/base.js +20 -0
- package/dist/metrics/base.js.map +1 -0
- package/dist/metrics/index.cjs +35 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.ts +5 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +9 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +53 -0
- package/dist/metrics/usage_collector.cjs.map +1 -0
- package/dist/metrics/usage_collector.d.ts +14 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -0
- package/dist/metrics/usage_collector.js +29 -0
- package/dist/metrics/usage_collector.js.map +1 -0
- package/dist/metrics/utils.cjs +104 -0
- package/dist/metrics/utils.cjs.map +1 -0
- package/dist/metrics/utils.d.ts +10 -0
- package/dist/metrics/utils.d.ts.map +1 -0
- package/dist/metrics/utils.js +73 -0
- package/dist/metrics/utils.js.map +1 -0
- package/dist/multimodal/multimodal_agent.cjs +7 -13
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +1 -4
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +7 -13
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/index.cjs +2 -0
- package/dist/pipeline/index.cjs.map +1 -1
- package/dist/pipeline/index.d.ts +1 -1
- package/dist/pipeline/index.d.ts.map +1 -1
- package/dist/pipeline/index.js +3 -1
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +166 -66
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts +10 -4
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +169 -69
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/pipeline/speech_handle.cjs +49 -1
- package/dist/pipeline/speech_handle.cjs.map +1 -1
- package/dist/pipeline/speech_handle.d.ts +12 -2
- package/dist/pipeline/speech_handle.d.ts.map +1 -1
- package/dist/pipeline/speech_handle.js +50 -2
- package/dist/pipeline/speech_handle.js.map +1 -1
- package/dist/stt/index.cjs.map +1 -1
- package/dist/stt/index.d.ts +1 -1
- package/dist/stt/index.d.ts.map +1 -1
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +15 -5
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.ts +4 -1
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +15 -5
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +46 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.ts +25 -3
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +46 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/index.cjs +4 -2
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.ts +1 -1
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +3 -1
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +14 -3
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.ts +3 -0
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +15 -4
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +109 -6
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts +24 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +107 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/vad.cjs +43 -2
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.ts +21 -4
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +43 -2
- package/dist/vad.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +2 -1
- package/src/llm/index.ts +2 -0
- package/src/llm/llm.ts +55 -3
- package/src/metrics/base.ts +127 -0
- package/src/metrics/index.ts +20 -0
- package/src/metrics/usage_collector.ts +40 -0
- package/src/metrics/utils.ts +100 -0
- package/src/multimodal/multimodal_agent.ts +12 -17
- package/src/pipeline/index.ts +1 -1
- package/src/pipeline/pipeline_agent.ts +206 -87
- package/src/pipeline/speech_handle.ts +67 -2
- package/src/stt/index.ts +2 -0
- package/src/stt/stream_adapter.ts +17 -5
- package/src/stt/stt.ts +67 -3
- package/src/tts/index.ts +2 -0
- package/src/tts/stream_adapter.ts +17 -4
- package/src/tts/tts.ts +127 -4
- package/src/vad.ts +61 -4
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport { BasicTranscriptionForwarder } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: { promise: Promise<void>; cancel: () => void } | null = null;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.transcriptionFwd.text;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n const trFwd = new BasicTranscriptionForwarder(\n this.room!,\n this.room!.localParticipant!.identity,\n this.#getLocalTrackSid()!,\n message.responseId,\n );\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n trFwd,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n if (this.readMicroTask) {\n this.readMicroTask.cancel();\n }\n\n let cancel: () => void;\n this.readMicroTask = {\n promise: new Promise<void>((resolve, reject) => {\n cancel = () => {\n reject(new Error('Task cancelled'));\n };\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n }),\n cancel: () => cancel(),\n };\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant?.identity);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes[AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AACpB,SAAS,mCAAmC;AAC5C,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAM3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAuE;AAAA,EAEvE,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAIG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAxIlD;AAyIM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAQ;AAAA,MAC5C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,iBAAiB;AAChD,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAQ;AAAA,QAC5C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAQ;AAC1C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA3OnE,YAAAC;AA6OQ,cAAM,QAAQ,IAAI;AAAA,UAChB,KAAK;AAAA,UACL,KAAK,KAAM,iBAAkB;AAAA,UAC7B,KAAK,kBAAkB;AAAA,UACvB,QAAQ;AAAA,QACV;AAEA,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AA/P9D,YAAAA,KAAA;AAiQQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA3Q5E,YAAAA,KAAA;AA6QQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA7R5D,YAAAA,KAAA;AA8RQ,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AA/XJ;AAgYI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,QAAI,KAAK,eAAe;AACtB,WAAK,cAAc,OAAO;AAAA,IAC5B;AAEA,QAAI;AACJ,SAAK,gBAAgB;AAAA,MACnB,SAAS,IAAI,QAAc,CAAC,SAAS,WAAW;AAC9C,iBAAS,MAAM;AACb,iBAAO,IAAI,MAAM,gBAAgB,CAAC;AAAA,QACpC;AACA,4BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,MACjB,CAAC;AAAA,MACD,QAAQ,MAAM,OAAO;AAAA,IACvB;AAAA,EACF;AAAA,EAEA,oBAAmC;AAxarC;AAyaI,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,OAAM,UAAK,KAAK,qBAAV,mBAA4B,QAAQ;AAAA,IACxF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AArbV;AAsbI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AA3d/B;AA4dI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAW,qBAAqB;AAChF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/multimodal/multimodal_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n LocalTrackPublication,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrack,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport {\n AudioSource,\n AudioStream,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport { AudioByteStream } from '../audio.js';\nimport * as llm from '../llm/index.js';\nimport { log } from '../log.js';\nimport type { MultimodalLLMMetrics } from '../metrics/base.js';\nimport { BasicTranscriptionForwarder } from '../transcription.js';\nimport { findMicroTrackId } from '../utils.js';\nimport { AgentPlayout, type PlayoutHandle } from './agent_playout.js';\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeSession extends EventEmitter {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract conversation: any; // openai.realtime.Conversation\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract inputAudioBuffer: any; // openai.realtime.InputAudioBuffer\n abstract fncCtx: llm.FunctionContext | undefined;\n}\n\n/**\n * @internal\n * @beta\n */\nexport abstract class RealtimeModel {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n abstract session(options: any): RealtimeSession; // openai.realtime.ModelOptions\n abstract close(): Promise<void>;\n abstract sampleRate: number;\n abstract numChannels: number;\n abstract inFrameSize: number;\n abstract outFrameSize: number;\n}\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\n/** @beta */\nexport class MultimodalAgent extends EventEmitter {\n model: RealtimeModel;\n room: Room | null = null;\n linkedParticipant: RemoteParticipant | null = null;\n subscribedTrack: RemoteAudioTrack | null = null;\n readMicroTask: Promise<void> | null = null;\n\n constructor({\n model,\n chatCtx,\n fncCtx,\n }: {\n model: RealtimeModel;\n chatCtx?: llm.ChatContext;\n fncCtx?: llm.FunctionContext;\n }) {\n super();\n this.model = model;\n this.#chatCtx = chatCtx;\n this.#fncCtx = fncCtx;\n }\n\n #participant: RemoteParticipant | string | null = null;\n #agentPublication: LocalTrackPublication | null = null;\n #localTrackSid: string | null = null;\n #localSource: AudioSource | null = null;\n #agentPlayout: AgentPlayout | null = null;\n #playingHandle: PlayoutHandle | undefined = undefined;\n #logger = log();\n #session: RealtimeSession | null = null;\n #fncCtx: llm.FunctionContext | undefined = undefined;\n #chatCtx: llm.ChatContext | undefined = undefined;\n\n #_started: boolean = false;\n #_pendingFunctionCalls: Set<string> = new Set();\n #_speaking: boolean = false;\n\n get fncCtx(): llm.FunctionContext | undefined {\n return this.#fncCtx;\n }\n\n set fncCtx(ctx: llm.FunctionContext | undefined) {\n this.#fncCtx = ctx;\n if (this.#session) {\n this.#session.fncCtx = ctx;\n }\n }\n\n get #pendingFunctionCalls(): Set<string> {\n return this.#_pendingFunctionCalls;\n }\n\n set #pendingFunctionCalls(calls: Set<string>) {\n this.#_pendingFunctionCalls = calls;\n this.#updateState();\n }\n\n get #speaking(): boolean {\n return this.#_speaking;\n }\n\n set #speaking(isSpeaking: boolean) {\n this.#_speaking = isSpeaking;\n this.#updateState();\n }\n\n get #started(): boolean {\n return this.#_started;\n }\n\n set #started(started: boolean) {\n this.#_started = started;\n this.#updateState();\n }\n\n start(\n room: Room,\n participant: RemoteParticipant | string | null = null,\n ): Promise<RealtimeSession> {\n return new Promise(async (resolve, reject) => {\n if (this.#started) {\n reject(new Error('MultimodalAgent already started'));\n }\n this.#updateState();\n\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.linkedParticipant) {\n return;\n }\n this.#linkParticipant(participant.identity);\n });\n room.on(\n RoomEvent.TrackPublished,\n (trackPublication: RemoteTrackPublication, participant: RemoteParticipant) => {\n if (\n this.linkedParticipant &&\n participant.identity === this.linkedParticipant.identity &&\n trackPublication.source === TrackSource.SOURCE_MICROPHONE &&\n !trackPublication.subscribed\n ) {\n trackPublication.setSubscribed(true);\n }\n },\n );\n room.on(RoomEvent.TrackSubscribed, this.#handleTrackSubscription.bind(this));\n\n this.room = room;\n this.#participant = participant;\n\n this.#localSource = new AudioSource(this.model.sampleRate, this.model.numChannels);\n this.#agentPlayout = new AgentPlayout(\n this.#localSource,\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n this.model.outFrameSize,\n );\n const onPlayoutStarted = () => {\n this.emit('agent_started_speaking');\n this.#speaking = true;\n };\n\n const onPlayoutStopped = (interrupted: boolean) => {\n this.emit('agent_stopped_speaking');\n this.#speaking = false;\n if (this.#playingHandle) {\n let text = this.#playingHandle.transcriptionFwd.text;\n if (interrupted) {\n text += '…';\n }\n const msg = llm.ChatMessage.create({\n role: llm.ChatRole.ASSISTANT,\n text,\n });\n\n if (interrupted) {\n this.emit('agent_speech_interrupted', msg);\n } else {\n this.emit('agent_speech_committed', msg);\n }\n this.#logger.child({ transcription: text, interrupted }).debug('committed agent speech');\n }\n };\n\n this.#agentPlayout.on('playout_started', onPlayoutStarted);\n this.#agentPlayout.on('playout_stopped', onPlayoutStopped);\n\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.#localSource);\n const options = new TrackPublishOptions();\n options.source = TrackSource.SOURCE_MICROPHONE;\n this.#agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;\n if (!this.#agentPublication) {\n this.#logger.error('Failed to publish track');\n reject(new Error('Failed to publish track'));\n return;\n }\n\n await this.#agentPublication.waitForSubscription();\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n } else {\n // No participant specified, try to find the first participant in the room\n for (const participant of room.remoteParticipants.values()) {\n this.#linkParticipant(participant.identity);\n break;\n }\n }\n\n this.#session = this.model.session({ fncCtx: this.#fncCtx, chatCtx: this.#chatCtx });\n this.#started = true;\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('response_content_added', (message: any) => {\n // openai.realtime.RealtimeContent\n const trFwd = new BasicTranscriptionForwarder(\n this.room!,\n this.room!.localParticipant!.identity,\n this.#getLocalTrackSid()!,\n message.responseId,\n );\n\n const handle = this.#agentPlayout?.play(\n message.itemId,\n message.contentIndex,\n trFwd,\n message.textStream,\n message.audioStream,\n );\n this.#playingHandle = handle;\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_committed', (ev: any) => {\n // openai.realtime.InputSpeechCommittedEvent\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('input_speech_transcription_completed', (ev: any) => {\n // openai.realtime.InputSpeechTranscriptionCompletedEvent\n const transcription = ev.transcript;\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, transcription, true, ev.itemId);\n } else {\n this.#logger.error('Participant or track not set');\n }\n const userMsg = llm.ChatMessage.create({\n role: llm.ChatRole.USER,\n text: transcription,\n });\n this.emit('user_speech_committed', userMsg);\n this.#logger.child({ transcription }).debug('committed user speech');\n });\n\n this.#session.on('input_speech_started', (ev: any) => {\n this.emit('user_started_speaking');\n if (this.#playingHandle && !this.#playingHandle.done) {\n this.#playingHandle.interrupt();\n\n this.#session!.conversation.item.truncate(\n this.#playingHandle.itemId,\n this.#playingHandle.contentIndex,\n Math.floor((this.#playingHandle.audioSamples / 24000) * 1000),\n );\n\n this.#playingHandle = undefined;\n }\n\n const participantIdentity = this.linkedParticipant?.identity;\n const trackSid = this.subscribedTrack?.sid;\n if (participantIdentity && trackSid) {\n this.#publishTranscription(participantIdentity, trackSid, '…', false, ev.itemId);\n }\n });\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#session.on('input_speech_stopped', (ev: any) => {\n this.emit('user_stopped_speaking');\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_started', (ev: any) => {\n this.#pendingFunctionCalls.add(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_completed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n this.#session.on('function_call_failed', (ev: any) => {\n this.#pendingFunctionCalls.delete(ev.callId);\n this.#updateState();\n });\n\n this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {\n this.emit('metrics_collected', metrics);\n });\n\n resolve(this.#session);\n });\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.linkedParticipant = this.room.remoteParticipants.get(participantIdentity) || null;\n if (!this.linkedParticipant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n if (this.linkedParticipant.trackPublications.size > 0) {\n this.#subscribeToMicrophone();\n }\n\n // also check if already subscribed\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE && publication.track) {\n this.#handleTrackSubscription(publication.track, publication, this.linkedParticipant);\n break;\n }\n }\n }\n\n #subscribeToMicrophone(): void {\n if (!this.linkedParticipant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.linkedParticipant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n }\n\n #handleTrackSubscription(\n track: RemoteTrack,\n publication: RemoteTrackPublication,\n participant: RemoteParticipant,\n ) {\n if (\n publication.source !== TrackSource.SOURCE_MICROPHONE ||\n participant.identity !== this.linkedParticipant?.identity\n ) {\n return;\n }\n const readAudioStreamTask = async (audioStream: AudioStream) => {\n const bstream = new AudioByteStream(\n this.model.sampleRate,\n this.model.numChannels,\n this.model.inFrameSize,\n );\n\n for await (const frame of audioStream) {\n const audioData = frame.data;\n for (const frame of bstream.write(audioData.buffer)) {\n this.#session!.inputAudioBuffer.append(frame);\n }\n }\n };\n this.subscribedTrack = track;\n\n this.readMicroTask = new Promise<void>((resolve, reject) => {\n readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))\n .then(resolve)\n .catch(reject);\n });\n }\n\n #getLocalTrackSid(): string | null {\n if (!this.#localTrackSid && this.room && this.room.localParticipant) {\n this.#localTrackSid = findMicroTrackId(this.room, this.room.localParticipant?.identity);\n }\n return this.#localTrackSid;\n }\n\n #publishTranscription(\n participantIdentity: string,\n trackSid: string,\n text: string,\n isFinal: boolean,\n id: string,\n ): void {\n this.#logger.debug(\n `Publishing transcription ${participantIdentity} ${trackSid} ${text} ${isFinal} ${id}`,\n );\n if (!this.room?.localParticipant) {\n this.#logger.error('Room or local participant not set');\n return;\n }\n\n this.room.localParticipant.publishTranscription({\n participantIdentity,\n trackSid,\n segments: [\n {\n text,\n final: isFinal,\n id,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n #updateState() {\n let newState: AgentState = 'initializing';\n if (this.#pendingFunctionCalls.size > 0) {\n newState = 'thinking';\n } else if (this.#speaking) {\n newState = 'speaking';\n } else if (this.#started) {\n newState = 'listening';\n }\n\n this.#setState(newState);\n }\n\n #setState(state: AgentState) {\n if (this.room?.isConnected && this.room.localParticipant) {\n const currentState = this.room.localParticipant.attributes[AGENT_STATE_ATTRIBUTE];\n if (currentState !== state) {\n this.room.localParticipant.setAttributes({\n [AGENT_STATE_ATTRIBUTE]: state,\n });\n this.#logger.debug(`${AGENT_STATE_ATTRIBUTE}: ${currentState} ->${state}`);\n }\n }\n }\n}\n"],"mappings":"AAWA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,YAAY,SAAS;AACrB,SAAS,WAAW;AAEpB,SAAS,mCAAmC;AAC5C,SAAS,wBAAwB;AACjC,SAAS,oBAAwC;AAM1C,MAAe,wBAAwB,aAAa;AAM3D;AAMO,MAAe,cAAc;AAQpC;AAGO,MAAM,wBAAwB;AAG9B,MAAM,wBAAwB,aAAa;AAAA,EAChD;AAAA,EACA,OAAoB;AAAA,EACpB,oBAA8C;AAAA,EAC9C,kBAA2C;AAAA,EAC3C,gBAAsC;AAAA,EAEtC,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAIG;AACD,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,WAAW;AAChB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,eAAkD;AAAA,EAClD,oBAAkD;AAAA,EAClD,iBAAgC;AAAA,EAChC,eAAmC;AAAA,EACnC,gBAAqC;AAAA,EACrC,iBAA4C;AAAA,EAC5C,UAAU,IAAI;AAAA,EACd,WAAmC;AAAA,EACnC,UAA2C;AAAA,EAC3C,WAAwC;AAAA,EAExC,YAAqB;AAAA,EACrB,yBAAsC,oBAAI,IAAI;AAAA,EAC9C,aAAsB;AAAA,EAEtB,IAAI,SAA0C;AAC5C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,OAAO,KAAsC;AAC/C,SAAK,UAAU;AACf,QAAI,KAAK,UAAU;AACjB,WAAK,SAAS,SAAS;AAAA,IACzB;AAAA,EACF;AAAA,EAEA,IAAI,wBAAqC;AACvC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAAsB,OAAoB;AAC5C,SAAK,yBAAyB;AAC9B,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAU,YAAqB;AACjC,SAAK,aAAa;AAClB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,SAAkB;AAC7B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,MACE,MACA,cAAiD,MACvB;AAC1B,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAzIlD;AA0IM,UAAI,KAAK,UAAU;AACjB,eAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,MACrD;AACA,WAAK,aAAa;AAElB,WAAK,GAAG,UAAU,sBAAsB,CAACA,iBAAmC;AAE1E,YAAI,KAAK,mBAAmB;AAC1B;AAAA,QACF;AACA,aAAK,iBAAiBA,aAAY,QAAQ;AAAA,MAC5C,CAAC;AACD,WAAK;AAAA,QACH,UAAU;AAAA,QACV,CAAC,kBAA0CA,iBAAmC;AAC5E,cACE,KAAK,qBACLA,aAAY,aAAa,KAAK,kBAAkB,YAChD,iBAAiB,WAAW,YAAY,qBACxC,CAAC,iBAAiB,YAClB;AACA,6BAAiB,cAAc,IAAI;AAAA,UACrC;AAAA,QACF;AAAA,MACF;AACA,WAAK,GAAG,UAAU,iBAAiB,KAAK,yBAAyB,KAAK,IAAI,CAAC;AAE3E,WAAK,OAAO;AACZ,WAAK,eAAe;AAEpB,WAAK,eAAe,IAAI,YAAY,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW;AACjF,WAAK,gBAAgB,IAAI;AAAA,QACvB,KAAK;AAAA,QACL,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AACA,YAAM,mBAAmB,MAAM;AAC7B,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AAAA,MACnB;AAEA,YAAM,mBAAmB,CAAC,gBAAyB;AACjD,aAAK,KAAK,wBAAwB;AAClC,aAAK,YAAY;AACjB,YAAI,KAAK,gBAAgB;AACvB,cAAI,OAAO,KAAK,eAAe,iBAAiB;AAChD,cAAI,aAAa;AACf,oBAAQ;AAAA,UACV;AACA,gBAAM,MAAM,IAAI,YAAY,OAAO;AAAA,YACjC,MAAM,IAAI,SAAS;AAAA,YACnB;AAAA,UACF,CAAC;AAED,cAAI,aAAa;AACf,iBAAK,KAAK,4BAA4B,GAAG;AAAA,UAC3C,OAAO;AACL,iBAAK,KAAK,0BAA0B,GAAG;AAAA,UACzC;AACA,eAAK,QAAQ,MAAM,EAAE,eAAe,MAAM,YAAY,CAAC,EAAE,MAAM,wBAAwB;AAAA,QACzF;AAAA,MACF;AAEA,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AACzD,WAAK,cAAc,GAAG,mBAAmB,gBAAgB;AAEzD,YAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,KAAK,YAAY;AACnF,YAAM,UAAU,IAAI,oBAAoB;AACxC,cAAQ,SAAS,YAAY;AAC7B,WAAK,oBAAqB,QAAM,UAAK,qBAAL,mBAAuB,aAAa,OAAO,aAAa;AACxF,UAAI,CAAC,KAAK,mBAAmB;AAC3B,aAAK,QAAQ,MAAM,yBAAyB;AAC5C,eAAO,IAAI,MAAM,yBAAyB,CAAC;AAC3C;AAAA,MACF;AAEA,YAAM,KAAK,kBAAkB,oBAAoB;AAEjD,UAAI,aAAa;AACf,YAAI,OAAO,gBAAgB,UAAU;AACnC,eAAK,iBAAiB,WAAW;AAAA,QACnC,OAAO;AACL,eAAK,iBAAiB,YAAY,QAAQ;AAAA,QAC5C;AAAA,MACF,OAAO;AAEL,mBAAWA,gBAAe,KAAK,mBAAmB,OAAO,GAAG;AAC1D,eAAK,iBAAiBA,aAAY,QAAQ;AAC1C;AAAA,QACF;AAAA,MACF;AAEA,WAAK,WAAW,KAAK,MAAM,QAAQ,EAAE,QAAQ,KAAK,SAAS,SAAS,KAAK,SAAS,CAAC;AACnF,WAAK,WAAW;AAGhB,WAAK,SAAS,GAAG,0BAA0B,CAAC,YAAiB;AA5OnE,YAAAC;AA8OQ,cAAM,QAAQ,IAAI;AAAA,UAChB,KAAK;AAAA,UACL,KAAK,KAAM,iBAAkB;AAAA,UAC7B,KAAK,kBAAkB;AAAA,UACvB,QAAQ;AAAA,QACV;AAEA,cAAM,UAASA,MAAA,KAAK,kBAAL,gBAAAA,IAAoB;AAAA,UACjC,QAAQ;AAAA,UACR,QAAQ;AAAA,UACR;AAAA,UACA,QAAQ;AAAA,UACR,QAAQ;AAAA;AAEV,aAAK,iBAAiB;AAAA,MACxB,CAAC;AAGD,WAAK,SAAS,GAAG,0BAA0B,CAAC,OAAY;AAhQ9D,YAAAA,KAAA;AAkQQ,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wCAAwC,CAAC,OAAY;AA5Q5E,YAAAA,KAAA;AA8QQ,cAAM,gBAAgB,GAAG;AACzB,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,eAAe,MAAM,GAAG,MAAM;AAAA,QAC1F,OAAO;AACL,eAAK,QAAQ,MAAM,8BAA8B;AAAA,QACnD;AACA,cAAM,UAAU,IAAI,YAAY,OAAO;AAAA,UACrC,MAAM,IAAI,SAAS;AAAA,UACnB,MAAM;AAAA,QACR,CAAC;AACD,aAAK,KAAK,yBAAyB,OAAO;AAC1C,aAAK,QAAQ,MAAM,EAAE,cAAc,CAAC,EAAE,MAAM,uBAAuB;AAAA,MACrE,CAAC;AAED,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AA9R5D,YAAAA,KAAA;AA+RQ,aAAK,KAAK,uBAAuB;AACjC,YAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,MAAM;AACpD,eAAK,eAAe,UAAU;AAE9B,eAAK,SAAU,aAAa,KAAK;AAAA,YAC/B,KAAK,eAAe;AAAA,YACpB,KAAK,eAAe;AAAA,YACpB,KAAK,MAAO,KAAK,eAAe,eAAe,OAAS,GAAI;AAAA,UAC9D;AAEA,eAAK,iBAAiB;AAAA,QACxB;AAEA,cAAM,uBAAsBA,MAAA,KAAK,sBAAL,gBAAAA,IAAwB;AACpD,cAAM,YAAW,UAAK,oBAAL,mBAAsB;AACvC,YAAI,uBAAuB,UAAU;AACnC,eAAK,sBAAsB,qBAAqB,UAAU,UAAK,OAAO,GAAG,MAAM;AAAA,QACjF;AAAA,MACF,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,KAAK,uBAAuB;AAAA,MACnC,CAAC;AAGD,WAAK,SAAS,GAAG,yBAAyB,CAAC,OAAY;AACrD,aAAK,sBAAsB,IAAI,GAAG,MAAM;AACxC,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,2BAA2B,CAAC,OAAY;AACvD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAGD,WAAK,SAAS,GAAG,wBAAwB,CAAC,OAAY;AACpD,aAAK,sBAAsB,OAAO,GAAG,MAAM;AAC3C,aAAK,aAAa;AAAA,MACpB,CAAC;AAED,WAAK,SAAS,GAAG,qBAAqB,CAAC,YAAkC;AACvE,aAAK,KAAK,qBAAqB,OAAO;AAAA,MACxC,CAAC;AAED,cAAQ,KAAK,QAAQ;AAAA,IACvB,CAAC;AAAA,EACH;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,MAAM;AACd,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,oBAAoB,KAAK,KAAK,mBAAmB,IAAI,mBAAmB,KAAK;AAClF,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AACrD,WAAK,uBAAuB;AAAA,IAC9B;AAGA,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,qBAAqB,YAAY,OAAO;AAC7E,aAAK,yBAAyB,YAAY,OAAO,aAAa,KAAK,iBAAiB;AACpF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,mBAAmB;AAC3B,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,kBAAkB,kBAAkB,OAAO,GAAG;AAC3E,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAAA,EACF;AAAA,EAEA,yBACE,OACA,aACA,aACA;AArYJ;AAsYI,QACE,YAAY,WAAW,YAAY,qBACnC,YAAY,eAAa,UAAK,sBAAL,mBAAwB,WACjD;AACA;AAAA,IACF;AACA,UAAM,sBAAsB,OAAO,gBAA6B;AAC9D,YAAM,UAAU,IAAI;AAAA,QAClB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,MACb;AAEA,uBAAiB,SAAS,aAAa;AACrC,cAAM,YAAY,MAAM;AACxB,mBAAWC,UAAS,QAAQ,MAAM,UAAU,MAAM,GAAG;AACnD,eAAK,SAAU,iBAAiB,OAAOA,MAAK;AAAA,QAC9C;AAAA,MACF;AAAA,IACF;AACA,SAAK,kBAAkB;AAEvB,SAAK,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC1D,0BAAoB,IAAI,YAAY,OAAO,KAAK,MAAM,YAAY,KAAK,MAAM,WAAW,CAAC,EACtF,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,IACjB,CAAC;AAAA,EACH;AAAA,EAEA,oBAAmC;AAnarC;AAoaI,QAAI,CAAC,KAAK,kBAAkB,KAAK,QAAQ,KAAK,KAAK,kBAAkB;AACnE,WAAK,iBAAiB,iBAAiB,KAAK,OAAM,UAAK,KAAK,qBAAV,mBAA4B,QAAQ;AAAA,IACxF;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,sBACE,qBACA,UACA,MACA,SACA,IACM;AAhbV;AAibI,SAAK,QAAQ;AAAA,MACX,4BAA4B,mBAAmB,IAAI,QAAQ,IAAI,IAAI,IAAI,OAAO,IAAI,EAAE;AAAA,IACtF;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,mBAAkB;AAChC,WAAK,QAAQ,MAAM,mCAAmC;AACtD;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB,qBAAqB;AAAA,MAC9C;AAAA,MACA;AAAA,MACA,UAAU;AAAA,QACR;AAAA,UACE;AAAA,UACA,OAAO;AAAA,UACP;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,eAAe;AACb,QAAI,WAAuB;AAC3B,QAAI,KAAK,sBAAsB,OAAO,GAAG;AACvC,iBAAW;AAAA,IACb,WAAW,KAAK,WAAW;AACzB,iBAAW;AAAA,IACb,WAAW,KAAK,UAAU;AACxB,iBAAW;AAAA,IACb;AAEA,SAAK,UAAU,QAAQ;AAAA,EACzB;AAAA,EAEA,UAAU,OAAmB;AAtd/B;AAudI,UAAI,UAAK,SAAL,mBAAW,gBAAe,KAAK,KAAK,kBAAkB;AACxD,YAAM,eAAe,KAAK,KAAK,iBAAiB,WAAW,qBAAqB;AAChF,UAAI,iBAAiB,OAAO;AAC1B,aAAK,KAAK,iBAAiB,cAAc;AAAA,UACvC,CAAC,qBAAqB,GAAG;AAAA,QAC3B,CAAC;AACD,aAAK,QAAQ,MAAM,GAAG,qBAAqB,KAAK,YAAY,MAAM,KAAK,EAAE;AAAA,MAC3E;AAAA,IACF;AAAA,EACF;AACF;","names":["participant","_a","frame"]}
|
package/dist/pipeline/index.cjs
CHANGED
|
@@ -18,6 +18,7 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
18
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
19
|
var pipeline_exports = {};
|
|
20
20
|
__export(pipeline_exports, {
|
|
21
|
+
AgentCallContext: () => import_pipeline_agent.AgentCallContext,
|
|
21
22
|
VPAEvent: () => import_pipeline_agent.VPAEvent,
|
|
22
23
|
VoicePipelineAgent: () => import_pipeline_agent.VoicePipelineAgent
|
|
23
24
|
});
|
|
@@ -25,6 +26,7 @@ module.exports = __toCommonJS(pipeline_exports);
|
|
|
25
26
|
var import_pipeline_agent = require("./pipeline_agent.cjs");
|
|
26
27
|
// Annotate the CommonJS export names for ESM import in node:
|
|
27
28
|
0 && (module.exports = {
|
|
29
|
+
AgentCallContext,
|
|
28
30
|
VPAEvent,
|
|
29
31
|
VoicePipelineAgent
|
|
30
32
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n AgentCallContext,\n} from './pipeline_agent.js';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,4BAUO;","names":[]}
|
package/dist/pipeline/index.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type
|
|
1
|
+
export { type AgentState, type BeforeTTSCallback, type BeforeLLMCallback, type VPACallbacks, type AgentTranscriptionOptions, type VPAOptions, VPAEvent, VoicePipelineAgent, AgentCallContext, } from './pipeline_agent.js';
|
|
2
2
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,KAAK,UAAU,EACf,KAAK,iBAAiB,EACtB,KAAK,iBAAiB,EACtB,KAAK,YAAY,EACjB,KAAK,yBAAyB,EAC9B,KAAK,UAAU,EACf,QAAQ,EACR,kBAAkB,EAClB,gBAAgB,GACjB,MAAM,qBAAqB,CAAC"}
|
package/dist/pipeline/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type
|
|
1
|
+
{"version":3,"sources":["../../src/pipeline/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\nexport {\n type AgentState,\n type BeforeTTSCallback,\n type BeforeLLMCallback,\n type VPACallbacks,\n type AgentTranscriptionOptions,\n type VPAOptions,\n VPAEvent,\n VoicePipelineAgent,\n AgentCallContext,\n} from './pipeline_agent.js';\n"],"mappings":"AAIA;AAAA,EAOE;AAAA,EACA;AAAA,EACA;AAAA,OACK;","names":[]}
|
|
@@ -43,11 +43,13 @@ var import_stt = require("../stt/index.cjs");
|
|
|
43
43
|
var import_basic = require("../tokenize/basic/index.cjs");
|
|
44
44
|
var import_tts = require("../tts/index.cjs");
|
|
45
45
|
var import_utils = require("../utils.cjs");
|
|
46
|
+
var import_vad = require("../vad.cjs");
|
|
46
47
|
var import_agent_output = require("./agent_output.cjs");
|
|
47
48
|
var import_agent_playout = require("./agent_playout.cjs");
|
|
48
49
|
var import_human_input = require("./human_input.cjs");
|
|
49
50
|
var import_speech_handle = require("./speech_handle.cjs");
|
|
50
51
|
const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
|
|
52
|
+
let speechData;
|
|
51
53
|
var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
|
|
52
54
|
VPAEvent2[VPAEvent2["USER_STARTED_SPEAKING"] = 0] = "USER_STARTED_SPEAKING";
|
|
53
55
|
VPAEvent2[VPAEvent2["USER_STOPPED_SPEAKING"] = 1] = "USER_STOPPED_SPEAKING";
|
|
@@ -58,12 +60,14 @@ var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
|
|
|
58
60
|
VPAEvent2[VPAEvent2["AGENT_SPEECH_INTERRUPTED"] = 6] = "AGENT_SPEECH_INTERRUPTED";
|
|
59
61
|
VPAEvent2[VPAEvent2["FUNCTION_CALLS_COLLECTED"] = 7] = "FUNCTION_CALLS_COLLECTED";
|
|
60
62
|
VPAEvent2[VPAEvent2["FUNCTION_CALLS_FINISHED"] = 8] = "FUNCTION_CALLS_FINISHED";
|
|
63
|
+
VPAEvent2[VPAEvent2["METRICS_COLLECTED"] = 9] = "METRICS_COLLECTED";
|
|
61
64
|
return VPAEvent2;
|
|
62
65
|
})(VPAEvent || {});
|
|
63
66
|
class AgentCallContext {
|
|
64
67
|
#agent;
|
|
65
68
|
#llmStream;
|
|
66
69
|
#metadata = /* @__PURE__ */ new Map();
|
|
70
|
+
#extraChatMessages = [];
|
|
67
71
|
static #current;
|
|
68
72
|
constructor(agent, llmStream) {
|
|
69
73
|
this.#agent = agent;
|
|
@@ -85,6 +89,12 @@ class AgentCallContext {
|
|
|
85
89
|
get llmStream() {
|
|
86
90
|
return this.#llmStream;
|
|
87
91
|
}
|
|
92
|
+
get extraChatMessages() {
|
|
93
|
+
return this.#extraChatMessages;
|
|
94
|
+
}
|
|
95
|
+
addExtraChatMessage(message) {
|
|
96
|
+
this.#extraChatMessages.push(message);
|
|
97
|
+
}
|
|
88
98
|
}
|
|
89
99
|
const defaultBeforeLLMCallback = (agent, chatCtx) => {
|
|
90
100
|
return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });
|
|
@@ -106,7 +116,7 @@ const defaultVPAOptions = {
|
|
|
106
116
|
interruptSpeechDuration: 50,
|
|
107
117
|
interruptMinWords: 0,
|
|
108
118
|
minEndpointingDelay: 500,
|
|
109
|
-
|
|
119
|
+
maxNestedFncCalls: 1,
|
|
110
120
|
preemptiveSynthesis: false,
|
|
111
121
|
beforeLLMCallback: defaultBeforeLLMCallback,
|
|
112
122
|
beforeTTSCallback: defaultBeforeTTSCallback,
|
|
@@ -131,7 +141,6 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
131
141
|
#transcribedInterimText = "";
|
|
132
142
|
#speechQueueOpen = new import_utils.Future();
|
|
133
143
|
#speechQueue = new import_utils.AsyncIterableQueue();
|
|
134
|
-
#lastEndOfSpeechTime;
|
|
135
144
|
#updateStateTask;
|
|
136
145
|
#started = false;
|
|
137
146
|
#room;
|
|
@@ -139,6 +148,8 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
139
148
|
#deferredValidation;
|
|
140
149
|
#logger = (0, import_log.log)();
|
|
141
150
|
#agentPublication;
|
|
151
|
+
#lastFinalTranscriptTime;
|
|
152
|
+
#lastSpeechTime;
|
|
142
153
|
constructor(vad, stt, llm, tts, opts = defaultVPAOptions) {
|
|
143
154
|
super();
|
|
144
155
|
this.#opts = { ...defaultVPAOptions, ...opts };
|
|
@@ -183,6 +194,20 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
183
194
|
if (this.#started) {
|
|
184
195
|
throw new Error("voice assistant already started");
|
|
185
196
|
}
|
|
197
|
+
this.#stt.on(import_stt.SpeechEventType.METRICS_COLLECTED, (metrics) => {
|
|
198
|
+
this.emit(9 /* METRICS_COLLECTED */, metrics);
|
|
199
|
+
});
|
|
200
|
+
this.#tts.on(import_tts.TTSEvent.METRICS_COLLECTED, (metrics) => {
|
|
201
|
+
if (!speechData) return;
|
|
202
|
+
this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
|
|
203
|
+
});
|
|
204
|
+
this.#llm.on(import_llm.LLMEvent.METRICS_COLLECTED, (metrics) => {
|
|
205
|
+
if (!speechData) return;
|
|
206
|
+
this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
|
|
207
|
+
});
|
|
208
|
+
this.#vad.on(import_vad.VADEventType.METRICS_COLLECTED, (metrics) => {
|
|
209
|
+
this.emit(9 /* METRICS_COLLECTED */, metrics);
|
|
210
|
+
});
|
|
186
211
|
room.on(import_rtc_node.RoomEvent.ParticipantConnected, (participant2) => {
|
|
187
212
|
if (this.#participant) {
|
|
188
213
|
return;
|
|
@@ -203,10 +228,43 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
203
228
|
/** Play a speech source through the voice assistant. */
|
|
204
229
|
async say(source, allowInterruptions = true, addToChatCtx = true) {
|
|
205
230
|
await this.#trackPublishedFut.await;
|
|
231
|
+
let callContext;
|
|
232
|
+
let fncSource;
|
|
233
|
+
if (addToChatCtx) {
|
|
234
|
+
callContext = AgentCallContext.getCurrent();
|
|
235
|
+
if (source instanceof import_llm.LLMStream) {
|
|
236
|
+
this.#logger.warn("LLMStream will be ignored for function call chat context");
|
|
237
|
+
} else if (typeof source === "string") {
|
|
238
|
+
fncSource = source;
|
|
239
|
+
} else {
|
|
240
|
+
fncSource = source;
|
|
241
|
+
source = new import_utils.AsyncIterableQueue();
|
|
242
|
+
}
|
|
243
|
+
}
|
|
206
244
|
const newHandle = import_speech_handle.SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);
|
|
207
245
|
const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);
|
|
208
246
|
newHandle.initialize(source, synthesisHandle);
|
|
209
|
-
this.#
|
|
247
|
+
if (this.#playingSpeech && !this.#playingSpeech.nestedSpeechFinished) {
|
|
248
|
+
this.#playingSpeech.addNestedSpeech(newHandle);
|
|
249
|
+
} else {
|
|
250
|
+
this.#addSpeechForPlayout(newHandle);
|
|
251
|
+
}
|
|
252
|
+
if (callContext && fncSource) {
|
|
253
|
+
let text;
|
|
254
|
+
if (typeof source === "string") {
|
|
255
|
+
text = fncSource;
|
|
256
|
+
} else {
|
|
257
|
+
text = "";
|
|
258
|
+
for await (const chunk of fncSource) {
|
|
259
|
+
source.put(chunk);
|
|
260
|
+
text += chunk;
|
|
261
|
+
}
|
|
262
|
+
source.close();
|
|
263
|
+
}
|
|
264
|
+
callContext.addExtraChatMessage(import_llm2.ChatMessage.create({ text, role: import_llm2.ChatRole.ASSISTANT }));
|
|
265
|
+
this.#logger.child({ text }).debug("added speech to function call chat context");
|
|
266
|
+
}
|
|
267
|
+
return newHandle;
|
|
210
268
|
}
|
|
211
269
|
#updateState(state, delay = 0) {
|
|
212
270
|
const runTask = (delay2) => {
|
|
@@ -260,11 +318,13 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
260
318
|
if (event.speechDuration >= this.#opts.interruptSpeechDuration) {
|
|
261
319
|
this.#interruptIfPossible();
|
|
262
320
|
}
|
|
321
|
+
if (event.rawAccumulatedSpeech > 0) {
|
|
322
|
+
this.#lastSpeechTime = Date.now() - event.rawAccumulatedSilence;
|
|
323
|
+
}
|
|
263
324
|
});
|
|
264
325
|
this.#humanInput.on(import_human_input.HumanInputEvent.END_OF_SPEECH, (event) => {
|
|
265
326
|
this.emit(0 /* USER_STARTED_SPEAKING */);
|
|
266
327
|
this.#deferredValidation.onHumanEndOfSpeech(event);
|
|
267
|
-
this.#lastEndOfSpeechTime = Date.now();
|
|
268
328
|
});
|
|
269
329
|
this.#humanInput.on(import_human_input.HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
|
|
270
330
|
this.#transcribedInterimText = event.alternatives[0].text;
|
|
@@ -272,7 +332,7 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
272
332
|
this.#humanInput.on(import_human_input.HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
|
|
273
333
|
const newTranscript = event.alternatives[0].text;
|
|
274
334
|
if (!newTranscript) return;
|
|
275
|
-
this.#
|
|
335
|
+
this.#lastFinalTranscriptTime = Date.now();
|
|
276
336
|
this.#transcribedText += (this.#transcribedText ? " " : "") + newTranscript;
|
|
277
337
|
if (this.#opts.preemptiveSynthesis && (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)) {
|
|
278
338
|
this.#synthesizeAgentReply();
|
|
@@ -356,23 +416,26 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
356
416
|
role: import_llm2.ChatRole.USER
|
|
357
417
|
})
|
|
358
418
|
);
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
419
|
+
speechData = { sequenceId: handle.id };
|
|
420
|
+
try {
|
|
421
|
+
if (cancelled) resolve();
|
|
422
|
+
let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);
|
|
423
|
+
if (llmStream === false) {
|
|
424
|
+
handle == null ? void 0 : handle.cancel();
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
if (cancelled) resolve();
|
|
428
|
+
if (!(llmStream instanceof import_llm.LLMStream)) {
|
|
429
|
+
llmStream = await defaultBeforeLLMCallback(this, copiedCtx);
|
|
430
|
+
}
|
|
431
|
+
if (handle.interrupted) {
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
|
|
435
|
+
handle.initialize(llmStream, synthesisHandle);
|
|
436
|
+
} finally {
|
|
437
|
+
speechData = void 0;
|
|
371
438
|
}
|
|
372
|
-
const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
|
|
373
|
-
handle.initialize(llmStream, synthesisHandle);
|
|
374
|
-
const elapsed = !!this.#lastEndOfSpeechTime ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1e3) / 1e3 : -1;
|
|
375
|
-
this.#logger.child({ speechId: handle.id, elapsed }).debug("synthesizing agent reply");
|
|
376
439
|
resolve();
|
|
377
440
|
});
|
|
378
441
|
}
|
|
@@ -414,59 +477,81 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
414
477
|
commitUserQuestionIfNeeded();
|
|
415
478
|
const collectedText = handle.synthesisHandle.text;
|
|
416
479
|
const isUsingTools = handle.source instanceof import_llm.LLMStream && !!handle.source.functionCalls.length;
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
480
|
+
const interrupted = handle.interrupted;
|
|
481
|
+
const executeFunctionCalls = async () => {
|
|
482
|
+
if (!isUsingTools || interrupted) return;
|
|
483
|
+
if (handle.fncNestedDepth >= this.#opts.maxNestedFncCalls) {
|
|
484
|
+
this.#logger.child({ speechId: handle.id, fncNestedDepth: handle.fncNestedDepth }).warn("max function calls nested depth reached");
|
|
485
|
+
return;
|
|
486
|
+
}
|
|
420
487
|
if (!userQuestion || !handle.userCommitted) {
|
|
421
488
|
throw new Error("user speech should have been committed before using tools");
|
|
422
489
|
}
|
|
423
490
|
const llmStream = handle.source;
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
const toolCallsInfo = [];
|
|
442
|
-
const toolCallsResults = [];
|
|
443
|
-
for (const fnc of calledFuncs) {
|
|
444
|
-
const task = await fnc.task;
|
|
445
|
-
if (!task || task.result === void 0) continue;
|
|
446
|
-
toolCallsInfo.push(fnc);
|
|
447
|
-
toolCallsResults.push(import_llm2.ChatMessage.createToolFromFunctionResult(task));
|
|
491
|
+
const newFunctionCalls = llmStream.functionCalls;
|
|
492
|
+
new AgentCallContext(this, llmStream);
|
|
493
|
+
this.emit(7 /* FUNCTION_CALLS_COLLECTED */, newFunctionCalls);
|
|
494
|
+
const calledFuncs = [];
|
|
495
|
+
for (const func of newFunctionCalls) {
|
|
496
|
+
const task2 = func.func.execute(func.params).then(
|
|
497
|
+
(result) => ({ name: func.name, toolCallId: func.toolCallId, result }),
|
|
498
|
+
(error) => ({ name: func.name, toolCallId: func.toolCallId, error })
|
|
499
|
+
);
|
|
500
|
+
calledFuncs.push({ ...func, task: task2 });
|
|
501
|
+
this.#logger.child({ function: func.name, speechId: handle.id }).debug("executing AI function");
|
|
502
|
+
try {
|
|
503
|
+
await task2;
|
|
504
|
+
} catch {
|
|
505
|
+
this.#logger.child({ function: func.name, speechId: handle.id }).error("error executing AI function");
|
|
448
506
|
}
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
507
|
+
}
|
|
508
|
+
const toolCallsInfo = [];
|
|
509
|
+
const toolCallsResults = [];
|
|
510
|
+
for (const fnc of calledFuncs) {
|
|
511
|
+
const task2 = await fnc.task;
|
|
512
|
+
if (!task2 || task2.result === void 0) continue;
|
|
513
|
+
toolCallsInfo.push(fnc);
|
|
514
|
+
toolCallsResults.push(import_llm2.ChatMessage.createToolFromFunctionResult(task2));
|
|
515
|
+
}
|
|
516
|
+
if (!toolCallsInfo.length) return;
|
|
517
|
+
const extraToolsMessages = [import_llm2.ChatMessage.createToolCalls(toolCallsInfo, collectedText)];
|
|
518
|
+
extraToolsMessages.push(...toolCallsResults);
|
|
519
|
+
const newSpeechHandle = import_speech_handle.SpeechHandle.createToolSpeech(
|
|
520
|
+
handle.allowInterruptions,
|
|
521
|
+
handle.addToChatCtx,
|
|
522
|
+
handle.fncNestedDepth + 1,
|
|
523
|
+
extraToolsMessages
|
|
524
|
+
);
|
|
525
|
+
const chatCtx = handle.source.chatCtx.copy();
|
|
526
|
+
chatCtx.messages.push(...extraToolsMessages);
|
|
527
|
+
chatCtx.messages.push(...AgentCallContext.getCurrent().extraChatMessages);
|
|
528
|
+
const answerLLMStream = this.llm.chat({
|
|
529
|
+
chatCtx,
|
|
530
|
+
fncCtx: this.fncCtx
|
|
531
|
+
});
|
|
532
|
+
const answerSynthesis = this.#synthesizeAgentSpeech(newSpeechHandle.id, answerLLMStream);
|
|
533
|
+
newSpeechHandle.initialize(answerLLMStream, answerSynthesis);
|
|
534
|
+
handle.addNestedSpeech(newSpeechHandle);
|
|
535
|
+
this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
|
|
536
|
+
};
|
|
537
|
+
const task = executeFunctionCalls().then(() => {
|
|
538
|
+
handle.markNestedSpeechFinished();
|
|
539
|
+
});
|
|
540
|
+
while (!handle.nestedSpeechFinished) {
|
|
541
|
+
const changed = handle.nestedSpeechChanged();
|
|
542
|
+
await Promise.race([changed, task]);
|
|
543
|
+
while (handle.nestedSpeechHandles.length) {
|
|
544
|
+
const speech = handle.nestedSpeechHandles[0];
|
|
545
|
+
this.#playingSpeech = speech;
|
|
546
|
+
await this.#playSpeech(speech);
|
|
547
|
+
handle.nestedSpeechHandles.shift();
|
|
548
|
+
this.#playingSpeech = handle;
|
|
466
549
|
}
|
|
467
550
|
}
|
|
468
551
|
if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {
|
|
469
|
-
|
|
552
|
+
if (handle.extraToolsMessages) {
|
|
553
|
+
this.chatCtx.messages.push(...handle.extraToolsMessages);
|
|
554
|
+
}
|
|
470
555
|
if (interrupted) {
|
|
471
556
|
collectedText + "\u2026";
|
|
472
557
|
}
|
|
@@ -483,6 +568,7 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
483
568
|
interrupted,
|
|
484
569
|
speechId: handle.id
|
|
485
570
|
}).debug("committed agent speech");
|
|
571
|
+
handle.setDone();
|
|
486
572
|
}
|
|
487
573
|
}
|
|
488
574
|
#synthesizeAgentSpeech(speechId, source) {
|
|
@@ -523,6 +609,20 @@ class VoicePipelineAgent extends import_node_events.default {
|
|
|
523
609
|
}
|
|
524
610
|
}
|
|
525
611
|
this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug("validated agent reply");
|
|
612
|
+
if (this.#lastSpeechTime) {
|
|
613
|
+
const timeSinceLastSpeech = Date.now() - this.#lastSpeechTime;
|
|
614
|
+
const transcriptionDelay = Math.max(
|
|
615
|
+
(this.#lastFinalTranscriptTime || 0) - this.#lastSpeechTime,
|
|
616
|
+
0
|
|
617
|
+
);
|
|
618
|
+
const metrics = {
|
|
619
|
+
timestamp: Date.now(),
|
|
620
|
+
sequenceId: this.#pendingAgentReply.id,
|
|
621
|
+
endOfUtteranceDelay: timeSinceLastSpeech,
|
|
622
|
+
transcriptionDelay
|
|
623
|
+
};
|
|
624
|
+
this.emit(9 /* METRICS_COLLECTED */, metrics);
|
|
625
|
+
}
|
|
526
626
|
this.#addSpeechForPlayout(this.#pendingAgentReply);
|
|
527
627
|
this.#pendingAgentReply = void 0;
|
|
528
628
|
this.#transcribedInterimText = "";
|