@livekit/agents 1.0.38 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/llm.cjs +7 -3
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +5 -6
- package/dist/inference/llm.d.ts +5 -6
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +7 -3
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +5 -4
- package/dist/inference/stt.d.ts +5 -4
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +10 -7
- package/dist/inference/tts.d.ts +10 -7
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +9 -1
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +9 -1
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/utils.cjs +5 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +4 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent.cjs +1 -2
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.js +1 -2
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +23 -14
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +1 -0
- package/dist/voice/agent_activity.d.ts +1 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +23 -14
- package/dist/voice/agent_activity.js.map +1 -1
- package/package.json +2 -2
- package/src/inference/llm.ts +20 -15
- package/src/inference/stt.ts +9 -7
- package/src/inference/tts.ts +36 -16
- package/src/stt/stream_adapter.ts +12 -1
- package/src/utils.ts +14 -0
- package/src/voice/agent.ts +2 -2
- package/src/voice/agent_activity.ts +36 -15
package/dist/voice/agent.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { AsyncLocalStorage } from 'node:async_hooks';\nimport { ReadableStream } from 'node:stream/web';\nimport {\n LLM as InferenceLLM,\n STT as InferenceSTT,\n TTS as InferenceTTS,\n type LLMModels,\n type STTModelString,\n type TTSModelString,\n} from '../inference/index.js';\nimport { ReadonlyChatContext } from '../llm/chat_context.js';\nimport type { ChatMessage, FunctionCall, RealtimeModel } from '../llm/index.js';\nimport {\n type ChatChunk,\n ChatContext,\n LLM,\n type ToolChoice,\n type ToolContext,\n} from '../llm/index.js';\nimport type { STT, SpeechEvent } from '../stt/index.js';\nimport { StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport { SentenceTokenizer as BasicSentenceTokenizer } from '../tokenize/basic/index.js';\nimport type { TTS } from '../tts/index.js';\nimport { SynthesizeStream, StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport type { VAD } from '../vad.js';\nimport type { AgentActivity } from './agent_activity.js';\nimport type { AgentSession, TurnDetectionMode } from './agent_session.js';\n\nexport const asyncLocalStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();\nexport const STOP_RESPONSE_SYMBOL = Symbol('StopResponse');\n\nexport class StopResponse extends Error {\n constructor() {\n super();\n this.name = 'StopResponse';\n\n Object.defineProperty(this, STOP_RESPONSE_SYMBOL, {\n value: true,\n });\n }\n}\n\nexport function isStopResponse(value: unknown): value is StopResponse {\n return (\n value !== undefined &&\n value !== null &&\n typeof value === 'object' &&\n STOP_RESPONSE_SYMBOL in value\n );\n}\n\nexport interface ModelSettings {\n /** The tool choice to use when calling the LLM. */\n toolChoice?: ToolChoice;\n}\n\nexport interface AgentOptions<UserData> {\n id?: string;\n instructions: string;\n chatCtx?: ChatContext;\n tools?: ToolContext<UserData>;\n turnDetection?: TurnDetectionMode;\n stt?: STT | STTModelString;\n vad?: VAD;\n llm?: LLM | RealtimeModel | LLMModels;\n tts?: TTS | TTSModelString;\n allowInterruptions?: boolean;\n minConsecutiveSpeechDelay?: number;\n}\n\nexport class Agent<UserData = any> {\n private _id: string;\n private turnDetection?: TurnDetectionMode;\n private _stt?: STT;\n private _vad?: VAD;\n private _llm?: LLM | RealtimeModel;\n private _tts?: TTS;\n\n /** @internal */\n _agentActivity?: AgentActivity;\n\n /** @internal */\n _chatCtx: ChatContext;\n\n /** @internal */\n _instructions: string;\n\n /** @internal */\n _tools?: ToolContext<UserData>;\n\n constructor({\n id,\n instructions,\n chatCtx,\n tools,\n turnDetection,\n stt,\n vad,\n llm,\n tts,\n }: AgentOptions<UserData>) {\n if (id) {\n this._id = id;\n } else {\n // Convert class name to snake_case\n const className = this.constructor.name;\n if (className === 'Agent') {\n this._id = 'default_agent';\n } else {\n this._id = className\n .replace(/([A-Z])/g, '_$1')\n .toLowerCase()\n .replace(/^_/, '');\n }\n }\n\n this._instructions = instructions;\n this._tools = { ...tools };\n this._chatCtx = chatCtx\n ? chatCtx.copy({\n toolCtx: this._tools,\n })\n : ChatContext.empty();\n\n this.turnDetection = turnDetection;\n this._vad = vad;\n\n if (typeof stt === 'string') {\n this._stt = InferenceSTT.fromModelString(stt);\n } else {\n this._stt = stt;\n }\n\n if (typeof llm === 'string') {\n this._llm = InferenceLLM.fromModelString(llm);\n } else {\n this._llm = llm;\n }\n\n if (typeof tts === 'string') {\n this._tts = InferenceTTS.fromModelString(tts);\n } else {\n this._tts = tts;\n }\n\n this._agentActivity = undefined;\n }\n\n get vad(): VAD | undefined {\n return this._vad;\n }\n\n get stt(): STT | undefined {\n return this._stt;\n }\n\n get llm(): LLM | RealtimeModel | undefined {\n return this._llm;\n }\n\n get tts(): TTS | undefined {\n return this._tts;\n }\n\n get chatCtx(): ReadonlyChatContext {\n return new ReadonlyChatContext(this._chatCtx.items);\n }\n\n get id(): string {\n return this._id;\n }\n\n get instructions(): string {\n return this._instructions;\n }\n\n get toolCtx(): ToolContext<UserData> {\n return { ...this._tools };\n }\n\n get session(): AgentSession<UserData> {\n return this.getActivityOrThrow().agentSession as AgentSession<UserData>;\n }\n\n async onEnter(): Promise<void> {}\n\n async onExit(): Promise<void> {}\n\n async transcriptionNode(\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<string> | null> {\n return Agent.default.transcriptionNode(this, text, modelSettings);\n }\n\n async onUserTurnCompleted(_chatCtx: ChatContext, _newMessage: ChatMessage): Promise<void> {}\n\n async sttNode(\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<SpeechEvent | string> | null> {\n return Agent.default.sttNode(this, audio, modelSettings);\n }\n\n async llmNode(\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<ChatChunk | string> | null> {\n return Agent.default.llmNode(this, chatCtx, toolCtx, modelSettings);\n }\n\n async ttsNode(\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return Agent.default.ttsNode(this, text, modelSettings);\n }\n\n async realtimeAudioOutputNode(\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return Agent.default.realtimeAudioOutputNode(this, audio, modelSettings);\n }\n\n // realtime_audio_output_node\n\n getActivityOrThrow(): AgentActivity {\n if (!this._agentActivity) {\n throw new Error('Agent activity not found');\n }\n return this._agentActivity;\n }\n\n async updateChatCtx(chatCtx: ChatContext): Promise<void> {\n if (!this._agentActivity) {\n this._chatCtx = chatCtx.copy({ toolCtx: this.toolCtx });\n return;\n }\n\n this._agentActivity.updateChatCtx(chatCtx);\n }\n\n static default = {\n async sttNode(\n agent: Agent,\n audio: ReadableStream<AudioFrame>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<SpeechEvent | string> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.stt) {\n throw new Error('sttNode called but no STT node is available');\n }\n\n let wrapped_stt = activity.stt;\n\n if (!wrapped_stt.capabilities.streaming) {\n const vad = agent.vad || activity.vad;\n if (!vad) {\n throw new Error(\n 'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',\n );\n }\n wrapped_stt = new STTStreamAdapter(wrapped_stt, vad);\n }\n\n const connOptions = activity.agentSession.connOptions.sttConnOptions;\n const stream = wrapped_stt.stream({ connOptions });\n\n // Set startTimeOffset to provide linear timestamps across reconnections\n const audioInputStartedAt =\n activity.agentSession._recorderIO?.recordingStartedAt ?? // Use recording start time if available\n activity.agentSession._startedAt ?? // Fallback to session start time\n Date.now(); // Fallback to current time\n\n stream.startTimeOffset = (Date.now() - audioInputStartedAt) / 1000;\n\n stream.updateInputStream(audio);\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.detachInputStream();\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const event of stream) {\n controller.enqueue(event);\n }\n controller.close();\n } finally {\n // Always clean up the STT stream, whether it ends naturally or is cancelled\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async llmNode(\n agent: Agent,\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<ChatChunk | string> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.llm) {\n throw new Error('llmNode called but no LLM node is available');\n }\n\n if (!(activity.llm instanceof LLM)) {\n throw new Error(\n 'llmNode should only be used with LLM (non-multimodal/realtime APIs) nodes',\n );\n }\n\n // TODO(brian): make parallelToolCalls configurable\n const { toolChoice } = modelSettings;\n const connOptions = activity.agentSession.connOptions.llmConnOptions;\n\n const stream = activity.llm.chat({\n chatCtx,\n toolCtx,\n toolChoice,\n connOptions,\n parallelToolCalls: true,\n });\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const chunk of stream) {\n controller.enqueue(chunk);\n }\n controller.close();\n } finally {\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async ttsNode(\n agent: Agent,\n text: ReadableStream<string>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.tts) {\n throw new Error('ttsNode called but no TTS node is available');\n }\n\n let wrapped_tts = activity.tts;\n\n if (!activity.tts.capabilities.streaming) {\n wrapped_tts = new TTSStreamAdapter(wrapped_tts, new BasicSentenceTokenizer());\n }\n\n const connOptions = activity.agentSession.connOptions.ttsConnOptions;\n const stream = wrapped_tts.stream({ connOptions });\n stream.updateInputStream(text);\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const chunk of stream) {\n if (chunk === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n controller.enqueue(chunk.frame);\n }\n controller.close();\n } finally {\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async transcriptionNode(\n agent: Agent,\n text: ReadableStream<string>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<string> | null> {\n return text;\n },\n\n async realtimeAudioOutputNode(\n _agent: Agent,\n audio: ReadableStream<AudioFrame>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return audio;\n },\n };\n}\n"],"mappings":"AAIA,SAAS,yBAAyB;AAClC,SAAS,sBAAsB;AAC/B;AAAA,EACE,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AAAA,OAIF;AACP,SAAS,2BAA2B;AAEpC;AAAA,EAEE;AAAA,EACA;AAAA,OAGK;AAEP,SAAS,iBAAiB,wBAAwB;AAClD,SAAS,qBAAqB,8BAA8B;AAE5D,SAAS,kBAAkB,iBAAiB,wBAAwB;AAK7D,MAAM,oBAAoB,IAAI,kBAAmD;AACjF,MAAM,uBAAuB,OAAO,cAAc;AAElD,MAAM,qBAAqB,MAAM;AAAA,EACtC,cAAc;AACZ,UAAM;AACN,SAAK,OAAO;AAEZ,WAAO,eAAe,MAAM,sBAAsB;AAAA,MAChD,OAAO;AAAA,IACT,CAAC;AAAA,EACH;AACF;AAEO,SAAS,eAAe,OAAuC;AACpE,SACE,UAAU,UACV,UAAU,QACV,OAAO,UAAU,YACjB,wBAAwB;AAE5B;AAqBO,MAAM,MAAsB;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGR;AAAA;AAAA,EAGA;AAAA;AAAA,EAGA;AAAA;AAAA,EAGA;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAA2B;AACzB,QAAI,IAAI;AACN,WAAK,MAAM;AAAA,IACb,OAAO;AAEL,YAAM,YAAY,KAAK,YAAY;AACnC,UAAI,cAAc,SAAS;AACzB,aAAK,MAAM;AAAA,MACb,OAAO;AACL,aAAK,MAAM,UACR,QAAQ,YAAY,KAAK,EACzB,YAAY,EACZ,QAAQ,MAAM,EAAE;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,gBAAgB;AACrB,SAAK,SAAS,EAAE,GAAG,MAAM;AACzB,SAAK,WAAW,UACZ,QAAQ,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,IAChB,CAAC,IACD,YAAY,MAAM;AAEtB,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAEZ,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,SAAK,iBAAiB;AAAA,EACxB;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuC;AACzC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAA+B;AACjC,WAAO,IAAI,oBAAoB,KAAK,SAAS,KAAK;AAAA,EACpD;AAAA,EAEA,IAAI,KAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAiC;AACnC,WAAO,EAAE,GAAG,KAAK,OAAO;AAAA,EAC1B;AAAA,EAEA,IAAI,UAAkC;AACpC,WAAO,KAAK,mBAAmB,EAAE;AAAA,EACnC;AAAA,EAEA,MAAM,UAAyB;AAAA,EAAC;AAAA,EAEhC,MAAM,SAAwB;AAAA,EAAC;AAAA,EAE/B,MAAM,kBACJ,MACA,eACwC;AACxC,WAAO,MAAM,QAAQ,kBAAkB,MAAM,MAAM,aAAa;AAAA,EAClE;AAAA,EAEA,MAAM,oBAAoB,UAAuB,aAAyC;AAAA,EAAC;AAAA,EAE3F,MAAM,QACJ,OACA,eACsD;AACtD,WAAO,MAAM,QAAQ,QAAQ,MAAM,OAAO,aAAa;AAAA,EACzD;AAAA,EAEA,MAAM,QACJ,SACA,SACA,eACoD;AACpD,WAAO,MAAM,QAAQ,QAAQ,MAAM,SAAS,SAAS,aAAa;AAAA,EACpE;AAAA,EAEA,MAAM,QACJ,MACA,eAC4C;AAC5C,WAAO,MAAM,QAAQ,QAAQ,MAAM,MAAM,aAAa;AAAA,EACxD;AAAA,EAEA,MAAM,wBACJ,OACA,eAC4C;AAC5C,WAAO,MAAM,QAAQ,wBAAwB,MAAM,OAAO,aAAa;AAAA,EACzE;AAAA;AAAA,EAIA,qBAAoC;AAClC,QAAI,CAAC,KAAK,gBAAgB;AACxB,YAAM,IAAI,MAAM,0BAA0B;AAAA,IAC5C;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,cAAc,SAAqC;AACvD,QAAI,CAAC,KAAK,gBAAgB;AACxB,WAAK,WAAW,QAAQ,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC;AACtD;AAAA,IACF;AAEA,SAAK,eAAe,cAAc,OAAO;AAAA,EAC3C;AAAA,EAEA,OAAO,UAAU;AAAA,IACf,MAAM,QACJ,OACA,OACA,gBACsD;AA7P5D;AA8PM,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,cAAc,SAAS;AAE3B,UAAI,CAAC,YAAY,aAAa,WAAW;AACvC,cAAM,MAAM,MAAM,OAAO,SAAS;AAClC,YAAI,CAAC,KAAK;AACR,gBAAM,IAAI;AAAA,YACR;AAAA,UACF;AAAA,QACF;AACA,sBAAc,IAAI,iBAAiB,aAAa,GAAG;AAAA,MACrD;AAEA,YAAM,cAAc,SAAS,aAAa,YAAY;AACtD,YAAM,SAAS,YAAY,OAAO,EAAE,YAAY,CAAC;AAGjD,YAAM,wBACJ,cAAS,aAAa,gBAAtB,mBAAmC;AAAA,MACnC,SAAS,aAAa;AAAA,MACtB,KAAK,IAAI;AAEX,aAAO,mBAAmB,KAAK,IAAI,IAAI,uBAAuB;AAE9D,aAAO,kBAAkB,KAAK;AAE9B,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,kBAAkB;AACzB,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,yBAAW,QAAQ,KAAK;AAAA,YAC1B;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AAEA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,QACJ,OACA,SACA,SACA,eACoD;AACpD,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,EAAE,SAAS,eAAe,MAAM;AAClC,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAGA,YAAM,EAAE,WAAW,IAAI;AACvB,YAAM,cAAc,SAAS,aAAa,YAAY;AAEtD,YAAM,SAAS,SAAS,IAAI,KAAK;AAAA,QAC/B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,mBAAmB;AAAA,MACrB,CAAC;AAED,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,yBAAW,QAAQ,KAAK;AAAA,YAC1B;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AACA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,QACJ,OACA,MACA,gBAC4C;AAC5C,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,cAAc,SAAS;AAE3B,UAAI,CAAC,SAAS,IAAI,aAAa,WAAW;AACxC,sBAAc,IAAI,iBAAiB,aAAa,IAAI,uBAAuB,CAAC;AAAA,MAC9E;AAEA,YAAM,cAAc,SAAS,aAAa,YAAY;AACtD,YAAM,SAAS,YAAY,OAAO,EAAE,YAAY,CAAC;AACjD,aAAO,kBAAkB,IAAI;AAE7B,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,kBAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,cACF;AACA,yBAAW,QAAQ,MAAM,KAAK;AAAA,YAChC;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AACA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,kBACJ,OACA,MACA,gBACwC;AACxC,aAAO;AAAA,IACT;AAAA,IAEA,MAAM,wBACJ,QACA,OACA,gBAC4C;AAC5C,aAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { AsyncLocalStorage } from 'node:async_hooks';\nimport { ReadableStream } from 'node:stream/web';\nimport {\n LLM as InferenceLLM,\n STT as InferenceSTT,\n TTS as InferenceTTS,\n type LLMModels,\n type STTModelString,\n type TTSModelString,\n} from '../inference/index.js';\nimport { ReadonlyChatContext } from '../llm/chat_context.js';\nimport type { ChatMessage, FunctionCall, RealtimeModel } from '../llm/index.js';\nimport {\n type ChatChunk,\n ChatContext,\n LLM,\n type ToolChoice,\n type ToolContext,\n} from '../llm/index.js';\nimport type { STT, SpeechEvent } from '../stt/index.js';\nimport { StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport { SentenceTokenizer as BasicSentenceTokenizer } from '../tokenize/basic/index.js';\nimport type { TTS } from '../tts/index.js';\nimport { SynthesizeStream, StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport type { VAD } from '../vad.js';\nimport type { AgentActivity } from './agent_activity.js';\nimport type { AgentSession, TurnDetectionMode } from './agent_session.js';\n\nexport const asyncLocalStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();\nexport const STOP_RESPONSE_SYMBOL = Symbol('StopResponse');\n\nexport class StopResponse extends Error {\n constructor() {\n super();\n this.name = 'StopResponse';\n\n Object.defineProperty(this, STOP_RESPONSE_SYMBOL, {\n value: true,\n });\n }\n}\n\nexport function isStopResponse(value: unknown): value is StopResponse {\n return (\n value !== undefined &&\n value !== null &&\n typeof value === 'object' &&\n STOP_RESPONSE_SYMBOL in value\n );\n}\n\nexport interface ModelSettings {\n /** The tool choice to use when calling the LLM. */\n toolChoice?: ToolChoice;\n}\n\nexport interface AgentOptions<UserData> {\n id?: string;\n instructions: string;\n chatCtx?: ChatContext;\n tools?: ToolContext<UserData>;\n turnDetection?: TurnDetectionMode;\n stt?: STT | STTModelString;\n vad?: VAD;\n llm?: LLM | RealtimeModel | LLMModels;\n tts?: TTS | TTSModelString;\n allowInterruptions?: boolean;\n minConsecutiveSpeechDelay?: number;\n}\n\nexport class Agent<UserData = any> {\n private _id: string;\n private turnDetection?: TurnDetectionMode;\n private _stt?: STT;\n private _vad?: VAD;\n private _llm?: LLM | RealtimeModel;\n private _tts?: TTS;\n\n /** @internal */\n _agentActivity?: AgentActivity;\n\n /** @internal */\n _chatCtx: ChatContext;\n\n /** @internal */\n _instructions: string;\n\n /** @internal */\n _tools?: ToolContext<UserData>;\n\n constructor({\n id,\n instructions,\n chatCtx,\n tools,\n turnDetection,\n stt,\n vad,\n llm,\n tts,\n }: AgentOptions<UserData>) {\n if (id) {\n this._id = id;\n } else {\n // Convert class name to snake_case\n const className = this.constructor.name;\n if (className === 'Agent') {\n this._id = 'default_agent';\n } else {\n this._id = className\n .replace(/([A-Z])/g, '_$1')\n .toLowerCase()\n .replace(/^_/, '');\n }\n }\n\n this._instructions = instructions;\n this._tools = { ...tools };\n this._chatCtx = chatCtx\n ? chatCtx.copy({\n toolCtx: this._tools,\n })\n : ChatContext.empty();\n\n this.turnDetection = turnDetection;\n this._vad = vad;\n\n if (typeof stt === 'string') {\n this._stt = InferenceSTT.fromModelString(stt);\n } else {\n this._stt = stt;\n }\n\n if (typeof llm === 'string') {\n this._llm = InferenceLLM.fromModelString(llm);\n } else {\n this._llm = llm;\n }\n\n if (typeof tts === 'string') {\n this._tts = InferenceTTS.fromModelString(tts);\n } else {\n this._tts = tts;\n }\n\n this._agentActivity = undefined;\n }\n\n get vad(): VAD | undefined {\n return this._vad;\n }\n\n get stt(): STT | undefined {\n return this._stt;\n }\n\n get llm(): LLM | RealtimeModel | undefined {\n return this._llm;\n }\n\n get tts(): TTS | undefined {\n return this._tts;\n }\n\n get chatCtx(): ReadonlyChatContext {\n return new ReadonlyChatContext(this._chatCtx.items);\n }\n\n get id(): string {\n return this._id;\n }\n\n get instructions(): string {\n return this._instructions;\n }\n\n get toolCtx(): ToolContext<UserData> {\n return { ...this._tools };\n }\n\n get session(): AgentSession<UserData> {\n return this.getActivityOrThrow().agentSession as AgentSession<UserData>;\n }\n\n async onEnter(): Promise<void> {}\n\n async onExit(): Promise<void> {}\n\n async transcriptionNode(\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<string> | null> {\n return Agent.default.transcriptionNode(this, text, modelSettings);\n }\n\n async onUserTurnCompleted(_chatCtx: ChatContext, _newMessage: ChatMessage): Promise<void> {}\n\n async sttNode(\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<SpeechEvent | string> | null> {\n return Agent.default.sttNode(this, audio, modelSettings);\n }\n\n async llmNode(\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<ChatChunk | string> | null> {\n return Agent.default.llmNode(this, chatCtx, toolCtx, modelSettings);\n }\n\n async ttsNode(\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return Agent.default.ttsNode(this, text, modelSettings);\n }\n\n async realtimeAudioOutputNode(\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return Agent.default.realtimeAudioOutputNode(this, audio, modelSettings);\n }\n\n // realtime_audio_output_node\n\n getActivityOrThrow(): AgentActivity {\n if (!this._agentActivity) {\n throw new Error('Agent activity not found');\n }\n return this._agentActivity;\n }\n\n async updateChatCtx(chatCtx: ChatContext): Promise<void> {\n if (!this._agentActivity) {\n this._chatCtx = chatCtx.copy({ toolCtx: this.toolCtx });\n return;\n }\n\n this._agentActivity.updateChatCtx(chatCtx);\n }\n\n static default = {\n async sttNode(\n agent: Agent,\n audio: ReadableStream<AudioFrame>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<SpeechEvent | string> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.stt) {\n throw new Error('sttNode called but no STT node is available');\n }\n\n let wrapped_stt = activity.stt;\n\n if (!wrapped_stt.capabilities.streaming) {\n const vad = agent.vad || activity.vad;\n if (!vad) {\n throw new Error(\n 'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',\n );\n }\n wrapped_stt = new STTStreamAdapter(wrapped_stt, vad);\n }\n\n const connOptions = activity.agentSession.connOptions.sttConnOptions;\n const stream = wrapped_stt.stream({ connOptions });\n\n // Set startTimeOffset to provide linear timestamps across reconnections\n const audioInputStartedAt =\n activity.agentSession._recorderIO?.recordingStartedAt ?? // Use recording start time if available\n activity.agentSession._startedAt ?? // Fallback to session start time\n Date.now(); // Fallback to current time\n\n stream.startTimeOffset = (Date.now() - audioInputStartedAt) / 1000;\n\n stream.updateInputStream(audio);\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.detachInputStream();\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const event of stream) {\n controller.enqueue(event);\n }\n controller.close();\n } finally {\n // Always clean up the STT stream, whether it ends naturally or is cancelled\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async llmNode(\n agent: Agent,\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n ): Promise<ReadableStream<ChatChunk | string> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.llm) {\n throw new Error('llmNode called but no LLM node is available');\n }\n\n if (!(activity.llm instanceof LLM)) {\n throw new Error(\n 'llmNode should only be used with LLM (non-multimodal/realtime APIs) nodes',\n );\n }\n\n const { toolChoice } = modelSettings;\n const connOptions = activity.agentSession.connOptions.llmConnOptions;\n\n // parallelToolCalls is not passed here - it will use the value from LLM's modelOptions\n // This allows users to configure it via: new inference.LLM({ modelOptions: { parallel_tool_calls: false } })\n const stream = activity.llm.chat({\n chatCtx,\n toolCtx,\n toolChoice,\n connOptions,\n });\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const chunk of stream) {\n controller.enqueue(chunk);\n }\n controller.close();\n } finally {\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async ttsNode(\n agent: Agent,\n text: ReadableStream<string>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n const activity = agent.getActivityOrThrow();\n if (!activity.tts) {\n throw new Error('ttsNode called but no TTS node is available');\n }\n\n let wrapped_tts = activity.tts;\n\n if (!activity.tts.capabilities.streaming) {\n wrapped_tts = new TTSStreamAdapter(wrapped_tts, new BasicSentenceTokenizer());\n }\n\n const connOptions = activity.agentSession.connOptions.ttsConnOptions;\n const stream = wrapped_tts.stream({ connOptions });\n stream.updateInputStream(text);\n\n let cleaned = false;\n const cleanup = () => {\n if (cleaned) return;\n cleaned = true;\n stream.close();\n };\n\n return new ReadableStream({\n async start(controller) {\n try {\n for await (const chunk of stream) {\n if (chunk === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n controller.enqueue(chunk.frame);\n }\n controller.close();\n } finally {\n cleanup();\n }\n },\n cancel() {\n cleanup();\n },\n });\n },\n\n async transcriptionNode(\n agent: Agent,\n text: ReadableStream<string>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<string> | null> {\n return text;\n },\n\n async realtimeAudioOutputNode(\n _agent: Agent,\n audio: ReadableStream<AudioFrame>,\n _modelSettings: ModelSettings,\n ): Promise<ReadableStream<AudioFrame> | null> {\n return audio;\n },\n };\n}\n"],"mappings":"AAIA,SAAS,yBAAyB;AAClC,SAAS,sBAAsB;AAC/B;AAAA,EACE,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AAAA,OAIF;AACP,SAAS,2BAA2B;AAEpC;AAAA,EAEE;AAAA,EACA;AAAA,OAGK;AAEP,SAAS,iBAAiB,wBAAwB;AAClD,SAAS,qBAAqB,8BAA8B;AAE5D,SAAS,kBAAkB,iBAAiB,wBAAwB;AAK7D,MAAM,oBAAoB,IAAI,kBAAmD;AACjF,MAAM,uBAAuB,OAAO,cAAc;AAElD,MAAM,qBAAqB,MAAM;AAAA,EACtC,cAAc;AACZ,UAAM;AACN,SAAK,OAAO;AAEZ,WAAO,eAAe,MAAM,sBAAsB;AAAA,MAChD,OAAO;AAAA,IACT,CAAC;AAAA,EACH;AACF;AAEO,SAAS,eAAe,OAAuC;AACpE,SACE,UAAU,UACV,UAAU,QACV,OAAO,UAAU,YACjB,wBAAwB;AAE5B;AAqBO,MAAM,MAAsB;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGR;AAAA;AAAA,EAGA;AAAA;AAAA,EAGA;AAAA;AAAA,EAGA;AAAA,EAEA,YAAY;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAA2B;AACzB,QAAI,IAAI;AACN,WAAK,MAAM;AAAA,IACb,OAAO;AAEL,YAAM,YAAY,KAAK,YAAY;AACnC,UAAI,cAAc,SAAS;AACzB,aAAK,MAAM;AAAA,MACb,OAAO;AACL,aAAK,MAAM,UACR,QAAQ,YAAY,KAAK,EACzB,YAAY,EACZ,QAAQ,MAAM,EAAE;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,gBAAgB;AACrB,SAAK,SAAS,EAAE,GAAG,MAAM;AACzB,SAAK,WAAW,UACZ,QAAQ,KAAK;AAAA,MACX,SAAS,KAAK;AAAA,IAChB,CAAC,IACD,YAAY,MAAM;AAEtB,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAEZ,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,OAAO,aAAa,gBAAgB,GAAG;AAAA,IAC9C,OAAO;AACL,WAAK,OAAO;AAAA,IACd;AAEA,SAAK,iBAAiB;AAAA,EACxB;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuC;AACzC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAA+B;AACjC,WAAO,IAAI,oBAAoB,KAAK,SAAS,KAAK;AAAA,EACpD;AAAA,EAEA,IAAI,KAAa;AACf,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAiC;AACnC,WAAO,EAAE,GAAG,KAAK,OAAO;AAAA,EAC1B;AAAA,EAEA,IAAI,UAAkC;AACpC,WAAO,KAAK,mBAAmB,EAAE;AAAA,EACnC;AAAA,EAEA,MAAM,UAAyB;AAAA,EAAC;AAAA,EAEhC,MAAM,SAAwB;AAAA,EAAC;AAAA,EAE/B,MAAM,kBACJ,MACA,eACwC;AACxC,WAAO,MAAM,QAAQ,kBAAkB,MAAM,MAAM,aAAa;AAAA,EAClE;AAAA,EAEA,MAAM,oBAAoB,UAAuB,aAAyC;AAAA,EAAC;AAAA,EAE3F,MAAM,QACJ,OACA,eACsD;AACtD,WAAO,MAAM,QAAQ,QAAQ,MAAM,OAAO,aAAa;AAAA,EACzD;AAAA,EAEA,MAAM,QACJ,SACA,SACA,eACoD;AACpD,WAAO,MAAM,QAAQ,QAAQ,MAAM,SAAS,SAAS,aAAa;AAAA,EACpE;AAAA,EAEA,MAAM,QACJ,MACA,eAC4C;AAC5C,WAAO,MAAM,QAAQ,QAAQ,MAAM,MAAM,aAAa;AAAA,EACxD;AAAA,EAEA,MAAM,wBACJ,OACA,eAC4C;AAC5C,WAAO,MAAM,QAAQ,wBAAwB,MAAM,OAAO,aAAa;AAAA,EACzE;AAAA;AAAA,EAIA,qBAAoC;AAClC,QAAI,CAAC,KAAK,gBAAgB;AACxB,YAAM,IAAI,MAAM,0BAA0B;AAAA,IAC5C;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,cAAc,SAAqC;AACvD,QAAI,CAAC,KAAK,gBAAgB;AACxB,WAAK,WAAW,QAAQ,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC;AACtD;AAAA,IACF;AAEA,SAAK,eAAe,cAAc,OAAO;AAAA,EAC3C;AAAA,EAEA,OAAO,UAAU;AAAA,IACf,MAAM,QACJ,OACA,OACA,gBACsD;AA7P5D;AA8PM,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,cAAc,SAAS;AAE3B,UAAI,CAAC,YAAY,aAAa,WAAW;AACvC,cAAM,MAAM,MAAM,OAAO,SAAS;AAClC,YAAI,CAAC,KAAK;AACR,gBAAM,IAAI;AAAA,YACR;AAAA,UACF;AAAA,QACF;AACA,sBAAc,IAAI,iBAAiB,aAAa,GAAG;AAAA,MACrD;AAEA,YAAM,cAAc,SAAS,aAAa,YAAY;AACtD,YAAM,SAAS,YAAY,OAAO,EAAE,YAAY,CAAC;AAGjD,YAAM,wBACJ,cAAS,aAAa,gBAAtB,mBAAmC;AAAA,MACnC,SAAS,aAAa;AAAA,MACtB,KAAK,IAAI;AAEX,aAAO,mBAAmB,KAAK,IAAI,IAAI,uBAAuB;AAE9D,aAAO,kBAAkB,KAAK;AAE9B,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,kBAAkB;AACzB,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,yBAAW,QAAQ,KAAK;AAAA,YAC1B;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AAEA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,QACJ,OACA,SACA,SACA,eACoD;AACpD,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,EAAE,SAAS,eAAe,MAAM;AAClC,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,YAAM,EAAE,WAAW,IAAI;AACvB,YAAM,cAAc,SAAS,aAAa,YAAY;AAItD,YAAM,SAAS,SAAS,IAAI,KAAK;AAAA,QAC/B;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,yBAAW,QAAQ,KAAK;AAAA,YAC1B;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AACA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,QACJ,OACA,MACA,gBAC4C;AAC5C,YAAM,WAAW,MAAM,mBAAmB;AAC1C,UAAI,CAAC,SAAS,KAAK;AACjB,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AAEA,UAAI,cAAc,SAAS;AAE3B,UAAI,CAAC,SAAS,IAAI,aAAa,WAAW;AACxC,sBAAc,IAAI,iBAAiB,aAAa,IAAI,uBAAuB,CAAC;AAAA,MAC9E;AAEA,YAAM,cAAc,SAAS,aAAa,YAAY;AACtD,YAAM,SAAS,YAAY,OAAO,EAAE,YAAY,CAAC;AACjD,aAAO,kBAAkB,IAAI;AAE7B,UAAI,UAAU;AACd,YAAM,UAAU,MAAM;AACpB,YAAI,QAAS;AACb,kBAAU;AACV,eAAO,MAAM;AAAA,MACf;AAEA,aAAO,IAAI,eAAe;AAAA,QACxB,MAAM,MAAM,YAAY;AACtB,cAAI;AACF,6BAAiB,SAAS,QAAQ;AAChC,kBAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,cACF;AACA,yBAAW,QAAQ,MAAM,KAAK;AAAA,YAChC;AACA,uBAAW,MAAM;AAAA,UACnB,UAAE;AACA,oBAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,SAAS;AACP,kBAAQ;AAAA,QACV;AAAA,MACF,CAAC;AAAA,IACH;AAAA,IAEA,MAAM,kBACJ,OACA,MACA,gBACwC;AACxC,aAAO;AAAA,IACT;AAAA,IAEA,MAAM,wBACJ,QACA,OACA,gBAC4C;AAC5C,aAAO;AAAA,IACT;AAAA,EACF;AACF;","names":[]}
|
|
@@ -122,9 +122,9 @@ class AgentActivity {
|
|
|
122
122
|
);
|
|
123
123
|
this.turnDetectionMode = void 0;
|
|
124
124
|
}
|
|
125
|
-
if (!this.vad && this.stt && this.llm instanceof import_llm.LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
|
|
125
|
+
if (!this.vad && this.stt && !this.stt.capabilities.streaming && this.llm instanceof import_llm.LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
|
|
126
126
|
this.logger.warn(
|
|
127
|
-
"VAD is not set. Enabling VAD is recommended when using LLM and STT for more responsive interruption handling."
|
|
127
|
+
"VAD is not set. Enabling VAD is recommended when using LLM and non-streaming STT for more responsive interruption handling."
|
|
128
128
|
);
|
|
129
129
|
}
|
|
130
130
|
}
|
|
@@ -473,14 +473,16 @@ class AgentActivity {
|
|
|
473
473
|
this.agentSession._updateUserState("listening", speechEndTime);
|
|
474
474
|
}
|
|
475
475
|
onVADInferenceDone(ev) {
|
|
476
|
-
var _a, _b;
|
|
477
476
|
if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
|
|
478
477
|
return;
|
|
479
478
|
}
|
|
480
|
-
if (
|
|
481
|
-
|
|
479
|
+
if (ev.speechDuration >= this.agentSession.options.minInterruptionDuration) {
|
|
480
|
+
this.interruptByAudioActivity();
|
|
482
481
|
}
|
|
483
|
-
|
|
482
|
+
}
|
|
483
|
+
interruptByAudioActivity() {
|
|
484
|
+
var _a, _b;
|
|
485
|
+
if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.turnDetection) {
|
|
484
486
|
return;
|
|
485
487
|
}
|
|
486
488
|
if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
|
|
@@ -493,7 +495,10 @@ class AgentActivity {
|
|
|
493
495
|
}
|
|
494
496
|
(_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
|
|
495
497
|
if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
|
|
496
|
-
this.logger.info(
|
|
498
|
+
this.logger.info(
|
|
499
|
+
{ "speech id": this._currentSpeech.id },
|
|
500
|
+
"speech interrupted by audio activity"
|
|
501
|
+
);
|
|
497
502
|
(_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
|
|
498
503
|
this._currentSpeech.interrupt();
|
|
499
504
|
}
|
|
@@ -511,6 +516,9 @@ class AgentActivity {
|
|
|
511
516
|
// TODO(AJS-106): add multi participant support
|
|
512
517
|
})
|
|
513
518
|
);
|
|
519
|
+
if (ev.alternatives[0].text) {
|
|
520
|
+
this.interruptByAudioActivity();
|
|
521
|
+
}
|
|
514
522
|
}
|
|
515
523
|
onFinalTranscript(ev) {
|
|
516
524
|
if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.userTranscription) {
|
|
@@ -525,6 +533,9 @@ class AgentActivity {
|
|
|
525
533
|
// TODO(AJS-106): add multi participant support
|
|
526
534
|
})
|
|
527
535
|
);
|
|
536
|
+
if (this.audioRecognition && this.turnDetection !== "manual" && this.turnDetection !== "realtime_llm") {
|
|
537
|
+
this.interruptByAudioActivity();
|
|
538
|
+
}
|
|
528
539
|
}
|
|
529
540
|
onPreemptiveGeneration(info) {
|
|
530
541
|
if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
|
|
@@ -1457,7 +1468,6 @@ ${instructions}` : instructions,
|
|
|
1457
1468
|
await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
|
|
1458
1469
|
if (audioOutput) {
|
|
1459
1470
|
await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
|
|
1460
|
-
this.agentSession._updateAgentState("listening");
|
|
1461
1471
|
}
|
|
1462
1472
|
if (speechHandle.interrupted) {
|
|
1463
1473
|
this.logger.debug(
|
|
@@ -1525,14 +1535,13 @@ ${instructions}` : instructions,
|
|
|
1525
1535
|
this.agentSession._conversationItemAdded(message);
|
|
1526
1536
|
}
|
|
1527
1537
|
speechHandle._markGenerationDone();
|
|
1528
|
-
toolOutput.firstToolStartedFuture.await.finally(() => {
|
|
1529
|
-
this.agentSession._updateAgentState("thinking");
|
|
1530
|
-
});
|
|
1531
1538
|
await executeToolsTask.result;
|
|
1539
|
+
if (toolOutput.output.length > 0) {
|
|
1540
|
+
this.agentSession._updateAgentState("thinking");
|
|
1541
|
+
} else if (this.agentSession.agentState === "speaking") {
|
|
1542
|
+
this.agentSession._updateAgentState("listening");
|
|
1543
|
+
}
|
|
1532
1544
|
if (toolOutput.output.length === 0) {
|
|
1533
|
-
if (!speechHandle.interrupted) {
|
|
1534
|
-
this.agentSession._updateAgentState("listening");
|
|
1535
|
-
}
|
|
1536
1545
|
return;
|
|
1537
1546
|
}
|
|
1538
1547
|
const { maxToolSteps } = this.agentSession.options;
|