@livekit/agents 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ipc/job_proc_lazy_main.cjs +1 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +2 -3
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/proc_pool.cjs +14 -2
- package/dist/ipc/proc_pool.cjs.map +1 -1
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +14 -2
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +32 -10
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +2 -0
- package/dist/ipc/supervised_proc.d.ts +2 -0
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +22 -10
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/llm/llm.cjs +4 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +4 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/vad.cjs +3 -0
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +3 -0
- package/dist/vad.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +3 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +3 -0
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/worker.cjs +25 -4
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +25 -4
- package/dist/worker.js.map +1 -1
- package/package.json +3 -1
- package/src/ipc/job_proc_lazy_main.ts +2 -3
- package/src/ipc/proc_pool.ts +14 -2
- package/src/ipc/supervised_proc.ts +23 -10
- package/src/llm/llm.ts +4 -2
- package/src/vad.ts +3 -0
- package/src/voice/audio_recognition.ts +5 -0
- package/src/worker.ts +25 -4
package/dist/llm/llm.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js';\nimport type { ToolChoice, ToolContext } from './tool_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCall[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n promptCachedTokens: number;\n totalTokens: number;\n}\n\nexport interface ChatChunk {\n id: string;\n delta?: ChoiceDelta;\n usage?: CompletionUsage;\n}\n\nexport interface LLMError {\n type: 'llm_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type LLMCallbacks = {\n ['metrics_collected']: (metrics: LLMMetrics) => void;\n ['error']: (error: LLMError) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n constructor() {\n super();\n }\n\n abstract label(): string;\n\n /**\n * Get the model name/identifier for this LLM instance.\n *\n * @returns The model name if available, \"unknown\" otherwise.\n *\n * @remarks\n * Plugins should override this property to provide their model information.\n */\n get model(): string {\n return 'unknown';\n }\n\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n toolCtx,\n connOptions,\n parallelToolCalls,\n toolChoice,\n extraKwargs,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions?: APIConnectOptions;\n parallelToolCalls?: boolean;\n toolChoice?: ToolChoice;\n extraKwargs?: Record<string, any>;\n }): LLMStream;\n\n /**\n * Pre-warm connection to the LLM service\n */\n prewarm(): void {\n // Default implementation - subclasses can override\n }\n\n async aclose(): Promise<void> {\n // Default implementation - subclasses can override\n }\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected abortController = new AbortController();\n protected _connOptions: APIConnectOptions;\n protected logger = log();\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #toolCtx?: ToolContext;\n\n constructor(\n llm: LLM,\n {\n chatCtx,\n toolCtx,\n connOptions,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions: APIConnectOptions;\n },\n ) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#toolCtx = toolCtx;\n this._connOptions = connOptions;\n this.monitorMetrics();\n this.abortController.signal.addEventListener('abort', () => {\n // TODO (AJS-37) clean this up when we refactor with streams\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate LLM completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { llm: this.#llm.label(), attempt: i + 1, error },\n `failed to generate LLM completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#llm.emit('error', {\n type: 'llm_error',\n timestamp: Date.now(),\n label: this.#llm.label(),\n error,\n recoverable,\n });\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(ev);\n requestId = ev.id;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n type: 'llm_metrics',\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: this.abortController.signal.aborted,\n label: this.#llm.label(),\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n promptCachedTokens: usage?.promptCachedTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit('metrics_collected', metrics);\n }\n\n protected abstract run(): Promise<void>;\n\n /** The function context of this stream. */\n get toolCtx(): ToolContext | undefined {\n return this.#toolCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** The connection options for this stream. */\n get connOptions(): APIConnectOptions {\n return this._connOptions;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAA6B;AAC7B,wBAA6C;AAC7C,iBAAoB;AAGpB,mBAA8D;AAC9D,0BAAmE;AAmC5D,MAAe,YAAa,gCAAsD;AAAA,EACvF,cAAc;AACZ,UAAM;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAwBA,UAAgB;AAAA,EAEhB;AAAA,EAEA,MAAM,SAAwB;AAAA,EAE9B;AACF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,gCAA8B;AAAA,EAC3C,QAAQ,IAAI,gCAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,kBAAkB,IAAI,gBAAgB;AAAA,EACtC;AAAA,EACA,aAAS,gBAAI;AAAA,EAEvB;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YACE,KACA;AAAA,IACE;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKA;AACA,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAE1D,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,MAAM,GAAG,SAAS,IAAI,GAAG,MAAM;AAAA,cAChD,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW,KAAK,gBAAgB,OAAO;AAAA,MACvC,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,qBAAoB,+BAAO,uBAAsB;AAAA,MACjD,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAKA,IAAI,UAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js';\nimport type { ToolChoice, ToolContext } from './tool_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCall[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n promptCachedTokens: number;\n totalTokens: number;\n}\n\nexport interface ChatChunk {\n id: string;\n delta?: ChoiceDelta;\n usage?: CompletionUsage;\n}\n\nexport interface LLMError {\n type: 'llm_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type LLMCallbacks = {\n ['metrics_collected']: (metrics: LLMMetrics) => void;\n ['error']: (error: LLMError) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n constructor() {\n super();\n }\n\n abstract label(): string;\n\n /**\n * Get the model name/identifier for this LLM instance.\n *\n * @returns The model name if available, \"unknown\" otherwise.\n *\n * @remarks\n * Plugins should override this property to provide their model information.\n */\n get model(): string {\n return 'unknown';\n }\n\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n toolCtx,\n connOptions,\n parallelToolCalls,\n toolChoice,\n extraKwargs,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions?: APIConnectOptions;\n parallelToolCalls?: boolean;\n toolChoice?: ToolChoice;\n extraKwargs?: Record<string, any>;\n }): LLMStream;\n\n /**\n * Pre-warm connection to the LLM service\n */\n prewarm(): void {\n // Default implementation - subclasses can override\n }\n\n async aclose(): Promise<void> {\n // Default implementation - subclasses can override\n }\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected abortController = new AbortController();\n protected _connOptions: APIConnectOptions;\n protected logger = log();\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #toolCtx?: ToolContext;\n\n constructor(\n llm: LLM,\n {\n chatCtx,\n toolCtx,\n connOptions,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions: APIConnectOptions;\n },\n ) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#toolCtx = toolCtx;\n this._connOptions = connOptions;\n this.monitorMetrics();\n this.abortController.signal.addEventListener('abort', () => {\n // TODO (AJS-37) clean this up when we refactor with streams\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate LLM completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { llm: this.#llm.label(), attempt: i + 1, error },\n `failed to generate LLM completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#llm.emit('error', {\n type: 'llm_error',\n timestamp: Date.now(),\n label: this.#llm.label(),\n error,\n recoverable,\n });\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(ev);\n requestId = ev.id;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n type: 'llm_metrics',\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: this.abortController.signal.aborted,\n label: this.#llm.label(),\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n promptCachedTokens: usage?.promptCachedTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond: (() => {\n const durationSeconds = Math.trunc(Number(duration / BigInt(1000000000)));\n return durationSeconds > 0 ? (usage?.completionTokens || 0) / durationSeconds : 0;\n })(),\n };\n this.#llm.emit('metrics_collected', metrics);\n }\n\n protected abstract run(): Promise<void>;\n\n /** The function context of this stream. */\n get toolCtx(): ToolContext | undefined {\n return this.#toolCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** The connection options for this stream. */\n get connOptions(): APIConnectOptions {\n return this._connOptions;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,yBAA6B;AAC7B,wBAA6C;AAC7C,iBAAoB;AAGpB,mBAA8D;AAC9D,0BAAmE;AAmC5D,MAAe,YAAa,gCAAsD;AAAA,EACvF,cAAc;AACZ,UAAM;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAwBA,UAAgB;AAAA,EAEhB;AAAA,EAEA,MAAM,SAAwB;AAAA,EAE9B;AACF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,gCAA8B;AAAA,EAC3C,QAAQ,IAAI,gCAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,kBAAkB,IAAI,gBAAgB;AAAA,EACtC;AAAA,EACA,aAAS,gBAAI;AAAA,EAEvB;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YACE,KACA;AAAA,IACE;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKA;AACA,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAE1D,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,MAAM,GAAG,SAAS,IAAI,GAAG,MAAM;AAAA,cAChD,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW,KAAK,gBAAgB,OAAO;AAAA,MACvC,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,qBAAoB,+BAAO,uBAAsB;AAAA,MACjD,cAAa,+BAAO,gBAAe;AAAA,MACnC,kBAAkB,MAAM;AACtB,cAAM,kBAAkB,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AACxE,eAAO,kBAAkB,MAAK,+BAAO,qBAAoB,KAAK,kBAAkB;AAAA,MAClF,GAAG;AAAA,IACL;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAKA,IAAI,UAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":[]}
|
package/dist/llm/llm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/llm/llm.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAIhF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACvF,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,YAAY,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,eAAe;IAC9B,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,KAAK,CAAC,EAAE,eAAe,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAKtF,QAAQ,CAAC,KAAK,IAAI,MAAM;IAExB;;;;;;;OAOG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;OAEG;IACH,QAAQ,CAAC,IAAI,CAAC,EACZ,OAAO,EACP,OAAO,EACP,WAAW,EACX,iBAAiB,EACjB,UAAU,EACV,WAAW,GACZ,EAAE;QACD,OAAO,EAAE,WAAW,CAAC;QACrB,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,WAAW,CAAC,EAAE,iBAAiB,CAAC;QAChC,iBAAiB,CAAC,EAAE,OAAO,CAAC;QAC5B,UAAU,CAAC,EAAE,UAAU,CAAC;QACxB,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACnC,GAAG,SAAS;IAEb;;OAEG;IACH,OAAO,IAAI,IAAI;IAIT,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAG9B;AAED,8BAAsB,SAAU,YAAW,qBAAqB,CAAC,SAAS,CAAC;;IACzE,SAAS,CAAC,MAAM,gCAAuC;IACvD,SAAS,CAAC,KAAK,gCAAuC;IACtD,SAAS,CAAC,MAAM,UAAS;IACzB,SAAS,CAAC,eAAe,kBAAyB;IAClD,SAAS,CAAC,YAAY,EAAE,iBAAiB,CAAC;IAC1C,SAAS,CAAC,MAAM,wBAAS;gBAOvB,GAAG,EAAE,GAAG,EACR,EACE,OAAO,EACP,OAAO,EACP,WAAW,GACZ,EAAE;QACD,OAAO,EAAE,WAAW,CAAC;QACrB,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,WAAW,EAAE,iBAAiB,CAAC;KAChC;YAoBW,QAAQ;IAoCtB,OAAO,CAAC,SAAS;cAUD,cAAc;
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/llm/llm.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAIhF,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAC5E,OAAO,EAAE,KAAK,WAAW,EAAE,KAAK,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACvF,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEjE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,QAAQ,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,YAAY,EAAE,CAAC;CAC5B;AAED,MAAM,WAAW,eAAe;IAC9B,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,KAAK,CAAC,EAAE,eAAe,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAKtF,QAAQ,CAAC,KAAK,IAAI,MAAM;IAExB;;;;;;;OAOG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;OAEG;IACH,QAAQ,CAAC,IAAI,CAAC,EACZ,OAAO,EACP,OAAO,EACP,WAAW,EACX,iBAAiB,EACjB,UAAU,EACV,WAAW,GACZ,EAAE;QACD,OAAO,EAAE,WAAW,CAAC;QACrB,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,WAAW,CAAC,EAAE,iBAAiB,CAAC;QAChC,iBAAiB,CAAC,EAAE,OAAO,CAAC;QAC5B,UAAU,CAAC,EAAE,UAAU,CAAC;QACxB,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;KACnC,GAAG,SAAS;IAEb;;OAEG;IACH,OAAO,IAAI,IAAI;IAIT,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAG9B;AAED,8BAAsB,SAAU,YAAW,qBAAqB,CAAC,SAAS,CAAC;;IACzE,SAAS,CAAC,MAAM,gCAAuC;IACvD,SAAS,CAAC,KAAK,gCAAuC;IACtD,SAAS,CAAC,MAAM,UAAS;IACzB,SAAS,CAAC,eAAe,kBAAyB;IAClD,SAAS,CAAC,YAAY,EAAE,iBAAiB,CAAC;IAC1C,SAAS,CAAC,MAAM,wBAAS;gBAOvB,GAAG,EAAE,GAAG,EACR,EACE,OAAO,EACP,OAAO,EACP,WAAW,GACZ,EAAE;QACD,OAAO,EAAE,WAAW,CAAC;QACrB,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,WAAW,EAAE,iBAAiB,CAAC;KAChC;YAoBW,QAAQ;IAoCtB,OAAO,CAAC,SAAS;cAUD,cAAc;IA0C9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,2CAA2C;IAC3C,IAAI,OAAO,IAAI,WAAW,GAAG,SAAS,CAErC;IAED,+CAA+C;IAC/C,IAAI,OAAO,IAAI,WAAW,CAEzB;IAED,8CAA8C;IAC9C,IAAI,WAAW,IAAI,iBAAiB,CAEnC;IAED,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;IAI1C,KAAK;IAIL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,SAAS;CAGpC"}
|
package/dist/llm/llm.js
CHANGED
|
@@ -126,7 +126,10 @@ class LLMStream {
|
|
|
126
126
|
promptTokens: (usage == null ? void 0 : usage.promptTokens) || 0,
|
|
127
127
|
promptCachedTokens: (usage == null ? void 0 : usage.promptCachedTokens) || 0,
|
|
128
128
|
totalTokens: (usage == null ? void 0 : usage.totalTokens) || 0,
|
|
129
|
-
tokensPerSecond: ((
|
|
129
|
+
tokensPerSecond: (() => {
|
|
130
|
+
const durationSeconds = Math.trunc(Number(duration / BigInt(1e9)));
|
|
131
|
+
return durationSeconds > 0 ? ((usage == null ? void 0 : usage.completionTokens) || 0) / durationSeconds : 0;
|
|
132
|
+
})()
|
|
130
133
|
};
|
|
131
134
|
this.#llm.emit("metrics_collected", metrics);
|
|
132
135
|
}
|
package/dist/llm/llm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js';\nimport type { ToolChoice, ToolContext } from './tool_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCall[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n promptCachedTokens: number;\n totalTokens: number;\n}\n\nexport interface ChatChunk {\n id: string;\n delta?: ChoiceDelta;\n usage?: CompletionUsage;\n}\n\nexport interface LLMError {\n type: 'llm_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type LLMCallbacks = {\n ['metrics_collected']: (metrics: LLMMetrics) => void;\n ['error']: (error: LLMError) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n constructor() {\n super();\n }\n\n abstract label(): string;\n\n /**\n * Get the model name/identifier for this LLM instance.\n *\n * @returns The model name if available, \"unknown\" otherwise.\n *\n * @remarks\n * Plugins should override this property to provide their model information.\n */\n get model(): string {\n return 'unknown';\n }\n\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n toolCtx,\n connOptions,\n parallelToolCalls,\n toolChoice,\n extraKwargs,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions?: APIConnectOptions;\n parallelToolCalls?: boolean;\n toolChoice?: ToolChoice;\n extraKwargs?: Record<string, any>;\n }): LLMStream;\n\n /**\n * Pre-warm connection to the LLM service\n */\n prewarm(): void {\n // Default implementation - subclasses can override\n }\n\n async aclose(): Promise<void> {\n // Default implementation - subclasses can override\n }\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected abortController = new AbortController();\n protected _connOptions: APIConnectOptions;\n protected logger = log();\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #toolCtx?: ToolContext;\n\n constructor(\n llm: LLM,\n {\n chatCtx,\n toolCtx,\n connOptions,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions: APIConnectOptions;\n },\n ) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#toolCtx = toolCtx;\n this._connOptions = connOptions;\n this.monitorMetrics();\n this.abortController.signal.addEventListener('abort', () => {\n // TODO (AJS-37) clean this up when we refactor with streams\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate LLM completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { llm: this.#llm.label(), attempt: i + 1, error },\n `failed to generate LLM completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#llm.emit('error', {\n type: 'llm_error',\n timestamp: Date.now(),\n label: this.#llm.label(),\n error,\n recoverable,\n });\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(ev);\n requestId = ev.id;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n type: 'llm_metrics',\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: this.abortController.signal.aborted,\n label: this.#llm.label(),\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n promptCachedTokens: usage?.promptCachedTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond:\n (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))),\n };\n this.#llm.emit('metrics_collected', metrics);\n }\n\n protected abstract run(): Promise<void>;\n\n /** The function context of this stream. */\n get toolCtx(): ToolContext | undefined {\n return this.#toolCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** The connection options for this stream. */\n get connOptions(): APIConnectOptions {\n return this._connOptions;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAC7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,WAAW;AAGpB,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAC9D,eAAmE;AAmC5D,MAAe,YAAa,aAAsD;AAAA,EACvF,cAAc;AACZ,UAAM;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAwBA,UAAgB;AAAA,EAEhB;AAAA,EAEA,MAAM,SAAwB;AAAA,EAE9B;AACF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,mBAA8B;AAAA,EAC3C,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,kBAAkB,IAAI,gBAAgB;AAAA,EACtC;AAAA,EACA,SAAS,IAAI;AAAA,EAEvB;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YACE,KACA;AAAA,IACE;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKA;AACA,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAE1D,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,MAAM,GAAG,SAAS,IAAI,GAAG,MAAM;AAAA,cAChD,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW,KAAK,gBAAgB,OAAO;AAAA,MACvC,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,qBAAoB,+BAAO,uBAAsB;AAAA,MACjD,cAAa,+BAAO,gBAAe;AAAA,MACnC,mBACG,+BAAO,qBAAoB,KAAK,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AAAA,IACrF;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAKA,IAAI,UAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/llm/llm.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { LLMMetrics } from '../metrics/base.js';\nimport type { APIConnectOptions } from '../types.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js';\nimport type { ToolChoice, ToolContext } from './tool_context.js';\n\nexport interface ChoiceDelta {\n role: ChatRole;\n content?: string;\n toolCalls?: FunctionCall[];\n}\n\nexport interface CompletionUsage {\n completionTokens: number;\n promptTokens: number;\n promptCachedTokens: number;\n totalTokens: number;\n}\n\nexport interface ChatChunk {\n id: string;\n delta?: ChoiceDelta;\n usage?: CompletionUsage;\n}\n\nexport interface LLMError {\n type: 'llm_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type LLMCallbacks = {\n ['metrics_collected']: (metrics: LLMMetrics) => void;\n ['error']: (error: LLMError) => void;\n};\n\nexport abstract class LLM extends (EventEmitter as new () => TypedEmitter<LLMCallbacks>) {\n constructor() {\n super();\n }\n\n abstract label(): string;\n\n /**\n * Get the model name/identifier for this LLM instance.\n *\n * @returns The model name if available, \"unknown\" otherwise.\n *\n * @remarks\n * Plugins should override this property to provide their model information.\n */\n get model(): string {\n return 'unknown';\n }\n\n /**\n * Returns a {@link LLMStream} that can be used to push text and receive LLM responses.\n */\n abstract chat({\n chatCtx,\n toolCtx,\n connOptions,\n parallelToolCalls,\n toolChoice,\n extraKwargs,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions?: APIConnectOptions;\n parallelToolCalls?: boolean;\n toolChoice?: ToolChoice;\n extraKwargs?: Record<string, any>;\n }): LLMStream;\n\n /**\n * Pre-warm connection to the LLM service\n */\n prewarm(): void {\n // Default implementation - subclasses can override\n }\n\n async aclose(): Promise<void> {\n // Default implementation - subclasses can override\n }\n}\n\nexport abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {\n protected output = new AsyncIterableQueue<ChatChunk>();\n protected queue = new AsyncIterableQueue<ChatChunk>();\n protected closed = false;\n protected abortController = new AbortController();\n protected _connOptions: APIConnectOptions;\n protected logger = log();\n\n #llm: LLM;\n #chatCtx: ChatContext;\n #toolCtx?: ToolContext;\n\n constructor(\n llm: LLM,\n {\n chatCtx,\n toolCtx,\n connOptions,\n }: {\n chatCtx: ChatContext;\n toolCtx?: ToolContext;\n connOptions: APIConnectOptions;\n },\n ) {\n this.#llm = llm;\n this.#chatCtx = chatCtx;\n this.#toolCtx = toolCtx;\n this._connOptions = connOptions;\n this.monitorMetrics();\n this.abortController.signal.addEventListener('abort', () => {\n // TODO (AJS-37) clean this up when we refactor with streams\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate LLM completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { llm: this.#llm.label(), attempt: i + 1, error },\n `failed to generate LLM completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#llm.emit('error', {\n type: 'llm_error',\n timestamp: Date.now(),\n label: this.#llm.label(),\n error,\n recoverable,\n });\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let ttft: bigint = BigInt(-1);\n let requestId = '';\n let usage: CompletionUsage | undefined;\n\n for await (const ev of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(ev);\n requestId = ev.id;\n if (ttft === BigInt(-1)) {\n ttft = process.hrtime.bigint() - startTime;\n }\n if (ev.usage) {\n usage = ev.usage;\n }\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: LLMMetrics = {\n type: 'llm_metrics',\n timestamp: Date.now(),\n requestId,\n ttft: ttft === BigInt(-1) ? -1 : Math.trunc(Number(ttft / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n cancelled: this.abortController.signal.aborted,\n label: this.#llm.label(),\n completionTokens: usage?.completionTokens || 0,\n promptTokens: usage?.promptTokens || 0,\n promptCachedTokens: usage?.promptCachedTokens || 0,\n totalTokens: usage?.totalTokens || 0,\n tokensPerSecond: (() => {\n const durationSeconds = Math.trunc(Number(duration / BigInt(1000000000)));\n return durationSeconds > 0 ? (usage?.completionTokens || 0) / durationSeconds : 0;\n })(),\n };\n this.#llm.emit('metrics_collected', metrics);\n }\n\n protected abstract run(): Promise<void>;\n\n /** The function context of this stream. */\n get toolCtx(): ToolContext | undefined {\n return this.#toolCtx;\n }\n\n /** The initial chat context of this stream. */\n get chatCtx(): ChatContext {\n return this.#chatCtx;\n }\n\n /** The connection options for this stream. */\n get connOptions(): APIConnectOptions {\n return this._connOptions;\n }\n\n next(): Promise<IteratorResult<ChatChunk>> {\n return this.output.next();\n }\n\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): LLMStream {\n return this;\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAC7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,WAAW;AAGpB,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAC9D,eAAmE;AAmC5D,MAAe,YAAa,aAAsD;AAAA,EACvF,cAAc;AACZ,UAAM;AAAA,EACR;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAwBA,UAAgB;AAAA,EAEhB;AAAA,EAEA,MAAM,SAAwB;AAAA,EAE9B;AACF;AAEO,MAAe,UAAsD;AAAA,EAChE,SAAS,IAAI,mBAA8B;AAAA,EAC3C,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EACT,kBAAkB,IAAI,gBAAgB;AAAA,EACtC;AAAA,EACA,SAAS,IAAI;AAAA,EAEvB;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YACE,KACA;AAAA,IACE;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKA;AACA,SAAK,OAAO;AACZ,SAAK,WAAW;AAChB,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,eAAe;AACpB,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAE1D,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,MAAM,GAAG,SAAS,IAAI,GAAG,MAAM;AAAA,cAChD,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAChB,QAAI;AAEJ,qBAAiB,MAAM,KAAK,OAAO;AACjC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,EAAE;AAClB,kBAAY,GAAG;AACf,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,UAAI,GAAG,OAAO;AACZ,gBAAQ,GAAG;AAAA,MACb;AAAA,IACF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,WAAW,KAAK,gBAAgB,OAAO;AAAA,MACvC,OAAO,KAAK,KAAK,MAAM;AAAA,MACvB,mBAAkB,+BAAO,qBAAoB;AAAA,MAC7C,eAAc,+BAAO,iBAAgB;AAAA,MACrC,qBAAoB,+BAAO,uBAAsB;AAAA,MACjD,cAAa,+BAAO,gBAAe;AAAA,MACnC,kBAAkB,MAAM;AACtB,cAAM,kBAAkB,KAAK,MAAM,OAAO,WAAW,OAAO,GAAU,CAAC,CAAC;AACxE,eAAO,kBAAkB,MAAK,+BAAO,qBAAoB,KAAK,kBAAkB;AAAA,MAClF,GAAG;AAAA,IACL;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAKA,IAAI,UAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA,EAEA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":[]}
|
package/dist/vad.cjs
CHANGED
|
@@ -121,6 +121,9 @@ class VADStream {
|
|
|
121
121
|
}
|
|
122
122
|
break;
|
|
123
123
|
case 1 /* INFERENCE_DONE */:
|
|
124
|
+
inferenceDurationTotal += value.inferenceDuration;
|
|
125
|
+
this.#lastActivityTime = process.hrtime.bigint();
|
|
126
|
+
break;
|
|
124
127
|
case 2 /* END_OF_SPEECH */:
|
|
125
128
|
this.#lastActivityTime = process.hrtime.bigint();
|
|
126
129
|
break;
|
package/dist/vad.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAM7B,iBAAoB;AAEpB,6BAAuC;AACvC,gCAAkC;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,4CAAgE;AAAA,EAC5E,SAAS,IAAI,4CAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,aAAS,gBAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,8CAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
|
1
|
+
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotal += value.inferenceDuration;\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAM7B,iBAAoB;AAEpB,6BAAuC;AACvC,gCAAkC;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,4CAAgE;AAAA,EAC5E,SAAS,IAAI,4CAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,aAAS,gBAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,8CAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AACH,oCAA0B,MAAM;AAChC,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
package/dist/vad.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,cAAc,EACd,2BAA2B,EAC3B,2BAA2B,EAC5B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAEnE,oBAAY,YAAY;IACtB,eAAe,IAAA;IACf,cAAc,IAAA;IACd,aAAa,IAAA;IACb,iBAAiB,IAAA;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,oFAAoF;IACpF,IAAI,EAAE,YAAY,CAAC;IACnB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,uCAAuC;IACvC,eAAe,EAAE,MAAM,CAAC;IACxB;;;;;;;OAOG;IACH,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,6EAA6E;IAC7E,WAAW,EAAE,MAAM,CAAC;IACpB,0FAA0F;IAC1F,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2DAA2D;IAC3D,QAAQ,EAAE,OAAO,CAAC;IAClB,wCAAwC;IACxC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uCAAuC;IACvC,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;CACtD,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAEtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,YAAY,EAAE,eAAe;IAKzC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,SAAS;CAC7B;AAED,8BAAsB,SAAU,YAAW,qBAAqB,CAAC,QAAQ,CAAC;;IACxE,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,kEAAyE;IACxF,SAAS,CAAC,MAAM,8BAAqC;IACrD,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,SAAS,CAAC,WAAW,UAAS;IAI9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,mBAAmB,CAAqC;IAEhE,OAAO,CAAC,aAAa,CAA2B;gBACpC,GAAG,EAAE,GAAG;IAgBpB;;;;;;OAMG;YACW,kBAAkB;cAgBhB,cAAc;
|
|
1
|
+
{"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,cAAc,EACd,2BAA2B,EAC3B,2BAA2B,EAC5B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAEnE,oBAAY,YAAY;IACtB,eAAe,IAAA;IACf,cAAc,IAAA;IACd,aAAa,IAAA;IACb,iBAAiB,IAAA;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,oFAAoF;IACpF,IAAI,EAAE,YAAY,CAAC;IACnB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,uCAAuC;IACvC,eAAe,EAAE,MAAM,CAAC;IACxB;;;;;;;OAOG;IACH,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,6EAA6E;IAC7E,WAAW,EAAE,MAAM,CAAC;IACpB,0FAA0F;IAC1F,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2DAA2D;IAC3D,QAAQ,EAAE,OAAO,CAAC;IAClB,wCAAwC;IACxC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uCAAuC;IACvC,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;CACtD,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAEtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,YAAY,EAAE,eAAe;IAKzC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,SAAS;CAC7B;AAED,8BAAsB,SAAU,YAAW,qBAAqB,CAAC,QAAQ,CAAC;;IACxE,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,kEAAyE;IACxF,SAAS,CAAC,MAAM,8BAAqC;IACrD,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,SAAS,CAAC,WAAW,UAAS;IAI9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,mBAAmB,CAAqC;IAEhE,OAAO,CAAC,aAAa,CAA2B;gBACpC,GAAG,EAAE,GAAG;IAgBpB;;;;;;OAMG;YACW,kBAAkB;cAgBhB,cAAc;IAuC9B,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,kDAAkD;IAClD,SAAS,CAAC,KAAK,EAAE,UAAU;IAW3B,KAAK;IAUL,QAAQ;IAWF,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAS/C,KAAK;IAOL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,SAAS;CAGpC"}
|
package/dist/vad.js
CHANGED
|
@@ -96,6 +96,9 @@ class VADStream {
|
|
|
96
96
|
}
|
|
97
97
|
break;
|
|
98
98
|
case 1 /* INFERENCE_DONE */:
|
|
99
|
+
inferenceDurationTotal += value.inferenceDuration;
|
|
100
|
+
this.#lastActivityTime = process.hrtime.bigint();
|
|
101
|
+
break;
|
|
99
102
|
case 2 /* END_OF_SPEECH */:
|
|
100
103
|
this.#lastActivityTime = process.hrtime.bigint();
|
|
101
104
|
break;
|
package/dist/vad.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AAAA,QACL,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
|
1
|
+
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotal += value.inferenceDuration;\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AACH,oCAA0B,MAAM;AAChC,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
|
@@ -285,6 +285,9 @@ class AudioRecognition {
|
|
|
285
285
|
this.logger.debug("VAD task: START_OF_SPEECH");
|
|
286
286
|
this.hooks.onStartOfSpeech(ev);
|
|
287
287
|
this.speaking = true;
|
|
288
|
+
if (ev.frames.length > 0 && ev.frames[0]) {
|
|
289
|
+
this.sampleRate = ev.frames[0].sampleRate;
|
|
290
|
+
}
|
|
288
291
|
(_a = this.bounceEOUTask) == null ? void 0 : _a.cancel();
|
|
289
292
|
break;
|
|
290
293
|
case import_vad.VADEventType.INFERENCE_DONE:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAA2B;AAE3B,iBAA+B;AAC/B,0BAAiC;AACjC,iBAAoB;AACpB,6BAAmE;AACnE,gCAAkC;AAClC,oCAAqC;AACrC,iBAAkD;AAClD,mBAA4B;AAC5B,iBAAsD;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,aAAS,gBAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,4CAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,qBAAiB,oDAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,2BAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,2BAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,2BAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,2BAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,gBAAM,oBAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,kBAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,2BAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,gBAAI,mDAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,wBAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAEhB,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AAtZlB;AAuZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAnazC;AAoaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,2BAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,kBAAM,oBAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,kBAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA7chB;AA8cI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAA2B;AAE3B,iBAA+B;AAC/B,0BAAiC;AACjC,iBAAoB;AACpB,6BAAmE;AACnE,gCAAkC;AAClC,oCAAqC;AACrC,iBAAkD;AAClD,mBAA4B;AAC5B,iBAAsD;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,aAAS,gBAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,4CAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,qBAAiB,oDAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,2BAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,2BAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,2BAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,2BAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,gBAAM,oBAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,kBAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,2BAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,gBAAI,mDAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,wBAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3ZlB;AA4ZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAxazC;AAyaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,2BAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,kBAAM,oBAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,kBAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AAldhB;AAmdI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3C,aAAa,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACtC,mBAAmB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7C,WAAW,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvD,eAAe,EAAE,MAAM,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,iBAAiB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACtE,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,iBAAiB,CAAC,EAAE,OAAO,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;IAC9D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,GAAG,CAAC,CAAU;IACtB,OAAO,CAAC,GAAG,CAAC,CAAM;IAClB,OAAO,CAAC,YAAY,CAAC,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,YAAY,CAAC,CAAS;IAE9B,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,eAAe,CAAM;IAC7B,OAAO,CAAC,sBAAsB,CAAM;IACpC,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAS;IAE5B,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,qBAAqB,CAAuC;IACpE,OAAO,CAAC,kBAAkB,CAA0C;IAGpE,OAAO,CAAC,aAAa,CAAC,CAAa;IACnC,OAAO,CAAC,kBAAkB,CAAC,CAAa;IACxC,OAAO,CAAC,OAAO,CAAC,CAAa;IAC7B,OAAO,CAAC,OAAO,CAAC,CAAa;gBAEjB,IAAI,EAAE,uBAAuB;IAiBzC;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAK9B;IAEK,KAAK;YAYG,UAAU;IAiFxB,OAAO,CAAC,eAAe;YA4FT,aAAa;YAqDb,aAAa;
|
|
1
|
+
{"version":3,"file":"audio_recognition.d.ts","sourceRoot":"","sources":["../../src/voice/audio_recognition.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,MAAM,wBAAwB,CAAC;AAK1D,OAAO,EAAE,KAAK,WAAW,EAAmB,MAAM,eAAe,CAAC;AAElE,OAAO,EAAE,KAAK,GAAG,EAAE,KAAK,QAAQ,EAAgB,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,aAAa;IAC5B,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3C,aAAa,EAAE,CAAC,EAAE,EAAE,QAAQ,KAAK,IAAI,CAAC;IACtC,mBAAmB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC/C,iBAAiB,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7C,WAAW,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAEvD,eAAe,EAAE,MAAM,WAAW,CAAC;CACpC;AAED,MAAM,WAAW,aAAa;IAC5B,iBAAiB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACtE,gBAAgB,EAAE,CAAC,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;IAC1D,gBAAgB,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACzD;AAED,MAAM,WAAW,uBAAuB;IACtC,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,GAAG,CAAC,EAAE,GAAG,CAAC;IACV,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,iBAAiB,CAAC,EAAE,OAAO,CAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;IAC9D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,GAAG,CAAC,CAAU;IACtB,OAAO,CAAC,GAAG,CAAC,CAAM;IAClB,OAAO,CAAC,YAAY,CAAC,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,YAAY,CAAC,CAAS;IAE9B,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,eAAe,CAAM;IAC7B,OAAO,CAAC,sBAAsB,CAAM;IACpC,OAAO,CAAC,gBAAgB,CAAK;IAC7B,OAAO,CAAC,iBAAiB,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,UAAU,CAAC,CAAS;IAE5B,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,cAAc,CAA6B;IACnD,OAAO,CAAC,qBAAqB,CAAuC;IACpE,OAAO,CAAC,kBAAkB,CAA0C;IAGpE,OAAO,CAAC,aAAa,CAAC,CAAa;IACnC,OAAO,CAAC,kBAAkB,CAAC,CAAa;IACxC,OAAO,CAAC,OAAO,CAAC,CAAa;IAC7B,OAAO,CAAC,OAAO,CAAC,CAAa;gBAEjB,IAAI,EAAE,uBAAuB;IAiBzC;;OAEG;IACH,IAAI,iBAAiB,IAAI,MAAM,CAK9B;IAEK,KAAK;YAYG,UAAU;IAiFxB,OAAO,CAAC,eAAe;YA4FT,aAAa;YAqDb,aAAa;IAyD3B,mBAAmB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAI3D,sBAAsB;IAItB,aAAa;IAab,cAAc,CAAC,aAAa,EAAE,OAAO;IA0C/B,KAAK;IAQX,OAAO,KAAK,oBAAoB,GAE/B;CACF"}
|
|
@@ -262,6 +262,9 @@ class AudioRecognition {
|
|
|
262
262
|
this.logger.debug("VAD task: START_OF_SPEECH");
|
|
263
263
|
this.hooks.onStartOfSpeech(ev);
|
|
264
264
|
this.speaking = true;
|
|
265
|
+
if (ev.frames.length > 0 && ev.frames[0]) {
|
|
266
|
+
this.sampleRate = ev.frames[0].sampleRate;
|
|
267
|
+
}
|
|
265
268
|
(_a = this.bounceEOUTask) == null ? void 0 : _a.cancel();
|
|
266
269
|
break;
|
|
267
270
|
case VADEventType.INFERENCE_DONE:
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAEhB,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AAtZlB;AAuZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAnazC;AAoaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA7chB;AA8cI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA3ZlB;AA4ZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAxazC;AAyaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AAldhB;AAmdI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|