@livekit/agents 1.0.50 → 1.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +67 -67
- package/dist/inference/api_protos.d.ts +67 -67
- package/dist/inference/llm.cjs +10 -8
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +3 -7
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/inference/utils.cjs +5 -5
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.js +1 -1
- package/dist/inference/utils.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent_activity.cjs +1 -1
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.js +1 -1
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +3 -2
- package/dist/voice/audio_recognition.d.ts +3 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +3 -2
- package/dist/voice/events.d.ts +3 -2
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +13 -15
- package/src/inference/llm.ts +3 -8
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +14 -5
- package/src/inference/utils.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +1 -1
- package/src/voice/audio_recognition.ts +4 -3
- package/src/voice/events.ts +3 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ParticipantKind } from '@livekit/rtc-node';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport {\n type Context,\n ROOT_CONTEXT,\n type Span,\n context as otelContext,\n trace,\n} from '@opentelemetry/api';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { traceTypes, tracer } from '../telemetry/index.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\nimport { setParticipantSpanAttributes } from './utils.js';\n\nexport interface EndOfTurnInfo {\n /** The new transcript text from the user's speech. */\n newTranscript: string;\n /** Confidence score of the transcript (0-1). */\n transcriptConfidence: number;\n /** Delay from speech stop to final transcription in milliseconds. */\n transcriptionDelay: number;\n /** Delay from speech stop to end of utterance detection in milliseconds. */\n endOfUtteranceDelay: number;\n /** Timestamp when user started speaking (milliseconds since epoch). */\n startedSpeakingAt: number | undefined;\n /** Timestamp when user stopped speaking (milliseconds since epoch). */\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n /** Hooks for recognition events. */\n recognitionHooks: RecognitionHooks;\n /** Speech-to-text node. */\n stt?: STTNode;\n /** Voice activity detection. */\n vad?: VAD;\n /** Turn detector for end-of-turn prediction. */\n turnDetector?: _TurnDetector;\n /** Turn detection mode. */\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n /** Minimum endpointing delay in milliseconds. */\n minEndpointingDelay: number;\n /** Maximum endpointing delay in milliseconds. */\n maxEndpointingDelay: number;\n /** Root span context for tracing. */\n rootSpanContext?: Context;\n /** STT model name for tracing */\n sttModel?: string;\n /** STT provider name for tracing */\n sttProvider?: string;\n /** Getter for linked participant for span attribution */\n getLinkedParticipant?: () => ParticipantLike | undefined;\n}\n\n/**\n * Minimal participant shape for span attribution.\n * Compatible with both `LocalParticipant` and `RemoteParticipant` from `@livekit/rtc-node`.\n */\nexport interface ParticipantLike {\n sid: string | undefined;\n identity: string;\n kind: ParticipantKind;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n private rootSpanContext?: Context;\n private sttModel?: string;\n private sttProvider?: string;\n private getLinkedParticipant?: () => ParticipantLike | undefined;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private userTurnSpan?: Span;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n this.rootSpanContext = opts.rootSpanContext;\n this.sttModel = opts.sttModel;\n this.sttProvider = opts.sttProvider;\n this.getLinkedParticipant = opts.getLinkedParticipant;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private ensureUserTurnSpan(startTime?: number): Span {\n if (this.userTurnSpan && this.userTurnSpan.isRecording()) {\n return this.userTurnSpan;\n }\n\n this.userTurnSpan = tracer.startSpan({\n name: 'user_turn',\n context: this.rootSpanContext,\n startTime,\n });\n\n const participant = this.getLinkedParticipant?.();\n if (participant) {\n setParticipantSpanAttributes(this.userTurnSpan, participant);\n }\n\n if (this.sttModel) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.sttModel);\n }\n if (this.sttProvider) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, this.sttProvider);\n }\n\n return this.userTurnSpan;\n }\n\n private userTurnContext(span: Span): Context {\n const base = this.rootSpanContext ?? ROOT_CONTEXT;\n return trace.setSpan(base, span);\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.hooks.onFinalTranscript(ev);\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan(Date.now());\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n const userTurnSpan = this.ensureUserTurnSpan();\n const userTurnCtx = this.userTurnContext(userTurnSpan);\n\n if (turnDetector) {\n await tracer.startActiveSpan(\n async (span) => {\n this.logger.debug('Running turn detector model');\n\n let endOfTurnProbability = 0.0;\n let unlikelyThreshold: number | undefined;\n\n if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n try {\n endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n\n this.logger.debug(\n { endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n } catch (error) {\n this.logger.error(error, 'Error predicting end of turn');\n }\n }\n\n span.setAttribute(\n traceTypes.ATTR_CHAT_CTX,\n JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),\n );\n span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);\n span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);\n span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);\n span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');\n },\n {\n name: 'eou_detection',\n context: userTurnCtx,\n },\n );\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n this._endUserTurnSpan({\n transcript: this.audioTranscript,\n confidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n });\n\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n {\n const startTime = Date.now() - ev.speechDuration;\n const span = this.ensureUserTurnSpan(startTime);\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onStartOfSpeech(ev));\n }\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onEndOfSpeech(ev));\n }\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.name === 'AbortError') {\n this.logger.debug('User turn commit task cancelled');\n return;\n }\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n this.silenceAudioWriter.releaseLock();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private _endUserTurnSpan({\n transcript,\n confidence,\n transcriptionDelay,\n endOfUtteranceDelay,\n }: {\n transcript: string;\n confidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n }): void {\n if (this.userTurnSpan) {\n this.userTurnSpan.setAttributes({\n [traceTypes.ATTR_USER_TRANSCRIPT]: transcript,\n [traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,\n [traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,\n [traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,\n });\n this.userTurnSpan.end();\n this.userTurnSpan = undefined;\n }\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAIA,SAAS,kBAAkB;AAC3B;AAAA,EAEE;AAAA,EAEA,WAAW;AAAA,EACX;AAAA,OACK;AAEP,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,YAAY,cAAc;AACnC,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAGtD,SAAS,oCAAoC;AA2EtC,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AACpB,SAAK,kBAAkB,KAAK;AAC5B,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AACxB,SAAK,uBAAuB,KAAK;AAEjC,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEQ,mBAAmB,WAA0B;AAvLvD;AAwLI,QAAI,KAAK,gBAAgB,KAAK,aAAa,YAAY,GAAG;AACxD,aAAO,KAAK;AAAA,IACd;AAEA,SAAK,eAAe,OAAO,UAAU;AAAA,MACnC,MAAM;AAAA,MACN,SAAS,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,UAAM,eAAc,UAAK,yBAAL;AACpB,QAAI,aAAa;AACf,mCAA6B,KAAK,cAAc,WAAW;AAAA,IAC7D;AAEA,QAAI,KAAK,UAAU;AACjB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,QAAQ;AAAA,IACpF;AACA,QAAI,KAAK,aAAa;AACpB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,WAAW;AAAA,IACvF;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEQ,gBAAgB,MAAqB;AAC3C,UAAM,OAAO,KAAK,mBAAmB;AACrC,WAAO,MAAM,QAAQ,MAAM,IAAI;AAAA,EACjC;AAAA,EAEA,MAAc,WAAW,IAAiB;AAtN5C;AAuNI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,MAAM,kBAAkB,EAAE;AAE/B,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB,KAAK,IAAI,CAAC;AAC/C,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,gBAAgB;AAAA,cACzB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB;AACrC,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,cAAc;AAAA,cACvB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAnahD;AAoaI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,YAAM,eAAe,KAAK,mBAAmB;AAC7C,YAAM,cAAc,KAAK,gBAAgB,YAAY;AAErD,UAAI,cAAc;AAChB,cAAM,OAAO;AAAA,UACX,OAAO,SAAS;AACd,iBAAK,OAAO,MAAM,6BAA6B;AAE/C,gBAAI,uBAAuB;AAC3B,gBAAI;AAEJ,gBAAI,CAAE,MAAM,aAAa,iBAAiB,KAAK,YAAY,GAAI;AAC7D,mBAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,YAClF,OAAO;AACL,kBAAI;AACF,uCAAuB,MAAM,aAAa,iBAAiB,OAAO;AAClE,oCAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAE1E,qBAAK,OAAO;AAAA,kBACV,EAAE,sBAAsB,mBAAmB,UAAU,KAAK,aAAa;AAAA,kBACvE;AAAA,gBACF;AAEA,oBAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,qCAAmB,KAAK;AAAA,gBAC1B;AAAA,cACF,SAAS,OAAO;AACd,qBAAK,OAAO,MAAM,OAAO,8BAA8B;AAAA,cACzD;AAAA,YACF;AAEA,iBAAK;AAAA,cACH,WAAW;AAAA,cACX,KAAK,UAAU,QAAQ,OAAO,EAAE,kBAAkB,MAAM,CAAC,CAAC;AAAA,YAC5D;AACA,iBAAK,aAAa,WAAW,sBAAsB,oBAAoB;AACvE,iBAAK,aAAa,WAAW,6BAA6B,qBAAqB,CAAC;AAChF,iBAAK,aAAa,WAAW,gBAAgB,gBAAgB;AAC7D,iBAAK,aAAa,WAAW,mBAAmB,KAAK,gBAAgB,EAAE;AAAA,UACzE;AAAA,UACA;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AACb,aAAK,iBAAiB;AAAA,UACpB,YAAY,KAAK;AAAA,UACjB,YAAY;AAAA,UACZ,oBAAoB,sBAAsB;AAAA,UAC1C,qBAAqB,uBAAuB;AAAA,QAC9C,CAAC;AAGD,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA3nBzE;AA4nBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C;AACE,oBAAM,YAAY,KAAK,IAAI,IAAI,GAAG;AAClC,oBAAM,OAAO,KAAK,mBAAmB,SAAS;AAC9C,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,gBAAgB,EAAE,CAAC;AAAA,YAC5D;AACA,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C;AACE,oBAAM,OAAO,KAAK,mBAAmB;AACrC,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,cAAc,EAAE,CAAC;AAAA,YAC1D;AAGA,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA7sBlB;AA8sBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA5tBzC;AA6tBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACrD,aAAK,OAAO,MAAM,iCAAiC;AACnD;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA1wBhB;AA2wBI,SAAK,uBAAuB;AAC5B,SAAK,mBAAmB,YAAY;AACpC,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEQ,iBAAiB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKS;AACP,QAAI,KAAK,cAAc;AACrB,WAAK,aAAa,cAAc;AAAA,QAC9B,CAAC,WAAW,oBAAoB,GAAG;AAAA,QACnC,CAAC,WAAW,0BAA0B,GAAG;AAAA,QACzC,CAAC,WAAW,wBAAwB,GAAG;AAAA,QACvC,CAAC,WAAW,sBAAsB,GAAG;AAAA,MACvC,CAAC;AACD,WAAK,aAAa,IAAI;AACtB,WAAK,eAAe;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ParticipantKind } from '@livekit/rtc-node';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport {\n type Context,\n ROOT_CONTEXT,\n type Span,\n context as otelContext,\n trace,\n} from '@opentelemetry/api';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport type { LanguageCode } from '../language.js';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { traceTypes, tracer } from '../telemetry/index.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\nimport { setParticipantSpanAttributes } from './utils.js';\n\nexport interface EndOfTurnInfo {\n /** The new transcript text from the user's speech. */\n newTranscript: string;\n /** Confidence score of the transcript (0-1). */\n transcriptConfidence: number;\n /** Delay from speech stop to final transcription in milliseconds. */\n transcriptionDelay: number;\n /** Delay from speech stop to end of utterance detection in milliseconds. */\n endOfUtteranceDelay: number;\n /** Timestamp when user started speaking (milliseconds since epoch). */\n startedSpeakingAt: number | undefined;\n /** Timestamp when user stopped speaking (milliseconds since epoch). */\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: LanguageCode) => Promise<number | undefined>;\n supportsLanguage: (language?: LanguageCode) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n /** Hooks for recognition events. */\n recognitionHooks: RecognitionHooks;\n /** Speech-to-text node. */\n stt?: STTNode;\n /** Voice activity detection. */\n vad?: VAD;\n /** Turn detector for end-of-turn prediction. */\n turnDetector?: _TurnDetector;\n /** Turn detection mode. */\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n /** Minimum endpointing delay in milliseconds. */\n minEndpointingDelay: number;\n /** Maximum endpointing delay in milliseconds. */\n maxEndpointingDelay: number;\n /** Root span context for tracing. */\n rootSpanContext?: Context;\n /** STT model name for tracing */\n sttModel?: string;\n /** STT provider name for tracing */\n sttProvider?: string;\n /** Getter for linked participant for span attribution */\n getLinkedParticipant?: () => ParticipantLike | undefined;\n}\n\n/**\n * Minimal participant shape for span attribution.\n * Compatible with both `LocalParticipant` and `RemoteParticipant` from `@livekit/rtc-node`.\n */\nexport interface ParticipantLike {\n sid: string | undefined;\n identity: string;\n kind: ParticipantKind;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: LanguageCode;\n private rootSpanContext?: Context;\n private sttModel?: string;\n private sttProvider?: string;\n private getLinkedParticipant?: () => ParticipantLike | undefined;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private userTurnSpan?: Span;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n this.rootSpanContext = opts.rootSpanContext;\n this.sttModel = opts.sttModel;\n this.sttProvider = opts.sttProvider;\n this.getLinkedParticipant = opts.getLinkedParticipant;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private ensureUserTurnSpan(startTime?: number): Span {\n if (this.userTurnSpan && this.userTurnSpan.isRecording()) {\n return this.userTurnSpan;\n }\n\n this.userTurnSpan = tracer.startSpan({\n name: 'user_turn',\n context: this.rootSpanContext,\n startTime,\n });\n\n const participant = this.getLinkedParticipant?.();\n if (participant) {\n setParticipantSpanAttributes(this.userTurnSpan, participant);\n }\n\n if (this.sttModel) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.sttModel);\n }\n if (this.sttProvider) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, this.sttProvider);\n }\n\n return this.userTurnSpan;\n }\n\n private userTurnContext(span: Span): Context {\n const base = this.rootSpanContext ?? ROOT_CONTEXT;\n return trace.setSpan(base, span);\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.hooks.onFinalTranscript(ev);\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan(Date.now());\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n const userTurnSpan = this.ensureUserTurnSpan();\n const userTurnCtx = this.userTurnContext(userTurnSpan);\n\n if (turnDetector) {\n await tracer.startActiveSpan(\n async (span) => {\n this.logger.debug('Running turn detector model');\n\n let endOfTurnProbability = 0.0;\n let unlikelyThreshold: number | undefined;\n\n if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n try {\n endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n\n this.logger.debug(\n { endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n } catch (error) {\n this.logger.error(error, 'Error predicting end of turn');\n }\n }\n\n span.setAttribute(\n traceTypes.ATTR_CHAT_CTX,\n JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),\n );\n span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);\n span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);\n span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);\n span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');\n },\n {\n name: 'eou_detection',\n context: userTurnCtx,\n },\n );\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n this._endUserTurnSpan({\n transcript: this.audioTranscript,\n confidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n });\n\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n {\n const startTime = Date.now() - ev.speechDuration;\n const span = this.ensureUserTurnSpan(startTime);\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onStartOfSpeech(ev));\n }\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onEndOfSpeech(ev));\n }\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.name === 'AbortError') {\n this.logger.debug('User turn commit task cancelled');\n return;\n }\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n this.silenceAudioWriter.releaseLock();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private _endUserTurnSpan({\n transcript,\n confidence,\n transcriptionDelay,\n endOfUtteranceDelay,\n }: {\n transcript: string;\n confidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n }): void {\n if (this.userTurnSpan) {\n this.userTurnSpan.setAttributes({\n [traceTypes.ATTR_USER_TRANSCRIPT]: transcript,\n [traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,\n [traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,\n [traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,\n });\n this.userTurnSpan.end();\n this.userTurnSpan = undefined;\n }\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAIA,SAAS,kBAAkB;AAC3B;AAAA,EAEE;AAAA,EAEA,WAAW;AAAA,EACX;AAAA,OACK;AAEP,SAAS,sBAAsB;AAE/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,YAAY,cAAc;AACnC,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAGtD,SAAS,oCAAoC;AA2EtC,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AACpB,SAAK,kBAAkB,KAAK;AAC5B,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AACxB,SAAK,uBAAuB,KAAK;AAEjC,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEQ,mBAAmB,WAA0B;AAxLvD;AAyLI,QAAI,KAAK,gBAAgB,KAAK,aAAa,YAAY,GAAG;AACxD,aAAO,KAAK;AAAA,IACd;AAEA,SAAK,eAAe,OAAO,UAAU;AAAA,MACnC,MAAM;AAAA,MACN,SAAS,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,UAAM,eAAc,UAAK,yBAAL;AACpB,QAAI,aAAa;AACf,mCAA6B,KAAK,cAAc,WAAW;AAAA,IAC7D;AAEA,QAAI,KAAK,UAAU;AACjB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,QAAQ;AAAA,IACpF;AACA,QAAI,KAAK,aAAa;AACpB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,WAAW;AAAA,IACvF;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEQ,gBAAgB,MAAqB;AAC3C,UAAM,OAAO,KAAK,mBAAmB;AACrC,WAAO,MAAM,QAAQ,MAAM,IAAI;AAAA,EACjC;AAAA,EAEA,MAAc,WAAW,IAAiB;AAvN5C;AAwNI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,MAAM,kBAAkB,EAAE;AAE/B,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB,KAAK,IAAI,CAAC;AAC/C,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,gBAAgB;AAAA,cACzB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB;AACrC,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,cAAc;AAAA,cACvB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AApahD;AAqaI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,YAAM,eAAe,KAAK,mBAAmB;AAC7C,YAAM,cAAc,KAAK,gBAAgB,YAAY;AAErD,UAAI,cAAc;AAChB,cAAM,OAAO;AAAA,UACX,OAAO,SAAS;AACd,iBAAK,OAAO,MAAM,6BAA6B;AAE/C,gBAAI,uBAAuB;AAC3B,gBAAI;AAEJ,gBAAI,CAAE,MAAM,aAAa,iBAAiB,KAAK,YAAY,GAAI;AAC7D,mBAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,YAClF,OAAO;AACL,kBAAI;AACF,uCAAuB,MAAM,aAAa,iBAAiB,OAAO;AAClE,oCAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAE1E,qBAAK,OAAO;AAAA,kBACV,EAAE,sBAAsB,mBAAmB,UAAU,KAAK,aAAa;AAAA,kBACvE;AAAA,gBACF;AAEA,oBAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,qCAAmB,KAAK;AAAA,gBAC1B;AAAA,cACF,SAAS,OAAO;AACd,qBAAK,OAAO,MAAM,OAAO,8BAA8B;AAAA,cACzD;AAAA,YACF;AAEA,iBAAK;AAAA,cACH,WAAW;AAAA,cACX,KAAK,UAAU,QAAQ,OAAO,EAAE,kBAAkB,MAAM,CAAC,CAAC;AAAA,YAC5D;AACA,iBAAK,aAAa,WAAW,sBAAsB,oBAAoB;AACvE,iBAAK,aAAa,WAAW,6BAA6B,qBAAqB,CAAC;AAChF,iBAAK,aAAa,WAAW,gBAAgB,gBAAgB;AAC7D,iBAAK,aAAa,WAAW,mBAAmB,KAAK,gBAAgB,EAAE;AAAA,UACzE;AAAA,UACA;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AACb,aAAK,iBAAiB;AAAA,UACpB,YAAY,KAAK;AAAA,UACjB,YAAY;AAAA,UACZ,oBAAoB,sBAAsB;AAAA,UAC1C,qBAAqB,uBAAuB;AAAA,QAC9C,CAAC;AAGD,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA5nBzE;AA6nBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C;AACE,oBAAM,YAAY,KAAK,IAAI,IAAI,GAAG;AAClC,oBAAM,OAAO,KAAK,mBAAmB,SAAS;AAC9C,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,gBAAgB,EAAE,CAAC;AAAA,YAC5D;AACA,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C;AACE,oBAAM,OAAO,KAAK,mBAAmB;AACrC,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,cAAc,EAAE,CAAC;AAAA,YAC1D;AAGA,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA9sBlB;AA+sBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA7tBzC;AA8tBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACrD,aAAK,OAAO,MAAM,iCAAiC;AACnD;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA3wBhB;AA4wBI,SAAK,uBAAuB;AAC5B,SAAK,mBAAmB,YAAY;AACpC,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEQ,iBAAiB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKS;AACP,QAAI,KAAK,cAAc;AACrB,WAAK,aAAa,cAAc;AAAA,QAC9B,CAAC,WAAW,oBAAoB,GAAG;AAAA,QACnC,CAAC,WAAW,0BAA0B,GAAG;AAAA,QACzC,CAAC,WAAW,wBAAwB,GAAG;AAAA,QACvC,CAAC,WAAW,sBAAsB,GAAG;AAAA,MACvC,CAAC;AACD,WAAK,aAAa,IAAI;AACtB,WAAK,eAAe;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language:
|
|
1
|
+
{"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LanguageCode } from '../language.js';\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: LanguageCode | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: LanguageCode | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
|
package/dist/voice/events.d.cts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { LanguageCode } from '../language.js';
|
|
1
2
|
import type { ChatMessage, FunctionCall, FunctionCallOutput, LLM, RealtimeModel, RealtimeModelError } from '../llm/index.js';
|
|
2
3
|
import type { LLMError } from '../llm/llm.js';
|
|
3
4
|
import type { AgentMetrics } from '../metrics/base.js';
|
|
@@ -48,13 +49,13 @@ export type UserInputTranscribedEvent = {
|
|
|
48
49
|
/** Not supported yet. Always null by default. */
|
|
49
50
|
speakerId: string | null;
|
|
50
51
|
createdAt: number;
|
|
51
|
-
language:
|
|
52
|
+
language: LanguageCode | null;
|
|
52
53
|
};
|
|
53
54
|
export declare const createUserInputTranscribedEvent: ({ transcript, isFinal, speakerId, language, createdAt, }: {
|
|
54
55
|
transcript: string;
|
|
55
56
|
isFinal: boolean;
|
|
56
57
|
speakerId?: string | null;
|
|
57
|
-
language?:
|
|
58
|
+
language?: LanguageCode | null;
|
|
58
59
|
createdAt?: number;
|
|
59
60
|
}) => UserInputTranscribedEvent;
|
|
60
61
|
export type MetricsCollectedEvent = {
|
package/dist/voice/events.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { LanguageCode } from '../language.js';
|
|
1
2
|
import type { ChatMessage, FunctionCall, FunctionCallOutput, LLM, RealtimeModel, RealtimeModelError } from '../llm/index.js';
|
|
2
3
|
import type { LLMError } from '../llm/llm.js';
|
|
3
4
|
import type { AgentMetrics } from '../metrics/base.js';
|
|
@@ -48,13 +49,13 @@ export type UserInputTranscribedEvent = {
|
|
|
48
49
|
/** Not supported yet. Always null by default. */
|
|
49
50
|
speakerId: string | null;
|
|
50
51
|
createdAt: number;
|
|
51
|
-
language:
|
|
52
|
+
language: LanguageCode | null;
|
|
52
53
|
};
|
|
53
54
|
export declare const createUserInputTranscribedEvent: ({ transcript, isFinal, speakerId, language, createdAt, }: {
|
|
54
55
|
transcript: string;
|
|
55
56
|
isFinal: boolean;
|
|
56
57
|
speakerId?: string | null;
|
|
57
|
-
language?:
|
|
58
|
+
language?: LanguageCode | null;
|
|
58
59
|
createdAt?: number;
|
|
59
60
|
}) => UserInputTranscribedEvent;
|
|
60
61
|
export type MetricsCollectedEvent = {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../src/voice/events.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,WAAW,EACX,YAAY,EACZ,kBAAkB,EAClB,GAAG,EACH,aAAa,EACb,kBAAkB,EACnB,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,oBAAY,sBAAsB;IAChC,oBAAoB,2BAA2B;IAC/C,iBAAiB,wBAAwB;IACzC,gBAAgB,uBAAuB;IACvC,qBAAqB,4BAA4B;IACjD,qBAAqB,4BAA4B;IACjD,gBAAgB,sBAAsB;IACtC,aAAa,mBAAmB;IAChC,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,WAAW,GAAG,MAAM,CAAC;AAC1D,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,CAAC;AAEzF,oBAAY,WAAW;IACrB,KAAK,UAAU;IACf,YAAY,iBAAiB;IAC7B,wBAAwB,6BAA6B;IACrD,cAAc,mBAAmB;CAClC;AAED,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,MAAM,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,gBAAgB,GAAG,eAAe,CAAC;AAEtE,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,QAAQ,EAAE,SAAS,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,aAC5B,SAAS,YACT,SAAS,cACR,MAAM,KAChB,qBAKD,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,qBAAqB,CAAC;IAC5B,QAAQ,EAAE,UAAU,CAAC;IACrB,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,4BAA4B,aAC7B,UAAU,YACV,UAAU,cACT,MAAM,KAChB,sBAKD,CAAC;AAEH,MAAM,MAAM,yBAAyB,GAAG;IACtC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IAEjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../src/voice/events.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,KAAK,EACV,WAAW,EACX,YAAY,EACZ,kBAAkB,EAClB,GAAG,EACH,aAAa,EACb,kBAAkB,EACnB,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,oBAAY,sBAAsB;IAChC,oBAAoB,2BAA2B;IAC/C,iBAAiB,wBAAwB;IACzC,gBAAgB,uBAAuB;IACvC,qBAAqB,4BAA4B;IACjD,qBAAqB,4BAA4B;IACjD,gBAAgB,sBAAsB;IACtC,aAAa,mBAAmB;IAChC,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,WAAW,GAAG,MAAM,CAAC;AAC1D,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,CAAC;AAEzF,oBAAY,WAAW;IACrB,KAAK,UAAU;IACf,YAAY,iBAAiB;IAC7B,wBAAwB,6BAA6B;IACrD,cAAc,mBAAmB;CAClC;AAED,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,MAAM,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,gBAAgB,GAAG,eAAe,CAAC;AAEtE,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,QAAQ,EAAE,SAAS,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,aAC5B,SAAS,YACT,SAAS,cACR,MAAM,KAChB,qBAKD,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,qBAAqB,CAAC;IAC5B,QAAQ,EAAE,UAAU,CAAC;IACrB,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,4BAA4B,aAC7B,UAAU,YACV,UAAU,cACT,MAAM,KAChB,sBAKD,CAAC;AAEH,MAAM,MAAM,yBAAyB,GAAG;IACtC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IAEjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,YAAY,GAAG,IAAI,CAAC;CAC/B,CAAC;AAEF,eAAO,MAAM,+BAA+B,6DAMzC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,QAAQ,CAAC,EAAE,YAAY,GAAG,IAAI,CAAC;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,yBAOF,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,4BAGrC;IACD,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,qBAIF,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,SACrC,WAAW,cACN,MAAM,KAChB,0BAID,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,uDAI1C;IACD,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,0BAOH,CAAC;AAEF,eAAO,MAAM,0BAA0B,UAC9B,0BAA0B,KAChC,MAAM,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAE1C,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB;;OAEG;IACH,aAAa,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,MAAM,EAAE,YAAY,CAAC;IACrB;;OAEG;IAEH,YAAY,EAAE,YAAY,CAAC;IAC3B;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,wBAAwB,wDAKlC;IACD,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,EAAE,YAAY,CAAC;IACrB,YAAY,EAAE,YAAY,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,kBAMF,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACrE,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,CAAC;IAClD,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,UACpB,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,UAC5D,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,cACtC,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC;IAClE,MAAM,EAAE,cAAc,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,WACnB,cAAc,UACf,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,cACtD,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAClB,yBAAyB,GACzB,qBAAqB,GACrB,sBAAsB,GACtB,qBAAqB,GACrB,0BAA0B,GAC1B,0BAA0B,GAC1B,kBAAkB,GAClB,UAAU,GACV,UAAU,CAAC"}
|
package/dist/voice/events.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language:
|
|
1
|
+
{"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LanguageCode } from '../language.js';\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: LanguageCode | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: LanguageCode | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":"AAoBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -9,33 +9,31 @@
|
|
|
9
9
|
* @see {@link https://docs.livekit.io/agents/overview | LiveKit Agents documentation}
|
|
10
10
|
* @packageDocumentation
|
|
11
11
|
*/
|
|
12
|
-
import * as beta from './beta/index.js';
|
|
13
|
-
import * as cli from './cli.js';
|
|
14
|
-
import * as inference from './inference/index.js';
|
|
15
|
-
import * as ipc from './ipc/index.js';
|
|
16
|
-
import * as llm from './llm/index.js';
|
|
17
|
-
import * as metrics from './metrics/index.js';
|
|
18
|
-
import * as stream from './stream/index.js';
|
|
19
|
-
import * as stt from './stt/index.js';
|
|
20
|
-
import * as telemetry from './telemetry/index.js';
|
|
21
|
-
import * as tokenize from './tokenize/index.js';
|
|
22
|
-
import * as tts from './tts/index.js';
|
|
23
|
-
import * as voice from './voice/index.js';
|
|
24
|
-
|
|
25
12
|
export * from './_exceptions.js';
|
|
26
13
|
export * from './audio.js';
|
|
14
|
+
export * as beta from './beta/index.js';
|
|
15
|
+
export * as cli from './cli.js';
|
|
27
16
|
export * from './connection_pool.js';
|
|
28
17
|
export * from './generator.js';
|
|
18
|
+
export * as inference from './inference/index.js';
|
|
29
19
|
export * from './inference_runner.js';
|
|
20
|
+
export * as ipc from './ipc/index.js';
|
|
30
21
|
export * from './job.js';
|
|
22
|
+
export * from './language.js';
|
|
23
|
+
export * as llm from './llm/index.js';
|
|
31
24
|
export * from './log.js';
|
|
25
|
+
export * as metrics from './metrics/index.js';
|
|
32
26
|
export * from './plugin.js';
|
|
27
|
+
export * as stream from './stream/index.js';
|
|
28
|
+
export * as stt from './stt/index.js';
|
|
29
|
+
export * as telemetry from './telemetry/index.js';
|
|
30
|
+
export * as tokenize from './tokenize/index.js';
|
|
33
31
|
export * from './transcription.js';
|
|
32
|
+
export * as tts from './tts/index.js';
|
|
34
33
|
export * from './types.js';
|
|
35
34
|
export * from './utils.js';
|
|
36
35
|
export * from './vad.js';
|
|
37
36
|
export * from './version.js';
|
|
37
|
+
export * as voice from './voice/index.js';
|
|
38
38
|
export { createTimedString, isTimedString, type TimedString } from './voice/io.js';
|
|
39
39
|
export * from './worker.js';
|
|
40
|
-
|
|
41
|
-
export { beta, cli, inference, ipc, llm, metrics, stream, stt, telemetry, tokenize, tts, voice };
|
package/src/inference/llm.ts
CHANGED
|
@@ -2,16 +2,11 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import OpenAI from 'openai';
|
|
5
|
-
import {
|
|
6
|
-
APIConnectionError,
|
|
7
|
-
APIStatusError,
|
|
8
|
-
APITimeoutError,
|
|
9
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
|
10
|
-
type Expand,
|
|
11
|
-
toError,
|
|
12
|
-
} from '../index.js';
|
|
5
|
+
import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
|
|
13
6
|
import * as llm from '../llm/index.js';
|
|
7
|
+
import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
14
8
|
import type { APIConnectOptions } from '../types.js';
|
|
9
|
+
import { type Expand, toError } from '../utils.js';
|
|
15
10
|
import { type AnyString, createAccessToken } from './utils.js';
|
|
16
11
|
|
|
17
12
|
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { beforeAll, describe, expect, it } from 'vitest';
|
|
5
|
+
import { normalizeLanguage } from '../language.js';
|
|
5
6
|
import { initializeLogger } from '../log.js';
|
|
6
7
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
8
|
import { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';
|
|
@@ -34,6 +35,12 @@ describe('parseSTTModelString', () => {
|
|
|
34
35
|
expect(language).toBe('en');
|
|
35
36
|
});
|
|
36
37
|
|
|
38
|
+
it('normalizes language suffixes', () => {
|
|
39
|
+
const [model, language] = parseSTTModelString('deepgram:english');
|
|
40
|
+
expect(model).toBe('deepgram');
|
|
41
|
+
expect(language).toBe('en');
|
|
42
|
+
});
|
|
43
|
+
|
|
37
44
|
it('provider/model format without language', () => {
|
|
38
45
|
const [model, language] = parseSTTModelString('deepgram/nova-3');
|
|
39
46
|
expect(model).toBe('deepgram/nova-3');
|
|
@@ -151,6 +158,16 @@ describe('normalizeSTTFallback', () => {
|
|
|
151
158
|
});
|
|
152
159
|
|
|
153
160
|
describe('STT constructor fallback and connOptions', () => {
|
|
161
|
+
it('normalizes language in constructor and model string', () => {
|
|
162
|
+
const stt = makeStt({ model: 'deepgram/nova-3:english' });
|
|
163
|
+
expect(stt['opts'].language).toBe('en');
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it('prefers explicit normalized language over model suffix', () => {
|
|
167
|
+
const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });
|
|
168
|
+
expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));
|
|
169
|
+
});
|
|
170
|
+
|
|
154
171
|
it('fallback not given defaults to undefined', () => {
|
|
155
172
|
const stt = makeStt();
|
|
156
173
|
expect(stt['opts'].fallback).toBeUndefined();
|
package/src/inference/stt.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { type AudioFrame } from '@livekit/rtc-node';
|
|
|
5
5
|
import type { WebSocket } from 'ws';
|
|
6
6
|
import { APIError, APIStatusError } from '../_exceptions.js';
|
|
7
7
|
import { AudioByteStream } from '../audio.js';
|
|
8
|
+
import { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';
|
|
8
9
|
import { log } from '../log.js';
|
|
9
10
|
import { createStreamChannel } from '../stream/stream_channel.js';
|
|
10
11
|
import {
|
|
@@ -121,10 +122,10 @@ export interface STTFallbackModel {
|
|
|
121
122
|
export type STTFallbackModelType = STTFallbackModel | string;
|
|
122
123
|
|
|
123
124
|
/** Parse a model string into [model, language]. Language is undefined if not specified. */
|
|
124
|
-
export function parseSTTModelString(model: string): [string,
|
|
125
|
+
export function parseSTTModelString(model: string): [string, LanguageCode | undefined] {
|
|
125
126
|
const idx = model.lastIndexOf(':');
|
|
126
127
|
if (idx !== -1) {
|
|
127
|
-
return [model.slice(0, idx), model.slice(idx + 1)];
|
|
128
|
+
return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];
|
|
128
129
|
}
|
|
129
130
|
return [model, undefined];
|
|
130
131
|
}
|
|
@@ -156,7 +157,7 @@ const DEFAULT_CANCEL_TIMEOUT = 5000;
|
|
|
156
157
|
|
|
157
158
|
export interface InferenceSTTOptions<TModel extends STTModels> {
|
|
158
159
|
model?: TModel;
|
|
159
|
-
language?:
|
|
160
|
+
language?: LanguageCode;
|
|
160
161
|
encoding: STTEncoding;
|
|
161
162
|
sampleRate: number;
|
|
162
163
|
baseURL: string;
|
|
@@ -219,25 +220,24 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
219
220
|
let nextModel = model;
|
|
220
221
|
let nextLanguage = language;
|
|
221
222
|
if (typeof nextModel === 'string') {
|
|
222
|
-
const
|
|
223
|
-
if (
|
|
224
|
-
|
|
225
|
-
if (nextLanguage && nextLanguage !== languageFromModel) {
|
|
223
|
+
const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);
|
|
224
|
+
if (parsedLanguage !== undefined) {
|
|
225
|
+
if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {
|
|
226
226
|
this.#logger.warn(
|
|
227
227
|
'`language` is provided via both argument and model, using the one from the argument',
|
|
228
228
|
{ language: nextLanguage, model: nextModel },
|
|
229
229
|
);
|
|
230
230
|
} else {
|
|
231
|
-
nextLanguage =
|
|
231
|
+
nextLanguage = parsedLanguage as STTLanguages;
|
|
232
232
|
}
|
|
233
|
-
nextModel =
|
|
233
|
+
nextModel = parsedModel as TModel;
|
|
234
234
|
}
|
|
235
235
|
}
|
|
236
236
|
const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;
|
|
237
237
|
|
|
238
238
|
this.opts = {
|
|
239
239
|
model: nextModel as TModel,
|
|
240
|
-
language: nextLanguage,
|
|
240
|
+
language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,
|
|
241
241
|
encoding,
|
|
242
242
|
sampleRate,
|
|
243
243
|
baseURL: lkBaseURL,
|
|
@@ -263,7 +263,11 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
263
263
|
}
|
|
264
264
|
|
|
265
265
|
updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
|
|
266
|
-
this.opts = {
|
|
266
|
+
this.opts = {
|
|
267
|
+
...this.opts,
|
|
268
|
+
...opts,
|
|
269
|
+
language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
|
|
270
|
+
};
|
|
267
271
|
|
|
268
272
|
for (const stream of this.streams) {
|
|
269
273
|
stream.updateOptions(opts);
|
|
@@ -278,7 +282,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
278
282
|
options || {};
|
|
279
283
|
const streamOpts = {
|
|
280
284
|
...this.opts,
|
|
281
|
-
language: language
|
|
285
|
+
language: language !== undefined ? normalizeLanguage(language) : this.opts.language,
|
|
282
286
|
} as InferenceSTTOptions<TModel>;
|
|
283
287
|
|
|
284
288
|
const stream = new SpeechStream(this, streamOpts, connOptions);
|
|
@@ -364,7 +368,11 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
364
368
|
}
|
|
365
369
|
|
|
366
370
|
updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
|
|
367
|
-
this.opts = {
|
|
371
|
+
this.opts = {
|
|
372
|
+
...this.opts,
|
|
373
|
+
...opts,
|
|
374
|
+
language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
|
|
375
|
+
};
|
|
368
376
|
this.reconnectEvent.set();
|
|
369
377
|
}
|
|
370
378
|
|
|
@@ -569,7 +577,7 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
569
577
|
|
|
570
578
|
const requestId = data.session_id || this.requestId;
|
|
571
579
|
const text = data.transcript;
|
|
572
|
-
const language = data.language || this.opts.language || 'en';
|
|
580
|
+
const language = normalizeLanguage(data.language || this.opts.language || 'en');
|
|
573
581
|
|
|
574
582
|
if (!text && !isFinal) return;
|
|
575
583
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { beforeAll, describe, expect, it } from 'vitest';
|
|
5
|
+
import { normalizeLanguage } from '../language.js';
|
|
5
6
|
import { initializeLogger } from '../log.js';
|
|
6
7
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
8
|
import { TTS, type TTSFallbackModel, normalizeTTSFallback, parseTTSModelString } from './tts.js';
|
|
@@ -165,6 +166,17 @@ describe('normalizeTTSFallback', () => {
|
|
|
165
166
|
});
|
|
166
167
|
|
|
167
168
|
describe('TTS constructor fallback and connOptions', () => {
|
|
169
|
+
it('normalizes language in constructor', () => {
|
|
170
|
+
const tts = makeTts({ language: 'english' });
|
|
171
|
+
expect(tts['opts'].language).toBe('en');
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('normalizes updated language values', () => {
|
|
175
|
+
const tts = makeTts();
|
|
176
|
+
tts.updateOptions({ language: 'en_US' });
|
|
177
|
+
expect(tts['opts'].language).toBe(normalizeLanguage('en_US'));
|
|
178
|
+
});
|
|
179
|
+
|
|
168
180
|
it('fallback not given defaults to undefined', () => {
|
|
169
181
|
const tts = makeTts();
|
|
170
182
|
expect(tts['opts'].fallback).toBeUndefined();
|