@livekit/agents 1.0.50 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/index.cjs +12 -10
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +13 -13
  4. package/dist/index.d.ts +13 -13
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +11 -10
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +67 -67
  9. package/dist/inference/api_protos.d.ts +67 -67
  10. package/dist/inference/llm.cjs +10 -8
  11. package/dist/inference/llm.cjs.map +1 -1
  12. package/dist/inference/llm.d.cts +1 -1
  13. package/dist/inference/llm.d.ts +1 -1
  14. package/dist/inference/llm.d.ts.map +1 -1
  15. package/dist/inference/llm.js +3 -7
  16. package/dist/inference/llm.js.map +1 -1
  17. package/dist/inference/stt.cjs +20 -12
  18. package/dist/inference/stt.cjs.map +1 -1
  19. package/dist/inference/stt.d.cts +3 -2
  20. package/dist/inference/stt.d.ts +3 -2
  21. package/dist/inference/stt.d.ts.map +1 -1
  22. package/dist/inference/stt.js +20 -12
  23. package/dist/inference/stt.js.map +1 -1
  24. package/dist/inference/stt.test.cjs +14 -0
  25. package/dist/inference/stt.test.cjs.map +1 -1
  26. package/dist/inference/stt.test.js +14 -0
  27. package/dist/inference/stt.test.js.map +1 -1
  28. package/dist/inference/tts.cjs +13 -4
  29. package/dist/inference/tts.cjs.map +1 -1
  30. package/dist/inference/tts.d.cts +2 -1
  31. package/dist/inference/tts.d.ts +2 -1
  32. package/dist/inference/tts.d.ts.map +1 -1
  33. package/dist/inference/tts.js +13 -4
  34. package/dist/inference/tts.js.map +1 -1
  35. package/dist/inference/tts.test.cjs +10 -0
  36. package/dist/inference/tts.test.cjs.map +1 -1
  37. package/dist/inference/tts.test.js +10 -0
  38. package/dist/inference/tts.test.js.map +1 -1
  39. package/dist/inference/utils.cjs +5 -5
  40. package/dist/inference/utils.cjs.map +1 -1
  41. package/dist/inference/utils.js +1 -1
  42. package/dist/inference/utils.js.map +1 -1
  43. package/dist/language.cjs +394 -0
  44. package/dist/language.cjs.map +1 -0
  45. package/dist/language.d.cts +15 -0
  46. package/dist/language.d.ts +15 -0
  47. package/dist/language.d.ts.map +1 -0
  48. package/dist/language.js +363 -0
  49. package/dist/language.js.map +1 -0
  50. package/dist/language.test.cjs +43 -0
  51. package/dist/language.test.cjs.map +1 -0
  52. package/dist/language.test.js +49 -0
  53. package/dist/language.test.js.map +1 -0
  54. package/dist/stream/deferred_stream.cjs +6 -2
  55. package/dist/stream/deferred_stream.cjs.map +1 -1
  56. package/dist/stream/deferred_stream.d.ts.map +1 -1
  57. package/dist/stream/deferred_stream.js +6 -2
  58. package/dist/stream/deferred_stream.js.map +1 -1
  59. package/dist/stt/stt.cjs.map +1 -1
  60. package/dist/stt/stt.d.cts +2 -1
  61. package/dist/stt/stt.d.ts +2 -1
  62. package/dist/stt/stt.d.ts.map +1 -1
  63. package/dist/stt/stt.js.map +1 -1
  64. package/dist/version.cjs +1 -1
  65. package/dist/version.js +1 -1
  66. package/dist/voice/agent_activity.cjs +1 -1
  67. package/dist/voice/agent_activity.cjs.map +1 -1
  68. package/dist/voice/agent_activity.js +1 -1
  69. package/dist/voice/agent_activity.js.map +1 -1
  70. package/dist/voice/agent_activity.test.cjs +135 -0
  71. package/dist/voice/agent_activity.test.cjs.map +1 -0
  72. package/dist/voice/agent_activity.test.js +134 -0
  73. package/dist/voice/agent_activity.test.js.map +1 -0
  74. package/dist/voice/audio_recognition.cjs.map +1 -1
  75. package/dist/voice/audio_recognition.d.cts +3 -2
  76. package/dist/voice/audio_recognition.d.ts +3 -2
  77. package/dist/voice/audio_recognition.d.ts.map +1 -1
  78. package/dist/voice/audio_recognition.js.map +1 -1
  79. package/dist/voice/events.cjs.map +1 -1
  80. package/dist/voice/events.d.cts +3 -2
  81. package/dist/voice/events.d.ts +3 -2
  82. package/dist/voice/events.d.ts.map +1 -1
  83. package/dist/voice/events.js.map +1 -1
  84. package/package.json +1 -1
  85. package/src/index.ts +13 -15
  86. package/src/inference/llm.ts +3 -8
  87. package/src/inference/stt.test.ts +17 -0
  88. package/src/inference/stt.ts +22 -14
  89. package/src/inference/tts.test.ts +12 -0
  90. package/src/inference/tts.ts +14 -5
  91. package/src/inference/utils.ts +1 -1
  92. package/src/language.test.ts +62 -0
  93. package/src/language.ts +380 -0
  94. package/src/stream/deferred_stream.ts +5 -1
  95. package/src/stt/stt.ts +2 -1
  96. package/src/voice/agent_activity.test.ts +194 -0
  97. package/src/voice/agent_activity.ts +1 -1
  98. package/src/voice/audio_recognition.ts +4 -3
  99. package/src/voice/events.ts +3 -2
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ParticipantKind } from '@livekit/rtc-node';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport {\n type Context,\n ROOT_CONTEXT,\n type Span,\n context as otelContext,\n trace,\n} from '@opentelemetry/api';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { traceTypes, tracer } from '../telemetry/index.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\nimport { setParticipantSpanAttributes } from './utils.js';\n\nexport interface EndOfTurnInfo {\n /** The new transcript text from the user's speech. */\n newTranscript: string;\n /** Confidence score of the transcript (0-1). */\n transcriptConfidence: number;\n /** Delay from speech stop to final transcription in milliseconds. */\n transcriptionDelay: number;\n /** Delay from speech stop to end of utterance detection in milliseconds. */\n endOfUtteranceDelay: number;\n /** Timestamp when user started speaking (milliseconds since epoch). */\n startedSpeakingAt: number | undefined;\n /** Timestamp when user stopped speaking (milliseconds since epoch). */\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n /** Hooks for recognition events. */\n recognitionHooks: RecognitionHooks;\n /** Speech-to-text node. */\n stt?: STTNode;\n /** Voice activity detection. */\n vad?: VAD;\n /** Turn detector for end-of-turn prediction. */\n turnDetector?: _TurnDetector;\n /** Turn detection mode. */\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n /** Minimum endpointing delay in milliseconds. */\n minEndpointingDelay: number;\n /** Maximum endpointing delay in milliseconds. */\n maxEndpointingDelay: number;\n /** Root span context for tracing. */\n rootSpanContext?: Context;\n /** STT model name for tracing */\n sttModel?: string;\n /** STT provider name for tracing */\n sttProvider?: string;\n /** Getter for linked participant for span attribution */\n getLinkedParticipant?: () => ParticipantLike | undefined;\n}\n\n/**\n * Minimal participant shape for span attribution.\n * Compatible with both `LocalParticipant` and `RemoteParticipant` from `@livekit/rtc-node`.\n */\nexport interface ParticipantLike {\n sid: string | undefined;\n identity: string;\n kind: ParticipantKind;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n private rootSpanContext?: Context;\n private sttModel?: string;\n private sttProvider?: string;\n private getLinkedParticipant?: () => ParticipantLike | undefined;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private userTurnSpan?: Span;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n this.rootSpanContext = opts.rootSpanContext;\n this.sttModel = opts.sttModel;\n this.sttProvider = opts.sttProvider;\n this.getLinkedParticipant = opts.getLinkedParticipant;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private ensureUserTurnSpan(startTime?: number): Span {\n if (this.userTurnSpan && this.userTurnSpan.isRecording()) {\n return this.userTurnSpan;\n }\n\n this.userTurnSpan = tracer.startSpan({\n name: 'user_turn',\n context: this.rootSpanContext,\n startTime,\n });\n\n const participant = this.getLinkedParticipant?.();\n if (participant) {\n setParticipantSpanAttributes(this.userTurnSpan, participant);\n }\n\n if (this.sttModel) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.sttModel);\n }\n if (this.sttProvider) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, this.sttProvider);\n }\n\n return this.userTurnSpan;\n }\n\n private userTurnContext(span: Span): Context {\n const base = this.rootSpanContext ?? ROOT_CONTEXT;\n return trace.setSpan(base, span);\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.hooks.onFinalTranscript(ev);\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan(Date.now());\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n const userTurnSpan = this.ensureUserTurnSpan();\n const userTurnCtx = this.userTurnContext(userTurnSpan);\n\n if (turnDetector) {\n await tracer.startActiveSpan(\n async (span) => {\n this.logger.debug('Running turn detector model');\n\n let endOfTurnProbability = 0.0;\n let unlikelyThreshold: number | undefined;\n\n if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n try {\n endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n\n this.logger.debug(\n { endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n } catch (error) {\n this.logger.error(error, 'Error predicting end of turn');\n }\n }\n\n span.setAttribute(\n traceTypes.ATTR_CHAT_CTX,\n JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),\n );\n span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);\n span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);\n span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);\n span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');\n },\n {\n name: 'eou_detection',\n context: userTurnCtx,\n },\n );\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n this._endUserTurnSpan({\n transcript: this.audioTranscript,\n confidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n });\n\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n {\n const startTime = Date.now() - ev.speechDuration;\n const span = this.ensureUserTurnSpan(startTime);\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onStartOfSpeech(ev));\n }\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onEndOfSpeech(ev));\n }\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.name === 'AbortError') {\n this.logger.debug('User turn commit task cancelled');\n return;\n }\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n this.silenceAudioWriter.releaseLock();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private _endUserTurnSpan({\n transcript,\n confidence,\n transcriptionDelay,\n endOfUtteranceDelay,\n }: {\n transcript: string;\n confidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n }): void {\n if (this.userTurnSpan) {\n this.userTurnSpan.setAttributes({\n [traceTypes.ATTR_USER_TRANSCRIPT]: transcript,\n [traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,\n [traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,\n [traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,\n });\n this.userTurnSpan.end();\n this.userTurnSpan = undefined;\n }\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAIA,SAAS,kBAAkB;AAC3B;AAAA,EAEE;AAAA,EAEA,WAAW;AAAA,EACX;AAAA,OACK;AAEP,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,YAAY,cAAc;AACnC,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAGtD,SAAS,oCAAoC;AA2EtC,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AACpB,SAAK,kBAAkB,KAAK;AAC5B,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AACxB,SAAK,uBAAuB,KAAK;AAEjC,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEQ,mBAAmB,WAA0B;AAvLvD;AAwLI,QAAI,KAAK,gBAAgB,KAAK,aAAa,YAAY,GAAG;AACxD,aAAO,KAAK;AAAA,IACd;AAEA,SAAK,eAAe,OAAO,UAAU;AAAA,MACnC,MAAM;AAAA,MACN,SAAS,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,UAAM,eAAc,UAAK,yBAAL;AACpB,QAAI,aAAa;AACf,mCAA6B,KAAK,cAAc,WAAW;AAAA,IAC7D;AAEA,QAAI,KAAK,UAAU;AACjB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,QAAQ;AAAA,IACpF;AACA,QAAI,KAAK,aAAa;AACpB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,WAAW;AAAA,IACvF;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEQ,gBAAgB,MAAqB;AAC3C,UAAM,OAAO,KAAK,mBAAmB;AACrC,WAAO,MAAM,QAAQ,MAAM,IAAI;AAAA,EACjC;AAAA,EAEA,MAAc,WAAW,IAAiB;AAtN5C;AAuNI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,MAAM,kBAAkB,EAAE;AAE/B,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB,KAAK,IAAI,CAAC;AAC/C,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,gBAAgB;AAAA,cACzB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB;AACrC,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,cAAc;AAAA,cACvB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAnahD;AAoaI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,YAAM,eAAe,KAAK,mBAAmB;AAC7C,YAAM,cAAc,KAAK,gBAAgB,YAAY;AAErD,UAAI,cAAc;AAChB,cAAM,OAAO;AAAA,UACX,OAAO,SAAS;AACd,iBAAK,OAAO,MAAM,6BAA6B;AAE/C,gBAAI,uBAAuB;AAC3B,gBAAI;AAEJ,gBAAI,CAAE,MAAM,aAAa,iBAAiB,KAAK,YAAY,GAAI;AAC7D,mBAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,YAClF,OAAO;AACL,kBAAI;AACF,uCAAuB,MAAM,aAAa,iBAAiB,OAAO;AAClE,oCAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAE1E,qBAAK,OAAO;AAAA,kBACV,EAAE,sBAAsB,mBAAmB,UAAU,KAAK,aAAa;AAAA,kBACvE;AAAA,gBACF;AAEA,oBAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,qCAAmB,KAAK;AAAA,gBAC1B;AAAA,cACF,SAAS,OAAO;AACd,qBAAK,OAAO,MAAM,OAAO,8BAA8B;AAAA,cACzD;AAAA,YACF;AAEA,iBAAK;AAAA,cACH,WAAW;AAAA,cACX,KAAK,UAAU,QAAQ,OAAO,EAAE,kBAAkB,MAAM,CAAC,CAAC;AAAA,YAC5D;AACA,iBAAK,aAAa,WAAW,sBAAsB,oBAAoB;AACvE,iBAAK,aAAa,WAAW,6BAA6B,qBAAqB,CAAC;AAChF,iBAAK,aAAa,WAAW,gBAAgB,gBAAgB;AAC7D,iBAAK,aAAa,WAAW,mBAAmB,KAAK,gBAAgB,EAAE;AAAA,UACzE;AAAA,UACA;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AACb,aAAK,iBAAiB;AAAA,UACpB,YAAY,KAAK;AAAA,UACjB,YAAY;AAAA,UACZ,oBAAoB,sBAAsB;AAAA,UAC1C,qBAAqB,uBAAuB;AAAA,QAC9C,CAAC;AAGD,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA3nBzE;AA4nBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C;AACE,oBAAM,YAAY,KAAK,IAAI,IAAI,GAAG;AAClC,oBAAM,OAAO,KAAK,mBAAmB,SAAS;AAC9C,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,gBAAgB,EAAE,CAAC;AAAA,YAC5D;AACA,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C;AACE,oBAAM,OAAO,KAAK,mBAAmB;AACrC,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,cAAc,EAAE,CAAC;AAAA,YAC1D;AAGA,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA7sBlB;AA8sBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA5tBzC;AA6tBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACrD,aAAK,OAAO,MAAM,iCAAiC;AACnD;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA1wBhB;AA2wBI,SAAK,uBAAuB;AAC5B,SAAK,mBAAmB,YAAY;AACpC,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEQ,iBAAiB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKS;AACP,QAAI,KAAK,cAAc;AACrB,WAAK,aAAa,cAAc;AAAA,QAC9B,CAAC,WAAW,oBAAoB,GAAG;AAAA,QACnC,CAAC,WAAW,0BAA0B,GAAG;AAAA,QACzC,CAAC,WAAW,wBAAwB,GAAG;AAAA,QACvC,CAAC,WAAW,sBAAsB,GAAG;AAAA,MACvC,CAAC;AACD,WAAK,aAAa,IAAI;AACtB,WAAK,eAAe;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ParticipantKind } from '@livekit/rtc-node';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport {\n type Context,\n ROOT_CONTEXT,\n type Span,\n context as otelContext,\n trace,\n} from '@opentelemetry/api';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport type { LanguageCode } from '../language.js';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { traceTypes, tracer } from '../telemetry/index.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\nimport { setParticipantSpanAttributes } from './utils.js';\n\nexport interface EndOfTurnInfo {\n /** The new transcript text from the user's speech. */\n newTranscript: string;\n /** Confidence score of the transcript (0-1). */\n transcriptConfidence: number;\n /** Delay from speech stop to final transcription in milliseconds. */\n transcriptionDelay: number;\n /** Delay from speech stop to end of utterance detection in milliseconds. */\n endOfUtteranceDelay: number;\n /** Timestamp when user started speaking (milliseconds since epoch). */\n startedSpeakingAt: number | undefined;\n /** Timestamp when user stopped speaking (milliseconds since epoch). */\n stoppedSpeakingAt: number | undefined;\n}\n\nexport interface PreemptiveGenerationInfo {\n newTranscript: string;\n transcriptConfidence: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: LanguageCode) => Promise<number | undefined>;\n supportsLanguage: (language?: LanguageCode) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n /** Hooks for recognition events. */\n recognitionHooks: RecognitionHooks;\n /** Speech-to-text node. */\n stt?: STTNode;\n /** Voice activity detection. */\n vad?: VAD;\n /** Turn detector for end-of-turn prediction. */\n turnDetector?: _TurnDetector;\n /** Turn detection mode. */\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n /** Minimum endpointing delay in milliseconds. */\n minEndpointingDelay: number;\n /** Maximum endpointing delay in milliseconds. */\n maxEndpointingDelay: number;\n /** Root span context for tracing. */\n rootSpanContext?: Context;\n /** STT model name for tracing */\n sttModel?: string;\n /** STT provider name for tracing */\n sttProvider?: string;\n /** Getter for linked participant for span attribution */\n getLinkedParticipant?: () => ParticipantLike | undefined;\n}\n\n/**\n * Minimal participant shape for span attribution.\n * Compatible with both `LocalParticipant` and `RemoteParticipant` from `@livekit/rtc-node`.\n */\nexport interface ParticipantLike {\n sid: string | undefined;\n identity: string;\n kind: ParticipantKind;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: LanguageCode;\n private rootSpanContext?: Context;\n private sttModel?: string;\n private sttProvider?: string;\n private getLinkedParticipant?: () => ParticipantLike | undefined;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private audioPreflightTranscript = '';\n private finalTranscriptConfidence: number[] = [];\n private lastSpeakingTime: number | undefined;\n private speechStartTime: number | undefined;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private userTurnSpan?: Span;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n this.rootSpanContext = opts.rootSpanContext;\n this.sttModel = opts.sttModel;\n this.sttProvider = opts.sttProvider;\n this.getLinkedParticipant = opts.getLinkedParticipant;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private ensureUserTurnSpan(startTime?: number): Span {\n if (this.userTurnSpan && this.userTurnSpan.isRecording()) {\n return this.userTurnSpan;\n }\n\n this.userTurnSpan = tracer.startSpan({\n name: 'user_turn',\n context: this.rootSpanContext,\n startTime,\n });\n\n const participant = this.getLinkedParticipant?.();\n if (participant) {\n setParticipantSpanAttributes(this.userTurnSpan, participant);\n }\n\n if (this.sttModel) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.sttModel);\n }\n if (this.sttProvider) {\n this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, this.sttProvider);\n }\n\n return this.userTurnSpan;\n }\n\n private userTurnContext(span: Span): Context {\n const base = this.rootSpanContext ?? ROOT_CONTEXT;\n return trace.setSpan(base, span);\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n const transcript = ev.alternatives?.[0]?.text;\n const confidence = ev.alternatives?.[0]?.confidence ?? 0;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.hooks.onFinalTranscript(ev);\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.finalTranscriptConfidence.push(confidence);\n const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for lastSpeakingTime\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n if (transcriptChanged) {\n this.logger.debug(\n { transcript: this.audioTranscript },\n 'triggering preemptive generation (FINAL_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioTranscript,\n transcriptConfidence:\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0,\n });\n }\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.PREFLIGHT_TRANSCRIPT:\n this.hooks.onInterimTranscript(ev);\n const preflightTranscript = ev.alternatives?.[0]?.text ?? '';\n const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;\n const preflightLanguage = ev.alternatives?.[0]?.language;\n\n const MIN_LANGUAGE_DETECTION_LENGTH = 5;\n if (\n !this.lastLanguage ||\n (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)\n ) {\n this.lastLanguage = preflightLanguage;\n }\n\n if (!preflightTranscript) {\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: preflightTranscript,\n language: this.lastLanguage,\n },\n 'received user preflight transcript',\n );\n\n // still need to increment it as it's used for turn detection,\n this.lastFinalTranscriptTime = Date.now();\n // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)\n this.audioPreflightTranscript =\n `${this.audioTranscript} ${preflightTranscript}`.trimStart();\n this.audioInterimTranscript = preflightTranscript;\n\n if (!this.vad || this.lastSpeakingTime === undefined) {\n // vad disabled, use stt timestamp\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {\n const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];\n this.logger.debug(\n {\n transcript:\n this.audioPreflightTranscript.length > 100\n ? this.audioPreflightTranscript.slice(0, 100) + '...'\n : this.audioPreflightTranscript,\n },\n 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',\n );\n this.hooks.onPreemptiveGeneration({\n newTranscript: this.audioPreflightTranscript,\n transcriptConfidence:\n confidenceVals.length > 0\n ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length\n : 0,\n });\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.START_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan(Date.now());\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onStartOfSpeech({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = true;\n this.lastSpeakingTime = Date.now();\n\n this.bounceEOUTask?.cancel();\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => {\n this.hooks.onEndOfSpeech({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: Date.now(),\n speechDuration: 0,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n });\n }\n this.speaking = false;\n this.userTurnCommitted = true;\n this.lastSpeakingTime = Date.now();\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask =\n (\n lastSpeakingTime: number | undefined,\n lastFinalTranscriptTime: number,\n speechStartTime: number | undefined,\n ) =>\n async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n const userTurnSpan = this.ensureUserTurnSpan();\n const userTurnCtx = this.userTurnContext(userTurnSpan);\n\n if (turnDetector) {\n await tracer.startActiveSpan(\n async (span) => {\n this.logger.debug('Running turn detector model');\n\n let endOfTurnProbability = 0.0;\n let unlikelyThreshold: number | undefined;\n\n if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n try {\n endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n\n this.logger.debug(\n { endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n } catch (error) {\n this.logger.error(error, 'Error predicting end of turn');\n }\n }\n\n span.setAttribute(\n traceTypes.ATTR_CHAT_CTX,\n JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),\n );\n span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);\n span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);\n span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);\n span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');\n },\n {\n name: 'eou_detection',\n context: userTurnCtx,\n },\n );\n }\n\n let extraSleep = endpointingDelay;\n if (lastSpeakingTime !== undefined) {\n extraSleep += lastSpeakingTime - Date.now();\n }\n\n if (extraSleep > 0) {\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n }\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const confidenceAvg =\n this.finalTranscriptConfidence.length > 0\n ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /\n this.finalTranscriptConfidence.length\n : 0;\n\n let startedSpeakingAt: number | undefined;\n let stoppedSpeakingAt: number | undefined;\n let transcriptionDelay: number | undefined;\n let endOfUtteranceDelay: number | undefined;\n\n // sometimes, we can't calculate the metrics because VAD was unreliable.\n // in this case, we just ignore the calculation, it's better than providing likely wrong values\n if (\n lastFinalTranscriptTime !== 0 &&\n lastSpeakingTime !== undefined &&\n speechStartTime !== undefined\n ) {\n startedSpeakingAt = speechStartTime;\n stoppedSpeakingAt = lastSpeakingTime;\n transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);\n endOfUtteranceDelay = Date.now() - lastSpeakingTime;\n }\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptConfidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n startedSpeakingAt,\n stoppedSpeakingAt,\n });\n\n if (committed) {\n this._endUserTurnSpan({\n transcript: this.audioTranscript,\n confidence: confidenceAvg,\n transcriptionDelay: transcriptionDelay ?? 0,\n endOfUtteranceDelay: endOfUtteranceDelay ?? 0,\n });\n\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n this.finalTranscriptConfidence = [];\n this.lastSpeakingTime = undefined;\n this.lastFinalTranscriptTime = 0;\n this.speechStartTime = undefined;\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n // copy the values before awaiting (the values can change)\n this.bounceEOUTask = Task.from(\n bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),\n );\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n {\n const startTime = Date.now() - ev.speechDuration;\n const span = this.ensureUserTurnSpan(startTime);\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onStartOfSpeech(ev));\n }\n this.speaking = true;\n\n // Capture sample rate from the first VAD event if not already set\n if (ev.frames.length > 0 && ev.frames[0]) {\n this.sampleRate = ev.frames[0].sampleRate;\n }\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n // for metrics, get the \"earliest\" signal of speech as possible\n if (ev.rawAccumulatedSpeech > 0.0) {\n this.lastSpeakingTime = Date.now();\n\n if (this.speechStartTime === undefined) {\n this.speechStartTime = Date.now();\n }\n }\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n {\n const span = this.ensureUserTurnSpan();\n const ctx = this.userTurnContext(span);\n otelContext.with(ctx, () => this.hooks.onEndOfSpeech(ev));\n }\n\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.speaking = false;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.audioPreflightTranscript = '';\n this.finalTranscriptConfidence = [];\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.name === 'AbortError') {\n this.logger.debug('User turn commit task cancelled');\n return;\n }\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n this.silenceAudioWriter.releaseLock();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private _endUserTurnSpan({\n transcript,\n confidence,\n transcriptionDelay,\n endOfUtteranceDelay,\n }: {\n transcript: string;\n confidence: number;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n }): void {\n if (this.userTurnSpan) {\n this.userTurnSpan.setAttributes({\n [traceTypes.ATTR_USER_TRANSCRIPT]: transcript,\n [traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,\n [traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,\n [traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,\n });\n this.userTurnSpan.end();\n this.userTurnSpan = undefined;\n }\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAIA,SAAS,kBAAkB;AAC3B;AAAA,EAEE;AAAA,EAEA,WAAW;AAAA,EACX;AAAA,OACK;AAEP,SAAS,sBAAsB;AAE/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,YAAY,cAAc;AACnC,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAGtD,SAAS,oCAAoC;AA2EtC,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,2BAA2B;AAAA,EAC3B,4BAAsC,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AACpB,SAAK,kBAAkB,KAAK;AAC5B,SAAK,WAAW,KAAK;AACrB,SAAK,cAAc,KAAK;AACxB,SAAK,uBAAuB,KAAK;AAEjC,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEQ,mBAAmB,WAA0B;AAxLvD;AAyLI,QAAI,KAAK,gBAAgB,KAAK,aAAa,YAAY,GAAG;AACxD,aAAO,KAAK;AAAA,IACd;AAEA,SAAK,eAAe,OAAO,UAAU;AAAA,MACnC,MAAM;AAAA,MACN,SAAS,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,UAAM,eAAc,UAAK,yBAAL;AACpB,QAAI,aAAa;AACf,mCAA6B,KAAK,cAAc,WAAW;AAAA,IAC7D;AAEA,QAAI,KAAK,UAAU;AACjB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,QAAQ;AAAA,IACpF;AACA,QAAI,KAAK,aAAa;AACpB,WAAK,aAAa,aAAa,WAAW,2BAA2B,KAAK,WAAW;AAAA,IACvF;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEQ,gBAAgB,MAAqB;AAC3C,UAAM,OAAO,KAAK,mBAAmB;AACrC,WAAO,MAAM,QAAQ,MAAM,IAAI;AAAA,EACjC;AAAA,EAEA,MAAc,WAAW,IAAiB;AAvN5C;AAwNI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,cAAM,eAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AACvD,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,MAAM,kBAAkB,EAAE;AAE/B,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,0BAA0B,KAAK,UAAU;AAC9C,cAAM,oBAAoB,KAAK,oBAAoB,KAAK;AACxD,aAAK,yBAAyB;AAC9B,aAAK,2BAA2B;AAEhC,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAMpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,cAAI,mBAAmB;AACrB,iBAAK,OAAO;AAAA,cACV,EAAE,YAAY,KAAK,gBAAgB;AAAA,cACnC;AAAA,YACF;AACA,iBAAK,MAAM,uBAAuB;AAAA,cAChC,eAAe,KAAK;AAAA,cACpB,sBACE,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAAA,YACR,CAAC;AAAA,UACH;AAEA,cAAI,CAAC,KAAK,UAAU;AAClB,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,MAAM,oBAAoB,EAAE;AACjC,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC1D,cAAM,wBAAsB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,eAAc;AAChE,cAAM,qBAAoB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAEhD,cAAM,gCAAgC;AACtC,YACE,CAAC,KAAK,gBACL,qBAAqB,oBAAoB,SAAS,+BACnD;AACA,eAAK,eAAe;AAAA,QACtB;AAEA,YAAI,CAAC,qBAAqB;AACxB;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAGA,aAAK,0BAA0B,KAAK,IAAI;AAExC,aAAK,2BACH,GAAG,KAAK,eAAe,IAAI,mBAAmB,GAAG,UAAU;AAC7D,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,OAAO,KAAK,qBAAqB,QAAW;AAEpD,eAAK,mBAAmB,KAAK,IAAI;AAAA,QACnC;AAEA,YAAI,KAAK,sBAAsB,YAAY,KAAK,mBAAmB;AACjE,gBAAM,iBAAiB,CAAC,GAAG,KAAK,2BAA2B,mBAAmB;AAC9E,eAAK,OAAO;AAAA,YACV;AAAA,cACE,YACE,KAAK,yBAAyB,SAAS,MACnC,KAAK,yBAAyB,MAAM,GAAG,GAAG,IAAI,QAC9C,KAAK;AAAA,YACb;AAAA,YACA;AAAA,UACF;AACA,eAAK,MAAM,uBAAuB;AAAA,YAChC,eAAe,KAAK;AAAA,YACpB,sBACE,eAAe,SAAS,IACpB,eAAe,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IAAI,eAAe,SAC3D;AAAA,UACR,CAAC;AAAA,QACH;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB,KAAK,IAAI,CAAC;AAC/C,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,gBAAgB;AAAA,cACzB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,mBAAK,kBAAL,mBAAoB;AACpB;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC;AACE,gBAAM,OAAO,KAAK,mBAAmB;AACrC,gBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,sBAAY,KAAK,KAAK,MAAM;AAC1B,iBAAK,MAAM,cAAc;AAAA,cACvB,MAAM,aAAa;AAAA,cACnB,cAAc;AAAA,cACd,WAAW,KAAK,IAAI;AAAA,cACpB,gBAAgB;AAAA,cAChB,iBAAiB;AAAA,cACjB,QAAQ,CAAC;AAAA,cACT,aAAa;AAAA,cACb,mBAAmB;AAAA,cACnB,UAAU;AAAA,cACV,uBAAuB;AAAA,cACvB,sBAAsB;AAAA,YACxB,CAAC;AAAA,UACH,CAAC;AAAA,QACH;AACA,aAAK,WAAW;AAChB,aAAK,oBAAoB;AACzB,aAAK,mBAAmB,KAAK,IAAI;AAEjC,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AApahD;AAqaI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBACJ,CACE,kBACA,yBACA,oBAEF,OAAO,eAAgC;AACrC,UAAI,mBAAmB,KAAK;AAE5B,YAAM,eAAe,KAAK,mBAAmB;AAC7C,YAAM,cAAc,KAAK,gBAAgB,YAAY;AAErD,UAAI,cAAc;AAChB,cAAM,OAAO;AAAA,UACX,OAAO,SAAS;AACd,iBAAK,OAAO,MAAM,6BAA6B;AAE/C,gBAAI,uBAAuB;AAC3B,gBAAI;AAEJ,gBAAI,CAAE,MAAM,aAAa,iBAAiB,KAAK,YAAY,GAAI;AAC7D,mBAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,YAClF,OAAO;AACL,kBAAI;AACF,uCAAuB,MAAM,aAAa,iBAAiB,OAAO;AAClE,oCAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAE1E,qBAAK,OAAO;AAAA,kBACV,EAAE,sBAAsB,mBAAmB,UAAU,KAAK,aAAa;AAAA,kBACvE;AAAA,gBACF;AAEA,oBAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,qCAAmB,KAAK;AAAA,gBAC1B;AAAA,cACF,SAAS,OAAO;AACd,qBAAK,OAAO,MAAM,OAAO,8BAA8B;AAAA,cACzD;AAAA,YACF;AAEA,iBAAK;AAAA,cACH,WAAW;AAAA,cACX,KAAK,UAAU,QAAQ,OAAO,EAAE,kBAAkB,MAAM,CAAC,CAAC;AAAA,YAC5D;AACA,iBAAK,aAAa,WAAW,sBAAsB,oBAAoB;AACvE,iBAAK,aAAa,WAAW,6BAA6B,qBAAqB,CAAC;AAChF,iBAAK,aAAa,WAAW,gBAAgB,gBAAgB;AAC7D,iBAAK,aAAa,WAAW,mBAAmB,KAAK,gBAAgB,EAAE;AAAA,UACzE;AAAA,UACA;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,UACX;AAAA,QACF;AAAA,MACF;AAEA,UAAI,aAAa;AACjB,UAAI,qBAAqB,QAAW;AAClC,sBAAc,mBAAmB,KAAK,IAAI;AAAA,MAC5C;AAEA,UAAI,aAAa,GAAG;AAElB,cAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MACpE;AAEA,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,gBACJ,KAAK,0BAA0B,SAAS,IACpC,KAAK,0BAA0B,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC,IACxD,KAAK,0BAA0B,SAC/B;AAEN,UAAI;AACJ,UAAI;AACJ,UAAI;AACJ,UAAI;AAIJ,UACE,4BAA4B,KAC5B,qBAAqB,UACrB,oBAAoB,QACpB;AACA,4BAAoB;AACpB,4BAAoB;AACpB,6BAAqB,KAAK,IAAI,0BAA0B,kBAAkB,CAAC;AAC3E,8BAAsB,KAAK,IAAI,IAAI;AAAA,MACrC;AAEA,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,sBAAsB;AAAA,QACtB,oBAAoB,sBAAsB;AAAA,QAC1C,qBAAqB,uBAAuB;AAAA,QAC5C;AAAA,QACA;AAAA,MACF,CAAC;AAED,UAAI,WAAW;AACb,aAAK,iBAAiB;AAAA,UACpB,YAAY,KAAK;AAAA,UACjB,YAAY;AAAA,UACZ,oBAAoB,sBAAsB;AAAA,UAC1C,qBAAqB,uBAAuB;AAAA,QAC9C,CAAC;AAGD,aAAK,kBAAkB;AACvB,aAAK,4BAA4B,CAAC;AAClC,aAAK,mBAAmB;AACxB,aAAK,0BAA0B;AAC/B,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,kBAAL,mBAAoB;AAEpB,SAAK,gBAAgB,KAAK;AAAA,MACxB,cAAc,KAAK,kBAAkB,KAAK,yBAAyB,KAAK,eAAe;AAAA,IACzF;AAEA,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA5nBzE;AA6nBI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C;AACE,oBAAM,YAAY,KAAK,IAAI,IAAI,GAAG;AAClC,oBAAM,OAAO,KAAK,mBAAmB,SAAS;AAC9C,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,gBAAgB,EAAE,CAAC;AAAA,YAC5D;AACA,iBAAK,WAAW;AAGhB,gBAAI,GAAG,OAAO,SAAS,KAAK,GAAG,OAAO,CAAC,GAAG;AACxC,mBAAK,aAAa,GAAG,OAAO,CAAC,EAAE;AAAA,YACjC;AAEA,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAEhC,gBAAI,GAAG,uBAAuB,GAAK;AACjC,mBAAK,mBAAmB,KAAK,IAAI;AAEjC,kBAAI,KAAK,oBAAoB,QAAW;AACtC,qBAAK,kBAAkB,KAAK,IAAI;AAAA,cAClC;AAAA,YACF;AACA;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C;AACE,oBAAM,OAAO,KAAK,mBAAmB;AACrC,oBAAM,MAAM,KAAK,gBAAgB,IAAI;AACrC,0BAAY,KAAK,KAAK,MAAM,KAAK,MAAM,cAAc,EAAE,CAAC;AAAA,YAC1D;AAGA,iBAAK,WAAW;AAEhB,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AA9sBlB;AA+sBI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,2BAA2B;AAChC,SAAK,4BAA4B,CAAC;AAClC,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AA7tBzC;AA8tBI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACrD,aAAK,OAAO,MAAM,iCAAiC;AACnD;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA3wBhB;AA4wBI,SAAK,uBAAuB;AAC5B,SAAK,mBAAmB,YAAY;AACpC,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEQ,iBAAiB;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAKS;AACP,QAAI,KAAK,cAAc;AACrB,WAAK,aAAa,cAAc;AAAA,QAC9B,CAAC,WAAW,oBAAoB,GAAG;AAAA,QACnC,CAAC,WAAW,0BAA0B,GAAG;AAAA,QACzC,CAAC,WAAW,wBAAwB,GAAG;AAAA,QACvC,CAAC,WAAW,sBAAsB,GAAG;AAAA,MACvC,CAAC;AACD,WAAK,aAAa,IAAI;AACtB,WAAK,eAAe;AAAA,IACtB;AAAA,EACF;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: string | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: string | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAmBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
1
+ {"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LanguageCode } from '../language.js';\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: LanguageCode | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: LanguageCode | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAoBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
@@ -1,3 +1,4 @@
1
+ import type { LanguageCode } from '../language.js';
1
2
  import type { ChatMessage, FunctionCall, FunctionCallOutput, LLM, RealtimeModel, RealtimeModelError } from '../llm/index.js';
2
3
  import type { LLMError } from '../llm/llm.js';
3
4
  import type { AgentMetrics } from '../metrics/base.js';
@@ -48,13 +49,13 @@ export type UserInputTranscribedEvent = {
48
49
  /** Not supported yet. Always null by default. */
49
50
  speakerId: string | null;
50
51
  createdAt: number;
51
- language: string | null;
52
+ language: LanguageCode | null;
52
53
  };
53
54
  export declare const createUserInputTranscribedEvent: ({ transcript, isFinal, speakerId, language, createdAt, }: {
54
55
  transcript: string;
55
56
  isFinal: boolean;
56
57
  speakerId?: string | null;
57
- language?: string | null;
58
+ language?: LanguageCode | null;
58
59
  createdAt?: number;
59
60
  }) => UserInputTranscribedEvent;
60
61
  export type MetricsCollectedEvent = {
@@ -1,3 +1,4 @@
1
+ import type { LanguageCode } from '../language.js';
1
2
  import type { ChatMessage, FunctionCall, FunctionCallOutput, LLM, RealtimeModel, RealtimeModelError } from '../llm/index.js';
2
3
  import type { LLMError } from '../llm/llm.js';
3
4
  import type { AgentMetrics } from '../metrics/base.js';
@@ -48,13 +49,13 @@ export type UserInputTranscribedEvent = {
48
49
  /** Not supported yet. Always null by default. */
49
50
  speakerId: string | null;
50
51
  createdAt: number;
51
- language: string | null;
52
+ language: LanguageCode | null;
52
53
  };
53
54
  export declare const createUserInputTranscribedEvent: ({ transcript, isFinal, speakerId, language, createdAt, }: {
54
55
  transcript: string;
55
56
  isFinal: boolean;
56
57
  speakerId?: string | null;
57
- language?: string | null;
58
+ language?: LanguageCode | null;
58
59
  createdAt?: number;
59
60
  }) => UserInputTranscribedEvent;
60
61
  export type MetricsCollectedEvent = {
@@ -1 +1 @@
1
- {"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../src/voice/events.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,WAAW,EACX,YAAY,EACZ,kBAAkB,EAClB,GAAG,EACH,aAAa,EACb,kBAAkB,EACnB,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,oBAAY,sBAAsB;IAChC,oBAAoB,2BAA2B;IAC/C,iBAAiB,wBAAwB;IACzC,gBAAgB,uBAAuB;IACvC,qBAAqB,4BAA4B;IACjD,qBAAqB,4BAA4B;IACjD,gBAAgB,sBAAsB;IACtC,aAAa,mBAAmB;IAChC,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,WAAW,GAAG,MAAM,CAAC;AAC1D,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,CAAC;AAEzF,oBAAY,WAAW;IACrB,KAAK,UAAU;IACf,YAAY,iBAAiB;IAC7B,wBAAwB,6BAA6B;IACrD,cAAc,mBAAmB;CAClC;AAED,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,MAAM,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,gBAAgB,GAAG,eAAe,CAAC;AAEtE,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,QAAQ,EAAE,SAAS,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,aAC5B,SAAS,YACT,SAAS,cACR,MAAM,KAChB,qBAKD,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,qBAAqB,CAAC;IAC5B,QAAQ,EAAE,UAAU,CAAC;IACrB,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,4BAA4B,aAC7B,UAAU,YACV,UAAU,cACT,MAAM,KAChB,sBAKD,CAAC;AAEH,MAAM,MAAM,yBAAyB,GAAG;IACtC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IAEjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;CACzB,CAAC;AAEF,eAAO,MAAM,+BAA+B,6DAMzC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,yBAOF,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,4BAGrC;IACD,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,qBAIF,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,SACrC,WAAW,cACN,MAAM,KAChB,0BAID,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,uDAI1C;IACD,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,0BAOH,CAAC;AAEF,eAAO,MAAM,0BAA0B,UAC9B,0BAA0B,KAChC,MAAM,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAE1C,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB;;OAEG;IACH,aAAa,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,MAAM,EAAE,YAAY,CAAC;IACrB;;OAEG;IAEH,YAAY,EAAE,YAAY,CAAC;IAC3B;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,wBAAwB,wDAKlC;IACD,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,EAAE,YAAY,CAAC;IACrB,YAAY,EAAE,YAAY,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,kBAMF,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACrE,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,CAAC;IAClD,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,UACpB,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,UAC5D,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,cACtC,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC;IAClE,MAAM,EAAE,cAAc,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,WACnB,cAAc,UACf,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,cACtD,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAClB,yBAAyB,GACzB,qBAAqB,GACrB,sBAAsB,GACtB,qBAAqB,GACrB,0BAA0B,GAC1B,0BAA0B,GAC1B,kBAAkB,GAClB,UAAU,GACV,UAAU,CAAC"}
1
+ {"version":3,"file":"events.d.ts","sourceRoot":"","sources":["../../src/voice/events.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,KAAK,EACV,WAAW,EACX,YAAY,EACZ,kBAAkB,EAClB,GAAG,EACH,aAAa,EACb,kBAAkB,EACnB,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEvD,oBAAY,sBAAsB;IAChC,oBAAoB,2BAA2B;IAC/C,iBAAiB,wBAAwB;IACzC,gBAAgB,uBAAuB;IACvC,qBAAqB,4BAA4B;IACjD,qBAAqB,4BAA4B;IACjD,gBAAgB,sBAAsB;IACtC,aAAa,mBAAmB;IAChC,KAAK,UAAU;IACf,KAAK,UAAU;CAChB;AAED,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,WAAW,GAAG,MAAM,CAAC;AAC1D,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,UAAU,CAAC;AAEzF,oBAAY,WAAW;IACrB,KAAK,UAAU;IACf,YAAY,iBAAiB;IAC7B,wBAAwB,6BAA6B;IACrD,cAAc,mBAAmB;CAClC;AAED,MAAM,MAAM,cAAc,GAAG,WAAW,GAAG,MAAM,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,gBAAgB,GAAG,eAAe,CAAC;AAEtE,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,oBAAoB,CAAC;IAC3B,QAAQ,EAAE,SAAS,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,aAC5B,SAAS,YACT,SAAS,cACR,MAAM,KAChB,qBAKD,CAAC;AAEH,MAAM,MAAM,sBAAsB,GAAG;IACnC,IAAI,EAAE,qBAAqB,CAAC;IAC5B,QAAQ,EAAE,UAAU,CAAC;IACrB,QAAQ,EAAE,UAAU,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,4BAA4B,aAC7B,UAAU,YACV,UAAU,cACT,MAAM,KAChB,sBAKD,CAAC;AAEH,MAAM,MAAM,yBAAyB,GAAG;IACtC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IAEjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,YAAY,GAAG,IAAI,CAAC;CAC/B,CAAC;AAEF,eAAO,MAAM,+BAA+B,6DAMzC;IACD,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,QAAQ,CAAC,EAAE,YAAY,GAAG,IAAI,CAAC;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,yBAOF,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG;IAClC,IAAI,EAAE,mBAAmB,CAAC;IAC1B,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,2BAA2B,4BAGrC;IACD,OAAO,EAAE,YAAY,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,qBAIF,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,SACrC,WAAW,cACN,MAAM,KAChB,0BAID,CAAC;AAEH,MAAM,MAAM,0BAA0B,GAAG;IACvC,IAAI,EAAE,yBAAyB,CAAC;IAChC,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gCAAgC,uDAI1C;IACD,aAAa,EAAE,YAAY,EAAE,CAAC;IAC9B,mBAAmB,EAAE,kBAAkB,EAAE,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,0BAOH,CAAC;AAEF,eAAO,MAAM,0BAA0B,UAC9B,0BAA0B,KAChC,MAAM,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAE1C,CAAC;AAEF,MAAM,MAAM,kBAAkB,GAAG;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB;;OAEG;IACH,aAAa,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,MAAM,EAAE,YAAY,CAAC;IACrB;;OAEG;IAEH,YAAY,EAAE,YAAY,CAAC;IAC3B;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,wBAAwB,wDAKlC;IACD,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,EAAE,YAAY,CAAC;IACrB,YAAY,EAAE,YAAY,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB,KAAG,kBAMF,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,CAAC;IACrE,MAAM,EAAE,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,CAAC;IAClD,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,UACpB,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,OAAO,UAC5D,GAAG,GAAG,GAAG,GAAG,GAAG,GAAG,aAAa,GAAG,OAAO,cACtC,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAAG;IACvB,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,CAAC;IAClE,MAAM,EAAE,cAAc,CAAC;IACvB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,eAAO,MAAM,gBAAgB,WACnB,cAAc,UACf,kBAAkB,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,GAAG,IAAI,cACtD,MAAM,KAChB,UAKD,CAAC;AAEH,MAAM,MAAM,UAAU,GAClB,yBAAyB,GACzB,qBAAqB,GACrB,sBAAsB,GACtB,qBAAqB,GACrB,0BAA0B,GAC1B,0BAA0B,GAC1B,kBAAkB,GAClB,UAAU,GACV,UAAU,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: string | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: string | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":"AAmBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
1
+ {"version":3,"sources":["../../src/voice/events.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LanguageCode } from '../language.js';\nimport type {\n ChatMessage,\n FunctionCall,\n FunctionCallOutput,\n LLM,\n RealtimeModel,\n RealtimeModelError,\n} from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport type { AgentMetrics } from '../metrics/base.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS } from '../tts/index.js';\nimport type { TTSError } from '../tts/tts.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport enum AgentSessionEventTypes {\n UserInputTranscribed = 'user_input_transcribed',\n AgentStateChanged = 'agent_state_changed',\n UserStateChanged = 'user_state_changed',\n ConversationItemAdded = 'conversation_item_added',\n FunctionToolsExecuted = 'function_tools_executed',\n MetricsCollected = 'metrics_collected',\n SpeechCreated = 'speech_created',\n Error = 'error',\n Close = 'close',\n}\n\nexport type UserState = 'speaking' | 'listening' | 'away';\nexport type AgentState = 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';\n\nexport enum CloseReason {\n ERROR = 'error',\n JOB_SHUTDOWN = 'job_shutdown',\n PARTICIPANT_DISCONNECTED = 'participant_disconnected',\n USER_INITIATED = 'user_initiated',\n}\n\nexport type ShutdownReason = CloseReason | string;\n\nexport type SpeechSource = 'say' | 'generate_reply' | 'tool_response';\n\nexport type UserStateChangedEvent = {\n type: 'user_state_changed';\n oldState: UserState;\n newState: UserState;\n createdAt: number;\n};\n\nexport const createUserStateChangedEvent = (\n oldState: UserState,\n newState: UserState,\n createdAt: number = Date.now(),\n): UserStateChangedEvent => ({\n type: 'user_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type AgentStateChangedEvent = {\n type: 'agent_state_changed';\n oldState: AgentState;\n newState: AgentState;\n createdAt: number;\n};\n\nexport const createAgentStateChangedEvent = (\n oldState: AgentState,\n newState: AgentState,\n createdAt: number = Date.now(),\n): AgentStateChangedEvent => ({\n type: 'agent_state_changed',\n oldState,\n newState,\n createdAt,\n});\n\nexport type UserInputTranscribedEvent = {\n type: 'user_input_transcribed';\n transcript: string;\n isFinal: boolean;\n // TODO(AJS-106): add multi participant support\n /** Not supported yet. Always null by default. */\n speakerId: string | null;\n createdAt: number;\n language: LanguageCode | null;\n};\n\nexport const createUserInputTranscribedEvent = ({\n transcript,\n isFinal,\n speakerId = null,\n language = null,\n createdAt = Date.now(),\n}: {\n transcript: string;\n isFinal: boolean;\n speakerId?: string | null;\n language?: LanguageCode | null;\n createdAt?: number;\n}): UserInputTranscribedEvent => ({\n type: 'user_input_transcribed',\n transcript,\n isFinal,\n speakerId,\n language,\n createdAt,\n});\n\nexport type MetricsCollectedEvent = {\n type: 'metrics_collected';\n metrics: AgentMetrics;\n createdAt: number;\n};\n\nexport const createMetricsCollectedEvent = ({\n metrics,\n createdAt = Date.now(),\n}: {\n metrics: AgentMetrics;\n createdAt?: number;\n}): MetricsCollectedEvent => ({\n type: 'metrics_collected',\n metrics,\n createdAt,\n});\n\nexport type ConversationItemAddedEvent = {\n type: 'conversation_item_added';\n item: ChatMessage;\n createdAt: number;\n};\n\nexport const createConversationItemAddedEvent = (\n item: ChatMessage,\n createdAt: number = Date.now(),\n): ConversationItemAddedEvent => ({\n type: 'conversation_item_added',\n item,\n createdAt,\n});\n\nexport type FunctionToolsExecutedEvent = {\n type: 'function_tools_executed';\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt: number;\n};\n\nexport const createFunctionToolsExecutedEvent = ({\n functionCalls,\n functionCallOutputs,\n createdAt = Date.now(),\n}: {\n functionCalls: FunctionCall[];\n functionCallOutputs: FunctionCallOutput[];\n createdAt?: number;\n}): FunctionToolsExecutedEvent => {\n return {\n type: 'function_tools_executed',\n functionCalls,\n functionCallOutputs,\n createdAt,\n };\n};\n\nexport const zipFunctionCallsAndOutputs = (\n event: FunctionToolsExecutedEvent,\n): Array<[FunctionCall, FunctionCallOutput]> => {\n return event.functionCalls.map((call, index) => [call, event.functionCallOutputs[index]!]);\n};\n\nexport type SpeechCreatedEvent = {\n type: 'speech_created';\n /**\n * True if the speech was created using public methods like `say` or `generate_reply`\n */\n userInitiated: boolean;\n /**\n * Source indicating how the speech handle was created\n */\n source: SpeechSource;\n /**\n * The speech handle that was created\n */\n // TODO(shubhra): we need to make sure this doesn't get serialized\n speechHandle: SpeechHandle;\n /**\n * The timestamp when the speech handle was created\n */\n createdAt: number;\n};\n\nexport const createSpeechCreatedEvent = ({\n userInitiated,\n source,\n speechHandle,\n createdAt = Date.now(),\n}: {\n userInitiated: boolean;\n source: SpeechSource;\n speechHandle: SpeechHandle;\n createdAt?: number;\n}): SpeechCreatedEvent => ({\n type: 'speech_created',\n userInitiated,\n source,\n speechHandle,\n createdAt,\n});\n\nexport type ErrorEvent = {\n type: 'error';\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown;\n source: LLM | STT | TTS | RealtimeModel | unknown;\n createdAt: number;\n};\n\nexport const createErrorEvent = (\n error: RealtimeModelError | STTError | TTSError | LLMError | unknown,\n source: LLM | STT | TTS | RealtimeModel | unknown,\n createdAt: number = Date.now(),\n): ErrorEvent => ({\n type: 'error',\n error,\n source,\n createdAt,\n});\n\nexport type CloseEvent = {\n type: 'close';\n error: RealtimeModelError | STTError | TTSError | LLMError | null;\n reason: ShutdownReason;\n createdAt: number;\n};\n\nexport const createCloseEvent = (\n reason: ShutdownReason,\n error: RealtimeModelError | STTError | TTSError | LLMError | null = null,\n createdAt: number = Date.now(),\n): CloseEvent => ({\n type: 'close',\n error,\n reason,\n createdAt,\n});\n\nexport type AgentEvent =\n | UserInputTranscribedEvent\n | UserStateChangedEvent\n | AgentStateChangedEvent\n | MetricsCollectedEvent\n | ConversationItemAddedEvent\n | FunctionToolsExecutedEvent\n | SpeechCreatedEvent\n | ErrorEvent\n | CloseEvent;\n"],"mappings":"AAoBO,IAAK,yBAAL,kBAAKA,4BAAL;AACL,EAAAA,wBAAA,0BAAuB;AACvB,EAAAA,wBAAA,uBAAoB;AACpB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,2BAAwB;AACxB,EAAAA,wBAAA,sBAAmB;AACnB,EAAAA,wBAAA,mBAAgB;AAChB,EAAAA,wBAAA,WAAQ;AACR,EAAAA,wBAAA,WAAQ;AATE,SAAAA;AAAA,GAAA;AAeL,IAAK,cAAL,kBAAKC,iBAAL;AACL,EAAAA,aAAA,WAAQ;AACR,EAAAA,aAAA,kBAAe;AACf,EAAAA,aAAA,8BAA2B;AAC3B,EAAAA,aAAA,oBAAiB;AAJP,SAAAA;AAAA,GAAA;AAkBL,MAAM,8BAA8B,CACzC,UACA,UACA,YAAoB,KAAK,IAAI,OACF;AAAA,EAC3B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,+BAA+B,CAC1C,UACA,UACA,YAAoB,KAAK,IAAI,OACD;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AAaO,MAAM,kCAAkC,CAAC;AAAA,EAC9C;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,YAAY,KAAK,IAAI;AACvB,OAMkC;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAQO,MAAM,8BAA8B,CAAC;AAAA,EAC1C;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAG8B;AAAA,EAC5B,MAAM;AAAA,EACN;AAAA,EACA;AACF;AAQO,MAAM,mCAAmC,CAC9C,MACA,YAAoB,KAAK,IAAI,OACG;AAAA,EAChC,MAAM;AAAA,EACN;AAAA,EACA;AACF;AASO,MAAM,mCAAmC,CAAC;AAAA,EAC/C;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,MAIkC;AAChC,SAAO;AAAA,IACL,MAAM;AAAA,IACN;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAEO,MAAM,6BAA6B,CACxC,UAC8C;AAC9C,SAAO,MAAM,cAAc,IAAI,CAAC,MAAM,UAAU,CAAC,MAAM,MAAM,oBAAoB,KAAK,CAAE,CAAC;AAC3F;AAuBO,MAAM,2BAA2B,CAAC;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY,KAAK,IAAI;AACvB,OAK2B;AAAA,EACzB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,OACA,QACA,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;AASO,MAAM,mBAAmB,CAC9B,QACA,QAAoE,MACpE,YAAoB,KAAK,IAAI,OACb;AAAA,EAChB,MAAM;AAAA,EACN;AAAA,EACA;AAAA,EACA;AACF;","names":["AgentSessionEventTypes","CloseReason"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents",
3
- "version": "1.0.50",
3
+ "version": "1.0.51",
4
4
  "description": "LiveKit Agents - Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
package/src/index.ts CHANGED
@@ -9,33 +9,31 @@
9
9
  * @see {@link https://docs.livekit.io/agents/overview | LiveKit Agents documentation}
10
10
  * @packageDocumentation
11
11
  */
12
- import * as beta from './beta/index.js';
13
- import * as cli from './cli.js';
14
- import * as inference from './inference/index.js';
15
- import * as ipc from './ipc/index.js';
16
- import * as llm from './llm/index.js';
17
- import * as metrics from './metrics/index.js';
18
- import * as stream from './stream/index.js';
19
- import * as stt from './stt/index.js';
20
- import * as telemetry from './telemetry/index.js';
21
- import * as tokenize from './tokenize/index.js';
22
- import * as tts from './tts/index.js';
23
- import * as voice from './voice/index.js';
24
-
25
12
  export * from './_exceptions.js';
26
13
  export * from './audio.js';
14
+ export * as beta from './beta/index.js';
15
+ export * as cli from './cli.js';
27
16
  export * from './connection_pool.js';
28
17
  export * from './generator.js';
18
+ export * as inference from './inference/index.js';
29
19
  export * from './inference_runner.js';
20
+ export * as ipc from './ipc/index.js';
30
21
  export * from './job.js';
22
+ export * from './language.js';
23
+ export * as llm from './llm/index.js';
31
24
  export * from './log.js';
25
+ export * as metrics from './metrics/index.js';
32
26
  export * from './plugin.js';
27
+ export * as stream from './stream/index.js';
28
+ export * as stt from './stt/index.js';
29
+ export * as telemetry from './telemetry/index.js';
30
+ export * as tokenize from './tokenize/index.js';
33
31
  export * from './transcription.js';
32
+ export * as tts from './tts/index.js';
34
33
  export * from './types.js';
35
34
  export * from './utils.js';
36
35
  export * from './vad.js';
37
36
  export * from './version.js';
37
+ export * as voice from './voice/index.js';
38
38
  export { createTimedString, isTimedString, type TimedString } from './voice/io.js';
39
39
  export * from './worker.js';
40
-
41
- export { beta, cli, inference, ipc, llm, metrics, stream, stt, telemetry, tokenize, tts, voice };
@@ -2,16 +2,11 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import OpenAI from 'openai';
5
- import {
6
- APIConnectionError,
7
- APIStatusError,
8
- APITimeoutError,
9
- DEFAULT_API_CONNECT_OPTIONS,
10
- type Expand,
11
- toError,
12
- } from '../index.js';
5
+ import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
13
6
  import * as llm from '../llm/index.js';
7
+ import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
14
8
  import type { APIConnectOptions } from '../types.js';
9
+ import { type Expand, toError } from '../utils.js';
15
10
  import { type AnyString, createAccessToken } from './utils.js';
16
11
 
17
12
  const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { beforeAll, describe, expect, it } from 'vitest';
5
+ import { normalizeLanguage } from '../language.js';
5
6
  import { initializeLogger } from '../log.js';
6
7
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
7
8
  import { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';
@@ -34,6 +35,12 @@ describe('parseSTTModelString', () => {
34
35
  expect(language).toBe('en');
35
36
  });
36
37
 
38
+ it('normalizes language suffixes', () => {
39
+ const [model, language] = parseSTTModelString('deepgram:english');
40
+ expect(model).toBe('deepgram');
41
+ expect(language).toBe('en');
42
+ });
43
+
37
44
  it('provider/model format without language', () => {
38
45
  const [model, language] = parseSTTModelString('deepgram/nova-3');
39
46
  expect(model).toBe('deepgram/nova-3');
@@ -151,6 +158,16 @@ describe('normalizeSTTFallback', () => {
151
158
  });
152
159
 
153
160
  describe('STT constructor fallback and connOptions', () => {
161
+ it('normalizes language in constructor and model string', () => {
162
+ const stt = makeStt({ model: 'deepgram/nova-3:english' });
163
+ expect(stt['opts'].language).toBe('en');
164
+ });
165
+
166
+ it('prefers explicit normalized language over model suffix', () => {
167
+ const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });
168
+ expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));
169
+ });
170
+
154
171
  it('fallback not given defaults to undefined', () => {
155
172
  const stt = makeStt();
156
173
  expect(stt['opts'].fallback).toBeUndefined();
@@ -5,6 +5,7 @@ import { type AudioFrame } from '@livekit/rtc-node';
5
5
  import type { WebSocket } from 'ws';
6
6
  import { APIError, APIStatusError } from '../_exceptions.js';
7
7
  import { AudioByteStream } from '../audio.js';
8
+ import { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';
8
9
  import { log } from '../log.js';
9
10
  import { createStreamChannel } from '../stream/stream_channel.js';
10
11
  import {
@@ -121,10 +122,10 @@ export interface STTFallbackModel {
121
122
  export type STTFallbackModelType = STTFallbackModel | string;
122
123
 
123
124
  /** Parse a model string into [model, language]. Language is undefined if not specified. */
124
- export function parseSTTModelString(model: string): [string, string | undefined] {
125
+ export function parseSTTModelString(model: string): [string, LanguageCode | undefined] {
125
126
  const idx = model.lastIndexOf(':');
126
127
  if (idx !== -1) {
127
- return [model.slice(0, idx), model.slice(idx + 1)];
128
+ return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];
128
129
  }
129
130
  return [model, undefined];
130
131
  }
@@ -156,7 +157,7 @@ const DEFAULT_CANCEL_TIMEOUT = 5000;
156
157
 
157
158
  export interface InferenceSTTOptions<TModel extends STTModels> {
158
159
  model?: TModel;
159
- language?: STTLanguages;
160
+ language?: LanguageCode;
160
161
  encoding: STTEncoding;
161
162
  sampleRate: number;
162
163
  baseURL: string;
@@ -219,25 +220,24 @@ export class STT<TModel extends STTModels> extends BaseSTT {
219
220
  let nextModel = model;
220
221
  let nextLanguage = language;
221
222
  if (typeof nextModel === 'string') {
222
- const idx = nextModel.lastIndexOf(':');
223
- if (idx !== -1) {
224
- const languageFromModel = nextModel.slice(idx + 1) as STTLanguages;
225
- if (nextLanguage && nextLanguage !== languageFromModel) {
223
+ const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);
224
+ if (parsedLanguage !== undefined) {
225
+ if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {
226
226
  this.#logger.warn(
227
227
  '`language` is provided via both argument and model, using the one from the argument',
228
228
  { language: nextLanguage, model: nextModel },
229
229
  );
230
230
  } else {
231
- nextLanguage = languageFromModel;
231
+ nextLanguage = parsedLanguage as STTLanguages;
232
232
  }
233
- nextModel = nextModel.slice(0, idx) as TModel;
233
+ nextModel = parsedModel as TModel;
234
234
  }
235
235
  }
236
236
  const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;
237
237
 
238
238
  this.opts = {
239
239
  model: nextModel as TModel,
240
- language: nextLanguage,
240
+ language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,
241
241
  encoding,
242
242
  sampleRate,
243
243
  baseURL: lkBaseURL,
@@ -263,7 +263,11 @@ export class STT<TModel extends STTModels> extends BaseSTT {
263
263
  }
264
264
 
265
265
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
266
- this.opts = { ...this.opts, ...opts };
266
+ this.opts = {
267
+ ...this.opts,
268
+ ...opts,
269
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
270
+ };
267
271
 
268
272
  for (const stream of this.streams) {
269
273
  stream.updateOptions(opts);
@@ -278,7 +282,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
278
282
  options || {};
279
283
  const streamOpts = {
280
284
  ...this.opts,
281
- language: language ?? this.opts.language,
285
+ language: language !== undefined ? normalizeLanguage(language) : this.opts.language,
282
286
  } as InferenceSTTOptions<TModel>;
283
287
 
284
288
  const stream = new SpeechStream(this, streamOpts, connOptions);
@@ -364,7 +368,11 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
364
368
  }
365
369
 
366
370
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
367
- this.opts = { ...this.opts, ...opts };
371
+ this.opts = {
372
+ ...this.opts,
373
+ ...opts,
374
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
375
+ };
368
376
  this.reconnectEvent.set();
369
377
  }
370
378
 
@@ -569,7 +577,7 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
569
577
 
570
578
  const requestId = data.session_id || this.requestId;
571
579
  const text = data.transcript;
572
- const language = data.language || this.opts.language || 'en';
580
+ const language = normalizeLanguage(data.language || this.opts.language || 'en');
573
581
 
574
582
  if (!text && !isFinal) return;
575
583
 
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { beforeAll, describe, expect, it } from 'vitest';
5
+ import { normalizeLanguage } from '../language.js';
5
6
  import { initializeLogger } from '../log.js';
6
7
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
7
8
  import { TTS, type TTSFallbackModel, normalizeTTSFallback, parseTTSModelString } from './tts.js';
@@ -165,6 +166,17 @@ describe('normalizeTTSFallback', () => {
165
166
  });
166
167
 
167
168
  describe('TTS constructor fallback and connOptions', () => {
169
+ it('normalizes language in constructor', () => {
170
+ const tts = makeTts({ language: 'english' });
171
+ expect(tts['opts'].language).toBe('en');
172
+ });
173
+
174
+ it('normalizes updated language values', () => {
175
+ const tts = makeTts();
176
+ tts.updateOptions({ language: 'en_US' });
177
+ expect(tts['opts'].language).toBe(normalizeLanguage('en_US'));
178
+ });
179
+
168
180
  it('fallback not given defaults to undefined', () => {
169
181
  const tts = makeTts();
170
182
  expect(tts['opts'].fallback).toBeUndefined();