@livekit/agents 1.1.0-dev.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +2 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +3 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +1 -1
- package/dist/inference/interruption/defaults.cjs.map +1 -1
- package/dist/inference/interruption/defaults.d.cts +1 -1
- package/dist/inference/interruption/defaults.d.ts +1 -1
- package/dist/inference/interruption/defaults.d.ts.map +1 -1
- package/dist/inference/interruption/defaults.js +1 -1
- package/dist/inference/interruption/defaults.js.map +1 -1
- package/dist/inference/interruption/http_transport.cjs +44 -28
- package/dist/inference/interruption/http_transport.cjs.map +1 -1
- package/dist/inference/interruption/http_transport.d.ts.map +1 -1
- package/dist/inference/interruption/http_transport.js +45 -29
- package/dist/inference/interruption/http_transport.js.map +1 -1
- package/dist/inference/interruption/interruption_detector.cjs +22 -5
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
- package/dist/inference/interruption/interruption_detector.d.cts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
- package/dist/inference/interruption/interruption_detector.js +22 -5
- package/dist/inference/interruption/interruption_detector.js.map +1 -1
- package/dist/inference/interruption/interruption_stream.cjs +4 -4
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
- package/dist/inference/interruption/interruption_stream.js +4 -4
- package/dist/inference/interruption/interruption_stream.js.map +1 -1
- package/dist/inference/interruption/types.cjs.map +1 -1
- package/dist/inference/interruption/types.d.cts +2 -2
- package/dist/inference/interruption/types.d.ts +2 -2
- package/dist/inference/interruption/types.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.cjs +60 -47
- package/dist/inference/interruption/ws_transport.cjs.map +1 -1
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.js +60 -47
- package/dist/inference/interruption/ws_transport.js.map +1 -1
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +8 -1
- package/dist/inference/tts.d.ts +8 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +41 -23
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +41 -23
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +1 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.js +1 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/utils.cjs +15 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +14 -17
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -11
- package/dist/voice/agent.d.ts +10 -11
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -18
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +194 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +195 -1
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +116 -39
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +2 -0
- package/dist/voice/agent_activity.d.ts +2 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +117 -40
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +38 -38
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +65 -56
- package/dist/voice/agent_session.d.ts +65 -56
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +37 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +106 -52
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +4 -2
- package/dist/voice/audio_recognition.d.ts +4 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +106 -52
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +84 -22
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
- package/dist/voice/audio_recognition_span.test.js +90 -23
- package/dist/voice/audio_recognition_span.test.js.map +1 -1
- package/dist/voice/events.cjs +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -3
- package/dist/voice/events.d.ts +4 -3
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/index.cjs +9 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +10 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +11 -10
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -3
- package/dist/voice/report.d.ts +5 -3
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +11 -10
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +15 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +15 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +39 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +40 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/interruption.cjs.map +1 -1
- package/dist/voice/turn_config/interruption.d.cts +1 -1
- package/dist/voice/turn_config/interruption.d.ts +1 -1
- package/dist/voice/turn_config/interruption.d.ts.map +1 -1
- package/dist/voice/turn_config/interruption.js.map +1 -1
- package/dist/voice/turn_config/utils.cjs +95 -35
- package/dist/voice/turn_config/utils.cjs.map +1 -1
- package/dist/voice/turn_config/utils.d.cts +17 -5
- package/dist/voice/turn_config/utils.d.ts +17 -5
- package/dist/voice/turn_config/utils.d.ts.map +1 -1
- package/dist/voice/turn_config/utils.js +93 -35
- package/dist/voice/turn_config/utils.js.map +1 -1
- package/dist/voice/turn_config/utils.test.cjs +83 -41
- package/dist/voice/turn_config/utils.test.cjs.map +1 -1
- package/dist/voice/turn_config/utils.test.js +84 -42
- package/dist/voice/turn_config/utils.test.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +3 -2
- package/src/cli.ts +2 -0
- package/src/constants.ts +1 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +13 -15
- package/src/inference/interruption/defaults.ts +1 -1
- package/src/inference/interruption/http_transport.ts +49 -30
- package/src/inference/interruption/interruption_detector.ts +22 -6
- package/src/inference/interruption/interruption_stream.ts +4 -4
- package/src/inference/interruption/types.ts +2 -2
- package/src/inference/interruption/ws_transport.ts +63 -59
- package/src/inference/llm.ts +3 -1
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +22 -6
- package/src/ipc/job_proc_lazy_main.ts +44 -24
- package/src/job.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/index.ts +2 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/utils.ts +20 -0
- package/src/voice/agent.test.ts +208 -1
- package/src/voice/agent.ts +21 -22
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +161 -43
- package/src/voice/agent_session.ts +103 -92
- package/src/voice/audio_recognition.ts +124 -61
- package/src/voice/audio_recognition_span.test.ts +115 -35
- package/src/voice/events.ts +4 -3
- package/src/voice/index.ts +10 -1
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +22 -3
- package/src/voice/report.ts +31 -14
- package/src/voice/room_io/room_io.ts +52 -2
- package/src/voice/turn_config/interruption.ts +1 -1
- package/src/voice/turn_config/utils.test.ts +91 -43
- package/src/voice/turn_config/utils.ts +120 -56
- package/src/worker.ts +34 -50
- package/dist/voice/client_events.cjs +0 -554
- package/dist/voice/client_events.cjs.map +0 -1
- package/dist/voice/client_events.d.cts +0 -195
- package/dist/voice/client_events.d.ts +0 -195
- package/dist/voice/client_events.d.ts.map +0 -1
- package/dist/voice/client_events.js +0 -548
- package/dist/voice/client_events.js.map +0 -1
- package/dist/voice/wire_format.cjs +0 -798
- package/dist/voice/wire_format.cjs.map +0 -1
- package/dist/voice/wire_format.d.cts +0 -5503
- package/dist/voice/wire_format.d.ts +0 -5503
- package/dist/voice/wire_format.d.ts.map +0 -1
- package/dist/voice/wire_format.js +0 -728
- package/dist/voice/wire_format.js.map +0 -1
- package/src/voice/client_events.ts +0 -838
- package/src/voice/wire_format.ts +0 -827
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/http_transport.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { ofetch } from 'ofetch';\nimport { TransformStream } from 'stream/web';\nimport { z } from 'zod';\nimport { log } from '../../log.js';\nimport { createAccessToken } from '../utils.js';\nimport { intervalForRetry } from './defaults.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { OverlappingSpeechEvent } from './types.js';\nimport type { BoundedCache } from './utils.js';\n\nexport interface PostOptions {\n baseUrl: string;\n token: string;\n signal?: AbortSignal;\n timeout?: number;\n maxRetries?: number;\n}\n\nexport interface PredictOptions {\n threshold: number;\n minFrames: number;\n}\n\nexport const predictEndpointResponseSchema = z.object({\n created_at: z.number(),\n is_bargein: z.boolean(),\n probabilities: z.array(z.number()),\n});\n\nexport type PredictEndpointResponse = z.infer<typeof predictEndpointResponseSchema>;\n\nexport interface PredictResponse {\n createdAt: number;\n isBargein: boolean;\n probabilities: number[];\n predictionDurationInS: number;\n}\n\nexport async function predictHTTP(\n data: Int16Array,\n predictOptions: PredictOptions,\n options: PostOptions,\n): Promise<PredictResponse> {\n const createdAt = performance.now();\n const url = new URL(`/bargein`, options.baseUrl);\n url.searchParams.append('threshold', predictOptions.threshold.toString());\n url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());\n url.searchParams.append('created_at', createdAt.toFixed());\n\n let retryCount = 0;\n const response = await ofetch(url.toString(), {\n retry: options.maxRetries ?? 3,\n retryDelay: () => {\n const delay = intervalForRetry(retryCount);\n retryCount++;\n return delay;\n },\n headers: {\n 'Content-Type': 'application/octet-stream',\n Authorization: `Bearer ${options.token}`,\n },\n signal: options.signal,\n timeout: options.timeout,\n method: 'POST',\n body: data,\n });\n const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);\n\n return {\n createdAt: created_at,\n isBargein: is_bargein,\n probabilities,\n predictionDurationInS: (performance.now() - createdAt) / 1000,\n };\n}\n\nexport interface HttpTransportOptions {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries?: number;\n}\n\nexport interface HttpTransportState {\n overlapSpeechStarted: boolean;\n overlapSpeechStartedAt: number | undefined;\n cache: BoundedCache<number, InterruptionCacheEntry>;\n}\n\n/**\n * Creates an HTTP transport TransformStream for interruption detection.\n *\n * This transport receives Int16Array audio slices and outputs InterruptionEvents.\n * Each audio slice triggers an HTTP POST request.\n *\n * @param options - Transport options object. This is read on each request, so mutations\n * to threshold/minFrames will be picked up dynamically.\n */\nexport function createHttpTransport(\n options: HttpTransportOptions,\n getState: () => HttpTransportState,\n setState: (partial: Partial<HttpTransportState>) => void,\n updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,\n getAndResetNumRequests?: () => number,\n): TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent> {\n const logger = log();\n\n return new TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>(\n {\n async transform(chunk, controller) {\n if (!(chunk instanceof Int16Array)) {\n controller.enqueue(chunk);\n return;\n }\n\n const state = getState();\n const overlapSpeechStartedAt = state.overlapSpeechStartedAt;\n if (overlapSpeechStartedAt === undefined || !state.overlapSpeechStarted) return;\n\n try {\n const resp = await predictHTTP(\n chunk,\n { threshold: options.threshold, minFrames: options.minFrames },\n {\n baseUrl: options.baseUrl,\n timeout: options.timeout,\n maxRetries: options.maxRetries,\n token: await createAccessToken(options.apiKey, options.apiSecret),\n },\n );\n\n const { createdAt, isBargein, probabilities, predictionDurationInS } = resp;\n const entry = state.cache.setOrUpdate(\n createdAt,\n () => new InterruptionCacheEntry({ createdAt }),\n {\n probabilities,\n isInterruption: isBargein,\n speechInput: chunk,\n totalDurationInS: (performance.now() - createdAt) / 1000,\n detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,\n predictionDurationInS,\n },\n );\n\n if (state.overlapSpeechStarted && entry.isInterruption) {\n if (updateUserSpeakingSpan) {\n updateUserSpeakingSpan(entry);\n }\n const event: OverlappingSpeechEvent = {\n type: 'user_overlapping_speech',\n timestamp: Date.now(),\n overlapStartedAt: overlapSpeechStartedAt,\n isInterruption: entry.isInterruption,\n speechInput: entry.speechInput,\n probabilities: entry.probabilities,\n totalDurationInS: entry.totalDurationInS,\n predictionDurationInS: entry.predictionDurationInS,\n detectionDelayInS: entry.detectionDelayInS,\n probability: entry.probability,\n numRequests: getAndResetNumRequests?.() ?? 0,\n };\n logger.debug(\n {\n detectionDelayInS: entry.detectionDelayInS,\n totalDurationInS: entry.totalDurationInS,\n },\n 'interruption detected',\n );\n setState({ overlapSpeechStarted: false });\n controller.enqueue(event);\n }\n } catch (err) {\n logger.error({ err }, 'Failed to send audio data over HTTP');\n }\n },\n },\n { highWaterMark: 2 },\n { highWaterMark: 2 },\n );\n}\n"],"mappings":"AAGA,SAAS,cAAc;AACvB,SAAS,uBAAuB;AAChC,SAAS,SAAS;AAClB,SAAS,WAAW;AACpB,SAAS,yBAAyB;AAClC,SAAS,wBAAwB;AACjC,SAAS,8BAA8B;AAiBhC,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,YAAY,EAAE,OAAO;AAAA,EACrB,YAAY,EAAE,QAAQ;AAAA,EACtB,eAAe,EAAE,MAAM,EAAE,OAAO,CAAC;AACnC,CAAC;AAWD,eAAsB,YACpB,MACA,gBACA,SAC0B;AAC1B,QAAM,YAAY,YAAY,IAAI;AAClC,QAAM,MAAM,IAAI,IAAI,YAAY,QAAQ,OAAO;AAC/C,MAAI,aAAa,OAAO,aAAa,eAAe,UAAU,SAAS,CAAC;AACxE,MAAI,aAAa,OAAO,cAAc,eAAe,UAAU,QAAQ,CAAC;AACxE,MAAI,aAAa,OAAO,cAAc,UAAU,QAAQ,CAAC;AAEzD,MAAI,aAAa;AACjB,QAAM,WAAW,MAAM,OAAO,IAAI,SAAS,GAAG;AAAA,IAC5C,OAAO,QAAQ,cAAc;AAAA,IAC7B,YAAY,MAAM;AAChB,YAAM,QAAQ,iBAAiB,UAAU;AACzC;AACA,aAAO;AAAA,IACT;AAAA,IACA,SAAS;AAAA,MACP,gBAAgB;AAAA,MAChB,eAAe,UAAU,QAAQ,KAAK;AAAA,IACxC;AAAA,IACA,QAAQ,QAAQ;AAAA,IAChB,SAAS,QAAQ;AAAA,IACjB,QAAQ;AAAA,IACR,MAAM;AAAA,EACR,CAAC;AACD,QAAM,EAAE,YAAY,YAAY,cAAc,IAAI,8BAA8B,MAAM,QAAQ;AAE9F,SAAO;AAAA,IACL,WAAW;AAAA,IACX,WAAW;AAAA,IACX;AAAA,IACA,wBAAwB,YAAY,IAAI,IAAI,aAAa;AAAA,EAC3D;AACF;AA2BO,SAAS,oBACd,SACA,UACA,UACA,wBACA,wBAC8E;AAC9E,QAAM,SAAS,IAAI;AAEnB,SAAO,IAAI;AAAA,IACT;AAAA,MACE,MAAM,UAAU,OAAO,YAAY;AACjC,YAAI,EAAE,iBAAiB,aAAa;AAClC,qBAAW,QAAQ,KAAK;AACxB;AAAA,QACF;AAEA,cAAM,QAAQ,SAAS;AACvB,cAAM,yBAAyB,MAAM;AACrC,YAAI,2BAA2B,UAAa,CAAC,MAAM,qBAAsB;AAEzE,YAAI;AACF,gBAAM,OAAO,MAAM;AAAA,YACjB;AAAA,YACA,EAAE,WAAW,QAAQ,WAAW,WAAW,QAAQ,UAAU;AAAA,YAC7D;AAAA,cACE,SAAS,QAAQ;AAAA,cACjB,SAAS,QAAQ;AAAA,cACjB,YAAY,QAAQ;AAAA,cACpB,OAAO,MAAM,kBAAkB,QAAQ,QAAQ,QAAQ,SAAS;AAAA,YAClE;AAAA,UACF;AAEA,gBAAM,EAAE,WAAW,WAAW,eAAe,sBAAsB,IAAI;AACvE,gBAAM,QAAQ,MAAM,MAAM;AAAA,YACxB;AAAA,YACA,MAAM,IAAI,uBAAuB,EAAE,UAAU,CAAC;AAAA,YAC9C;AAAA,cACE;AAAA,cACA,gBAAgB;AAAA,cAChB,aAAa;AAAA,cACb,mBAAmB,YAAY,IAAI,IAAI,aAAa;AAAA,cACpD,oBAAoB,KAAK,IAAI,IAAI,0BAA0B;AAAA,cAC3D;AAAA,YACF;AAAA,UACF;AAEA,cAAI,MAAM,wBAAwB,MAAM,gBAAgB;AACtD,gBAAI,wBAAwB;AAC1B,qCAAuB,KAAK;AAAA,YAC9B;AACA,kBAAM,QAAgC;AAAA,cACpC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,kBAAkB;AAAA,cAClB,gBAAgB,MAAM;AAAA,cACtB,aAAa,MAAM;AAAA,cACnB,eAAe,MAAM;AAAA,cACrB,kBAAkB,MAAM;AAAA,cACxB,uBAAuB,MAAM;AAAA,cAC7B,mBAAmB,MAAM;AAAA,cACzB,aAAa,MAAM;AAAA,cACnB,cAAa,uEAA8B;AAAA,YAC7C;AACA,mBAAO;AAAA,cACL;AAAA,gBACE,mBAAmB,MAAM;AAAA,gBACzB,kBAAkB,MAAM;AAAA,cAC1B;AAAA,cACA;AAAA,YACF;AACA,qBAAS,EAAE,sBAAsB,MAAM,CAAC;AACxC,uBAAW,QAAQ,KAAK;AAAA,UAC1B;AAAA,QACF,SAAS,KAAK;AACZ,iBAAO,MAAM,EAAE,IAAI,GAAG,qCAAqC;AAAA,QAC7D;AAAA,MACF;AAAA,IACF;AAAA,IACA,EAAE,eAAe,EAAE;AAAA,IACnB,EAAE,eAAe,EAAE;AAAA,EACrB;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/http_transport.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { FetchError, ofetch } from 'ofetch';\nimport { TransformStream } from 'stream/web';\nimport { z } from 'zod';\nimport { APIConnectionError, APIError, APIStatusError, isAPIError } from '../../_exceptions.js';\nimport { log } from '../../log.js';\nimport { createAccessToken } from '../utils.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { OverlappingSpeechEvent } from './types.js';\nimport type { BoundedCache } from './utils.js';\n\nexport interface PostOptions {\n baseUrl: string;\n token: string;\n signal?: AbortSignal;\n timeout?: number;\n maxRetries?: number;\n}\n\nexport interface PredictOptions {\n threshold: number;\n minFrames: number;\n}\n\nexport const predictEndpointResponseSchema = z.object({\n created_at: z.number(),\n is_bargein: z.boolean(),\n probabilities: z.array(z.number()),\n});\n\nexport type PredictEndpointResponse = z.infer<typeof predictEndpointResponseSchema>;\n\nexport interface PredictResponse {\n createdAt: number;\n isBargein: boolean;\n probabilities: number[];\n predictionDurationInS: number;\n}\n\nexport async function predictHTTP(\n data: Int16Array,\n predictOptions: PredictOptions,\n options: PostOptions,\n): Promise<PredictResponse> {\n const createdAt = performance.now();\n const url = new URL(`/bargein`, options.baseUrl);\n url.searchParams.append('threshold', predictOptions.threshold.toString());\n url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());\n url.searchParams.append('created_at', createdAt.toFixed());\n\n try {\n const response = await ofetch(url.toString(), {\n retry: 0,\n headers: {\n 'Content-Type': 'application/octet-stream',\n Authorization: `Bearer ${options.token}`,\n },\n signal: options.signal,\n timeout: options.timeout,\n method: 'POST',\n body: data,\n });\n const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);\n\n return {\n createdAt: created_at,\n isBargein: is_bargein,\n probabilities,\n predictionDurationInS: (performance.now() - createdAt) / 1000,\n };\n } catch (err) {\n if (isAPIError(err)) throw err;\n if (err instanceof FetchError) {\n if (err.statusCode) {\n throw new APIStatusError({\n message: `error during interruption prediction: ${err.message}`,\n options: { statusCode: err.statusCode, body: err.data },\n });\n }\n if (\n err.cause instanceof Error &&\n (err.cause.name === 'TimeoutError' || err.cause.name === 'AbortError')\n ) {\n throw new APIStatusError({\n message: `interruption inference timeout: ${err.message}`,\n options: { statusCode: 408, retryable: false },\n });\n }\n throw new APIConnectionError({\n message: `interruption inference connection error: ${err.message}`,\n });\n }\n throw new APIError(`error during interruption prediction: ${err}`);\n }\n}\n\nexport interface HttpTransportOptions {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries?: number;\n}\n\nexport interface HttpTransportState {\n overlapSpeechStarted: boolean;\n overlapSpeechStartedAt: number | undefined;\n cache: BoundedCache<number, InterruptionCacheEntry>;\n}\n\n/**\n * Creates an HTTP transport TransformStream for interruption detection.\n *\n * This transport receives Int16Array audio slices and outputs InterruptionEvents.\n * Each audio slice triggers an HTTP POST request.\n *\n * @param options - Transport options object. This is read on each request, so mutations\n * to threshold/minFrames will be picked up dynamically.\n */\nexport function createHttpTransport(\n options: HttpTransportOptions,\n getState: () => HttpTransportState,\n setState: (partial: Partial<HttpTransportState>) => void,\n updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,\n getAndResetNumRequests?: () => number,\n): TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent> {\n const logger = log();\n\n return new TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>(\n {\n async transform(chunk, controller) {\n if (!(chunk instanceof Int16Array)) {\n controller.enqueue(chunk);\n return;\n }\n\n const state = getState();\n const overlapSpeechStartedAt = state.overlapSpeechStartedAt;\n if (overlapSpeechStartedAt === undefined || !state.overlapSpeechStarted) return;\n\n try {\n const resp = await predictHTTP(\n chunk,\n { threshold: options.threshold, minFrames: options.minFrames },\n {\n baseUrl: options.baseUrl,\n timeout: options.timeout,\n maxRetries: options.maxRetries,\n token: await createAccessToken(options.apiKey, options.apiSecret),\n },\n );\n\n const { createdAt, isBargein, probabilities, predictionDurationInS } = resp;\n const entry = state.cache.setOrUpdate(\n createdAt,\n () => new InterruptionCacheEntry({ createdAt }),\n {\n probabilities,\n isInterruption: isBargein,\n speechInput: chunk,\n totalDurationInS: (performance.now() - createdAt) / 1000,\n detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,\n predictionDurationInS,\n },\n );\n\n if (state.overlapSpeechStarted && entry.isInterruption) {\n if (updateUserSpeakingSpan) {\n updateUserSpeakingSpan(entry);\n }\n const event: OverlappingSpeechEvent = {\n type: 'overlapping_speech',\n detectedAt: Date.now(),\n overlapStartedAt: overlapSpeechStartedAt,\n isInterruption: entry.isInterruption,\n speechInput: entry.speechInput,\n probabilities: entry.probabilities,\n totalDurationInS: entry.totalDurationInS,\n predictionDurationInS: entry.predictionDurationInS,\n detectionDelayInS: entry.detectionDelayInS,\n probability: entry.probability,\n numRequests: getAndResetNumRequests?.() ?? 0,\n };\n logger.debug(\n {\n detectionDelayInS: entry.detectionDelayInS,\n totalDurationInS: entry.totalDurationInS,\n },\n 'interruption detected',\n );\n setState({ overlapSpeechStarted: false });\n controller.enqueue(event);\n }\n } catch (err) {\n controller.error(err);\n }\n },\n },\n { highWaterMark: 2 },\n { highWaterMark: 2 },\n );\n}\n"],"mappings":"AAGA,SAAS,YAAY,cAAc;AACnC,SAAS,uBAAuB;AAChC,SAAS,SAAS;AAClB,SAAS,oBAAoB,UAAU,gBAAgB,kBAAkB;AACzE,SAAS,WAAW;AACpB,SAAS,yBAAyB;AAClC,SAAS,8BAA8B;AAiBhC,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,YAAY,EAAE,OAAO;AAAA,EACrB,YAAY,EAAE,QAAQ;AAAA,EACtB,eAAe,EAAE,MAAM,EAAE,OAAO,CAAC;AACnC,CAAC;AAWD,eAAsB,YACpB,MACA,gBACA,SAC0B;AAC1B,QAAM,YAAY,YAAY,IAAI;AAClC,QAAM,MAAM,IAAI,IAAI,YAAY,QAAQ,OAAO;AAC/C,MAAI,aAAa,OAAO,aAAa,eAAe,UAAU,SAAS,CAAC;AACxE,MAAI,aAAa,OAAO,cAAc,eAAe,UAAU,QAAQ,CAAC;AACxE,MAAI,aAAa,OAAO,cAAc,UAAU,QAAQ,CAAC;AAEzD,MAAI;AACF,UAAM,WAAW,MAAM,OAAO,IAAI,SAAS,GAAG;AAAA,MAC5C,OAAO;AAAA,MACP,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,eAAe,UAAU,QAAQ,KAAK;AAAA,MACxC;AAAA,MACA,QAAQ,QAAQ;AAAA,MAChB,SAAS,QAAQ;AAAA,MACjB,QAAQ;AAAA,MACR,MAAM;AAAA,IACR,CAAC;AACD,UAAM,EAAE,YAAY,YAAY,cAAc,IAAI,8BAA8B,MAAM,QAAQ;AAE9F,WAAO;AAAA,MACL,WAAW;AAAA,MACX,WAAW;AAAA,MACX;AAAA,MACA,wBAAwB,YAAY,IAAI,IAAI,aAAa;AAAA,IAC3D;AAAA,EACF,SAAS,KAAK;AACZ,QAAI,WAAW,GAAG,EAAG,OAAM;AAC3B,QAAI,eAAe,YAAY;AAC7B,UAAI,IAAI,YAAY;AAClB,cAAM,IAAI,eAAe;AAAA,UACvB,SAAS,yCAAyC,IAAI,OAAO;AAAA,UAC7D,SAAS,EAAE,YAAY,IAAI,YAAY,MAAM,IAAI,KAAK;AAAA,QACxD,CAAC;AAAA,MACH;AACA,UACE,IAAI,iBAAiB,UACpB,IAAI,MAAM,SAAS,kBAAkB,IAAI,MAAM,SAAS,eACzD;AACA,cAAM,IAAI,eAAe;AAAA,UACvB,SAAS,mCAAmC,IAAI,OAAO;AAAA,UACvD,SAAS,EAAE,YAAY,KAAK,WAAW,MAAM;AAAA,QAC/C,CAAC;AAAA,MACH;AACA,YAAM,IAAI,mBAAmB;AAAA,QAC3B,SAAS,4CAA4C,IAAI,OAAO;AAAA,MAClE,CAAC;AAAA,IACH;AACA,UAAM,IAAI,SAAS,yCAAyC,GAAG,EAAE;AAAA,EACnE;AACF;AA2BO,SAAS,oBACd,SACA,UACA,UACA,wBACA,wBAC8E;AAC9E,QAAM,SAAS,IAAI;AAEnB,SAAO,IAAI;AAAA,IACT;AAAA,MACE,MAAM,UAAU,OAAO,YAAY;AACjC,YAAI,EAAE,iBAAiB,aAAa;AAClC,qBAAW,QAAQ,KAAK;AACxB;AAAA,QACF;AAEA,cAAM,QAAQ,SAAS;AACvB,cAAM,yBAAyB,MAAM;AACrC,YAAI,2BAA2B,UAAa,CAAC,MAAM,qBAAsB;AAEzE,YAAI;AACF,gBAAM,OAAO,MAAM;AAAA,YACjB;AAAA,YACA,EAAE,WAAW,QAAQ,WAAW,WAAW,QAAQ,UAAU;AAAA,YAC7D;AAAA,cACE,SAAS,QAAQ;AAAA,cACjB,SAAS,QAAQ;AAAA,cACjB,YAAY,QAAQ;AAAA,cACpB,OAAO,MAAM,kBAAkB,QAAQ,QAAQ,QAAQ,SAAS;AAAA,YAClE;AAAA,UACF;AAEA,gBAAM,EAAE,WAAW,WAAW,eAAe,sBAAsB,IAAI;AACvE,gBAAM,QAAQ,MAAM,MAAM;AAAA,YACxB;AAAA,YACA,MAAM,IAAI,uBAAuB,EAAE,UAAU,CAAC;AAAA,YAC9C;AAAA,cACE;AAAA,cACA,gBAAgB;AAAA,cAChB,aAAa;AAAA,cACb,mBAAmB,YAAY,IAAI,IAAI,aAAa;AAAA,cACpD,oBAAoB,KAAK,IAAI,IAAI,0BAA0B;AAAA,cAC3D;AAAA,YACF;AAAA,UACF;AAEA,cAAI,MAAM,wBAAwB,MAAM,gBAAgB;AACtD,gBAAI,wBAAwB;AAC1B,qCAAuB,KAAK;AAAA,YAC9B;AACA,kBAAM,QAAgC;AAAA,cACpC,MAAM;AAAA,cACN,YAAY,KAAK,IAAI;AAAA,cACrB,kBAAkB;AAAA,cAClB,gBAAgB,MAAM;AAAA,cACtB,aAAa,MAAM;AAAA,cACnB,eAAe,MAAM;AAAA,cACrB,kBAAkB,MAAM;AAAA,cACxB,uBAAuB,MAAM;AAAA,cAC7B,mBAAmB,MAAM;AAAA,cACzB,aAAa,MAAM;AAAA,cACnB,cAAa,uEAA8B;AAAA,YAC7C;AACA,mBAAO;AAAA,cACL;AAAA,gBACE,mBAAmB,MAAM;AAAA,gBACzB,kBAAkB,MAAM;AAAA,cAC1B;AAAA,cACA;AAAA,YACF;AACA,qBAAS,EAAE,sBAAsB,MAAM,CAAC;AACxC,uBAAW,QAAQ,KAAK;AAAA,UAC1B;AAAA,QACF,SAAS,KAAK;AACZ,qBAAW,MAAM,GAAG;AAAA,QACtB;AAAA,MACF;AAAA,IACF;AAAA,IACA,EAAE,eAAe,EAAE;AAAA,IACnB,EAAE,eAAe,EAAE;AAAA,EACrB;AACF;","names":[]}
|
|
@@ -35,6 +35,7 @@ var import_events = __toESM(require("events"), 1);
|
|
|
35
35
|
var import_log = require("../../log.cjs");
|
|
36
36
|
var import_utils = require("../utils.cjs");
|
|
37
37
|
var import_defaults = require("./defaults.cjs");
|
|
38
|
+
var import_errors = require("./errors.cjs");
|
|
38
39
|
var import_interruption_stream = require("./interruption_stream.cjs");
|
|
39
40
|
class AdaptiveInterruptionDetector extends import_events.default {
|
|
40
41
|
options;
|
|
@@ -80,6 +81,15 @@ class AdaptiveInterruptionDetector extends import_events.default {
|
|
|
80
81
|
} else {
|
|
81
82
|
useProxy = false;
|
|
82
83
|
}
|
|
84
|
+
const transport = useProxy ? "websocket" : "http";
|
|
85
|
+
this.logger.debug(
|
|
86
|
+
{
|
|
87
|
+
baseUrl: lkBaseUrl,
|
|
88
|
+
useProxy,
|
|
89
|
+
transport
|
|
90
|
+
},
|
|
91
|
+
"=== Resolved interruption detector transport configuration"
|
|
92
|
+
);
|
|
83
93
|
this.options = {
|
|
84
94
|
sampleRate: import_defaults.SAMPLE_RATE,
|
|
85
95
|
threshold,
|
|
@@ -104,9 +114,10 @@ class AdaptiveInterruptionDetector extends import_events.default {
|
|
|
104
114
|
minFrames: this.options.minFrames,
|
|
105
115
|
threshold: this.options.threshold,
|
|
106
116
|
inferenceTimeout: this.options.inferenceTimeout,
|
|
107
|
-
useProxy: this.options.useProxy
|
|
117
|
+
useProxy: this.options.useProxy,
|
|
118
|
+
transport
|
|
108
119
|
},
|
|
109
|
-
"
|
|
120
|
+
"=== Adaptive interruption detector initialized"
|
|
110
121
|
);
|
|
111
122
|
}
|
|
112
123
|
/**
|
|
@@ -145,9 +156,15 @@ class AdaptiveInterruptionDetector extends import_events.default {
|
|
|
145
156
|
* Use this when you need direct access to the stream for pushing frames.
|
|
146
157
|
*/
|
|
147
158
|
createStream() {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
159
|
+
try {
|
|
160
|
+
const streamBase = new import_interruption_stream.InterruptionStreamBase(this, {});
|
|
161
|
+
this.streams.add(streamBase);
|
|
162
|
+
return streamBase;
|
|
163
|
+
} catch (e) {
|
|
164
|
+
const cause = e instanceof Error ? e : new Error(String(e));
|
|
165
|
+
this.emitError(new import_errors.InterruptionDetectionError(cause.message, Date.now(), this._label, false));
|
|
166
|
+
throw e;
|
|
167
|
+
}
|
|
151
168
|
}
|
|
152
169
|
/**
|
|
153
170
|
* Remove a stream from tracking (called when stream is closed).
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/interruption_detector.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'events';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';\nimport { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';\nimport
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/interruption_detector.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'events';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';\nimport { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';\nimport { InterruptionDetectionError } from './errors.js';\nimport { InterruptionStreamBase } from './interruption_stream.js';\nimport type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';\n\ntype InterruptionCallbacks = {\n overlapping_speech: (event: OverlappingSpeechEvent) => void;\n metrics_collected: (metrics: InterruptionMetrics) => void;\n error: (error: InterruptionDetectionError) => void;\n};\n\nexport type AdaptiveInterruptionDetectorOptions = Omit<Partial<InterruptionOptions>, 'useProxy'>;\n\nexport class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {\n options: InterruptionOptions;\n private readonly _label: string;\n private logger = log();\n // Use Set instead of WeakSet to allow iteration for propagating option updates\n private streams: Set<InterruptionStreamBase> = new Set();\n\n constructor(options: AdaptiveInterruptionDetectorOptions = {}) {\n super();\n\n const {\n maxAudioDurationInS,\n baseUrl,\n apiKey,\n apiSecret,\n audioPrefixDurationInS,\n threshold,\n detectionIntervalInS,\n inferenceTimeout,\n minInterruptionDurationInS,\n } = { ...interruptionOptionDefaults, ...options };\n\n if (maxAudioDurationInS > 3.0) {\n throw new RangeError('maxAudioDurationInS must be less than or equal to 3.0 seconds');\n }\n\n const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? getDefaultInferenceUrl();\n let lkApiKey = apiKey ?? '';\n let lkApiSecret = apiSecret ?? '';\n let useProxy: boolean;\n\n // Use LiveKit credentials if using the inference service (production or staging)\n const isInferenceUrl =\n lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;\n if (isInferenceUrl) {\n lkApiKey =\n apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';\n if (!lkApiKey) {\n throw new TypeError(\n 'apiKey is required, either as argument or set LIVEKIT_API_KEY environmental variable',\n );\n }\n\n lkApiSecret =\n apiSecret ??\n process.env.LIVEKIT_INFERENCE_API_SECRET ??\n process.env.LIVEKIT_API_SECRET ??\n '';\n if (!lkApiSecret) {\n throw new TypeError(\n 'apiSecret is required, either as argument or set LIVEKIT_API_SECRET environmental variable',\n );\n }\n useProxy = true;\n } else {\n useProxy = false;\n }\n const transport = useProxy ? 'websocket' : 'http';\n this.logger.debug(\n {\n baseUrl: lkBaseUrl,\n useProxy,\n transport,\n },\n '=== Resolved interruption detector transport configuration',\n );\n\n this.options = {\n sampleRate: SAMPLE_RATE,\n threshold,\n minFrames: Math.ceil(minInterruptionDurationInS * FRAMES_PER_SECOND),\n maxAudioDurationInS,\n audioPrefixDurationInS,\n detectionIntervalInS,\n inferenceTimeout,\n baseUrl: lkBaseUrl,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n useProxy,\n minInterruptionDurationInS,\n };\n\n this._label = `${this.constructor.name}`;\n\n this.logger.debug(\n {\n baseUrl: this.options.baseUrl,\n detectionIntervalInS: this.options.detectionIntervalInS,\n audioPrefixDurationInS: this.options.audioPrefixDurationInS,\n maxAudioDurationInS: this.options.maxAudioDurationInS,\n minFrames: this.options.minFrames,\n threshold: this.options.threshold,\n inferenceTimeout: this.options.inferenceTimeout,\n useProxy: this.options.useProxy,\n transport,\n },\n '=== Adaptive interruption detector initialized',\n );\n }\n\n /**\n * The model identifier for this detector.\n */\n get model(): string {\n return 'adaptive interruption';\n }\n\n /**\n * The provider identifier for this detector.\n */\n get provider(): string {\n return 'livekit';\n }\n\n /**\n * The label for this detector instance.\n */\n get label(): string {\n return this._label;\n }\n\n /**\n * The sample rate used for audio processing.\n */\n get sampleRate(): number {\n return this.options.sampleRate;\n }\n\n /**\n * Emit an error event from the detector.\n */\n emitError(error: InterruptionDetectionError): void {\n this.emit('error', error);\n }\n\n /**\n * Creates a new InterruptionStreamBase for internal use.\n * The stream can receive audio frames and sentinels via pushFrame().\n * Use this when you need direct access to the stream for pushing frames.\n */\n createStream(): InterruptionStreamBase {\n try {\n const streamBase = new InterruptionStreamBase(this, {});\n this.streams.add(streamBase);\n return streamBase;\n } catch (e) {\n const cause = e instanceof Error ? e : new Error(String(e));\n this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));\n throw e;\n }\n }\n\n /**\n * Remove a stream from tracking (called when stream is closed).\n */\n removeStream(stream: InterruptionStreamBase): void {\n this.streams.delete(stream);\n }\n\n /**\n * Update options for the detector and propagate to all active streams.\n * For WebSocket streams, this triggers a reconnection with new settings.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n }\n\n // Propagate option updates to all active streams (matching Python behavior)\n const updatePromises: Promise<void>[] = [];\n for (const stream of this.streams) {\n updatePromises.push(stream.updateOptions(options));\n }\n await Promise.all(updatePromises);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,oBAAyB;AACzB,iBAAoB;AAEpB,mBAAqF;AACrF,sBAA2E;AAC3E,oBAA2C;AAC3C,iCAAuC;AAWhC,MAAM,qCAAsC,cAAAA,QAAoE;AAAA,EACrH;AAAA,EACiB;AAAA,EACT,aAAS,gBAAI;AAAA;AAAA,EAEb,UAAuC,oBAAI,IAAI;AAAA,EAEvD,YAAY,UAA+C,CAAC,GAAG;AAC7D,UAAM;AAEN,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI,EAAE,GAAG,4CAA4B,GAAG,QAAQ;AAEhD,QAAI,sBAAsB,GAAK;AAC7B,YAAM,IAAI,WAAW,+DAA+D;AAAA,IACtF;AAEA,UAAM,YAAY,WAAW,QAAQ,IAAI,8BAA0B,qCAAuB;AAC1F,QAAI,WAAW,UAAU;AACzB,QAAI,cAAc,aAAa;AAC/B,QAAI;AAGJ,UAAM,iBACJ,cAAc,sCAAyB,cAAc;AACvD,QAAI,gBAAgB;AAClB,iBACE,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI,mBAAmB;AACpF,UAAI,CAAC,UAAU;AACb,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,oBACE,aACA,QAAQ,IAAI,gCACZ,QAAQ,IAAI,sBACZ;AACF,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AACA,iBAAW;AAAA,IACb,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,YAAY,WAAW,cAAc;AAC3C,SAAK,OAAO;AAAA,MACV;AAAA,QACE,SAAS;AAAA,QACT;AAAA,QACA;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,SAAK,UAAU;AAAA,MACb,YAAY;AAAA,MACZ;AAAA,MACA,WAAW,KAAK,KAAK,6BAA6B,iCAAiB;AAAA,MACnE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA;AAAA,IACF;AAEA,SAAK,SAAS,GAAG,KAAK,YAAY,IAAI;AAEtC,SAAK,OAAO;AAAA,MACV;AAAA,QACE,SAAS,KAAK,QAAQ;AAAA,QACtB,sBAAsB,KAAK,QAAQ;AAAA,QACnC,wBAAwB,KAAK,QAAQ;AAAA,QACrC,qBAAqB,KAAK,QAAQ;AAAA,QAClC,WAAW,KAAK,QAAQ;AAAA,QACxB,WAAW,KAAK,QAAQ;AAAA,QACxB,kBAAkB,KAAK,QAAQ;AAAA,QAC/B,UAAU,KAAK,QAAQ;AAAA,QACvB;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,QAAgB;AAClB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,aAAqB;AACvB,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA,EAKA,UAAU,OAAyC;AACjD,SAAK,KAAK,SAAS,KAAK;AAAA,EAC1B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,eAAuC;AACrC,QAAI;AACF,YAAM,aAAa,IAAI,kDAAuB,MAAM,CAAC,CAAC;AACtD,WAAK,QAAQ,IAAI,UAAU;AAC3B,aAAO;AAAA,IACT,SAAS,GAAG;AACV,YAAM,QAAQ,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAC1D,WAAK,UAAU,IAAI,yCAA2B,MAAM,SAAS,KAAK,IAAI,GAAG,KAAK,QAAQ,KAAK,CAAC;AAC5F,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,aAAa,QAAsC;AACjD,SAAK,QAAQ,OAAO,MAAM;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AAAA,IACnC;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iCAAiB;AAAA,IAC3F;AAGA,UAAM,iBAAkC,CAAC;AACzC,eAAW,UAAU,KAAK,SAAS;AACjC,qBAAe,KAAK,OAAO,cAAc,OAAO,CAAC;AAAA,IACnD;AACA,UAAM,QAAQ,IAAI,cAAc;AAAA,EAClC;AACF;","names":["EventEmitter"]}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type { TypedEventEmitter } from '@livekit/typed-emitter';
|
|
2
2
|
import type { InterruptionMetrics } from '../../metrics/base.js';
|
|
3
|
-
import
|
|
3
|
+
import { InterruptionDetectionError } from './errors.js';
|
|
4
4
|
import { InterruptionStreamBase } from './interruption_stream.js';
|
|
5
5
|
import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
|
|
6
6
|
type InterruptionCallbacks = {
|
|
7
|
-
|
|
7
|
+
overlapping_speech: (event: OverlappingSpeechEvent) => void;
|
|
8
8
|
metrics_collected: (metrics: InterruptionMetrics) => void;
|
|
9
9
|
error: (error: InterruptionDetectionError) => void;
|
|
10
10
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import type { TypedEventEmitter } from '@livekit/typed-emitter';
|
|
2
2
|
import type { InterruptionMetrics } from '../../metrics/base.js';
|
|
3
|
-
import
|
|
3
|
+
import { InterruptionDetectionError } from './errors.js';
|
|
4
4
|
import { InterruptionStreamBase } from './interruption_stream.js';
|
|
5
5
|
import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
|
|
6
6
|
type InterruptionCallbacks = {
|
|
7
|
-
|
|
7
|
+
overlapping_speech: (event: OverlappingSpeechEvent) => void;
|
|
8
8
|
metrics_collected: (metrics: InterruptionMetrics) => void;
|
|
9
9
|
error: (error: InterruptionDetectionError) => void;
|
|
10
10
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"interruption_detector.d.ts","sourceRoot":"","sources":["../../../src/inference/interruption/interruption_detector.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAGhE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,
|
|
1
|
+
{"version":3,"file":"interruption_detector.d.ts","sourceRoot":"","sources":["../../../src/inference/interruption/interruption_detector.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAGhE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAGjE,OAAO,EAAE,0BAA0B,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,KAAK,EAAE,mBAAmB,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAE9E,KAAK,qBAAqB,GAAG;IAC3B,kBAAkB,EAAE,CAAC,KAAK,EAAE,sBAAsB,KAAK,IAAI,CAAC;IAC5D,iBAAiB,EAAE,CAAC,OAAO,EAAE,mBAAmB,KAAK,IAAI,CAAC;IAC1D,KAAK,EAAE,CAAC,KAAK,EAAE,0BAA0B,KAAK,IAAI,CAAC;CACpD,CAAC;AAEF,MAAM,MAAM,mCAAmC,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE,UAAU,CAAC,CAAC;2DAEpB,kBAAkB,qBAAqB,CAAC;AAArH,qBAAa,4BAA6B,SAAQ,iCAAoE;IACpH,OAAO,EAAE,mBAAmB,CAAC;IAC7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,MAAM,CAAS;IAEvB,OAAO,CAAC,OAAO,CAA0C;gBAE7C,OAAO,GAAE,mCAAwC;IA6F7D;;OAEG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;OAEG;IACH,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED;;OAEG;IACH,IAAI,KAAK,IAAI,MAAM,CAElB;IAED;;OAEG;IACH,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,0BAA0B,GAAG,IAAI;IAIlD;;;;OAIG;IACH,YAAY,IAAI,sBAAsB;IAYtC;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,sBAAsB,GAAG,IAAI;IAIlD;;;OAGG;IACG,aAAa,CAAC,OAAO,EAAE;QAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,0BAA0B,CAAC,EAAE,MAAM,CAAC;KACrC,GAAG,OAAO,CAAC,IAAI,CAAC;CAgBlB"}
|
|
@@ -2,6 +2,7 @@ import EventEmitter from "events";
|
|
|
2
2
|
import { log } from "../../log.js";
|
|
3
3
|
import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from "../utils.js";
|
|
4
4
|
import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from "./defaults.js";
|
|
5
|
+
import { InterruptionDetectionError } from "./errors.js";
|
|
5
6
|
import { InterruptionStreamBase } from "./interruption_stream.js";
|
|
6
7
|
class AdaptiveInterruptionDetector extends EventEmitter {
|
|
7
8
|
options;
|
|
@@ -47,6 +48,15 @@ class AdaptiveInterruptionDetector extends EventEmitter {
|
|
|
47
48
|
} else {
|
|
48
49
|
useProxy = false;
|
|
49
50
|
}
|
|
51
|
+
const transport = useProxy ? "websocket" : "http";
|
|
52
|
+
this.logger.debug(
|
|
53
|
+
{
|
|
54
|
+
baseUrl: lkBaseUrl,
|
|
55
|
+
useProxy,
|
|
56
|
+
transport
|
|
57
|
+
},
|
|
58
|
+
"=== Resolved interruption detector transport configuration"
|
|
59
|
+
);
|
|
50
60
|
this.options = {
|
|
51
61
|
sampleRate: SAMPLE_RATE,
|
|
52
62
|
threshold,
|
|
@@ -71,9 +81,10 @@ class AdaptiveInterruptionDetector extends EventEmitter {
|
|
|
71
81
|
minFrames: this.options.minFrames,
|
|
72
82
|
threshold: this.options.threshold,
|
|
73
83
|
inferenceTimeout: this.options.inferenceTimeout,
|
|
74
|
-
useProxy: this.options.useProxy
|
|
84
|
+
useProxy: this.options.useProxy,
|
|
85
|
+
transport
|
|
75
86
|
},
|
|
76
|
-
"
|
|
87
|
+
"=== Adaptive interruption detector initialized"
|
|
77
88
|
);
|
|
78
89
|
}
|
|
79
90
|
/**
|
|
@@ -112,9 +123,15 @@ class AdaptiveInterruptionDetector extends EventEmitter {
|
|
|
112
123
|
* Use this when you need direct access to the stream for pushing frames.
|
|
113
124
|
*/
|
|
114
125
|
createStream() {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
126
|
+
try {
|
|
127
|
+
const streamBase = new InterruptionStreamBase(this, {});
|
|
128
|
+
this.streams.add(streamBase);
|
|
129
|
+
return streamBase;
|
|
130
|
+
} catch (e) {
|
|
131
|
+
const cause = e instanceof Error ? e : new Error(String(e));
|
|
132
|
+
this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));
|
|
133
|
+
throw e;
|
|
134
|
+
}
|
|
118
135
|
}
|
|
119
136
|
/**
|
|
120
137
|
* Remove a stream from tracking (called when stream is closed).
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/interruption_detector.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'events';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';\nimport { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';\nimport
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/interruption_detector.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { TypedEventEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'events';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';\nimport { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';\nimport { InterruptionDetectionError } from './errors.js';\nimport { InterruptionStreamBase } from './interruption_stream.js';\nimport type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';\n\ntype InterruptionCallbacks = {\n overlapping_speech: (event: OverlappingSpeechEvent) => void;\n metrics_collected: (metrics: InterruptionMetrics) => void;\n error: (error: InterruptionDetectionError) => void;\n};\n\nexport type AdaptiveInterruptionDetectorOptions = Omit<Partial<InterruptionOptions>, 'useProxy'>;\n\nexport class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {\n options: InterruptionOptions;\n private readonly _label: string;\n private logger = log();\n // Use Set instead of WeakSet to allow iteration for propagating option updates\n private streams: Set<InterruptionStreamBase> = new Set();\n\n constructor(options: AdaptiveInterruptionDetectorOptions = {}) {\n super();\n\n const {\n maxAudioDurationInS,\n baseUrl,\n apiKey,\n apiSecret,\n audioPrefixDurationInS,\n threshold,\n detectionIntervalInS,\n inferenceTimeout,\n minInterruptionDurationInS,\n } = { ...interruptionOptionDefaults, ...options };\n\n if (maxAudioDurationInS > 3.0) {\n throw new RangeError('maxAudioDurationInS must be less than or equal to 3.0 seconds');\n }\n\n const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? getDefaultInferenceUrl();\n let lkApiKey = apiKey ?? '';\n let lkApiSecret = apiSecret ?? '';\n let useProxy: boolean;\n\n // Use LiveKit credentials if using the inference service (production or staging)\n const isInferenceUrl =\n lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;\n if (isInferenceUrl) {\n lkApiKey =\n apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';\n if (!lkApiKey) {\n throw new TypeError(\n 'apiKey is required, either as argument or set LIVEKIT_API_KEY environmental variable',\n );\n }\n\n lkApiSecret =\n apiSecret ??\n process.env.LIVEKIT_INFERENCE_API_SECRET ??\n process.env.LIVEKIT_API_SECRET ??\n '';\n if (!lkApiSecret) {\n throw new TypeError(\n 'apiSecret is required, either as argument or set LIVEKIT_API_SECRET environmental variable',\n );\n }\n useProxy = true;\n } else {\n useProxy = false;\n }\n const transport = useProxy ? 'websocket' : 'http';\n this.logger.debug(\n {\n baseUrl: lkBaseUrl,\n useProxy,\n transport,\n },\n '=== Resolved interruption detector transport configuration',\n );\n\n this.options = {\n sampleRate: SAMPLE_RATE,\n threshold,\n minFrames: Math.ceil(minInterruptionDurationInS * FRAMES_PER_SECOND),\n maxAudioDurationInS,\n audioPrefixDurationInS,\n detectionIntervalInS,\n inferenceTimeout,\n baseUrl: lkBaseUrl,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n useProxy,\n minInterruptionDurationInS,\n };\n\n this._label = `${this.constructor.name}`;\n\n this.logger.debug(\n {\n baseUrl: this.options.baseUrl,\n detectionIntervalInS: this.options.detectionIntervalInS,\n audioPrefixDurationInS: this.options.audioPrefixDurationInS,\n maxAudioDurationInS: this.options.maxAudioDurationInS,\n minFrames: this.options.minFrames,\n threshold: this.options.threshold,\n inferenceTimeout: this.options.inferenceTimeout,\n useProxy: this.options.useProxy,\n transport,\n },\n '=== Adaptive interruption detector initialized',\n );\n }\n\n /**\n * The model identifier for this detector.\n */\n get model(): string {\n return 'adaptive interruption';\n }\n\n /**\n * The provider identifier for this detector.\n */\n get provider(): string {\n return 'livekit';\n }\n\n /**\n * The label for this detector instance.\n */\n get label(): string {\n return this._label;\n }\n\n /**\n * The sample rate used for audio processing.\n */\n get sampleRate(): number {\n return this.options.sampleRate;\n }\n\n /**\n * Emit an error event from the detector.\n */\n emitError(error: InterruptionDetectionError): void {\n this.emit('error', error);\n }\n\n /**\n * Creates a new InterruptionStreamBase for internal use.\n * The stream can receive audio frames and sentinels via pushFrame().\n * Use this when you need direct access to the stream for pushing frames.\n */\n createStream(): InterruptionStreamBase {\n try {\n const streamBase = new InterruptionStreamBase(this, {});\n this.streams.add(streamBase);\n return streamBase;\n } catch (e) {\n const cause = e instanceof Error ? e : new Error(String(e));\n this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));\n throw e;\n }\n }\n\n /**\n * Remove a stream from tracking (called when stream is closed).\n */\n removeStream(stream: InterruptionStreamBase): void {\n this.streams.delete(stream);\n }\n\n /**\n * Update options for the detector and propagate to all active streams.\n * For WebSocket streams, this triggers a reconnection with new settings.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n }\n\n // Propagate option updates to all active streams (matching Python behavior)\n const updatePromises: Promise<void>[] = [];\n for (const stream of this.streams) {\n updatePromises.push(stream.updateOptions(options));\n }\n await Promise.all(updatePromises);\n }\n}\n"],"mappings":"AAIA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,uBAAuB,uBAAuB,8BAA8B;AACrF,SAAS,mBAAmB,aAAa,kCAAkC;AAC3E,SAAS,kCAAkC;AAC3C,SAAS,8BAA8B;AAWhC,MAAM,qCAAsC,aAAoE;AAAA,EACrH;AAAA,EACiB;AAAA,EACT,SAAS,IAAI;AAAA;AAAA,EAEb,UAAuC,oBAAI,IAAI;AAAA,EAEvD,YAAY,UAA+C,CAAC,GAAG;AAC7D,UAAM;AAEN,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI,EAAE,GAAG,4BAA4B,GAAG,QAAQ;AAEhD,QAAI,sBAAsB,GAAK;AAC7B,YAAM,IAAI,WAAW,+DAA+D;AAAA,IACtF;AAEA,UAAM,YAAY,WAAW,QAAQ,IAAI,0BAA0B,uBAAuB;AAC1F,QAAI,WAAW,UAAU;AACzB,QAAI,cAAc,aAAa;AAC/B,QAAI;AAGJ,UAAM,iBACJ,cAAc,yBAAyB,cAAc;AACvD,QAAI,gBAAgB;AAClB,iBACE,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI,mBAAmB;AACpF,UAAI,CAAC,UAAU;AACb,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,oBACE,aACA,QAAQ,IAAI,gCACZ,QAAQ,IAAI,sBACZ;AACF,UAAI,CAAC,aAAa;AAChB,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AACA,iBAAW;AAAA,IACb,OAAO;AACL,iBAAW;AAAA,IACb;AACA,UAAM,YAAY,WAAW,cAAc;AAC3C,SAAK,OAAO;AAAA,MACV;AAAA,QACE,SAAS;AAAA,QACT;AAAA,QACA;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAEA,SAAK,UAAU;AAAA,MACb,YAAY;AAAA,MACZ;AAAA,MACA,WAAW,KAAK,KAAK,6BAA6B,iBAAiB;AAAA,MACnE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,MACA;AAAA,IACF;AAEA,SAAK,SAAS,GAAG,KAAK,YAAY,IAAI;AAEtC,SAAK,OAAO;AAAA,MACV;AAAA,QACE,SAAS,KAAK,QAAQ;AAAA,QACtB,sBAAsB,KAAK,QAAQ;AAAA,QACnC,wBAAwB,KAAK,QAAQ;AAAA,QACrC,qBAAqB,KAAK,QAAQ;AAAA,QAClC,WAAW,KAAK,QAAQ;AAAA,QACxB,WAAW,KAAK,QAAQ;AAAA,QACxB,kBAAkB,KAAK,QAAQ;AAAA,QAC/B,UAAU,KAAK,QAAQ;AAAA,QACvB;AAAA,MACF;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,QAAgB;AAClB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,aAAqB;AACvB,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA,EAKA,UAAU,OAAyC;AACjD,SAAK,KAAK,SAAS,KAAK;AAAA,EAC1B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,eAAuC;AACrC,QAAI;AACF,YAAM,aAAa,IAAI,uBAAuB,MAAM,CAAC,CAAC;AACtD,WAAK,QAAQ,IAAI,UAAU;AAC3B,aAAO;AAAA,IACT,SAAS,GAAG;AACV,YAAM,QAAQ,aAAa,QAAQ,IAAI,IAAI,MAAM,OAAO,CAAC,CAAC;AAC1D,WAAK,UAAU,IAAI,2BAA2B,MAAM,SAAS,KAAK,IAAI,GAAG,KAAK,QAAQ,KAAK,CAAC;AAC5F,YAAM;AAAA,IACR;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,aAAa,QAAsC;AACjD,SAAK,QAAQ,OAAO,MAAM;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AAAA,IACnC;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iBAAiB;AAAA,IAC3F;AAGA,UAAM,iBAAkC,CAAC;AACzC,eAAW,UAAU,KAAK,SAAS;AACjC,qBAAe,KAAK,OAAO,cAAc,OAAO,CAAC;AAAA,IACnD;AACA,UAAM,QAAQ,IAAI,cAAc;AAAA,EAClC;AACF;","names":[]}
|
|
@@ -216,8 +216,8 @@ class InterruptionStreamBase {
|
|
|
216
216
|
}
|
|
217
217
|
const e = latestEntry ?? import_interruption_cache_entry.InterruptionCacheEntry.default();
|
|
218
218
|
const event = {
|
|
219
|
-
type: "
|
|
220
|
-
|
|
219
|
+
type: "overlapping_speech",
|
|
220
|
+
detectedAt: chunk.endedAt,
|
|
221
221
|
isInterruption: false,
|
|
222
222
|
overlapStartedAt: this.overlapSpeechStartedAt,
|
|
223
223
|
speechInput: e.speechInput,
|
|
@@ -264,10 +264,10 @@ class InterruptionStreamBase {
|
|
|
264
264
|
}
|
|
265
265
|
const eventEmitter = new import_web.TransformStream({
|
|
266
266
|
transform: (chunk, controller) => {
|
|
267
|
-
this.model.emit("
|
|
267
|
+
this.model.emit("overlapping_speech", chunk);
|
|
268
268
|
const metrics = {
|
|
269
269
|
type: "interruption_metrics",
|
|
270
|
-
timestamp: chunk.
|
|
270
|
+
timestamp: chunk.detectedAt,
|
|
271
271
|
totalDuration: chunk.totalDurationInS * 1e3,
|
|
272
272
|
predictionDuration: chunk.predictionDurationInS * 1e3,
|
|
273
273
|
detectionDelay: chunk.detectionDelayInS * 1e3,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/interruption_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Span } from '@opentelemetry/api';\nimport { type ReadableStream, TransformStream } from 'stream/web';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';\nimport { traceTypes } from '../../telemetry/index.js';\nimport { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';\nimport type { InterruptionDetectionError } from './errors.js';\nimport { createHttpTransport } from './http_transport.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { AdaptiveInterruptionDetector } from './interruption_detector.js';\nimport {\n type AgentSpeechEnded,\n type AgentSpeechStarted,\n type ApiConnectOptions,\n type Flush,\n type InterruptionOptions,\n type InterruptionSentinel,\n type OverlapSpeechEnded,\n type OverlapSpeechStarted,\n type OverlappingSpeechEvent,\n} from './types.js';\nimport { BoundedCache } from './utils.js';\nimport { createWsTransport } from './ws_transport.js';\n\n// Re-export sentinel types for backwards compatibility\nexport type {\n AgentSpeechEnded,\n AgentSpeechStarted,\n ApiConnectOptions,\n Flush,\n InterruptionSentinel,\n OverlapSpeechEnded,\n OverlapSpeechStarted,\n};\n\nexport class InterruptionStreamSentinel {\n static agentSpeechStarted(): AgentSpeechStarted {\n return { type: 'agent-speech-started' };\n }\n\n static agentSpeechEnded(): AgentSpeechEnded {\n return { type: 'agent-speech-ended' };\n }\n\n static overlapSpeechStarted(\n speechDuration: number,\n startedAt: number,\n userSpeakingSpan?: Span,\n ): OverlapSpeechStarted {\n return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };\n }\n\n static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {\n return { type: 'overlap-speech-ended', endedAt };\n }\n\n static flush(): Flush {\n return { type: 'flush' };\n }\n}\n\nfunction updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {\n span.setAttribute(\n traceTypes.ATTR_IS_INTERRUPTION,\n (entry.isInterruption ?? false).toString().toLowerCase(),\n );\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);\n}\n\nexport class InterruptionStreamBase {\n private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;\n\n private eventStream: ReadableStream<OverlappingSpeechEvent>;\n\n private resampler?: AudioResampler;\n\n private numRequests = 0;\n\n private userSpeakingSpan: Span | undefined;\n\n private overlapSpeechStartedAt: number | undefined;\n\n private options: InterruptionOptions;\n\n private apiOptions: ApiConnectOptions;\n\n private model: AdaptiveInterruptionDetector;\n\n private logger = log();\n\n // Store reconnect function for WebSocket transport\n private wsReconnect?: () => Promise<void>;\n\n // Mutable transport options that can be updated via updateOptions()\n private transportOptions: {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n sampleRate: number;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries: number;\n };\n\n constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {\n this.inputStream = createStreamChannel<\n InterruptionSentinel | AudioFrame,\n InterruptionDetectionError\n >();\n\n this.model = model;\n this.options = { ...model.options };\n this.apiOptions = { ...apiConnectDefaults, ...apiOptions };\n\n // Initialize mutable transport options\n this.transportOptions = {\n baseUrl: this.options.baseUrl,\n apiKey: this.options.apiKey,\n apiSecret: this.options.apiSecret,\n sampleRate: this.options.sampleRate,\n threshold: this.options.threshold,\n minFrames: this.options.minFrames,\n timeout: this.options.inferenceTimeout,\n maxRetries: this.apiOptions.maxRetries,\n };\n\n this.eventStream = this.setupTransform();\n }\n\n /**\n * Update stream options. For WebSocket transport, this triggers a reconnection.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n this.transportOptions.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n this.transportOptions.minFrames = this.options.minFrames;\n }\n // Trigger WebSocket reconnection if using proxy (WebSocket transport)\n if (this.options.useProxy && this.wsReconnect) {\n await this.wsReconnect();\n }\n }\n\n private setupTransform(): ReadableStream<OverlappingSpeechEvent> {\n let agentSpeechStarted = false;\n let startIdx = 0;\n let accumulatedSamples = 0;\n let overlapSpeechStarted = false;\n let overlapCount = 0;\n const cache = new BoundedCache<number, InterruptionCacheEntry>(10);\n const inferenceS16Data = new Int16Array(\n Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),\n ).fill(0);\n\n // State accessors for transport\n const getState = () => ({\n overlapSpeechStarted,\n overlapSpeechStartedAt: this.overlapSpeechStartedAt,\n cache,\n overlapCount,\n });\n const setState = (partial: { overlapSpeechStarted?: boolean }) => {\n if (partial.overlapSpeechStarted !== undefined) {\n overlapSpeechStarted = partial.overlapSpeechStarted;\n }\n };\n const handleSpanUpdate = (entry: InterruptionCacheEntry) => {\n if (this.userSpeakingSpan) {\n updateUserSpeakingSpan(this.userSpeakingSpan, entry);\n this.userSpeakingSpan = undefined;\n }\n };\n\n const onRequestSent = () => {\n this.numRequests++;\n };\n\n const getAndResetNumRequests = (): number => {\n const n = this.numRequests;\n this.numRequests = 0;\n return n;\n };\n\n // First transform: process input frames/sentinels and output audio slices or events\n const audioTransformer = new TransformStream<\n InterruptionSentinel | AudioFrame,\n Int16Array | OverlappingSpeechEvent\n >(\n {\n transform: (chunk, controller) => {\n if (chunk instanceof AudioFrame) {\n if (!agentSpeechStarted) {\n return;\n }\n if (this.options.sampleRate !== chunk.sampleRate) {\n controller.error('the sample rate of the input frames must be consistent');\n this.logger.error('the sample rate of the input frames must be consistent');\n return;\n }\n const result = writeToInferenceS16Data(\n chunk,\n startIdx,\n inferenceS16Data,\n this.options.maxAudioDurationInS,\n );\n startIdx = result.startIdx;\n accumulatedSamples += result.samplesWritten;\n\n if (\n accumulatedSamples >=\n Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&\n overlapSpeechStarted\n ) {\n const audioSlice = inferenceS16Data.slice(0, startIdx);\n accumulatedSamples = 0;\n controller.enqueue(audioSlice);\n }\n } else if (chunk.type === 'agent-speech-started') {\n this.logger.debug('agent speech started');\n agentSpeechStarted = true;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'agent-speech-ended') {\n this.logger.debug('agent speech ended');\n agentSpeechStarted = false;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {\n this.overlapSpeechStartedAt = chunk.startedAt;\n this.userSpeakingSpan = chunk.userSpeakingSpan;\n this.logger.debug('overlap speech started, starting interruption inference');\n overlapSpeechStarted = true;\n accumulatedSamples = 0;\n overlapCount += 1;\n if (overlapCount <= 1) {\n const keepSize =\n Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +\n Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);\n const shiftCount = Math.max(0, startIdx - keepSize);\n inferenceS16Data.copyWithin(0, shiftCount, startIdx);\n startIdx -= shiftCount;\n }\n cache.clear();\n } else if (chunk.type === 'overlap-speech-ended') {\n this.logger.debug('overlap speech ended');\n if (overlapSpeechStarted) {\n this.userSpeakingSpan = undefined;\n let latestEntry = cache.pop(\n (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,\n );\n if (!latestEntry) {\n this.logger.debug('no request made for overlap speech');\n latestEntry = InterruptionCacheEntry.default();\n }\n const e = latestEntry ?? InterruptionCacheEntry.default();\n const event: OverlappingSpeechEvent = {\n type: 'user_overlapping_speech',\n timestamp: chunk.endedAt,\n isInterruption: false,\n overlapStartedAt: this.overlapSpeechStartedAt,\n speechInput: e.speechInput,\n probabilities: e.probabilities,\n totalDurationInS: e.totalDurationInS,\n detectionDelayInS: e.detectionDelayInS,\n predictionDurationInS: e.predictionDurationInS,\n probability: e.probability,\n numRequests: getAndResetNumRequests(),\n };\n controller.enqueue(event);\n overlapSpeechStarted = false;\n accumulatedSamples = 0;\n }\n this.overlapSpeechStartedAt = undefined;\n } else if (chunk.type === 'flush') {\n // no-op\n }\n },\n },\n { highWaterMark: 32 },\n { highWaterMark: 32 },\n );\n\n // Second transform: transport layer (HTTP or WebSocket based on useProxy)\n const transportOptions = this.transportOptions;\n\n let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;\n if (this.options.useProxy) {\n const wsResult = createWsTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n onRequestSent,\n getAndResetNumRequests,\n );\n transport = wsResult.transport;\n this.wsReconnect = wsResult.reconnect;\n } else {\n transport = createHttpTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n getAndResetNumRequests,\n );\n }\n\n const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({\n transform: (chunk, controller) => {\n this.model.emit('user_overlapping_speech', chunk);\n\n const metrics: InterruptionMetrics = {\n type: 'interruption_metrics',\n timestamp: chunk.timestamp,\n totalDuration: chunk.totalDurationInS * 1000,\n predictionDuration: chunk.predictionDurationInS * 1000,\n detectionDelay: chunk.detectionDelayInS * 1000,\n numInterruptions: chunk.isInterruption ? 1 : 0,\n numBackchannels: chunk.isInterruption ? 0 : 1,\n numRequests: chunk.numRequests,\n metadata: {\n modelProvider: this.model.provider,\n modelName: this.model.model,\n },\n };\n this.model.emit('metrics_collected', metrics);\n\n controller.enqueue(chunk);\n },\n });\n\n // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream\n return this.inputStream\n .stream()\n .pipeThrough(audioTransformer)\n .pipeThrough(transport)\n .pipeThrough(eventEmitter);\n }\n\n private ensureInputNotEnded() {\n if (this.inputStream.closed) {\n throw new Error('input stream is closed');\n }\n }\n\n private ensureStreamsNotEnded() {\n this.ensureInputNotEnded();\n }\n\n private getResamplerFor(inputSampleRate: number): AudioResampler {\n if (!this.resampler) {\n this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);\n }\n return this.resampler;\n }\n\n stream(): ReadableStream<OverlappingSpeechEvent> {\n return this.eventStream;\n }\n\n async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {\n this.ensureStreamsNotEnded();\n if (!(frame instanceof AudioFrame)) {\n return this.inputStream.write(frame);\n } else if (this.options.sampleRate !== frame.sampleRate) {\n const resampler = this.getResamplerFor(frame.sampleRate);\n if (resampler.inputRate !== frame.sampleRate) {\n throw new Error('the sample rate of the input frames must be consistent');\n }\n for (const resampledFrame of resampler.push(frame)) {\n await this.inputStream.write(resampledFrame);\n }\n } else {\n await this.inputStream.write(frame);\n }\n }\n\n async flush(): Promise<void> {\n this.ensureStreamsNotEnded();\n await this.inputStream.write(InterruptionStreamSentinel.flush());\n }\n\n async endInput(): Promise<void> {\n await this.flush();\n await this.inputStream.close();\n }\n\n async close(): Promise<void> {\n if (!this.inputStream.closed) await this.inputStream.close();\n this.model.removeStream(this);\n }\n}\n\n/**\n * Write the audio frame to the output data array and return the new start index\n * and the number of samples written.\n */\nfunction writeToInferenceS16Data(\n frame: AudioFrame,\n startIdx: number,\n outData: Int16Array,\n maxAudioDuration: number,\n): { startIdx: number; samplesWritten: number } {\n const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);\n\n if (frame.samplesPerChannel > outData.length) {\n throw new Error('frame samples are greater than the max window size');\n }\n\n // Shift the data to the left if the window would overflow\n const shift = startIdx + frame.samplesPerChannel - maxWindowSize;\n if (shift > 0) {\n outData.copyWithin(0, shift, startIdx);\n startIdx -= shift;\n }\n\n // Get the frame data as Int16Array\n const frameData = new Int16Array(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.samplesPerChannel * frame.channels,\n );\n\n if (frame.channels > 1) {\n // Mix down multiple channels to mono by averaging\n for (let i = 0; i < frame.samplesPerChannel; i++) {\n let sum = 0;\n for (let ch = 0; ch < frame.channels; ch++) {\n sum += frameData[i * frame.channels + ch] ?? 0;\n }\n outData[startIdx + i] = Math.floor(sum / frame.channels);\n }\n } else {\n // Single channel - copy directly\n outData.set(frameData, startIdx);\n }\n\n startIdx += frame.samplesPerChannel;\n return { startIdx, samplesWritten: frame.samplesPerChannel };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAA2C;AAE3C,iBAAqD;AACrD,iBAAoB;AAEpB,4BAAwD;AACxD,uBAA2B;AAC3B,sBAAsD;AAEtD,4BAAoC;AACpC,sCAAuC;AAEvC,mBAUO;AACP,mBAA6B;AAC7B,0BAAkC;AAa3B,MAAM,2BAA2B;AAAA,EACtC,OAAO,qBAAyC;AAC9C,WAAO,EAAE,MAAM,uBAAuB;AAAA,EACxC;AAAA,EAEA,OAAO,mBAAqC;AAC1C,WAAO,EAAE,MAAM,qBAAqB;AAAA,EACtC;AAAA,EAEA,OAAO,qBACL,gBACA,WACA,kBACsB;AACtB,WAAO,EAAE,MAAM,0BAA0B,gBAAgB,WAAW,iBAAiB;AAAA,EACvF;AAAA,EAEA,OAAO,mBAAmB,SAAqC;AAC7D,WAAO,EAAE,MAAM,wBAAwB,QAAQ;AAAA,EACjD;AAAA,EAEA,OAAO,QAAe;AACpB,WAAO,EAAE,MAAM,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,uBAAuB,MAAY,OAA+B;AACzE,OAAK;AAAA,IACH,4BAAW;AAAA,KACV,MAAM,kBAAkB,OAAO,SAAS,EAAE,YAAY;AAAA,EACzD;AACA,OAAK,aAAa,4BAAW,+BAA+B,MAAM,WAAW;AAC7E,OAAK,aAAa,4BAAW,kCAAkC,MAAM,gBAAgB;AACrF,OAAK,aAAa,4BAAW,uCAAuC,MAAM,qBAAqB;AAC/F,OAAK,aAAa,4BAAW,mCAAmC,MAAM,iBAAiB;AACzF;AAEO,MAAM,uBAAuB;AAAA,EAC1B;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,cAAc;AAAA,EAEd;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,aAAS,gBAAI;AAAA;AAAA,EAGb;AAAA;AAAA,EAGA;AAAA,EAWR,YAAY,OAAqC,YAAwC;AACvF,SAAK,kBAAc,2CAGjB;AAEF,SAAK,QAAQ;AACb,SAAK,UAAU,EAAE,GAAG,MAAM,QAAQ;AAClC,SAAK,aAAa,EAAE,GAAG,oCAAoB,GAAG,WAAW;AAGzD,SAAK,mBAAmB;AAAA,MACtB,SAAS,KAAK,QAAQ;AAAA,MACtB,QAAQ,KAAK,QAAQ;AAAA,MACrB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,QAAQ;AAAA,MACzB,WAAW,KAAK,QAAQ;AAAA,MACxB,WAAW,KAAK,QAAQ;AAAA,MACxB,SAAS,KAAK,QAAQ;AAAA,MACtB,YAAY,KAAK,WAAW;AAAA,IAC9B;AAEA,SAAK,cAAc,KAAK,eAAe;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AACjC,WAAK,iBAAiB,YAAY,QAAQ;AAAA,IAC5C;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iCAAiB;AACzF,WAAK,iBAAiB,YAAY,KAAK,QAAQ;AAAA,IACjD;AAEA,QAAI,KAAK,QAAQ,YAAY,KAAK,aAAa;AAC7C,YAAM,KAAK,YAAY;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAAyD;AAC/D,QAAI,qBAAqB;AACzB,QAAI,WAAW;AACf,QAAI,qBAAqB;AACzB,QAAI,uBAAuB;AAC3B,QAAI,eAAe;AACnB,UAAM,QAAQ,IAAI,0BAA6C,EAAE;AACjE,UAAM,mBAAmB,IAAI;AAAA,MAC3B,KAAK,KAAK,KAAK,QAAQ,sBAAsB,KAAK,QAAQ,UAAU;AAAA,IACtE,EAAE,KAAK,CAAC;AAGR,UAAM,WAAW,OAAO;AAAA,MACtB;AAAA,MACA,wBAAwB,KAAK;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AACA,UAAM,WAAW,CAAC,YAAgD;AAChE,UAAI,QAAQ,yBAAyB,QAAW;AAC9C,+BAAuB,QAAQ;AAAA,MACjC;AAAA,IACF;AACA,UAAM,mBAAmB,CAAC,UAAkC;AAC1D,UAAI,KAAK,kBAAkB;AACzB,+BAAuB,KAAK,kBAAkB,KAAK;AACnD,aAAK,mBAAmB;AAAA,MAC1B;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM;AAC1B,WAAK;AAAA,IACP;AAEA,UAAM,yBAAyB,MAAc;AAC3C,YAAM,IAAI,KAAK;AACf,WAAK,cAAc;AACnB,aAAO;AAAA,IACT;AAGA,UAAM,mBAAmB,IAAI;AAAA,MAI3B;AAAA,QACE,WAAW,CAAC,OAAO,eAAe;AAChC,cAAI,iBAAiB,4BAAY;AAC/B,gBAAI,CAAC,oBAAoB;AACvB;AAAA,YACF;AACA,gBAAI,KAAK,QAAQ,eAAe,MAAM,YAAY;AAChD,yBAAW,MAAM,wDAAwD;AACzE,mBAAK,OAAO,MAAM,wDAAwD;AAC1E;AAAA,YACF;AACA,kBAAM,SAAS;AAAA,cACb;AAAA,cACA;AAAA,cACA;AAAA,cACA,KAAK,QAAQ;AAAA,YACf;AACA,uBAAW,OAAO;AAClB,kCAAsB,OAAO;AAE7B,gBACE,sBACE,KAAK,MAAM,KAAK,QAAQ,uBAAuB,KAAK,QAAQ,UAAU,KACxE,sBACA;AACA,oBAAM,aAAa,iBAAiB,MAAM,GAAG,QAAQ;AACrD,mCAAqB;AACrB,yBAAW,QAAQ,UAAU;AAAA,YAC/B;AAAA,UACF,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,sBAAsB;AAC9C,iBAAK,OAAO,MAAM,oBAAoB;AACtC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,4BAA4B,oBAAoB;AACxE,iBAAK,yBAAyB,MAAM;AACpC,iBAAK,mBAAmB,MAAM;AAC9B,iBAAK,OAAO,MAAM,yDAAyD;AAC3E,mCAAuB;AACvB,iCAAqB;AACrB,4BAAgB;AAChB,gBAAI,gBAAgB,GAAG;AACrB,oBAAM,WACJ,KAAK,MAAO,MAAM,iBAAiB,MAAQ,KAAK,QAAQ,UAAU,IAClE,KAAK,MAAM,KAAK,QAAQ,yBAAyB,KAAK,QAAQ,UAAU;AAC1E,oBAAM,aAAa,KAAK,IAAI,GAAG,WAAW,QAAQ;AAClD,+BAAiB,WAAW,GAAG,YAAY,QAAQ;AACnD,0BAAY;AAAA,YACd;AACA,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,gBAAI,sBAAsB;AACxB,mBAAK,mBAAmB;AACxB,kBAAI,cAAc,MAAM;AAAA,gBACtB,CAAC,UAAU,MAAM,qBAAqB,UAAa,MAAM,mBAAmB;AAAA,cAC9E;AACA,kBAAI,CAAC,aAAa;AAChB,qBAAK,OAAO,MAAM,oCAAoC;AACtD,8BAAc,uDAAuB,QAAQ;AAAA,cAC/C;AACA,oBAAM,IAAI,eAAe,uDAAuB,QAAQ;AACxD,oBAAM,QAAgC;AAAA,gBACpC,MAAM;AAAA,gBACN,WAAW,MAAM;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,kBAAkB,KAAK;AAAA,gBACvB,aAAa,EAAE;AAAA,gBACf,eAAe,EAAE;AAAA,gBACjB,kBAAkB,EAAE;AAAA,gBACpB,mBAAmB,EAAE;AAAA,gBACrB,uBAAuB,EAAE;AAAA,gBACzB,aAAa,EAAE;AAAA,gBACf,aAAa,uBAAuB;AAAA,cACtC;AACA,yBAAW,QAAQ,KAAK;AACxB,qCAAuB;AACvB,mCAAqB;AAAA,YACvB;AACA,iBAAK,yBAAyB;AAAA,UAChC,WAAW,MAAM,SAAS,SAAS;AAAA,UAEnC;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,eAAe,GAAG;AAAA,MACpB,EAAE,eAAe,GAAG;AAAA,IACtB;AAGA,UAAM,mBAAmB,KAAK;AAE9B,QAAI;AACJ,QAAI,KAAK,QAAQ,UAAU;AACzB,YAAM,eAAW;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,kBAAY,SAAS;AACrB,WAAK,cAAc,SAAS;AAAA,IAC9B,OAAO;AACL,sBAAY;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,IAAI,2BAAgE;AAAA,MACvF,WAAW,CAAC,OAAO,eAAe;AAChC,aAAK,MAAM,KAAK,2BAA2B,KAAK;AAEhD,cAAM,UAA+B;AAAA,UACnC,MAAM;AAAA,UACN,WAAW,MAAM;AAAA,UACjB,eAAe,MAAM,mBAAmB;AAAA,UACxC,oBAAoB,MAAM,wBAAwB;AAAA,UAClD,gBAAgB,MAAM,oBAAoB;AAAA,UAC1C,kBAAkB,MAAM,iBAAiB,IAAI;AAAA,UAC7C,iBAAiB,MAAM,iBAAiB,IAAI;AAAA,UAC5C,aAAa,MAAM;AAAA,UACnB,UAAU;AAAA,YACR,eAAe,KAAK,MAAM;AAAA,YAC1B,WAAW,KAAK,MAAM;AAAA,UACxB;AAAA,QACF;AACA,aAAK,MAAM,KAAK,qBAAqB,OAAO;AAE5C,mBAAW,QAAQ,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAGD,WAAO,KAAK,YACT,OAAO,EACP,YAAY,gBAAgB,EAC5B,YAAY,SAAS,EACrB,YAAY,YAAY;AAAA,EAC7B;AAAA,EAEQ,sBAAsB;AAC5B,QAAI,KAAK,YAAY,QAAQ;AAC3B,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C;AAAA,EACF;AAAA,EAEQ,wBAAwB;AAC9B,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,gBAAgB,iBAAyC;AAC/D,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,+BAAe,iBAAiB,KAAK,QAAQ,UAAU;AAAA,IAC9E;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,SAAiD;AAC/C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAU,OAAyD;AACvE,SAAK,sBAAsB;AAC3B,QAAI,EAAE,iBAAiB,6BAAa;AAClC,aAAO,KAAK,YAAY,MAAM,KAAK;AAAA,IACrC,WAAW,KAAK,QAAQ,eAAe,MAAM,YAAY;AACvD,YAAM,YAAY,KAAK,gBAAgB,MAAM,UAAU;AACvD,UAAI,UAAU,cAAc,MAAM,YAAY;AAC5C,cAAM,IAAI,MAAM,wDAAwD;AAAA,MAC1E;AACA,iBAAW,kBAAkB,UAAU,KAAK,KAAK,GAAG;AAClD,cAAM,KAAK,YAAY,MAAM,cAAc;AAAA,MAC7C;AAAA,IACF,OAAO;AACL,YAAM,KAAK,YAAY,MAAM,KAAK;AAAA,IACpC;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,sBAAsB;AAC3B,UAAM,KAAK,YAAY,MAAM,2BAA2B,MAAM,CAAC;AAAA,EACjE;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAM,KAAK,MAAM;AACjB,UAAM,KAAK,YAAY,MAAM;AAAA,EAC/B;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,CAAC,KAAK,YAAY,OAAQ,OAAM,KAAK,YAAY,MAAM;AAC3D,SAAK,MAAM,aAAa,IAAI;AAAA,EAC9B;AACF;AAMA,SAAS,wBACP,OACA,UACA,SACA,kBAC8C;AAC9C,QAAM,gBAAgB,KAAK,MAAM,mBAAmB,MAAM,UAAU;AAEpE,MAAI,MAAM,oBAAoB,QAAQ,QAAQ;AAC5C,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAGA,QAAM,QAAQ,WAAW,MAAM,oBAAoB;AACnD,MAAI,QAAQ,GAAG;AACb,YAAQ,WAAW,GAAG,OAAO,QAAQ;AACrC,gBAAY;AAAA,EACd;AAGA,QAAM,YAAY,IAAI;AAAA,IACpB,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,MAAM,oBAAoB,MAAM;AAAA,EAClC;AAEA,MAAI,MAAM,WAAW,GAAG;AAEtB,aAAS,IAAI,GAAG,IAAI,MAAM,mBAAmB,KAAK;AAChD,UAAI,MAAM;AACV,eAAS,KAAK,GAAG,KAAK,MAAM,UAAU,MAAM;AAC1C,eAAO,UAAU,IAAI,MAAM,WAAW,EAAE,KAAK;AAAA,MAC/C;AACA,cAAQ,WAAW,CAAC,IAAI,KAAK,MAAM,MAAM,MAAM,QAAQ;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,YAAQ,IAAI,WAAW,QAAQ;AAAA,EACjC;AAEA,cAAY,MAAM;AAClB,SAAO,EAAE,UAAU,gBAAgB,MAAM,kBAAkB;AAC7D;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/interruption_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Span } from '@opentelemetry/api';\nimport { type ReadableStream, TransformStream } from 'stream/web';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';\nimport { traceTypes } from '../../telemetry/index.js';\nimport { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';\nimport type { InterruptionDetectionError } from './errors.js';\nimport { createHttpTransport } from './http_transport.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { AdaptiveInterruptionDetector } from './interruption_detector.js';\nimport {\n type AgentSpeechEnded,\n type AgentSpeechStarted,\n type ApiConnectOptions,\n type Flush,\n type InterruptionOptions,\n type InterruptionSentinel,\n type OverlapSpeechEnded,\n type OverlapSpeechStarted,\n type OverlappingSpeechEvent,\n} from './types.js';\nimport { BoundedCache } from './utils.js';\nimport { createWsTransport } from './ws_transport.js';\n\n// Re-export sentinel types for backwards compatibility\nexport type {\n AgentSpeechEnded,\n AgentSpeechStarted,\n ApiConnectOptions,\n Flush,\n InterruptionSentinel,\n OverlapSpeechEnded,\n OverlapSpeechStarted,\n};\n\nexport class InterruptionStreamSentinel {\n static agentSpeechStarted(): AgentSpeechStarted {\n return { type: 'agent-speech-started' };\n }\n\n static agentSpeechEnded(): AgentSpeechEnded {\n return { type: 'agent-speech-ended' };\n }\n\n static overlapSpeechStarted(\n speechDuration: number,\n startedAt: number,\n userSpeakingSpan?: Span,\n ): OverlapSpeechStarted {\n return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };\n }\n\n static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {\n return { type: 'overlap-speech-ended', endedAt };\n }\n\n static flush(): Flush {\n return { type: 'flush' };\n }\n}\n\nfunction updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {\n span.setAttribute(\n traceTypes.ATTR_IS_INTERRUPTION,\n (entry.isInterruption ?? false).toString().toLowerCase(),\n );\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);\n}\n\nexport class InterruptionStreamBase {\n private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;\n\n private eventStream: ReadableStream<OverlappingSpeechEvent>;\n\n private resampler?: AudioResampler;\n\n private numRequests = 0;\n\n private userSpeakingSpan: Span | undefined;\n\n private overlapSpeechStartedAt: number | undefined;\n\n private options: InterruptionOptions;\n\n private apiOptions: ApiConnectOptions;\n\n private model: AdaptiveInterruptionDetector;\n\n private logger = log();\n\n // Store reconnect function for WebSocket transport\n private wsReconnect?: () => Promise<void>;\n\n // Mutable transport options that can be updated via updateOptions()\n private transportOptions: {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n sampleRate: number;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries: number;\n };\n\n constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {\n this.inputStream = createStreamChannel<\n InterruptionSentinel | AudioFrame,\n InterruptionDetectionError\n >();\n\n this.model = model;\n this.options = { ...model.options };\n this.apiOptions = { ...apiConnectDefaults, ...apiOptions };\n\n // Initialize mutable transport options\n this.transportOptions = {\n baseUrl: this.options.baseUrl,\n apiKey: this.options.apiKey,\n apiSecret: this.options.apiSecret,\n sampleRate: this.options.sampleRate,\n threshold: this.options.threshold,\n minFrames: this.options.minFrames,\n timeout: this.options.inferenceTimeout,\n maxRetries: this.apiOptions.maxRetries,\n };\n\n this.eventStream = this.setupTransform();\n }\n\n /**\n * Update stream options. For WebSocket transport, this triggers a reconnection.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n this.transportOptions.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n this.transportOptions.minFrames = this.options.minFrames;\n }\n // Trigger WebSocket reconnection if using proxy (WebSocket transport)\n if (this.options.useProxy && this.wsReconnect) {\n await this.wsReconnect();\n }\n }\n\n private setupTransform(): ReadableStream<OverlappingSpeechEvent> {\n let agentSpeechStarted = false;\n let startIdx = 0;\n let accumulatedSamples = 0;\n let overlapSpeechStarted = false;\n let overlapCount = 0;\n const cache = new BoundedCache<number, InterruptionCacheEntry>(10);\n const inferenceS16Data = new Int16Array(\n Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),\n ).fill(0);\n\n // State accessors for transport\n const getState = () => ({\n overlapSpeechStarted,\n overlapSpeechStartedAt: this.overlapSpeechStartedAt,\n cache,\n overlapCount,\n });\n const setState = (partial: { overlapSpeechStarted?: boolean }) => {\n if (partial.overlapSpeechStarted !== undefined) {\n overlapSpeechStarted = partial.overlapSpeechStarted;\n }\n };\n const handleSpanUpdate = (entry: InterruptionCacheEntry) => {\n if (this.userSpeakingSpan) {\n updateUserSpeakingSpan(this.userSpeakingSpan, entry);\n this.userSpeakingSpan = undefined;\n }\n };\n\n const onRequestSent = () => {\n this.numRequests++;\n };\n\n const getAndResetNumRequests = (): number => {\n const n = this.numRequests;\n this.numRequests = 0;\n return n;\n };\n\n // First transform: process input frames/sentinels and output audio slices or events\n const audioTransformer = new TransformStream<\n InterruptionSentinel | AudioFrame,\n Int16Array | OverlappingSpeechEvent\n >(\n {\n transform: (chunk, controller) => {\n if (chunk instanceof AudioFrame) {\n if (!agentSpeechStarted) {\n return;\n }\n if (this.options.sampleRate !== chunk.sampleRate) {\n controller.error('the sample rate of the input frames must be consistent');\n this.logger.error('the sample rate of the input frames must be consistent');\n return;\n }\n const result = writeToInferenceS16Data(\n chunk,\n startIdx,\n inferenceS16Data,\n this.options.maxAudioDurationInS,\n );\n startIdx = result.startIdx;\n accumulatedSamples += result.samplesWritten;\n\n if (\n accumulatedSamples >=\n Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&\n overlapSpeechStarted\n ) {\n const audioSlice = inferenceS16Data.slice(0, startIdx);\n accumulatedSamples = 0;\n controller.enqueue(audioSlice);\n }\n } else if (chunk.type === 'agent-speech-started') {\n this.logger.debug('agent speech started');\n agentSpeechStarted = true;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'agent-speech-ended') {\n this.logger.debug('agent speech ended');\n agentSpeechStarted = false;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {\n this.overlapSpeechStartedAt = chunk.startedAt;\n this.userSpeakingSpan = chunk.userSpeakingSpan;\n this.logger.debug('overlap speech started, starting interruption inference');\n overlapSpeechStarted = true;\n accumulatedSamples = 0;\n overlapCount += 1;\n if (overlapCount <= 1) {\n const keepSize =\n Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +\n Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);\n const shiftCount = Math.max(0, startIdx - keepSize);\n inferenceS16Data.copyWithin(0, shiftCount, startIdx);\n startIdx -= shiftCount;\n }\n cache.clear();\n } else if (chunk.type === 'overlap-speech-ended') {\n this.logger.debug('overlap speech ended');\n if (overlapSpeechStarted) {\n this.userSpeakingSpan = undefined;\n let latestEntry = cache.pop(\n (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,\n );\n if (!latestEntry) {\n this.logger.debug('no request made for overlap speech');\n latestEntry = InterruptionCacheEntry.default();\n }\n const e = latestEntry ?? InterruptionCacheEntry.default();\n const event: OverlappingSpeechEvent = {\n type: 'overlapping_speech',\n detectedAt: chunk.endedAt,\n isInterruption: false,\n overlapStartedAt: this.overlapSpeechStartedAt,\n speechInput: e.speechInput,\n probabilities: e.probabilities,\n totalDurationInS: e.totalDurationInS,\n detectionDelayInS: e.detectionDelayInS,\n predictionDurationInS: e.predictionDurationInS,\n probability: e.probability,\n numRequests: getAndResetNumRequests(),\n };\n controller.enqueue(event);\n overlapSpeechStarted = false;\n accumulatedSamples = 0;\n }\n this.overlapSpeechStartedAt = undefined;\n } else if (chunk.type === 'flush') {\n // no-op\n }\n },\n },\n { highWaterMark: 32 },\n { highWaterMark: 32 },\n );\n\n // Second transform: transport layer (HTTP or WebSocket based on useProxy)\n const transportOptions = this.transportOptions;\n\n let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;\n if (this.options.useProxy) {\n const wsResult = createWsTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n onRequestSent,\n getAndResetNumRequests,\n );\n transport = wsResult.transport;\n this.wsReconnect = wsResult.reconnect;\n } else {\n transport = createHttpTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n getAndResetNumRequests,\n );\n }\n\n const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({\n transform: (chunk, controller) => {\n this.model.emit('overlapping_speech', chunk);\n\n const metrics: InterruptionMetrics = {\n type: 'interruption_metrics',\n timestamp: chunk.detectedAt,\n totalDuration: chunk.totalDurationInS * 1000,\n predictionDuration: chunk.predictionDurationInS * 1000,\n detectionDelay: chunk.detectionDelayInS * 1000,\n numInterruptions: chunk.isInterruption ? 1 : 0,\n numBackchannels: chunk.isInterruption ? 0 : 1,\n numRequests: chunk.numRequests,\n metadata: {\n modelProvider: this.model.provider,\n modelName: this.model.model,\n },\n };\n this.model.emit('metrics_collected', metrics);\n\n controller.enqueue(chunk);\n },\n });\n\n // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream\n return this.inputStream\n .stream()\n .pipeThrough(audioTransformer)\n .pipeThrough(transport)\n .pipeThrough(eventEmitter);\n }\n\n private ensureInputNotEnded() {\n if (this.inputStream.closed) {\n throw new Error('input stream is closed');\n }\n }\n\n private ensureStreamsNotEnded() {\n this.ensureInputNotEnded();\n }\n\n private getResamplerFor(inputSampleRate: number): AudioResampler {\n if (!this.resampler) {\n this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);\n }\n return this.resampler;\n }\n\n stream(): ReadableStream<OverlappingSpeechEvent> {\n return this.eventStream;\n }\n\n async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {\n this.ensureStreamsNotEnded();\n if (!(frame instanceof AudioFrame)) {\n return this.inputStream.write(frame);\n } else if (this.options.sampleRate !== frame.sampleRate) {\n const resampler = this.getResamplerFor(frame.sampleRate);\n if (resampler.inputRate !== frame.sampleRate) {\n throw new Error('the sample rate of the input frames must be consistent');\n }\n for (const resampledFrame of resampler.push(frame)) {\n await this.inputStream.write(resampledFrame);\n }\n } else {\n await this.inputStream.write(frame);\n }\n }\n\n async flush(): Promise<void> {\n this.ensureStreamsNotEnded();\n await this.inputStream.write(InterruptionStreamSentinel.flush());\n }\n\n async endInput(): Promise<void> {\n await this.flush();\n await this.inputStream.close();\n }\n\n async close(): Promise<void> {\n if (!this.inputStream.closed) await this.inputStream.close();\n this.model.removeStream(this);\n }\n}\n\n/**\n * Write the audio frame to the output data array and return the new start index\n * and the number of samples written.\n */\nfunction writeToInferenceS16Data(\n frame: AudioFrame,\n startIdx: number,\n outData: Int16Array,\n maxAudioDuration: number,\n): { startIdx: number; samplesWritten: number } {\n const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);\n\n if (frame.samplesPerChannel > outData.length) {\n throw new Error('frame samples are greater than the max window size');\n }\n\n // Shift the data to the left if the window would overflow\n const shift = startIdx + frame.samplesPerChannel - maxWindowSize;\n if (shift > 0) {\n outData.copyWithin(0, shift, startIdx);\n startIdx -= shift;\n }\n\n // Get the frame data as Int16Array\n const frameData = new Int16Array(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.samplesPerChannel * frame.channels,\n );\n\n if (frame.channels > 1) {\n // Mix down multiple channels to mono by averaging\n for (let i = 0; i < frame.samplesPerChannel; i++) {\n let sum = 0;\n for (let ch = 0; ch < frame.channels; ch++) {\n sum += frameData[i * frame.channels + ch] ?? 0;\n }\n outData[startIdx + i] = Math.floor(sum / frame.channels);\n }\n } else {\n // Single channel - copy directly\n outData.set(frameData, startIdx);\n }\n\n startIdx += frame.samplesPerChannel;\n return { startIdx, samplesWritten: frame.samplesPerChannel };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAA2C;AAE3C,iBAAqD;AACrD,iBAAoB;AAEpB,4BAAwD;AACxD,uBAA2B;AAC3B,sBAAsD;AAEtD,4BAAoC;AACpC,sCAAuC;AAEvC,mBAUO;AACP,mBAA6B;AAC7B,0BAAkC;AAa3B,MAAM,2BAA2B;AAAA,EACtC,OAAO,qBAAyC;AAC9C,WAAO,EAAE,MAAM,uBAAuB;AAAA,EACxC;AAAA,EAEA,OAAO,mBAAqC;AAC1C,WAAO,EAAE,MAAM,qBAAqB;AAAA,EACtC;AAAA,EAEA,OAAO,qBACL,gBACA,WACA,kBACsB;AACtB,WAAO,EAAE,MAAM,0BAA0B,gBAAgB,WAAW,iBAAiB;AAAA,EACvF;AAAA,EAEA,OAAO,mBAAmB,SAAqC;AAC7D,WAAO,EAAE,MAAM,wBAAwB,QAAQ;AAAA,EACjD;AAAA,EAEA,OAAO,QAAe;AACpB,WAAO,EAAE,MAAM,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,uBAAuB,MAAY,OAA+B;AACzE,OAAK;AAAA,IACH,4BAAW;AAAA,KACV,MAAM,kBAAkB,OAAO,SAAS,EAAE,YAAY;AAAA,EACzD;AACA,OAAK,aAAa,4BAAW,+BAA+B,MAAM,WAAW;AAC7E,OAAK,aAAa,4BAAW,kCAAkC,MAAM,gBAAgB;AACrF,OAAK,aAAa,4BAAW,uCAAuC,MAAM,qBAAqB;AAC/F,OAAK,aAAa,4BAAW,mCAAmC,MAAM,iBAAiB;AACzF;AAEO,MAAM,uBAAuB;AAAA,EAC1B;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,cAAc;AAAA,EAEd;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,aAAS,gBAAI;AAAA;AAAA,EAGb;AAAA;AAAA,EAGA;AAAA,EAWR,YAAY,OAAqC,YAAwC;AACvF,SAAK,kBAAc,2CAGjB;AAEF,SAAK,QAAQ;AACb,SAAK,UAAU,EAAE,GAAG,MAAM,QAAQ;AAClC,SAAK,aAAa,EAAE,GAAG,oCAAoB,GAAG,WAAW;AAGzD,SAAK,mBAAmB;AAAA,MACtB,SAAS,KAAK,QAAQ;AAAA,MACtB,QAAQ,KAAK,QAAQ;AAAA,MACrB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,QAAQ;AAAA,MACzB,WAAW,KAAK,QAAQ;AAAA,MACxB,WAAW,KAAK,QAAQ;AAAA,MACxB,SAAS,KAAK,QAAQ;AAAA,MACtB,YAAY,KAAK,WAAW;AAAA,IAC9B;AAEA,SAAK,cAAc,KAAK,eAAe;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AACjC,WAAK,iBAAiB,YAAY,QAAQ;AAAA,IAC5C;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iCAAiB;AACzF,WAAK,iBAAiB,YAAY,KAAK,QAAQ;AAAA,IACjD;AAEA,QAAI,KAAK,QAAQ,YAAY,KAAK,aAAa;AAC7C,YAAM,KAAK,YAAY;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAAyD;AAC/D,QAAI,qBAAqB;AACzB,QAAI,WAAW;AACf,QAAI,qBAAqB;AACzB,QAAI,uBAAuB;AAC3B,QAAI,eAAe;AACnB,UAAM,QAAQ,IAAI,0BAA6C,EAAE;AACjE,UAAM,mBAAmB,IAAI;AAAA,MAC3B,KAAK,KAAK,KAAK,QAAQ,sBAAsB,KAAK,QAAQ,UAAU;AAAA,IACtE,EAAE,KAAK,CAAC;AAGR,UAAM,WAAW,OAAO;AAAA,MACtB;AAAA,MACA,wBAAwB,KAAK;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AACA,UAAM,WAAW,CAAC,YAAgD;AAChE,UAAI,QAAQ,yBAAyB,QAAW;AAC9C,+BAAuB,QAAQ;AAAA,MACjC;AAAA,IACF;AACA,UAAM,mBAAmB,CAAC,UAAkC;AAC1D,UAAI,KAAK,kBAAkB;AACzB,+BAAuB,KAAK,kBAAkB,KAAK;AACnD,aAAK,mBAAmB;AAAA,MAC1B;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM;AAC1B,WAAK;AAAA,IACP;AAEA,UAAM,yBAAyB,MAAc;AAC3C,YAAM,IAAI,KAAK;AACf,WAAK,cAAc;AACnB,aAAO;AAAA,IACT;AAGA,UAAM,mBAAmB,IAAI;AAAA,MAI3B;AAAA,QACE,WAAW,CAAC,OAAO,eAAe;AAChC,cAAI,iBAAiB,4BAAY;AAC/B,gBAAI,CAAC,oBAAoB;AACvB;AAAA,YACF;AACA,gBAAI,KAAK,QAAQ,eAAe,MAAM,YAAY;AAChD,yBAAW,MAAM,wDAAwD;AACzE,mBAAK,OAAO,MAAM,wDAAwD;AAC1E;AAAA,YACF;AACA,kBAAM,SAAS;AAAA,cACb;AAAA,cACA;AAAA,cACA;AAAA,cACA,KAAK,QAAQ;AAAA,YACf;AACA,uBAAW,OAAO;AAClB,kCAAsB,OAAO;AAE7B,gBACE,sBACE,KAAK,MAAM,KAAK,QAAQ,uBAAuB,KAAK,QAAQ,UAAU,KACxE,sBACA;AACA,oBAAM,aAAa,iBAAiB,MAAM,GAAG,QAAQ;AACrD,mCAAqB;AACrB,yBAAW,QAAQ,UAAU;AAAA,YAC/B;AAAA,UACF,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,sBAAsB;AAC9C,iBAAK,OAAO,MAAM,oBAAoB;AACtC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,4BAA4B,oBAAoB;AACxE,iBAAK,yBAAyB,MAAM;AACpC,iBAAK,mBAAmB,MAAM;AAC9B,iBAAK,OAAO,MAAM,yDAAyD;AAC3E,mCAAuB;AACvB,iCAAqB;AACrB,4BAAgB;AAChB,gBAAI,gBAAgB,GAAG;AACrB,oBAAM,WACJ,KAAK,MAAO,MAAM,iBAAiB,MAAQ,KAAK,QAAQ,UAAU,IAClE,KAAK,MAAM,KAAK,QAAQ,yBAAyB,KAAK,QAAQ,UAAU;AAC1E,oBAAM,aAAa,KAAK,IAAI,GAAG,WAAW,QAAQ;AAClD,+BAAiB,WAAW,GAAG,YAAY,QAAQ;AACnD,0BAAY;AAAA,YACd;AACA,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,gBAAI,sBAAsB;AACxB,mBAAK,mBAAmB;AACxB,kBAAI,cAAc,MAAM;AAAA,gBACtB,CAAC,UAAU,MAAM,qBAAqB,UAAa,MAAM,mBAAmB;AAAA,cAC9E;AACA,kBAAI,CAAC,aAAa;AAChB,qBAAK,OAAO,MAAM,oCAAoC;AACtD,8BAAc,uDAAuB,QAAQ;AAAA,cAC/C;AACA,oBAAM,IAAI,eAAe,uDAAuB,QAAQ;AACxD,oBAAM,QAAgC;AAAA,gBACpC,MAAM;AAAA,gBACN,YAAY,MAAM;AAAA,gBAClB,gBAAgB;AAAA,gBAChB,kBAAkB,KAAK;AAAA,gBACvB,aAAa,EAAE;AAAA,gBACf,eAAe,EAAE;AAAA,gBACjB,kBAAkB,EAAE;AAAA,gBACpB,mBAAmB,EAAE;AAAA,gBACrB,uBAAuB,EAAE;AAAA,gBACzB,aAAa,EAAE;AAAA,gBACf,aAAa,uBAAuB;AAAA,cACtC;AACA,yBAAW,QAAQ,KAAK;AACxB,qCAAuB;AACvB,mCAAqB;AAAA,YACvB;AACA,iBAAK,yBAAyB;AAAA,UAChC,WAAW,MAAM,SAAS,SAAS;AAAA,UAEnC;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,eAAe,GAAG;AAAA,MACpB,EAAE,eAAe,GAAG;AAAA,IACtB;AAGA,UAAM,mBAAmB,KAAK;AAE9B,QAAI;AACJ,QAAI,KAAK,QAAQ,UAAU;AACzB,YAAM,eAAW;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,kBAAY,SAAS;AACrB,WAAK,cAAc,SAAS;AAAA,IAC9B,OAAO;AACL,sBAAY;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,IAAI,2BAAgE;AAAA,MACvF,WAAW,CAAC,OAAO,eAAe;AAChC,aAAK,MAAM,KAAK,sBAAsB,KAAK;AAE3C,cAAM,UAA+B;AAAA,UACnC,MAAM;AAAA,UACN,WAAW,MAAM;AAAA,UACjB,eAAe,MAAM,mBAAmB;AAAA,UACxC,oBAAoB,MAAM,wBAAwB;AAAA,UAClD,gBAAgB,MAAM,oBAAoB;AAAA,UAC1C,kBAAkB,MAAM,iBAAiB,IAAI;AAAA,UAC7C,iBAAiB,MAAM,iBAAiB,IAAI;AAAA,UAC5C,aAAa,MAAM;AAAA,UACnB,UAAU;AAAA,YACR,eAAe,KAAK,MAAM;AAAA,YAC1B,WAAW,KAAK,MAAM;AAAA,UACxB;AAAA,QACF;AACA,aAAK,MAAM,KAAK,qBAAqB,OAAO;AAE5C,mBAAW,QAAQ,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAGD,WAAO,KAAK,YACT,OAAO,EACP,YAAY,gBAAgB,EAC5B,YAAY,SAAS,EACrB,YAAY,YAAY;AAAA,EAC7B;AAAA,EAEQ,sBAAsB;AAC5B,QAAI,KAAK,YAAY,QAAQ;AAC3B,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C;AAAA,EACF;AAAA,EAEQ,wBAAwB;AAC9B,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,gBAAgB,iBAAyC;AAC/D,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,+BAAe,iBAAiB,KAAK,QAAQ,UAAU;AAAA,IAC9E;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,SAAiD;AAC/C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAU,OAAyD;AACvE,SAAK,sBAAsB;AAC3B,QAAI,EAAE,iBAAiB,6BAAa;AAClC,aAAO,KAAK,YAAY,MAAM,KAAK;AAAA,IACrC,WAAW,KAAK,QAAQ,eAAe,MAAM,YAAY;AACvD,YAAM,YAAY,KAAK,gBAAgB,MAAM,UAAU;AACvD,UAAI,UAAU,cAAc,MAAM,YAAY;AAC5C,cAAM,IAAI,MAAM,wDAAwD;AAAA,MAC1E;AACA,iBAAW,kBAAkB,UAAU,KAAK,KAAK,GAAG;AAClD,cAAM,KAAK,YAAY,MAAM,cAAc;AAAA,MAC7C;AAAA,IACF,OAAO;AACL,YAAM,KAAK,YAAY,MAAM,KAAK;AAAA,IACpC;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,sBAAsB;AAC3B,UAAM,KAAK,YAAY,MAAM,2BAA2B,MAAM,CAAC;AAAA,EACjE;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAM,KAAK,MAAM;AACjB,UAAM,KAAK,YAAY,MAAM;AAAA,EAC/B;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,CAAC,KAAK,YAAY,OAAQ,OAAM,KAAK,YAAY,MAAM;AAC3D,SAAK,MAAM,aAAa,IAAI;AAAA,EAC9B;AACF;AAMA,SAAS,wBACP,OACA,UACA,SACA,kBAC8C;AAC9C,QAAM,gBAAgB,KAAK,MAAM,mBAAmB,MAAM,UAAU;AAEpE,MAAI,MAAM,oBAAoB,QAAQ,QAAQ;AAC5C,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAGA,QAAM,QAAQ,WAAW,MAAM,oBAAoB;AACnD,MAAI,QAAQ,GAAG;AACb,YAAQ,WAAW,GAAG,OAAO,QAAQ;AACrC,gBAAY;AAAA,EACd;AAGA,QAAM,YAAY,IAAI;AAAA,IACpB,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,MAAM,oBAAoB,MAAM;AAAA,EAClC;AAEA,MAAI,MAAM,WAAW,GAAG;AAEtB,aAAS,IAAI,GAAG,IAAI,MAAM,mBAAmB,KAAK;AAChD,UAAI,MAAM;AACV,eAAS,KAAK,GAAG,KAAK,MAAM,UAAU,MAAM;AAC1C,eAAO,UAAU,IAAI,MAAM,WAAW,EAAE,KAAK;AAAA,MAC/C;AACA,cAAQ,WAAW,CAAC,IAAI,KAAK,MAAM,MAAM,MAAM,QAAQ;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,YAAQ,IAAI,WAAW,QAAQ;AAAA,EACjC;AAEA,cAAY,MAAM;AAClB,SAAO,EAAE,UAAU,gBAAgB,MAAM,kBAAkB;AAC7D;","names":[]}
|
|
@@ -193,8 +193,8 @@ class InterruptionStreamBase {
|
|
|
193
193
|
}
|
|
194
194
|
const e = latestEntry ?? InterruptionCacheEntry.default();
|
|
195
195
|
const event = {
|
|
196
|
-
type: "
|
|
197
|
-
|
|
196
|
+
type: "overlapping_speech",
|
|
197
|
+
detectedAt: chunk.endedAt,
|
|
198
198
|
isInterruption: false,
|
|
199
199
|
overlapStartedAt: this.overlapSpeechStartedAt,
|
|
200
200
|
speechInput: e.speechInput,
|
|
@@ -241,10 +241,10 @@ class InterruptionStreamBase {
|
|
|
241
241
|
}
|
|
242
242
|
const eventEmitter = new TransformStream({
|
|
243
243
|
transform: (chunk, controller) => {
|
|
244
|
-
this.model.emit("
|
|
244
|
+
this.model.emit("overlapping_speech", chunk);
|
|
245
245
|
const metrics = {
|
|
246
246
|
type: "interruption_metrics",
|
|
247
|
-
timestamp: chunk.
|
|
247
|
+
timestamp: chunk.detectedAt,
|
|
248
248
|
totalDuration: chunk.totalDurationInS * 1e3,
|
|
249
249
|
predictionDuration: chunk.predictionDurationInS * 1e3,
|
|
250
250
|
detectionDelay: chunk.detectionDelayInS * 1e3,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/interruption_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Span } from '@opentelemetry/api';\nimport { type ReadableStream, TransformStream } from 'stream/web';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';\nimport { traceTypes } from '../../telemetry/index.js';\nimport { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';\nimport type { InterruptionDetectionError } from './errors.js';\nimport { createHttpTransport } from './http_transport.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { AdaptiveInterruptionDetector } from './interruption_detector.js';\nimport {\n type AgentSpeechEnded,\n type AgentSpeechStarted,\n type ApiConnectOptions,\n type Flush,\n type InterruptionOptions,\n type InterruptionSentinel,\n type OverlapSpeechEnded,\n type OverlapSpeechStarted,\n type OverlappingSpeechEvent,\n} from './types.js';\nimport { BoundedCache } from './utils.js';\nimport { createWsTransport } from './ws_transport.js';\n\n// Re-export sentinel types for backwards compatibility\nexport type {\n AgentSpeechEnded,\n AgentSpeechStarted,\n ApiConnectOptions,\n Flush,\n InterruptionSentinel,\n OverlapSpeechEnded,\n OverlapSpeechStarted,\n};\n\nexport class InterruptionStreamSentinel {\n static agentSpeechStarted(): AgentSpeechStarted {\n return { type: 'agent-speech-started' };\n }\n\n static agentSpeechEnded(): AgentSpeechEnded {\n return { type: 'agent-speech-ended' };\n }\n\n static overlapSpeechStarted(\n speechDuration: number,\n startedAt: number,\n userSpeakingSpan?: Span,\n ): OverlapSpeechStarted {\n return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };\n }\n\n static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {\n return { type: 'overlap-speech-ended', endedAt };\n }\n\n static flush(): Flush {\n return { type: 'flush' };\n }\n}\n\nfunction updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {\n span.setAttribute(\n traceTypes.ATTR_IS_INTERRUPTION,\n (entry.isInterruption ?? false).toString().toLowerCase(),\n );\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);\n}\n\nexport class InterruptionStreamBase {\n private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;\n\n private eventStream: ReadableStream<OverlappingSpeechEvent>;\n\n private resampler?: AudioResampler;\n\n private numRequests = 0;\n\n private userSpeakingSpan: Span | undefined;\n\n private overlapSpeechStartedAt: number | undefined;\n\n private options: InterruptionOptions;\n\n private apiOptions: ApiConnectOptions;\n\n private model: AdaptiveInterruptionDetector;\n\n private logger = log();\n\n // Store reconnect function for WebSocket transport\n private wsReconnect?: () => Promise<void>;\n\n // Mutable transport options that can be updated via updateOptions()\n private transportOptions: {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n sampleRate: number;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries: number;\n };\n\n constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {\n this.inputStream = createStreamChannel<\n InterruptionSentinel | AudioFrame,\n InterruptionDetectionError\n >();\n\n this.model = model;\n this.options = { ...model.options };\n this.apiOptions = { ...apiConnectDefaults, ...apiOptions };\n\n // Initialize mutable transport options\n this.transportOptions = {\n baseUrl: this.options.baseUrl,\n apiKey: this.options.apiKey,\n apiSecret: this.options.apiSecret,\n sampleRate: this.options.sampleRate,\n threshold: this.options.threshold,\n minFrames: this.options.minFrames,\n timeout: this.options.inferenceTimeout,\n maxRetries: this.apiOptions.maxRetries,\n };\n\n this.eventStream = this.setupTransform();\n }\n\n /**\n * Update stream options. For WebSocket transport, this triggers a reconnection.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n this.transportOptions.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n this.transportOptions.minFrames = this.options.minFrames;\n }\n // Trigger WebSocket reconnection if using proxy (WebSocket transport)\n if (this.options.useProxy && this.wsReconnect) {\n await this.wsReconnect();\n }\n }\n\n private setupTransform(): ReadableStream<OverlappingSpeechEvent> {\n let agentSpeechStarted = false;\n let startIdx = 0;\n let accumulatedSamples = 0;\n let overlapSpeechStarted = false;\n let overlapCount = 0;\n const cache = new BoundedCache<number, InterruptionCacheEntry>(10);\n const inferenceS16Data = new Int16Array(\n Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),\n ).fill(0);\n\n // State accessors for transport\n const getState = () => ({\n overlapSpeechStarted,\n overlapSpeechStartedAt: this.overlapSpeechStartedAt,\n cache,\n overlapCount,\n });\n const setState = (partial: { overlapSpeechStarted?: boolean }) => {\n if (partial.overlapSpeechStarted !== undefined) {\n overlapSpeechStarted = partial.overlapSpeechStarted;\n }\n };\n const handleSpanUpdate = (entry: InterruptionCacheEntry) => {\n if (this.userSpeakingSpan) {\n updateUserSpeakingSpan(this.userSpeakingSpan, entry);\n this.userSpeakingSpan = undefined;\n }\n };\n\n const onRequestSent = () => {\n this.numRequests++;\n };\n\n const getAndResetNumRequests = (): number => {\n const n = this.numRequests;\n this.numRequests = 0;\n return n;\n };\n\n // First transform: process input frames/sentinels and output audio slices or events\n const audioTransformer = new TransformStream<\n InterruptionSentinel | AudioFrame,\n Int16Array | OverlappingSpeechEvent\n >(\n {\n transform: (chunk, controller) => {\n if (chunk instanceof AudioFrame) {\n if (!agentSpeechStarted) {\n return;\n }\n if (this.options.sampleRate !== chunk.sampleRate) {\n controller.error('the sample rate of the input frames must be consistent');\n this.logger.error('the sample rate of the input frames must be consistent');\n return;\n }\n const result = writeToInferenceS16Data(\n chunk,\n startIdx,\n inferenceS16Data,\n this.options.maxAudioDurationInS,\n );\n startIdx = result.startIdx;\n accumulatedSamples += result.samplesWritten;\n\n if (\n accumulatedSamples >=\n Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&\n overlapSpeechStarted\n ) {\n const audioSlice = inferenceS16Data.slice(0, startIdx);\n accumulatedSamples = 0;\n controller.enqueue(audioSlice);\n }\n } else if (chunk.type === 'agent-speech-started') {\n this.logger.debug('agent speech started');\n agentSpeechStarted = true;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'agent-speech-ended') {\n this.logger.debug('agent speech ended');\n agentSpeechStarted = false;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {\n this.overlapSpeechStartedAt = chunk.startedAt;\n this.userSpeakingSpan = chunk.userSpeakingSpan;\n this.logger.debug('overlap speech started, starting interruption inference');\n overlapSpeechStarted = true;\n accumulatedSamples = 0;\n overlapCount += 1;\n if (overlapCount <= 1) {\n const keepSize =\n Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +\n Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);\n const shiftCount = Math.max(0, startIdx - keepSize);\n inferenceS16Data.copyWithin(0, shiftCount, startIdx);\n startIdx -= shiftCount;\n }\n cache.clear();\n } else if (chunk.type === 'overlap-speech-ended') {\n this.logger.debug('overlap speech ended');\n if (overlapSpeechStarted) {\n this.userSpeakingSpan = undefined;\n let latestEntry = cache.pop(\n (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,\n );\n if (!latestEntry) {\n this.logger.debug('no request made for overlap speech');\n latestEntry = InterruptionCacheEntry.default();\n }\n const e = latestEntry ?? InterruptionCacheEntry.default();\n const event: OverlappingSpeechEvent = {\n type: 'user_overlapping_speech',\n timestamp: chunk.endedAt,\n isInterruption: false,\n overlapStartedAt: this.overlapSpeechStartedAt,\n speechInput: e.speechInput,\n probabilities: e.probabilities,\n totalDurationInS: e.totalDurationInS,\n detectionDelayInS: e.detectionDelayInS,\n predictionDurationInS: e.predictionDurationInS,\n probability: e.probability,\n numRequests: getAndResetNumRequests(),\n };\n controller.enqueue(event);\n overlapSpeechStarted = false;\n accumulatedSamples = 0;\n }\n this.overlapSpeechStartedAt = undefined;\n } else if (chunk.type === 'flush') {\n // no-op\n }\n },\n },\n { highWaterMark: 32 },\n { highWaterMark: 32 },\n );\n\n // Second transform: transport layer (HTTP or WebSocket based on useProxy)\n const transportOptions = this.transportOptions;\n\n let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;\n if (this.options.useProxy) {\n const wsResult = createWsTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n onRequestSent,\n getAndResetNumRequests,\n );\n transport = wsResult.transport;\n this.wsReconnect = wsResult.reconnect;\n } else {\n transport = createHttpTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n getAndResetNumRequests,\n );\n }\n\n const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({\n transform: (chunk, controller) => {\n this.model.emit('user_overlapping_speech', chunk);\n\n const metrics: InterruptionMetrics = {\n type: 'interruption_metrics',\n timestamp: chunk.timestamp,\n totalDuration: chunk.totalDurationInS * 1000,\n predictionDuration: chunk.predictionDurationInS * 1000,\n detectionDelay: chunk.detectionDelayInS * 1000,\n numInterruptions: chunk.isInterruption ? 1 : 0,\n numBackchannels: chunk.isInterruption ? 0 : 1,\n numRequests: chunk.numRequests,\n metadata: {\n modelProvider: this.model.provider,\n modelName: this.model.model,\n },\n };\n this.model.emit('metrics_collected', metrics);\n\n controller.enqueue(chunk);\n },\n });\n\n // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream\n return this.inputStream\n .stream()\n .pipeThrough(audioTransformer)\n .pipeThrough(transport)\n .pipeThrough(eventEmitter);\n }\n\n private ensureInputNotEnded() {\n if (this.inputStream.closed) {\n throw new Error('input stream is closed');\n }\n }\n\n private ensureStreamsNotEnded() {\n this.ensureInputNotEnded();\n }\n\n private getResamplerFor(inputSampleRate: number): AudioResampler {\n if (!this.resampler) {\n this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);\n }\n return this.resampler;\n }\n\n stream(): ReadableStream<OverlappingSpeechEvent> {\n return this.eventStream;\n }\n\n async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {\n this.ensureStreamsNotEnded();\n if (!(frame instanceof AudioFrame)) {\n return this.inputStream.write(frame);\n } else if (this.options.sampleRate !== frame.sampleRate) {\n const resampler = this.getResamplerFor(frame.sampleRate);\n if (resampler.inputRate !== frame.sampleRate) {\n throw new Error('the sample rate of the input frames must be consistent');\n }\n for (const resampledFrame of resampler.push(frame)) {\n await this.inputStream.write(resampledFrame);\n }\n } else {\n await this.inputStream.write(frame);\n }\n }\n\n async flush(): Promise<void> {\n this.ensureStreamsNotEnded();\n await this.inputStream.write(InterruptionStreamSentinel.flush());\n }\n\n async endInput(): Promise<void> {\n await this.flush();\n await this.inputStream.close();\n }\n\n async close(): Promise<void> {\n if (!this.inputStream.closed) await this.inputStream.close();\n this.model.removeStream(this);\n }\n}\n\n/**\n * Write the audio frame to the output data array and return the new start index\n * and the number of samples written.\n */\nfunction writeToInferenceS16Data(\n frame: AudioFrame,\n startIdx: number,\n outData: Int16Array,\n maxAudioDuration: number,\n): { startIdx: number; samplesWritten: number } {\n const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);\n\n if (frame.samplesPerChannel > outData.length) {\n throw new Error('frame samples are greater than the max window size');\n }\n\n // Shift the data to the left if the window would overflow\n const shift = startIdx + frame.samplesPerChannel - maxWindowSize;\n if (shift > 0) {\n outData.copyWithin(0, shift, startIdx);\n startIdx -= shift;\n }\n\n // Get the frame data as Int16Array\n const frameData = new Int16Array(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.samplesPerChannel * frame.channels,\n );\n\n if (frame.channels > 1) {\n // Mix down multiple channels to mono by averaging\n for (let i = 0; i < frame.samplesPerChannel; i++) {\n let sum = 0;\n for (let ch = 0; ch < frame.channels; ch++) {\n sum += frameData[i * frame.channels + ch] ?? 0;\n }\n outData[startIdx + i] = Math.floor(sum / frame.channels);\n }\n } else {\n // Single channel - copy directly\n outData.set(frameData, startIdx);\n }\n\n startIdx += frame.samplesPerChannel;\n return { startIdx, samplesWritten: frame.samplesPerChannel };\n}\n"],"mappings":"AAGA,SAAS,YAAY,sBAAsB;AAE3C,SAA8B,uBAAuB;AACrD,SAAS,WAAW;AAEpB,SAA6B,2BAA2B;AACxD,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB,0BAA0B;AAEtD,SAAS,2BAA2B;AACpC,SAAS,8BAA8B;AAEvC;AAAA,OAUO;AACP,SAAS,oBAAoB;AAC7B,SAAS,yBAAyB;AAa3B,MAAM,2BAA2B;AAAA,EACtC,OAAO,qBAAyC;AAC9C,WAAO,EAAE,MAAM,uBAAuB;AAAA,EACxC;AAAA,EAEA,OAAO,mBAAqC;AAC1C,WAAO,EAAE,MAAM,qBAAqB;AAAA,EACtC;AAAA,EAEA,OAAO,qBACL,gBACA,WACA,kBACsB;AACtB,WAAO,EAAE,MAAM,0BAA0B,gBAAgB,WAAW,iBAAiB;AAAA,EACvF;AAAA,EAEA,OAAO,mBAAmB,SAAqC;AAC7D,WAAO,EAAE,MAAM,wBAAwB,QAAQ;AAAA,EACjD;AAAA,EAEA,OAAO,QAAe;AACpB,WAAO,EAAE,MAAM,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,uBAAuB,MAAY,OAA+B;AACzE,OAAK;AAAA,IACH,WAAW;AAAA,KACV,MAAM,kBAAkB,OAAO,SAAS,EAAE,YAAY;AAAA,EACzD;AACA,OAAK,aAAa,WAAW,+BAA+B,MAAM,WAAW;AAC7E,OAAK,aAAa,WAAW,kCAAkC,MAAM,gBAAgB;AACrF,OAAK,aAAa,WAAW,uCAAuC,MAAM,qBAAqB;AAC/F,OAAK,aAAa,WAAW,mCAAmC,MAAM,iBAAiB;AACzF;AAEO,MAAM,uBAAuB;AAAA,EAC1B;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,cAAc;AAAA,EAEd;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,SAAS,IAAI;AAAA;AAAA,EAGb;AAAA;AAAA,EAGA;AAAA,EAWR,YAAY,OAAqC,YAAwC;AACvF,SAAK,cAAc,oBAGjB;AAEF,SAAK,QAAQ;AACb,SAAK,UAAU,EAAE,GAAG,MAAM,QAAQ;AAClC,SAAK,aAAa,EAAE,GAAG,oBAAoB,GAAG,WAAW;AAGzD,SAAK,mBAAmB;AAAA,MACtB,SAAS,KAAK,QAAQ;AAAA,MACtB,QAAQ,KAAK,QAAQ;AAAA,MACrB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,QAAQ;AAAA,MACzB,WAAW,KAAK,QAAQ;AAAA,MACxB,WAAW,KAAK,QAAQ;AAAA,MACxB,SAAS,KAAK,QAAQ;AAAA,MACtB,YAAY,KAAK,WAAW;AAAA,IAC9B;AAEA,SAAK,cAAc,KAAK,eAAe;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AACjC,WAAK,iBAAiB,YAAY,QAAQ;AAAA,IAC5C;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iBAAiB;AACzF,WAAK,iBAAiB,YAAY,KAAK,QAAQ;AAAA,IACjD;AAEA,QAAI,KAAK,QAAQ,YAAY,KAAK,aAAa;AAC7C,YAAM,KAAK,YAAY;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAAyD;AAC/D,QAAI,qBAAqB;AACzB,QAAI,WAAW;AACf,QAAI,qBAAqB;AACzB,QAAI,uBAAuB;AAC3B,QAAI,eAAe;AACnB,UAAM,QAAQ,IAAI,aAA6C,EAAE;AACjE,UAAM,mBAAmB,IAAI;AAAA,MAC3B,KAAK,KAAK,KAAK,QAAQ,sBAAsB,KAAK,QAAQ,UAAU;AAAA,IACtE,EAAE,KAAK,CAAC;AAGR,UAAM,WAAW,OAAO;AAAA,MACtB;AAAA,MACA,wBAAwB,KAAK;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AACA,UAAM,WAAW,CAAC,YAAgD;AAChE,UAAI,QAAQ,yBAAyB,QAAW;AAC9C,+BAAuB,QAAQ;AAAA,MACjC;AAAA,IACF;AACA,UAAM,mBAAmB,CAAC,UAAkC;AAC1D,UAAI,KAAK,kBAAkB;AACzB,+BAAuB,KAAK,kBAAkB,KAAK;AACnD,aAAK,mBAAmB;AAAA,MAC1B;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM;AAC1B,WAAK;AAAA,IACP;AAEA,UAAM,yBAAyB,MAAc;AAC3C,YAAM,IAAI,KAAK;AACf,WAAK,cAAc;AACnB,aAAO;AAAA,IACT;AAGA,UAAM,mBAAmB,IAAI;AAAA,MAI3B;AAAA,QACE,WAAW,CAAC,OAAO,eAAe;AAChC,cAAI,iBAAiB,YAAY;AAC/B,gBAAI,CAAC,oBAAoB;AACvB;AAAA,YACF;AACA,gBAAI,KAAK,QAAQ,eAAe,MAAM,YAAY;AAChD,yBAAW,MAAM,wDAAwD;AACzE,mBAAK,OAAO,MAAM,wDAAwD;AAC1E;AAAA,YACF;AACA,kBAAM,SAAS;AAAA,cACb;AAAA,cACA;AAAA,cACA;AAAA,cACA,KAAK,QAAQ;AAAA,YACf;AACA,uBAAW,OAAO;AAClB,kCAAsB,OAAO;AAE7B,gBACE,sBACE,KAAK,MAAM,KAAK,QAAQ,uBAAuB,KAAK,QAAQ,UAAU,KACxE,sBACA;AACA,oBAAM,aAAa,iBAAiB,MAAM,GAAG,QAAQ;AACrD,mCAAqB;AACrB,yBAAW,QAAQ,UAAU;AAAA,YAC/B;AAAA,UACF,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,sBAAsB;AAC9C,iBAAK,OAAO,MAAM,oBAAoB;AACtC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,4BAA4B,oBAAoB;AACxE,iBAAK,yBAAyB,MAAM;AACpC,iBAAK,mBAAmB,MAAM;AAC9B,iBAAK,OAAO,MAAM,yDAAyD;AAC3E,mCAAuB;AACvB,iCAAqB;AACrB,4BAAgB;AAChB,gBAAI,gBAAgB,GAAG;AACrB,oBAAM,WACJ,KAAK,MAAO,MAAM,iBAAiB,MAAQ,KAAK,QAAQ,UAAU,IAClE,KAAK,MAAM,KAAK,QAAQ,yBAAyB,KAAK,QAAQ,UAAU;AAC1E,oBAAM,aAAa,KAAK,IAAI,GAAG,WAAW,QAAQ;AAClD,+BAAiB,WAAW,GAAG,YAAY,QAAQ;AACnD,0BAAY;AAAA,YACd;AACA,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,gBAAI,sBAAsB;AACxB,mBAAK,mBAAmB;AACxB,kBAAI,cAAc,MAAM;AAAA,gBACtB,CAAC,UAAU,MAAM,qBAAqB,UAAa,MAAM,mBAAmB;AAAA,cAC9E;AACA,kBAAI,CAAC,aAAa;AAChB,qBAAK,OAAO,MAAM,oCAAoC;AACtD,8BAAc,uBAAuB,QAAQ;AAAA,cAC/C;AACA,oBAAM,IAAI,eAAe,uBAAuB,QAAQ;AACxD,oBAAM,QAAgC;AAAA,gBACpC,MAAM;AAAA,gBACN,WAAW,MAAM;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,kBAAkB,KAAK;AAAA,gBACvB,aAAa,EAAE;AAAA,gBACf,eAAe,EAAE;AAAA,gBACjB,kBAAkB,EAAE;AAAA,gBACpB,mBAAmB,EAAE;AAAA,gBACrB,uBAAuB,EAAE;AAAA,gBACzB,aAAa,EAAE;AAAA,gBACf,aAAa,uBAAuB;AAAA,cACtC;AACA,yBAAW,QAAQ,KAAK;AACxB,qCAAuB;AACvB,mCAAqB;AAAA,YACvB;AACA,iBAAK,yBAAyB;AAAA,UAChC,WAAW,MAAM,SAAS,SAAS;AAAA,UAEnC;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,eAAe,GAAG;AAAA,MACpB,EAAE,eAAe,GAAG;AAAA,IACtB;AAGA,UAAM,mBAAmB,KAAK;AAE9B,QAAI;AACJ,QAAI,KAAK,QAAQ,UAAU;AACzB,YAAM,WAAW;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,kBAAY,SAAS;AACrB,WAAK,cAAc,SAAS;AAAA,IAC9B,OAAO;AACL,kBAAY;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,IAAI,gBAAgE;AAAA,MACvF,WAAW,CAAC,OAAO,eAAe;AAChC,aAAK,MAAM,KAAK,2BAA2B,KAAK;AAEhD,cAAM,UAA+B;AAAA,UACnC,MAAM;AAAA,UACN,WAAW,MAAM;AAAA,UACjB,eAAe,MAAM,mBAAmB;AAAA,UACxC,oBAAoB,MAAM,wBAAwB;AAAA,UAClD,gBAAgB,MAAM,oBAAoB;AAAA,UAC1C,kBAAkB,MAAM,iBAAiB,IAAI;AAAA,UAC7C,iBAAiB,MAAM,iBAAiB,IAAI;AAAA,UAC5C,aAAa,MAAM;AAAA,UACnB,UAAU;AAAA,YACR,eAAe,KAAK,MAAM;AAAA,YAC1B,WAAW,KAAK,MAAM;AAAA,UACxB;AAAA,QACF;AACA,aAAK,MAAM,KAAK,qBAAqB,OAAO;AAE5C,mBAAW,QAAQ,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAGD,WAAO,KAAK,YACT,OAAO,EACP,YAAY,gBAAgB,EAC5B,YAAY,SAAS,EACrB,YAAY,YAAY;AAAA,EAC7B;AAAA,EAEQ,sBAAsB;AAC5B,QAAI,KAAK,YAAY,QAAQ;AAC3B,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C;AAAA,EACF;AAAA,EAEQ,wBAAwB;AAC9B,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,gBAAgB,iBAAyC;AAC/D,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,eAAe,iBAAiB,KAAK,QAAQ,UAAU;AAAA,IAC9E;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,SAAiD;AAC/C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAU,OAAyD;AACvE,SAAK,sBAAsB;AAC3B,QAAI,EAAE,iBAAiB,aAAa;AAClC,aAAO,KAAK,YAAY,MAAM,KAAK;AAAA,IACrC,WAAW,KAAK,QAAQ,eAAe,MAAM,YAAY;AACvD,YAAM,YAAY,KAAK,gBAAgB,MAAM,UAAU;AACvD,UAAI,UAAU,cAAc,MAAM,YAAY;AAC5C,cAAM,IAAI,MAAM,wDAAwD;AAAA,MAC1E;AACA,iBAAW,kBAAkB,UAAU,KAAK,KAAK,GAAG;AAClD,cAAM,KAAK,YAAY,MAAM,cAAc;AAAA,MAC7C;AAAA,IACF,OAAO;AACL,YAAM,KAAK,YAAY,MAAM,KAAK;AAAA,IACpC;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,sBAAsB;AAC3B,UAAM,KAAK,YAAY,MAAM,2BAA2B,MAAM,CAAC;AAAA,EACjE;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAM,KAAK,MAAM;AACjB,UAAM,KAAK,YAAY,MAAM;AAAA,EAC/B;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,CAAC,KAAK,YAAY,OAAQ,OAAM,KAAK,YAAY,MAAM;AAC3D,SAAK,MAAM,aAAa,IAAI;AAAA,EAC9B;AACF;AAMA,SAAS,wBACP,OACA,UACA,SACA,kBAC8C;AAC9C,QAAM,gBAAgB,KAAK,MAAM,mBAAmB,MAAM,UAAU;AAEpE,MAAI,MAAM,oBAAoB,QAAQ,QAAQ;AAC5C,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAGA,QAAM,QAAQ,WAAW,MAAM,oBAAoB;AACnD,MAAI,QAAQ,GAAG;AACb,YAAQ,WAAW,GAAG,OAAO,QAAQ;AACrC,gBAAY;AAAA,EACd;AAGA,QAAM,YAAY,IAAI;AAAA,IACpB,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,MAAM,oBAAoB,MAAM;AAAA,EAClC;AAEA,MAAI,MAAM,WAAW,GAAG;AAEtB,aAAS,IAAI,GAAG,IAAI,MAAM,mBAAmB,KAAK;AAChD,UAAI,MAAM;AACV,eAAS,KAAK,GAAG,KAAK,MAAM,UAAU,MAAM;AAC1C,eAAO,UAAU,IAAI,MAAM,WAAW,EAAE,KAAK;AAAA,MAC/C;AACA,cAAQ,WAAW,CAAC,IAAI,KAAK,MAAM,MAAM,MAAM,QAAQ;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,YAAQ,IAAI,WAAW,QAAQ;AAAA,EACjC;AAEA,cAAY,MAAM;AAClB,SAAO,EAAE,UAAU,gBAAgB,MAAM,kBAAkB;AAC7D;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/interruption_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Span } from '@opentelemetry/api';\nimport { type ReadableStream, TransformStream } from 'stream/web';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';\nimport { traceTypes } from '../../telemetry/index.js';\nimport { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';\nimport type { InterruptionDetectionError } from './errors.js';\nimport { createHttpTransport } from './http_transport.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { AdaptiveInterruptionDetector } from './interruption_detector.js';\nimport {\n type AgentSpeechEnded,\n type AgentSpeechStarted,\n type ApiConnectOptions,\n type Flush,\n type InterruptionOptions,\n type InterruptionSentinel,\n type OverlapSpeechEnded,\n type OverlapSpeechStarted,\n type OverlappingSpeechEvent,\n} from './types.js';\nimport { BoundedCache } from './utils.js';\nimport { createWsTransport } from './ws_transport.js';\n\n// Re-export sentinel types for backwards compatibility\nexport type {\n AgentSpeechEnded,\n AgentSpeechStarted,\n ApiConnectOptions,\n Flush,\n InterruptionSentinel,\n OverlapSpeechEnded,\n OverlapSpeechStarted,\n};\n\nexport class InterruptionStreamSentinel {\n static agentSpeechStarted(): AgentSpeechStarted {\n return { type: 'agent-speech-started' };\n }\n\n static agentSpeechEnded(): AgentSpeechEnded {\n return { type: 'agent-speech-ended' };\n }\n\n static overlapSpeechStarted(\n speechDuration: number,\n startedAt: number,\n userSpeakingSpan?: Span,\n ): OverlapSpeechStarted {\n return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };\n }\n\n static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {\n return { type: 'overlap-speech-ended', endedAt };\n }\n\n static flush(): Flush {\n return { type: 'flush' };\n }\n}\n\nfunction updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {\n span.setAttribute(\n traceTypes.ATTR_IS_INTERRUPTION,\n (entry.isInterruption ?? false).toString().toLowerCase(),\n );\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);\n}\n\nexport class InterruptionStreamBase {\n private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;\n\n private eventStream: ReadableStream<OverlappingSpeechEvent>;\n\n private resampler?: AudioResampler;\n\n private numRequests = 0;\n\n private userSpeakingSpan: Span | undefined;\n\n private overlapSpeechStartedAt: number | undefined;\n\n private options: InterruptionOptions;\n\n private apiOptions: ApiConnectOptions;\n\n private model: AdaptiveInterruptionDetector;\n\n private logger = log();\n\n // Store reconnect function for WebSocket transport\n private wsReconnect?: () => Promise<void>;\n\n // Mutable transport options that can be updated via updateOptions()\n private transportOptions: {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n sampleRate: number;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries: number;\n };\n\n constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {\n this.inputStream = createStreamChannel<\n InterruptionSentinel | AudioFrame,\n InterruptionDetectionError\n >();\n\n this.model = model;\n this.options = { ...model.options };\n this.apiOptions = { ...apiConnectDefaults, ...apiOptions };\n\n // Initialize mutable transport options\n this.transportOptions = {\n baseUrl: this.options.baseUrl,\n apiKey: this.options.apiKey,\n apiSecret: this.options.apiSecret,\n sampleRate: this.options.sampleRate,\n threshold: this.options.threshold,\n minFrames: this.options.minFrames,\n timeout: this.options.inferenceTimeout,\n maxRetries: this.apiOptions.maxRetries,\n };\n\n this.eventStream = this.setupTransform();\n }\n\n /**\n * Update stream options. For WebSocket transport, this triggers a reconnection.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n this.transportOptions.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n this.transportOptions.minFrames = this.options.minFrames;\n }\n // Trigger WebSocket reconnection if using proxy (WebSocket transport)\n if (this.options.useProxy && this.wsReconnect) {\n await this.wsReconnect();\n }\n }\n\n private setupTransform(): ReadableStream<OverlappingSpeechEvent> {\n let agentSpeechStarted = false;\n let startIdx = 0;\n let accumulatedSamples = 0;\n let overlapSpeechStarted = false;\n let overlapCount = 0;\n const cache = new BoundedCache<number, InterruptionCacheEntry>(10);\n const inferenceS16Data = new Int16Array(\n Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),\n ).fill(0);\n\n // State accessors for transport\n const getState = () => ({\n overlapSpeechStarted,\n overlapSpeechStartedAt: this.overlapSpeechStartedAt,\n cache,\n overlapCount,\n });\n const setState = (partial: { overlapSpeechStarted?: boolean }) => {\n if (partial.overlapSpeechStarted !== undefined) {\n overlapSpeechStarted = partial.overlapSpeechStarted;\n }\n };\n const handleSpanUpdate = (entry: InterruptionCacheEntry) => {\n if (this.userSpeakingSpan) {\n updateUserSpeakingSpan(this.userSpeakingSpan, entry);\n this.userSpeakingSpan = undefined;\n }\n };\n\n const onRequestSent = () => {\n this.numRequests++;\n };\n\n const getAndResetNumRequests = (): number => {\n const n = this.numRequests;\n this.numRequests = 0;\n return n;\n };\n\n // First transform: process input frames/sentinels and output audio slices or events\n const audioTransformer = new TransformStream<\n InterruptionSentinel | AudioFrame,\n Int16Array | OverlappingSpeechEvent\n >(\n {\n transform: (chunk, controller) => {\n if (chunk instanceof AudioFrame) {\n if (!agentSpeechStarted) {\n return;\n }\n if (this.options.sampleRate !== chunk.sampleRate) {\n controller.error('the sample rate of the input frames must be consistent');\n this.logger.error('the sample rate of the input frames must be consistent');\n return;\n }\n const result = writeToInferenceS16Data(\n chunk,\n startIdx,\n inferenceS16Data,\n this.options.maxAudioDurationInS,\n );\n startIdx = result.startIdx;\n accumulatedSamples += result.samplesWritten;\n\n if (\n accumulatedSamples >=\n Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&\n overlapSpeechStarted\n ) {\n const audioSlice = inferenceS16Data.slice(0, startIdx);\n accumulatedSamples = 0;\n controller.enqueue(audioSlice);\n }\n } else if (chunk.type === 'agent-speech-started') {\n this.logger.debug('agent speech started');\n agentSpeechStarted = true;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'agent-speech-ended') {\n this.logger.debug('agent speech ended');\n agentSpeechStarted = false;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {\n this.overlapSpeechStartedAt = chunk.startedAt;\n this.userSpeakingSpan = chunk.userSpeakingSpan;\n this.logger.debug('overlap speech started, starting interruption inference');\n overlapSpeechStarted = true;\n accumulatedSamples = 0;\n overlapCount += 1;\n if (overlapCount <= 1) {\n const keepSize =\n Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +\n Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);\n const shiftCount = Math.max(0, startIdx - keepSize);\n inferenceS16Data.copyWithin(0, shiftCount, startIdx);\n startIdx -= shiftCount;\n }\n cache.clear();\n } else if (chunk.type === 'overlap-speech-ended') {\n this.logger.debug('overlap speech ended');\n if (overlapSpeechStarted) {\n this.userSpeakingSpan = undefined;\n let latestEntry = cache.pop(\n (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,\n );\n if (!latestEntry) {\n this.logger.debug('no request made for overlap speech');\n latestEntry = InterruptionCacheEntry.default();\n }\n const e = latestEntry ?? InterruptionCacheEntry.default();\n const event: OverlappingSpeechEvent = {\n type: 'overlapping_speech',\n detectedAt: chunk.endedAt,\n isInterruption: false,\n overlapStartedAt: this.overlapSpeechStartedAt,\n speechInput: e.speechInput,\n probabilities: e.probabilities,\n totalDurationInS: e.totalDurationInS,\n detectionDelayInS: e.detectionDelayInS,\n predictionDurationInS: e.predictionDurationInS,\n probability: e.probability,\n numRequests: getAndResetNumRequests(),\n };\n controller.enqueue(event);\n overlapSpeechStarted = false;\n accumulatedSamples = 0;\n }\n this.overlapSpeechStartedAt = undefined;\n } else if (chunk.type === 'flush') {\n // no-op\n }\n },\n },\n { highWaterMark: 32 },\n { highWaterMark: 32 },\n );\n\n // Second transform: transport layer (HTTP or WebSocket based on useProxy)\n const transportOptions = this.transportOptions;\n\n let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;\n if (this.options.useProxy) {\n const wsResult = createWsTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n onRequestSent,\n getAndResetNumRequests,\n );\n transport = wsResult.transport;\n this.wsReconnect = wsResult.reconnect;\n } else {\n transport = createHttpTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n getAndResetNumRequests,\n );\n }\n\n const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({\n transform: (chunk, controller) => {\n this.model.emit('overlapping_speech', chunk);\n\n const metrics: InterruptionMetrics = {\n type: 'interruption_metrics',\n timestamp: chunk.detectedAt,\n totalDuration: chunk.totalDurationInS * 1000,\n predictionDuration: chunk.predictionDurationInS * 1000,\n detectionDelay: chunk.detectionDelayInS * 1000,\n numInterruptions: chunk.isInterruption ? 1 : 0,\n numBackchannels: chunk.isInterruption ? 0 : 1,\n numRequests: chunk.numRequests,\n metadata: {\n modelProvider: this.model.provider,\n modelName: this.model.model,\n },\n };\n this.model.emit('metrics_collected', metrics);\n\n controller.enqueue(chunk);\n },\n });\n\n // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream\n return this.inputStream\n .stream()\n .pipeThrough(audioTransformer)\n .pipeThrough(transport)\n .pipeThrough(eventEmitter);\n }\n\n private ensureInputNotEnded() {\n if (this.inputStream.closed) {\n throw new Error('input stream is closed');\n }\n }\n\n private ensureStreamsNotEnded() {\n this.ensureInputNotEnded();\n }\n\n private getResamplerFor(inputSampleRate: number): AudioResampler {\n if (!this.resampler) {\n this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);\n }\n return this.resampler;\n }\n\n stream(): ReadableStream<OverlappingSpeechEvent> {\n return this.eventStream;\n }\n\n async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {\n this.ensureStreamsNotEnded();\n if (!(frame instanceof AudioFrame)) {\n return this.inputStream.write(frame);\n } else if (this.options.sampleRate !== frame.sampleRate) {\n const resampler = this.getResamplerFor(frame.sampleRate);\n if (resampler.inputRate !== frame.sampleRate) {\n throw new Error('the sample rate of the input frames must be consistent');\n }\n for (const resampledFrame of resampler.push(frame)) {\n await this.inputStream.write(resampledFrame);\n }\n } else {\n await this.inputStream.write(frame);\n }\n }\n\n async flush(): Promise<void> {\n this.ensureStreamsNotEnded();\n await this.inputStream.write(InterruptionStreamSentinel.flush());\n }\n\n async endInput(): Promise<void> {\n await this.flush();\n await this.inputStream.close();\n }\n\n async close(): Promise<void> {\n if (!this.inputStream.closed) await this.inputStream.close();\n this.model.removeStream(this);\n }\n}\n\n/**\n * Write the audio frame to the output data array and return the new start index\n * and the number of samples written.\n */\nfunction writeToInferenceS16Data(\n frame: AudioFrame,\n startIdx: number,\n outData: Int16Array,\n maxAudioDuration: number,\n): { startIdx: number; samplesWritten: number } {\n const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);\n\n if (frame.samplesPerChannel > outData.length) {\n throw new Error('frame samples are greater than the max window size');\n }\n\n // Shift the data to the left if the window would overflow\n const shift = startIdx + frame.samplesPerChannel - maxWindowSize;\n if (shift > 0) {\n outData.copyWithin(0, shift, startIdx);\n startIdx -= shift;\n }\n\n // Get the frame data as Int16Array\n const frameData = new Int16Array(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.samplesPerChannel * frame.channels,\n );\n\n if (frame.channels > 1) {\n // Mix down multiple channels to mono by averaging\n for (let i = 0; i < frame.samplesPerChannel; i++) {\n let sum = 0;\n for (let ch = 0; ch < frame.channels; ch++) {\n sum += frameData[i * frame.channels + ch] ?? 0;\n }\n outData[startIdx + i] = Math.floor(sum / frame.channels);\n }\n } else {\n // Single channel - copy directly\n outData.set(frameData, startIdx);\n }\n\n startIdx += frame.samplesPerChannel;\n return { startIdx, samplesWritten: frame.samplesPerChannel };\n}\n"],"mappings":"AAGA,SAAS,YAAY,sBAAsB;AAE3C,SAA8B,uBAAuB;AACrD,SAAS,WAAW;AAEpB,SAA6B,2BAA2B;AACxD,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB,0BAA0B;AAEtD,SAAS,2BAA2B;AACpC,SAAS,8BAA8B;AAEvC;AAAA,OAUO;AACP,SAAS,oBAAoB;AAC7B,SAAS,yBAAyB;AAa3B,MAAM,2BAA2B;AAAA,EACtC,OAAO,qBAAyC;AAC9C,WAAO,EAAE,MAAM,uBAAuB;AAAA,EACxC;AAAA,EAEA,OAAO,mBAAqC;AAC1C,WAAO,EAAE,MAAM,qBAAqB;AAAA,EACtC;AAAA,EAEA,OAAO,qBACL,gBACA,WACA,kBACsB;AACtB,WAAO,EAAE,MAAM,0BAA0B,gBAAgB,WAAW,iBAAiB;AAAA,EACvF;AAAA,EAEA,OAAO,mBAAmB,SAAqC;AAC7D,WAAO,EAAE,MAAM,wBAAwB,QAAQ;AAAA,EACjD;AAAA,EAEA,OAAO,QAAe;AACpB,WAAO,EAAE,MAAM,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,uBAAuB,MAAY,OAA+B;AACzE,OAAK;AAAA,IACH,WAAW;AAAA,KACV,MAAM,kBAAkB,OAAO,SAAS,EAAE,YAAY;AAAA,EACzD;AACA,OAAK,aAAa,WAAW,+BAA+B,MAAM,WAAW;AAC7E,OAAK,aAAa,WAAW,kCAAkC,MAAM,gBAAgB;AACrF,OAAK,aAAa,WAAW,uCAAuC,MAAM,qBAAqB;AAC/F,OAAK,aAAa,WAAW,mCAAmC,MAAM,iBAAiB;AACzF;AAEO,MAAM,uBAAuB;AAAA,EAC1B;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,cAAc;AAAA,EAEd;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,SAAS,IAAI;AAAA;AAAA,EAGb;AAAA;AAAA,EAGA;AAAA,EAWR,YAAY,OAAqC,YAAwC;AACvF,SAAK,cAAc,oBAGjB;AAEF,SAAK,QAAQ;AACb,SAAK,UAAU,EAAE,GAAG,MAAM,QAAQ;AAClC,SAAK,aAAa,EAAE,GAAG,oBAAoB,GAAG,WAAW;AAGzD,SAAK,mBAAmB;AAAA,MACtB,SAAS,KAAK,QAAQ;AAAA,MACtB,QAAQ,KAAK,QAAQ;AAAA,MACrB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,QAAQ;AAAA,MACzB,WAAW,KAAK,QAAQ;AAAA,MACxB,WAAW,KAAK,QAAQ;AAAA,MACxB,SAAS,KAAK,QAAQ;AAAA,MACtB,YAAY,KAAK,WAAW;AAAA,IAC9B;AAEA,SAAK,cAAc,KAAK,eAAe;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AACjC,WAAK,iBAAiB,YAAY,QAAQ;AAAA,IAC5C;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iBAAiB;AACzF,WAAK,iBAAiB,YAAY,KAAK,QAAQ;AAAA,IACjD;AAEA,QAAI,KAAK,QAAQ,YAAY,KAAK,aAAa;AAC7C,YAAM,KAAK,YAAY;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAAyD;AAC/D,QAAI,qBAAqB;AACzB,QAAI,WAAW;AACf,QAAI,qBAAqB;AACzB,QAAI,uBAAuB;AAC3B,QAAI,eAAe;AACnB,UAAM,QAAQ,IAAI,aAA6C,EAAE;AACjE,UAAM,mBAAmB,IAAI;AAAA,MAC3B,KAAK,KAAK,KAAK,QAAQ,sBAAsB,KAAK,QAAQ,UAAU;AAAA,IACtE,EAAE,KAAK,CAAC;AAGR,UAAM,WAAW,OAAO;AAAA,MACtB;AAAA,MACA,wBAAwB,KAAK;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AACA,UAAM,WAAW,CAAC,YAAgD;AAChE,UAAI,QAAQ,yBAAyB,QAAW;AAC9C,+BAAuB,QAAQ;AAAA,MACjC;AAAA,IACF;AACA,UAAM,mBAAmB,CAAC,UAAkC;AAC1D,UAAI,KAAK,kBAAkB;AACzB,+BAAuB,KAAK,kBAAkB,KAAK;AACnD,aAAK,mBAAmB;AAAA,MAC1B;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM;AAC1B,WAAK;AAAA,IACP;AAEA,UAAM,yBAAyB,MAAc;AAC3C,YAAM,IAAI,KAAK;AACf,WAAK,cAAc;AACnB,aAAO;AAAA,IACT;AAGA,UAAM,mBAAmB,IAAI;AAAA,MAI3B;AAAA,QACE,WAAW,CAAC,OAAO,eAAe;AAChC,cAAI,iBAAiB,YAAY;AAC/B,gBAAI,CAAC,oBAAoB;AACvB;AAAA,YACF;AACA,gBAAI,KAAK,QAAQ,eAAe,MAAM,YAAY;AAChD,yBAAW,MAAM,wDAAwD;AACzE,mBAAK,OAAO,MAAM,wDAAwD;AAC1E;AAAA,YACF;AACA,kBAAM,SAAS;AAAA,cACb;AAAA,cACA;AAAA,cACA;AAAA,cACA,KAAK,QAAQ;AAAA,YACf;AACA,uBAAW,OAAO;AAClB,kCAAsB,OAAO;AAE7B,gBACE,sBACE,KAAK,MAAM,KAAK,QAAQ,uBAAuB,KAAK,QAAQ,UAAU,KACxE,sBACA;AACA,oBAAM,aAAa,iBAAiB,MAAM,GAAG,QAAQ;AACrD,mCAAqB;AACrB,yBAAW,QAAQ,UAAU;AAAA,YAC/B;AAAA,UACF,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,sBAAsB;AAC9C,iBAAK,OAAO,MAAM,oBAAoB;AACtC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,4BAA4B,oBAAoB;AACxE,iBAAK,yBAAyB,MAAM;AACpC,iBAAK,mBAAmB,MAAM;AAC9B,iBAAK,OAAO,MAAM,yDAAyD;AAC3E,mCAAuB;AACvB,iCAAqB;AACrB,4BAAgB;AAChB,gBAAI,gBAAgB,GAAG;AACrB,oBAAM,WACJ,KAAK,MAAO,MAAM,iBAAiB,MAAQ,KAAK,QAAQ,UAAU,IAClE,KAAK,MAAM,KAAK,QAAQ,yBAAyB,KAAK,QAAQ,UAAU;AAC1E,oBAAM,aAAa,KAAK,IAAI,GAAG,WAAW,QAAQ;AAClD,+BAAiB,WAAW,GAAG,YAAY,QAAQ;AACnD,0BAAY;AAAA,YACd;AACA,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,gBAAI,sBAAsB;AACxB,mBAAK,mBAAmB;AACxB,kBAAI,cAAc,MAAM;AAAA,gBACtB,CAAC,UAAU,MAAM,qBAAqB,UAAa,MAAM,mBAAmB;AAAA,cAC9E;AACA,kBAAI,CAAC,aAAa;AAChB,qBAAK,OAAO,MAAM,oCAAoC;AACtD,8BAAc,uBAAuB,QAAQ;AAAA,cAC/C;AACA,oBAAM,IAAI,eAAe,uBAAuB,QAAQ;AACxD,oBAAM,QAAgC;AAAA,gBACpC,MAAM;AAAA,gBACN,YAAY,MAAM;AAAA,gBAClB,gBAAgB;AAAA,gBAChB,kBAAkB,KAAK;AAAA,gBACvB,aAAa,EAAE;AAAA,gBACf,eAAe,EAAE;AAAA,gBACjB,kBAAkB,EAAE;AAAA,gBACpB,mBAAmB,EAAE;AAAA,gBACrB,uBAAuB,EAAE;AAAA,gBACzB,aAAa,EAAE;AAAA,gBACf,aAAa,uBAAuB;AAAA,cACtC;AACA,yBAAW,QAAQ,KAAK;AACxB,qCAAuB;AACvB,mCAAqB;AAAA,YACvB;AACA,iBAAK,yBAAyB;AAAA,UAChC,WAAW,MAAM,SAAS,SAAS;AAAA,UAEnC;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,eAAe,GAAG;AAAA,MACpB,EAAE,eAAe,GAAG;AAAA,IACtB;AAGA,UAAM,mBAAmB,KAAK;AAE9B,QAAI;AACJ,QAAI,KAAK,QAAQ,UAAU;AACzB,YAAM,WAAW;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,kBAAY,SAAS;AACrB,WAAK,cAAc,SAAS;AAAA,IAC9B,OAAO;AACL,kBAAY;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,IAAI,gBAAgE;AAAA,MACvF,WAAW,CAAC,OAAO,eAAe;AAChC,aAAK,MAAM,KAAK,sBAAsB,KAAK;AAE3C,cAAM,UAA+B;AAAA,UACnC,MAAM;AAAA,UACN,WAAW,MAAM;AAAA,UACjB,eAAe,MAAM,mBAAmB;AAAA,UACxC,oBAAoB,MAAM,wBAAwB;AAAA,UAClD,gBAAgB,MAAM,oBAAoB;AAAA,UAC1C,kBAAkB,MAAM,iBAAiB,IAAI;AAAA,UAC7C,iBAAiB,MAAM,iBAAiB,IAAI;AAAA,UAC5C,aAAa,MAAM;AAAA,UACnB,UAAU;AAAA,YACR,eAAe,KAAK,MAAM;AAAA,YAC1B,WAAW,KAAK,MAAM;AAAA,UACxB;AAAA,QACF;AACA,aAAK,MAAM,KAAK,qBAAqB,OAAO;AAE5C,mBAAW,QAAQ,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAGD,WAAO,KAAK,YACT,OAAO,EACP,YAAY,gBAAgB,EAC5B,YAAY,SAAS,EACrB,YAAY,YAAY;AAAA,EAC7B;AAAA,EAEQ,sBAAsB;AAC5B,QAAI,KAAK,YAAY,QAAQ;AAC3B,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C;AAAA,EACF;AAAA,EAEQ,wBAAwB;AAC9B,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,gBAAgB,iBAAyC;AAC/D,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,eAAe,iBAAiB,KAAK,QAAQ,UAAU;AAAA,IAC9E;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,SAAiD;AAC/C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAU,OAAyD;AACvE,SAAK,sBAAsB;AAC3B,QAAI,EAAE,iBAAiB,aAAa;AAClC,aAAO,KAAK,YAAY,MAAM,KAAK;AAAA,IACrC,WAAW,KAAK,QAAQ,eAAe,MAAM,YAAY;AACvD,YAAM,YAAY,KAAK,gBAAgB,MAAM,UAAU;AACvD,UAAI,UAAU,cAAc,MAAM,YAAY;AAC5C,cAAM,IAAI,MAAM,wDAAwD;AAAA,MAC1E;AACA,iBAAW,kBAAkB,UAAU,KAAK,KAAK,GAAG;AAClD,cAAM,KAAK,YAAY,MAAM,cAAc;AAAA,MAC7C;AAAA,IACF,OAAO;AACL,YAAM,KAAK,YAAY,MAAM,KAAK;AAAA,IACpC;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,sBAAsB;AAC3B,UAAM,KAAK,YAAY,MAAM,2BAA2B,MAAM,CAAC;AAAA,EACjE;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAM,KAAK,MAAM;AACjB,UAAM,KAAK,YAAY,MAAM;AAAA,EAC/B;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,CAAC,KAAK,YAAY,OAAQ,OAAM,KAAK,YAAY,MAAM;AAC3D,SAAK,MAAM,aAAa,IAAI;AAAA,EAC9B;AACF;AAMA,SAAS,wBACP,OACA,UACA,SACA,kBAC8C;AAC9C,QAAM,gBAAgB,KAAK,MAAM,mBAAmB,MAAM,UAAU;AAEpE,MAAI,MAAM,oBAAoB,QAAQ,QAAQ;AAC5C,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAGA,QAAM,QAAQ,WAAW,MAAM,oBAAoB;AACnD,MAAI,QAAQ,GAAG;AACb,YAAQ,WAAW,GAAG,OAAO,QAAQ;AACrC,gBAAY;AAAA,EACd;AAGA,QAAM,YAAY,IAAI;AAAA,IACpB,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,MAAM,oBAAoB,MAAM;AAAA,EAClC;AAEA,MAAI,MAAM,WAAW,GAAG;AAEtB,aAAS,IAAI,GAAG,IAAI,MAAM,mBAAmB,KAAK;AAChD,UAAI,MAAM;AACV,eAAS,KAAK,GAAG,KAAK,MAAM,UAAU,MAAM;AAC1C,eAAO,UAAU,IAAI,MAAM,WAAW,EAAE,KAAK;AAAA,MAC/C;AACA,cAAQ,WAAW,CAAC,IAAI,KAAK,MAAM,MAAM,MAAM,QAAQ;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,YAAQ,IAAI,WAAW,QAAQ;AAAA,EACjC;AAEA,cAAY,MAAM;AAClB,SAAO,EAAE,UAAU,gBAAgB,MAAM,kBAAkB;AAC7D;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/inference/interruption/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { Span } from '@opentelemetry/api';\n\nexport interface OverlappingSpeechEvent {\n type: '
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { Span } from '@opentelemetry/api';\n\nexport interface OverlappingSpeechEvent {\n type: 'overlapping_speech';\n detectedAt: number;\n isInterruption: boolean;\n totalDurationInS: number;\n predictionDurationInS: number;\n detectionDelayInS: number;\n overlapStartedAt?: number;\n speechInput?: Int16Array;\n probabilities?: number[];\n probability: number;\n numRequests: number;\n}\n\n/**\n * Configuration options for interruption detection.\n */\nexport interface InterruptionOptions {\n sampleRate: number;\n threshold: number;\n minFrames: number;\n maxAudioDurationInS: number;\n audioPrefixDurationInS: number;\n detectionIntervalInS: number;\n inferenceTimeout: number;\n minInterruptionDurationInS: number;\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n useProxy: boolean;\n}\n\n/**\n * API connection options for transport layers.\n */\nexport interface ApiConnectOptions {\n maxRetries: number;\n retryInterval: number;\n timeout: number;\n}\n\n// Sentinel types for stream control signals\n\nexport interface AgentSpeechStarted {\n type: 'agent-speech-started';\n}\n\nexport interface AgentSpeechEnded {\n type: 'agent-speech-ended';\n}\n\nexport interface OverlapSpeechStarted {\n type: 'overlap-speech-started';\n /** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */\n speechDuration: number;\n /** Absolute timestamp (ms) when overlap speech started, computed at call-site. */\n startedAt: number;\n userSpeakingSpan?: Span;\n}\n\nexport interface OverlapSpeechEnded {\n type: 'overlap-speech-ended';\n /** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */\n endedAt: number;\n}\n\nexport interface Flush {\n type: 'flush';\n}\n\n/**\n * Union type for all stream control signals.\n */\nexport type InterruptionSentinel =\n | AgentSpeechStarted\n | AgentSpeechEnded\n | OverlapSpeechStarted\n | OverlapSpeechEnded\n | Flush;\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Span } from '@opentelemetry/api';
|
|
2
2
|
export interface OverlappingSpeechEvent {
|
|
3
|
-
type: '
|
|
4
|
-
|
|
3
|
+
type: 'overlapping_speech';
|
|
4
|
+
detectedAt: number;
|
|
5
5
|
isInterruption: boolean;
|
|
6
6
|
totalDurationInS: number;
|
|
7
7
|
predictionDurationInS: number;
|