@livekit/agents 1.0.48 → 1.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.cjs +27 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +9 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +18 -0
- package/dist/constants.js.map +1 -1
- package/dist/inference/api_protos.d.cts +71 -71
- package/dist/inference/api_protos.d.ts +71 -71
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +147 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +63 -0
- package/dist/inference/interruption/http_transport.d.ts +63 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +121 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +181 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +147 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +329 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +295 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +14 -10
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +2 -1
- package/dist/inference/llm.d.ts +2 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +8 -10
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +7 -2
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +2 -0
- package/dist/inference/stt.d.ts +2 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +8 -3
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +7 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -0
- package/dist/inference/tts.d.ts +2 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +8 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +13 -0
- package/dist/inference/utils.d.ts +13 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +19 -1
- package/dist/llm/chat_context.d.ts +19 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +16 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +9 -0
- package/dist/llm/llm.d.ts +9 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +16 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +45 -1
- package/dist/metrics/base.d.ts +45 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +3 -0
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +9 -0
- package/dist/metrics/usage_collector.d.ts +9 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +3 -0
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +9 -0
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +9 -0
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/multi_input_stream.test.cjs +4 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -1
- package/dist/stream/multi_input_stream.test.js +5 -1
- package/dist/stream/multi_input_stream.test.js.map +1 -1
- package/dist/stream/stream_channel.cjs +31 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +4 -2
- package/dist/stream/stream_channel.d.ts +4 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +31 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stt/stt.cjs +34 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +22 -0
- package/dist/stt/stt.d.ts +22 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +34 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/otel_http_exporter.cjs +24 -5
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
- package/dist/telemetry/otel_http_exporter.d.cts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
- package/dist/telemetry/otel_http_exporter.js +24 -5
- package/dist/telemetry/otel_http_exporter.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +5 -5
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +9 -5
- package/dist/telemetry/trace_types.d.ts +9 -5
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +5 -5
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/telemetry/traces.cjs +47 -8
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +47 -8
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +64 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +34 -0
- package/dist/tts/tts.d.ts +34 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +64 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +25 -4
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -2
- package/dist/voice/agent.d.ts +10 -2
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +25 -4
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +261 -36
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +20 -6
- package/dist/voice/agent_activity.d.ts +20 -6
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +262 -37
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +105 -48
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +90 -20
- package/dist/voice/agent_session.d.ts +90 -20
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +105 -46
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +287 -6
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +42 -3
- package/dist/voice/audio_recognition.d.ts +42 -3
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +289 -7
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/client_events.cjs +554 -0
- package/dist/voice/client_events.cjs.map +1 -0
- package/dist/voice/client_events.d.cts +195 -0
- package/dist/voice/client_events.d.ts +195 -0
- package/dist/voice/client_events.d.ts.map +1 -0
- package/dist/voice/client_events.js +548 -0
- package/dist/voice/client_events.js.map +1 -0
- package/dist/voice/events.cjs +1 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +8 -5
- package/dist/voice/events.d.ts +8 -5
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +43 -8
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -3
- package/dist/voice/generation.d.ts +3 -3
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +20 -8
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -0
- package/dist/voice/report.d.ts +5 -0
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +20 -8
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +106 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +105 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +5 -39
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +4 -9
- package/dist/voice/room_io/room_io.d.ts +4 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -40
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +97 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +25 -0
- package/dist/voice/turn_config/utils.d.ts +25 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +73 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +86 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +85 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/wire_format.cjs +798 -0
- package/dist/voice/wire_format.cjs.map +1 -0
- package/dist/voice/wire_format.d.cts +5503 -0
- package/dist/voice/wire_format.d.ts +5503 -0
- package/dist/voice/wire_format.d.ts.map +1 -0
- package/dist/voice/wire_format.js +728 -0
- package/dist/voice/wire_format.js.map +1 -0
- package/package.json +2 -1
- package/src/constants.ts +13 -0
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +187 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +188 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +402 -0
- package/src/inference/llm.ts +9 -12
- package/src/inference/stt.ts +10 -3
- package/src/inference/tts.ts +10 -3
- package/src/inference/utils.ts +29 -1
- package/src/llm/chat_context.ts +40 -2
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +16 -0
- package/src/llm/realtime.ts +4 -0
- package/src/metrics/base.ts +48 -1
- package/src/metrics/index.ts +11 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +11 -0
- package/src/metrics/utils.ts +11 -0
- package/src/stream/multi_input_stream.test.ts +6 -1
- package/src/stream/stream_channel.ts +34 -2
- package/src/stt/stt.ts +38 -0
- package/src/telemetry/otel_http_exporter.ts +28 -5
- package/src/telemetry/trace_types.ts +11 -8
- package/src/telemetry/traces.ts +111 -54
- package/src/tts/tts.ts +69 -1
- package/src/voice/agent.ts +30 -3
- package/src/voice/agent_activity.ts +327 -28
- package/src/voice/agent_session.ts +207 -59
- package/src/voice/audio_recognition.ts +385 -9
- package/src/voice/client_events.ts +838 -0
- package/src/voice/events.ts +14 -4
- package/src/voice/generation.ts +52 -9
- package/src/voice/index.ts +1 -0
- package/src/voice/report.test.ts +117 -0
- package/src/voice/report.ts +29 -6
- package/src/voice/room_io/room_io.ts +7 -61
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +100 -0
- package/src/voice/turn_config/utils.ts +103 -0
- package/src/voice/wire_format.ts +827 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
|
|
5
|
+
import type { Span } from '@opentelemetry/api';
|
|
6
|
+
import { type ReadableStream, TransformStream } from 'stream/web';
|
|
7
|
+
import { log } from '../../log.js';
|
|
8
|
+
import type { InterruptionMetrics } from '../../metrics/base.js';
|
|
9
|
+
import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
|
|
10
|
+
import { traceTypes } from '../../telemetry/index.js';
|
|
11
|
+
import { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';
|
|
12
|
+
import type { InterruptionDetectionError } from './errors.js';
|
|
13
|
+
import { createHttpTransport } from './http_transport.js';
|
|
14
|
+
import { InterruptionCacheEntry } from './interruption_cache_entry.js';
|
|
15
|
+
import type { AdaptiveInterruptionDetector } from './interruption_detector.js';
|
|
16
|
+
import {
|
|
17
|
+
type AgentSpeechEnded,
|
|
18
|
+
type AgentSpeechStarted,
|
|
19
|
+
type ApiConnectOptions,
|
|
20
|
+
type Flush,
|
|
21
|
+
type InterruptionOptions,
|
|
22
|
+
type InterruptionSentinel,
|
|
23
|
+
type OverlapSpeechEnded,
|
|
24
|
+
type OverlapSpeechStarted,
|
|
25
|
+
type OverlappingSpeechEvent,
|
|
26
|
+
} from './types.js';
|
|
27
|
+
import { BoundedCache } from './utils.js';
|
|
28
|
+
import { createWsTransport } from './ws_transport.js';
|
|
29
|
+
|
|
30
|
+
// Re-export sentinel types for backwards compatibility
|
|
31
|
+
export type {
|
|
32
|
+
AgentSpeechEnded,
|
|
33
|
+
AgentSpeechStarted,
|
|
34
|
+
ApiConnectOptions,
|
|
35
|
+
Flush,
|
|
36
|
+
InterruptionSentinel,
|
|
37
|
+
OverlapSpeechEnded,
|
|
38
|
+
OverlapSpeechStarted,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export class InterruptionStreamSentinel {
|
|
42
|
+
static agentSpeechStarted(): AgentSpeechStarted {
|
|
43
|
+
return { type: 'agent-speech-started' };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static agentSpeechEnded(): AgentSpeechEnded {
|
|
47
|
+
return { type: 'agent-speech-ended' };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static overlapSpeechStarted(
|
|
51
|
+
speechDuration: number,
|
|
52
|
+
startedAt: number,
|
|
53
|
+
userSpeakingSpan?: Span,
|
|
54
|
+
): OverlapSpeechStarted {
|
|
55
|
+
return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {
|
|
59
|
+
return { type: 'overlap-speech-ended', endedAt };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
static flush(): Flush {
|
|
63
|
+
return { type: 'flush' };
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
|
|
68
|
+
span.setAttribute(
|
|
69
|
+
traceTypes.ATTR_IS_INTERRUPTION,
|
|
70
|
+
(entry.isInterruption ?? false).toString().toLowerCase(),
|
|
71
|
+
);
|
|
72
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
|
|
73
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);
|
|
74
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);
|
|
75
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export class InterruptionStreamBase {
|
|
79
|
+
private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
|
|
80
|
+
|
|
81
|
+
private eventStream: ReadableStream<OverlappingSpeechEvent>;
|
|
82
|
+
|
|
83
|
+
private resampler?: AudioResampler;
|
|
84
|
+
|
|
85
|
+
private numRequests = 0;
|
|
86
|
+
|
|
87
|
+
private userSpeakingSpan: Span | undefined;
|
|
88
|
+
|
|
89
|
+
private overlapSpeechStartedAt: number | undefined;
|
|
90
|
+
|
|
91
|
+
private options: InterruptionOptions;
|
|
92
|
+
|
|
93
|
+
private apiOptions: ApiConnectOptions;
|
|
94
|
+
|
|
95
|
+
private model: AdaptiveInterruptionDetector;
|
|
96
|
+
|
|
97
|
+
private logger = log();
|
|
98
|
+
|
|
99
|
+
// Store reconnect function for WebSocket transport
|
|
100
|
+
private wsReconnect?: () => Promise<void>;
|
|
101
|
+
|
|
102
|
+
// Mutable transport options that can be updated via updateOptions()
|
|
103
|
+
private transportOptions: {
|
|
104
|
+
baseUrl: string;
|
|
105
|
+
apiKey: string;
|
|
106
|
+
apiSecret: string;
|
|
107
|
+
sampleRate: number;
|
|
108
|
+
threshold: number;
|
|
109
|
+
minFrames: number;
|
|
110
|
+
timeout: number;
|
|
111
|
+
maxRetries: number;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
|
|
115
|
+
this.inputStream = createStreamChannel<
|
|
116
|
+
InterruptionSentinel | AudioFrame,
|
|
117
|
+
InterruptionDetectionError
|
|
118
|
+
>();
|
|
119
|
+
|
|
120
|
+
this.model = model;
|
|
121
|
+
this.options = { ...model.options };
|
|
122
|
+
this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
|
|
123
|
+
|
|
124
|
+
// Initialize mutable transport options
|
|
125
|
+
this.transportOptions = {
|
|
126
|
+
baseUrl: this.options.baseUrl,
|
|
127
|
+
apiKey: this.options.apiKey,
|
|
128
|
+
apiSecret: this.options.apiSecret,
|
|
129
|
+
sampleRate: this.options.sampleRate,
|
|
130
|
+
threshold: this.options.threshold,
|
|
131
|
+
minFrames: this.options.minFrames,
|
|
132
|
+
timeout: this.options.inferenceTimeout,
|
|
133
|
+
maxRetries: this.apiOptions.maxRetries,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
this.eventStream = this.setupTransform();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Update stream options. For WebSocket transport, this triggers a reconnection.
|
|
141
|
+
*/
|
|
142
|
+
async updateOptions(options: {
|
|
143
|
+
threshold?: number;
|
|
144
|
+
minInterruptionDurationInS?: number;
|
|
145
|
+
}): Promise<void> {
|
|
146
|
+
if (options.threshold !== undefined) {
|
|
147
|
+
this.options.threshold = options.threshold;
|
|
148
|
+
this.transportOptions.threshold = options.threshold;
|
|
149
|
+
}
|
|
150
|
+
if (options.minInterruptionDurationInS !== undefined) {
|
|
151
|
+
this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
|
|
152
|
+
this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
|
|
153
|
+
this.transportOptions.minFrames = this.options.minFrames;
|
|
154
|
+
}
|
|
155
|
+
// Trigger WebSocket reconnection if using proxy (WebSocket transport)
|
|
156
|
+
if (this.options.useProxy && this.wsReconnect) {
|
|
157
|
+
await this.wsReconnect();
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
private setupTransform(): ReadableStream<OverlappingSpeechEvent> {
|
|
162
|
+
let agentSpeechStarted = false;
|
|
163
|
+
let startIdx = 0;
|
|
164
|
+
let accumulatedSamples = 0;
|
|
165
|
+
let overlapSpeechStarted = false;
|
|
166
|
+
let overlapCount = 0;
|
|
167
|
+
const cache = new BoundedCache<number, InterruptionCacheEntry>(10);
|
|
168
|
+
const inferenceS16Data = new Int16Array(
|
|
169
|
+
Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),
|
|
170
|
+
).fill(0);
|
|
171
|
+
|
|
172
|
+
// State accessors for transport
|
|
173
|
+
const getState = () => ({
|
|
174
|
+
overlapSpeechStarted,
|
|
175
|
+
overlapSpeechStartedAt: this.overlapSpeechStartedAt,
|
|
176
|
+
cache,
|
|
177
|
+
overlapCount,
|
|
178
|
+
});
|
|
179
|
+
const setState = (partial: { overlapSpeechStarted?: boolean }) => {
|
|
180
|
+
if (partial.overlapSpeechStarted !== undefined) {
|
|
181
|
+
overlapSpeechStarted = partial.overlapSpeechStarted;
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
const handleSpanUpdate = (entry: InterruptionCacheEntry) => {
|
|
185
|
+
if (this.userSpeakingSpan) {
|
|
186
|
+
updateUserSpeakingSpan(this.userSpeakingSpan, entry);
|
|
187
|
+
this.userSpeakingSpan = undefined;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
const onRequestSent = () => {
|
|
192
|
+
this.numRequests++;
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
const getAndResetNumRequests = (): number => {
|
|
196
|
+
const n = this.numRequests;
|
|
197
|
+
this.numRequests = 0;
|
|
198
|
+
return n;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
// First transform: process input frames/sentinels and output audio slices or events
|
|
202
|
+
const audioTransformer = new TransformStream<
|
|
203
|
+
InterruptionSentinel | AudioFrame,
|
|
204
|
+
Int16Array | OverlappingSpeechEvent
|
|
205
|
+
>(
|
|
206
|
+
{
|
|
207
|
+
transform: (chunk, controller) => {
|
|
208
|
+
if (chunk instanceof AudioFrame) {
|
|
209
|
+
if (!agentSpeechStarted) {
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
if (this.options.sampleRate !== chunk.sampleRate) {
|
|
213
|
+
controller.error('the sample rate of the input frames must be consistent');
|
|
214
|
+
this.logger.error('the sample rate of the input frames must be consistent');
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
const result = writeToInferenceS16Data(
|
|
218
|
+
chunk,
|
|
219
|
+
startIdx,
|
|
220
|
+
inferenceS16Data,
|
|
221
|
+
this.options.maxAudioDurationInS,
|
|
222
|
+
);
|
|
223
|
+
startIdx = result.startIdx;
|
|
224
|
+
accumulatedSamples += result.samplesWritten;
|
|
225
|
+
|
|
226
|
+
if (
|
|
227
|
+
accumulatedSamples >=
|
|
228
|
+
Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&
|
|
229
|
+
overlapSpeechStarted
|
|
230
|
+
) {
|
|
231
|
+
const audioSlice = inferenceS16Data.slice(0, startIdx);
|
|
232
|
+
accumulatedSamples = 0;
|
|
233
|
+
controller.enqueue(audioSlice);
|
|
234
|
+
}
|
|
235
|
+
} else if (chunk.type === 'agent-speech-started') {
|
|
236
|
+
this.logger.debug('agent speech started');
|
|
237
|
+
agentSpeechStarted = true;
|
|
238
|
+
overlapSpeechStarted = false;
|
|
239
|
+
this.overlapSpeechStartedAt = undefined;
|
|
240
|
+
accumulatedSamples = 0;
|
|
241
|
+
overlapCount = 0;
|
|
242
|
+
startIdx = 0;
|
|
243
|
+
this.numRequests = 0;
|
|
244
|
+
cache.clear();
|
|
245
|
+
} else if (chunk.type === 'agent-speech-ended') {
|
|
246
|
+
this.logger.debug('agent speech ended');
|
|
247
|
+
agentSpeechStarted = false;
|
|
248
|
+
overlapSpeechStarted = false;
|
|
249
|
+
this.overlapSpeechStartedAt = undefined;
|
|
250
|
+
accumulatedSamples = 0;
|
|
251
|
+
overlapCount = 0;
|
|
252
|
+
startIdx = 0;
|
|
253
|
+
this.numRequests = 0;
|
|
254
|
+
cache.clear();
|
|
255
|
+
} else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
|
|
256
|
+
this.overlapSpeechStartedAt = chunk.startedAt;
|
|
257
|
+
this.userSpeakingSpan = chunk.userSpeakingSpan;
|
|
258
|
+
this.logger.debug('overlap speech started, starting interruption inference');
|
|
259
|
+
overlapSpeechStarted = true;
|
|
260
|
+
accumulatedSamples = 0;
|
|
261
|
+
overlapCount += 1;
|
|
262
|
+
if (overlapCount <= 1) {
|
|
263
|
+
const keepSize =
|
|
264
|
+
Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +
|
|
265
|
+
Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);
|
|
266
|
+
const shiftCount = Math.max(0, startIdx - keepSize);
|
|
267
|
+
inferenceS16Data.copyWithin(0, shiftCount, startIdx);
|
|
268
|
+
startIdx -= shiftCount;
|
|
269
|
+
}
|
|
270
|
+
cache.clear();
|
|
271
|
+
} else if (chunk.type === 'overlap-speech-ended') {
|
|
272
|
+
this.logger.debug('overlap speech ended');
|
|
273
|
+
if (overlapSpeechStarted) {
|
|
274
|
+
this.userSpeakingSpan = undefined;
|
|
275
|
+
let latestEntry = cache.pop(
|
|
276
|
+
(entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,
|
|
277
|
+
);
|
|
278
|
+
if (!latestEntry) {
|
|
279
|
+
this.logger.debug('no request made for overlap speech');
|
|
280
|
+
latestEntry = InterruptionCacheEntry.default();
|
|
281
|
+
}
|
|
282
|
+
const e = latestEntry ?? InterruptionCacheEntry.default();
|
|
283
|
+
const event: OverlappingSpeechEvent = {
|
|
284
|
+
type: 'user_overlapping_speech',
|
|
285
|
+
timestamp: chunk.endedAt,
|
|
286
|
+
isInterruption: false,
|
|
287
|
+
overlapStartedAt: this.overlapSpeechStartedAt,
|
|
288
|
+
speechInput: e.speechInput,
|
|
289
|
+
probabilities: e.probabilities,
|
|
290
|
+
totalDurationInS: e.totalDurationInS,
|
|
291
|
+
detectionDelayInS: e.detectionDelayInS,
|
|
292
|
+
predictionDurationInS: e.predictionDurationInS,
|
|
293
|
+
probability: e.probability,
|
|
294
|
+
numRequests: getAndResetNumRequests(),
|
|
295
|
+
};
|
|
296
|
+
controller.enqueue(event);
|
|
297
|
+
overlapSpeechStarted = false;
|
|
298
|
+
accumulatedSamples = 0;
|
|
299
|
+
}
|
|
300
|
+
this.overlapSpeechStartedAt = undefined;
|
|
301
|
+
} else if (chunk.type === 'flush') {
|
|
302
|
+
// no-op
|
|
303
|
+
}
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
{ highWaterMark: 32 },
|
|
307
|
+
{ highWaterMark: 32 },
|
|
308
|
+
);
|
|
309
|
+
|
|
310
|
+
// Second transform: transport layer (HTTP or WebSocket based on useProxy)
|
|
311
|
+
const transportOptions = this.transportOptions;
|
|
312
|
+
|
|
313
|
+
let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;
|
|
314
|
+
if (this.options.useProxy) {
|
|
315
|
+
const wsResult = createWsTransport(
|
|
316
|
+
transportOptions,
|
|
317
|
+
getState,
|
|
318
|
+
setState,
|
|
319
|
+
handleSpanUpdate,
|
|
320
|
+
onRequestSent,
|
|
321
|
+
getAndResetNumRequests,
|
|
322
|
+
);
|
|
323
|
+
transport = wsResult.transport;
|
|
324
|
+
this.wsReconnect = wsResult.reconnect;
|
|
325
|
+
} else {
|
|
326
|
+
transport = createHttpTransport(
|
|
327
|
+
transportOptions,
|
|
328
|
+
getState,
|
|
329
|
+
setState,
|
|
330
|
+
handleSpanUpdate,
|
|
331
|
+
getAndResetNumRequests,
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({
|
|
336
|
+
transform: (chunk, controller) => {
|
|
337
|
+
this.model.emit('user_overlapping_speech', chunk);
|
|
338
|
+
|
|
339
|
+
const metrics: InterruptionMetrics = {
|
|
340
|
+
type: 'interruption_metrics',
|
|
341
|
+
timestamp: chunk.timestamp,
|
|
342
|
+
totalDuration: chunk.totalDurationInS * 1000,
|
|
343
|
+
predictionDuration: chunk.predictionDurationInS * 1000,
|
|
344
|
+
detectionDelay: chunk.detectionDelayInS * 1000,
|
|
345
|
+
numInterruptions: chunk.isInterruption ? 1 : 0,
|
|
346
|
+
numBackchannels: chunk.isInterruption ? 0 : 1,
|
|
347
|
+
numRequests: chunk.numRequests,
|
|
348
|
+
metadata: {
|
|
349
|
+
modelProvider: this.model.provider,
|
|
350
|
+
modelName: this.model.model,
|
|
351
|
+
},
|
|
352
|
+
};
|
|
353
|
+
this.model.emit('metrics_collected', metrics);
|
|
354
|
+
|
|
355
|
+
controller.enqueue(chunk);
|
|
356
|
+
},
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream
|
|
360
|
+
return this.inputStream
|
|
361
|
+
.stream()
|
|
362
|
+
.pipeThrough(audioTransformer)
|
|
363
|
+
.pipeThrough(transport)
|
|
364
|
+
.pipeThrough(eventEmitter);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
private ensureInputNotEnded() {
|
|
368
|
+
if (this.inputStream.closed) {
|
|
369
|
+
throw new Error('input stream is closed');
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
private ensureStreamsNotEnded() {
|
|
374
|
+
this.ensureInputNotEnded();
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
private getResamplerFor(inputSampleRate: number): AudioResampler {
|
|
378
|
+
if (!this.resampler) {
|
|
379
|
+
this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
|
|
380
|
+
}
|
|
381
|
+
return this.resampler;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
stream(): ReadableStream<OverlappingSpeechEvent> {
|
|
385
|
+
return this.eventStream;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
|
|
389
|
+
this.ensureStreamsNotEnded();
|
|
390
|
+
if (!(frame instanceof AudioFrame)) {
|
|
391
|
+
return this.inputStream.write(frame);
|
|
392
|
+
} else if (this.options.sampleRate !== frame.sampleRate) {
|
|
393
|
+
const resampler = this.getResamplerFor(frame.sampleRate);
|
|
394
|
+
if (resampler.inputRate !== frame.sampleRate) {
|
|
395
|
+
throw new Error('the sample rate of the input frames must be consistent');
|
|
396
|
+
}
|
|
397
|
+
for (const resampledFrame of resampler.push(frame)) {
|
|
398
|
+
await this.inputStream.write(resampledFrame);
|
|
399
|
+
}
|
|
400
|
+
} else {
|
|
401
|
+
await this.inputStream.write(frame);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
async flush(): Promise<void> {
|
|
406
|
+
this.ensureStreamsNotEnded();
|
|
407
|
+
await this.inputStream.write(InterruptionStreamSentinel.flush());
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
async endInput(): Promise<void> {
|
|
411
|
+
await this.flush();
|
|
412
|
+
await this.inputStream.close();
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
async close(): Promise<void> {
|
|
416
|
+
if (!this.inputStream.closed) await this.inputStream.close();
|
|
417
|
+
this.model.removeStream(this);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Write the audio frame to the output data array and return the new start index
|
|
423
|
+
* and the number of samples written.
|
|
424
|
+
*/
|
|
425
|
+
function writeToInferenceS16Data(
|
|
426
|
+
frame: AudioFrame,
|
|
427
|
+
startIdx: number,
|
|
428
|
+
outData: Int16Array,
|
|
429
|
+
maxAudioDuration: number,
|
|
430
|
+
): { startIdx: number; samplesWritten: number } {
|
|
431
|
+
const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
|
|
432
|
+
|
|
433
|
+
if (frame.samplesPerChannel > outData.length) {
|
|
434
|
+
throw new Error('frame samples are greater than the max window size');
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Shift the data to the left if the window would overflow
|
|
438
|
+
const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
|
|
439
|
+
if (shift > 0) {
|
|
440
|
+
outData.copyWithin(0, shift, startIdx);
|
|
441
|
+
startIdx -= shift;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Get the frame data as Int16Array
|
|
445
|
+
const frameData = new Int16Array(
|
|
446
|
+
frame.data.buffer,
|
|
447
|
+
frame.data.byteOffset,
|
|
448
|
+
frame.samplesPerChannel * frame.channels,
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
if (frame.channels > 1) {
|
|
452
|
+
// Mix down multiple channels to mono by averaging
|
|
453
|
+
for (let i = 0; i < frame.samplesPerChannel; i++) {
|
|
454
|
+
let sum = 0;
|
|
455
|
+
for (let ch = 0; ch < frame.channels; ch++) {
|
|
456
|
+
sum += frameData[i * frame.channels + ch] ?? 0;
|
|
457
|
+
}
|
|
458
|
+
outData[startIdx + i] = Math.floor(sum / frame.channels);
|
|
459
|
+
}
|
|
460
|
+
} else {
|
|
461
|
+
// Single channel - copy directly
|
|
462
|
+
outData.set(frameData, startIdx);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
startIdx += frame.samplesPerChannel;
|
|
466
|
+
return { startIdx, samplesWritten: frame.samplesPerChannel };
|
|
467
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { Span } from '@opentelemetry/api';
|
|
5
|
+
|
|
6
|
+
export interface OverlappingSpeechEvent {
|
|
7
|
+
type: 'user_overlapping_speech';
|
|
8
|
+
timestamp: number;
|
|
9
|
+
isInterruption: boolean;
|
|
10
|
+
totalDurationInS: number;
|
|
11
|
+
predictionDurationInS: number;
|
|
12
|
+
detectionDelayInS: number;
|
|
13
|
+
overlapStartedAt?: number;
|
|
14
|
+
speechInput?: Int16Array;
|
|
15
|
+
probabilities?: number[];
|
|
16
|
+
probability: number;
|
|
17
|
+
numRequests: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Configuration options for interruption detection.
|
|
22
|
+
*/
|
|
23
|
+
export interface InterruptionOptions {
|
|
24
|
+
sampleRate: number;
|
|
25
|
+
threshold: number;
|
|
26
|
+
minFrames: number;
|
|
27
|
+
maxAudioDurationInS: number;
|
|
28
|
+
audioPrefixDurationInS: number;
|
|
29
|
+
detectionIntervalInS: number;
|
|
30
|
+
inferenceTimeout: number;
|
|
31
|
+
minInterruptionDurationInS: number;
|
|
32
|
+
baseUrl: string;
|
|
33
|
+
apiKey: string;
|
|
34
|
+
apiSecret: string;
|
|
35
|
+
useProxy: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* API connection options for transport layers.
|
|
40
|
+
*/
|
|
41
|
+
export interface ApiConnectOptions {
|
|
42
|
+
maxRetries: number;
|
|
43
|
+
retryInterval: number;
|
|
44
|
+
timeout: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Sentinel types for stream control signals
|
|
48
|
+
|
|
49
|
+
export interface AgentSpeechStarted {
|
|
50
|
+
type: 'agent-speech-started';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface AgentSpeechEnded {
|
|
54
|
+
type: 'agent-speech-ended';
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface OverlapSpeechStarted {
|
|
58
|
+
type: 'overlap-speech-started';
|
|
59
|
+
/** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */
|
|
60
|
+
speechDuration: number;
|
|
61
|
+
/** Absolute timestamp (ms) when overlap speech started, computed at call-site. */
|
|
62
|
+
startedAt: number;
|
|
63
|
+
userSpeakingSpan?: Span;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface OverlapSpeechEnded {
|
|
67
|
+
type: 'overlap-speech-ended';
|
|
68
|
+
/** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */
|
|
69
|
+
endedAt: number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface Flush {
|
|
73
|
+
type: 'flush';
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Union type for all stream control signals.
|
|
78
|
+
*/
|
|
79
|
+
export type InterruptionSentinel =
|
|
80
|
+
| AgentSpeechStarted
|
|
81
|
+
| AgentSpeechEnded
|
|
82
|
+
| OverlapSpeechStarted
|
|
83
|
+
| OverlapSpeechEnded
|
|
84
|
+
| Flush;
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
5
|
+
import { BoundedCache } from './utils.js';
|
|
6
|
+
|
|
7
|
+
class Entry {
|
|
8
|
+
createdAt: number;
|
|
9
|
+
totalDurationInS: number | undefined = undefined;
|
|
10
|
+
predictionDurationInS: number | undefined = undefined;
|
|
11
|
+
note: string | undefined = undefined;
|
|
12
|
+
|
|
13
|
+
constructor(createdAt: number, note?: string) {
|
|
14
|
+
this.createdAt = createdAt;
|
|
15
|
+
this.note = note;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe('BoundedCache', () => {
|
|
20
|
+
it('evicts oldest entry when maxLen is exceeded', () => {
|
|
21
|
+
const cache = new BoundedCache<number, Entry>(2);
|
|
22
|
+
cache.set(1, new Entry(1));
|
|
23
|
+
cache.set(2, new Entry(2));
|
|
24
|
+
cache.set(3, new Entry(3));
|
|
25
|
+
|
|
26
|
+
expect(cache.size).toBe(2);
|
|
27
|
+
expect([...cache.keys()]).toEqual([2, 3]);
|
|
28
|
+
expect(cache.get(1)).toBeUndefined();
|
|
29
|
+
expect(cache.get(2)!.createdAt).toBe(2);
|
|
30
|
+
expect(cache.get(3)!.createdAt).toBe(3);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('setOrUpdate creates a value via factory when key is missing', () => {
|
|
34
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
35
|
+
const factory = vi.fn(() => new Entry(100));
|
|
36
|
+
|
|
37
|
+
const value = cache.setOrUpdate(1, factory, { predictionDurationInS: 0.42 });
|
|
38
|
+
|
|
39
|
+
expect(factory).toHaveBeenCalledTimes(1);
|
|
40
|
+
expect(value.createdAt).toBe(100);
|
|
41
|
+
expect(value.predictionDurationInS).toBe(0.42);
|
|
42
|
+
expect(cache.get(1)?.predictionDurationInS).toBe(0.42);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('setOrUpdate updates existing value and does not call factory', () => {
|
|
46
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
47
|
+
cache.set(1, new Entry(1, 'before'));
|
|
48
|
+
const factory = vi.fn(() => new Entry(999));
|
|
49
|
+
|
|
50
|
+
const value = cache.setOrUpdate(1, factory, { note: 'after', totalDurationInS: 1.5 });
|
|
51
|
+
|
|
52
|
+
expect(factory).not.toHaveBeenCalled();
|
|
53
|
+
expect(value.createdAt).toBe(1);
|
|
54
|
+
expect(value.note).toBe('after');
|
|
55
|
+
expect(value.totalDurationInS).toBe(1.5);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('updateValue returns undefined for missing key', () => {
|
|
59
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
60
|
+
const result = cache.updateValue(404, { note: 'missing' });
|
|
61
|
+
|
|
62
|
+
expect(result).toBeUndefined();
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it('updateValue ignores undefined fields', () => {
|
|
66
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
67
|
+
cache.set(1, new Entry(1, 'keep'));
|
|
68
|
+
|
|
69
|
+
const result = cache.updateValue(1, {
|
|
70
|
+
note: undefined,
|
|
71
|
+
predictionDurationInS: 0.1,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(result?.createdAt).toBe(1);
|
|
75
|
+
expect(result?.note).toBe('keep');
|
|
76
|
+
expect(result?.predictionDurationInS).toBe(0.1);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('pop without predicate removes the oldest entry (python parity)', () => {
|
|
80
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
81
|
+
cache.set(1, new Entry(1));
|
|
82
|
+
cache.set(2, new Entry(2));
|
|
83
|
+
cache.set(3, new Entry(3));
|
|
84
|
+
|
|
85
|
+
const popped = cache.pop();
|
|
86
|
+
|
|
87
|
+
expect(popped?.createdAt).toBe(1);
|
|
88
|
+
expect([...cache.keys()]).toEqual([2, 3]);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it('pop with predicate removes the most recent matching entry', () => {
|
|
92
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
93
|
+
const e1 = new Entry(1);
|
|
94
|
+
e1.totalDurationInS = 0;
|
|
95
|
+
const e2 = new Entry(2);
|
|
96
|
+
e2.totalDurationInS = 1;
|
|
97
|
+
const e3 = new Entry(3);
|
|
98
|
+
e3.totalDurationInS = 2;
|
|
99
|
+
cache.set(1, e1);
|
|
100
|
+
cache.set(2, e2);
|
|
101
|
+
cache.set(3, e3);
|
|
102
|
+
|
|
103
|
+
const popped = cache.pop((entry) => (entry.totalDurationInS ?? 0) > 0);
|
|
104
|
+
|
|
105
|
+
expect(popped?.createdAt).toBe(3);
|
|
106
|
+
expect(popped?.totalDurationInS).toBe(2);
|
|
107
|
+
expect([...cache.keys()]).toEqual([1, 2]);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('pop with predicate returns undefined when no match exists', () => {
|
|
111
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
112
|
+
const e1 = new Entry(1);
|
|
113
|
+
e1.totalDurationInS = 0;
|
|
114
|
+
cache.set(1, e1);
|
|
115
|
+
|
|
116
|
+
const popped = cache.pop((entry) => (entry.totalDurationInS ?? 0) > 10);
|
|
117
|
+
|
|
118
|
+
expect(popped).toBeUndefined();
|
|
119
|
+
expect(cache.size).toBe(1);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('clear removes all entries', () => {
|
|
123
|
+
const cache = new BoundedCache<number, Entry>(10);
|
|
124
|
+
cache.set(1, new Entry(1));
|
|
125
|
+
cache.set(2, new Entry(2));
|
|
126
|
+
|
|
127
|
+
cache.clear();
|
|
128
|
+
|
|
129
|
+
expect(cache.size).toBe(0);
|
|
130
|
+
expect([...cache.keys()]).toEqual([]);
|
|
131
|
+
});
|
|
132
|
+
});
|