@livekit/agents 1.0.48 → 1.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.cjs +27 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +9 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +18 -0
- package/dist/constants.js.map +1 -1
- package/dist/inference/api_protos.d.cts +71 -71
- package/dist/inference/api_protos.d.ts +71 -71
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +147 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +63 -0
- package/dist/inference/interruption/http_transport.d.ts +63 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +121 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +181 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +147 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +329 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +295 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +14 -10
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +2 -1
- package/dist/inference/llm.d.ts +2 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +8 -10
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +7 -2
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +2 -0
- package/dist/inference/stt.d.ts +2 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +8 -3
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +7 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -0
- package/dist/inference/tts.d.ts +2 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +8 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +13 -0
- package/dist/inference/utils.d.ts +13 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +19 -1
- package/dist/llm/chat_context.d.ts +19 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +16 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +9 -0
- package/dist/llm/llm.d.ts +9 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +16 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +45 -1
- package/dist/metrics/base.d.ts +45 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +3 -0
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +9 -0
- package/dist/metrics/usage_collector.d.ts +9 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +3 -0
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +9 -0
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +9 -0
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/multi_input_stream.test.cjs +4 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -1
- package/dist/stream/multi_input_stream.test.js +5 -1
- package/dist/stream/multi_input_stream.test.js.map +1 -1
- package/dist/stream/stream_channel.cjs +31 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +4 -2
- package/dist/stream/stream_channel.d.ts +4 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +31 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stt/stt.cjs +34 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +22 -0
- package/dist/stt/stt.d.ts +22 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +34 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/otel_http_exporter.cjs +24 -5
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
- package/dist/telemetry/otel_http_exporter.d.cts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
- package/dist/telemetry/otel_http_exporter.js +24 -5
- package/dist/telemetry/otel_http_exporter.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +5 -5
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +9 -5
- package/dist/telemetry/trace_types.d.ts +9 -5
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +5 -5
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/telemetry/traces.cjs +47 -8
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +47 -8
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +64 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +34 -0
- package/dist/tts/tts.d.ts +34 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +64 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +25 -4
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -2
- package/dist/voice/agent.d.ts +10 -2
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +25 -4
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +261 -36
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +20 -6
- package/dist/voice/agent_activity.d.ts +20 -6
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +262 -37
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +105 -48
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +90 -20
- package/dist/voice/agent_session.d.ts +90 -20
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +105 -46
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +287 -6
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +42 -3
- package/dist/voice/audio_recognition.d.ts +42 -3
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +289 -7
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/client_events.cjs +554 -0
- package/dist/voice/client_events.cjs.map +1 -0
- package/dist/voice/client_events.d.cts +195 -0
- package/dist/voice/client_events.d.ts +195 -0
- package/dist/voice/client_events.d.ts.map +1 -0
- package/dist/voice/client_events.js +548 -0
- package/dist/voice/client_events.js.map +1 -0
- package/dist/voice/events.cjs +1 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +8 -5
- package/dist/voice/events.d.ts +8 -5
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +43 -8
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -3
- package/dist/voice/generation.d.ts +3 -3
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +20 -8
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -0
- package/dist/voice/report.d.ts +5 -0
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +20 -8
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +106 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +105 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +5 -39
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +4 -9
- package/dist/voice/room_io/room_io.d.ts +4 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -40
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +97 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +25 -0
- package/dist/voice/turn_config/utils.d.ts +25 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +73 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +86 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +85 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/wire_format.cjs +798 -0
- package/dist/voice/wire_format.cjs.map +1 -0
- package/dist/voice/wire_format.d.cts +5503 -0
- package/dist/voice/wire_format.d.ts +5503 -0
- package/dist/voice/wire_format.d.ts.map +1 -0
- package/dist/voice/wire_format.js +728 -0
- package/dist/voice/wire_format.js.map +1 -0
- package/package.json +2 -1
- package/src/constants.ts +13 -0
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +187 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +188 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +402 -0
- package/src/inference/llm.ts +9 -12
- package/src/inference/stt.ts +10 -3
- package/src/inference/tts.ts +10 -3
- package/src/inference/utils.ts +29 -1
- package/src/llm/chat_context.ts +40 -2
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +16 -0
- package/src/llm/realtime.ts +4 -0
- package/src/metrics/base.ts +48 -1
- package/src/metrics/index.ts +11 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +11 -0
- package/src/metrics/utils.ts +11 -0
- package/src/stream/multi_input_stream.test.ts +6 -1
- package/src/stream/stream_channel.ts +34 -2
- package/src/stt/stt.ts +38 -0
- package/src/telemetry/otel_http_exporter.ts +28 -5
- package/src/telemetry/trace_types.ts +11 -8
- package/src/telemetry/traces.ts +111 -54
- package/src/tts/tts.ts +69 -1
- package/src/voice/agent.ts +30 -3
- package/src/voice/agent_activity.ts +327 -28
- package/src/voice/agent_session.ts +207 -59
- package/src/voice/audio_recognition.ts +385 -9
- package/src/voice/client_events.ts +838 -0
- package/src/voice/events.ts +14 -4
- package/src/voice/generation.ts +52 -9
- package/src/voice/index.ts +1 -0
- package/src/voice/report.test.ts +117 -0
- package/src/voice/report.ts +29 -6
- package/src/voice/room_io/room_io.ts +7 -61
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +100 -0
- package/src/voice/turn_config/utils.ts +103 -0
- package/src/voice/wire_format.ts +827 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { FRAME_DURATION_IN_S, MIN_INTERRUPTION_DURATION_IN_S } from './defaults.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* A bounded cache that automatically evicts the oldest entries when the cache exceeds max size.
|
|
8
|
+
* Uses FIFO eviction strategy.
|
|
9
|
+
*/
|
|
10
|
+
export class BoundedCache<K, V extends object> {
|
|
11
|
+
private cache: Map<K, V> = new Map();
|
|
12
|
+
private readonly maxLen: number;
|
|
13
|
+
|
|
14
|
+
constructor(maxLen: number = 10) {
|
|
15
|
+
this.maxLen = maxLen;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
set(key: K, value: V): void {
|
|
19
|
+
this.cache.set(key, value);
|
|
20
|
+
if (this.cache.size > this.maxLen) {
|
|
21
|
+
// Remove the oldest entry (first inserted)
|
|
22
|
+
const firstKey = this.cache.keys().next().value as K;
|
|
23
|
+
this.cache.delete(firstKey);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Update existing value fields if present and defined.
|
|
29
|
+
* Mirrors python BoundedDict.update_value behavior.
|
|
30
|
+
*/
|
|
31
|
+
updateValue(key: K, fields: Partial<V>): V | undefined {
|
|
32
|
+
const value = this.cache.get(key);
|
|
33
|
+
if (!value) return value;
|
|
34
|
+
|
|
35
|
+
for (const [fieldName, fieldValue] of Object.entries(fields) as [keyof V, V[keyof V]][]) {
|
|
36
|
+
if (fieldValue === undefined) continue;
|
|
37
|
+
// Runtime field update parity with python's hasattr + setattr.
|
|
38
|
+
if (fieldName in (value as object)) {
|
|
39
|
+
(value as Record<string, unknown>)[String(fieldName)] = fieldValue;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return value;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Set a new value with factory when missing; otherwise update in place.
|
|
47
|
+
* Mirrors python BoundedDict.set_or_update behavior.
|
|
48
|
+
*/
|
|
49
|
+
setOrUpdate(key: K, factory: () => V, fields: Partial<V>): V {
|
|
50
|
+
if (!this.cache.has(key)) {
|
|
51
|
+
this.set(key, factory());
|
|
52
|
+
}
|
|
53
|
+
const result = this.updateValue(key, fields);
|
|
54
|
+
if (!result) {
|
|
55
|
+
throw new Error('setOrUpdate invariant failed: entry should exist after set');
|
|
56
|
+
}
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
get(key: K): V | undefined {
|
|
61
|
+
return this.cache.get(key);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
has(key: K): boolean {
|
|
65
|
+
return this.cache.has(key);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
delete(key: K): boolean {
|
|
69
|
+
return this.cache.delete(key);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Pop an entry if it satisfies the predicate.
|
|
74
|
+
* - No predicate: pop oldest (FIFO)
|
|
75
|
+
* - With predicate: search in reverse order and pop first match
|
|
76
|
+
*/
|
|
77
|
+
pop(predicate?: (value: V) => boolean): V | undefined {
|
|
78
|
+
if (predicate === undefined) {
|
|
79
|
+
const first = this.cache.entries().next().value as [K, V] | undefined;
|
|
80
|
+
if (!first) return undefined;
|
|
81
|
+
const [key, value] = first;
|
|
82
|
+
this.cache.delete(key);
|
|
83
|
+
return value;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const keys = Array.from(this.cache.keys());
|
|
87
|
+
for (let i = keys.length - 1; i >= 0; i--) {
|
|
88
|
+
const key = keys[i]!;
|
|
89
|
+
const value = this.cache.get(key)!;
|
|
90
|
+
if (predicate(value)) {
|
|
91
|
+
this.cache.delete(key);
|
|
92
|
+
return value;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
clear(): void {
|
|
99
|
+
this.cache.clear();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
get size(): number {
|
|
103
|
+
return this.cache.size;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
values(): IterableIterator<V> {
|
|
107
|
+
return this.cache.values();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
keys(): IterableIterator<K> {
|
|
111
|
+
return this.cache.keys();
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
entries(): IterableIterator<[K, V]> {
|
|
115
|
+
return this.cache.entries();
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Estimate probability by finding the n-th maximum value in the probabilities array.
|
|
121
|
+
* The n-th position is determined by the window size (25ms per frame).
|
|
122
|
+
* Returns 0 if there are insufficient probabilities.
|
|
123
|
+
*/
|
|
124
|
+
export function estimateProbability(
|
|
125
|
+
probabilities: number[],
|
|
126
|
+
windowSizeInS: number = MIN_INTERRUPTION_DURATION_IN_S,
|
|
127
|
+
): number {
|
|
128
|
+
const nTh = Math.ceil(windowSizeInS / FRAME_DURATION_IN_S);
|
|
129
|
+
if (probabilities.length < nTh) {
|
|
130
|
+
return 0;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Find the n-th maximum value by sorting in descending order
|
|
134
|
+
// Create a copy to avoid mutating the original array
|
|
135
|
+
const sorted = [...probabilities].sort((a, b) => b - a);
|
|
136
|
+
return sorted[nTh - 1]!;
|
|
137
|
+
}
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { TransformStream } from 'stream/web';
|
|
5
|
+
import WebSocket from 'ws';
|
|
6
|
+
import { z } from 'zod';
|
|
7
|
+
import { log } from '../../log.js';
|
|
8
|
+
import { createAccessToken } from '../utils.js';
|
|
9
|
+
import { intervalForRetry } from './defaults.js';
|
|
10
|
+
import { InterruptionCacheEntry } from './interruption_cache_entry.js';
|
|
11
|
+
import type { OverlappingSpeechEvent } from './types.js';
|
|
12
|
+
import type { BoundedCache } from './utils.js';
|
|
13
|
+
|
|
14
|
+
// WebSocket message types
|
|
15
|
+
const MSG_SESSION_CREATE = 'session.create';
|
|
16
|
+
const MSG_SESSION_CLOSE = 'session.close';
|
|
17
|
+
const MSG_SESSION_CREATED = 'session.created';
|
|
18
|
+
const MSG_SESSION_CLOSED = 'session.closed';
|
|
19
|
+
const MSG_INTERRUPTION_DETECTED = 'bargein_detected';
|
|
20
|
+
const MSG_INFERENCE_DONE = 'inference_done';
|
|
21
|
+
const MSG_ERROR = 'error';
|
|
22
|
+
|
|
23
|
+
export interface WsTransportOptions {
|
|
24
|
+
baseUrl: string;
|
|
25
|
+
apiKey: string;
|
|
26
|
+
apiSecret: string;
|
|
27
|
+
sampleRate: number;
|
|
28
|
+
threshold: number;
|
|
29
|
+
minFrames: number;
|
|
30
|
+
timeout: number;
|
|
31
|
+
maxRetries?: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface WsTransportState {
|
|
35
|
+
overlapSpeechStarted: boolean;
|
|
36
|
+
overlapSpeechStartedAt: number | undefined;
|
|
37
|
+
cache: BoundedCache<number, InterruptionCacheEntry>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const wsMessageSchema = z.discriminatedUnion('type', [
|
|
41
|
+
z.object({
|
|
42
|
+
type: z.literal(MSG_SESSION_CREATED),
|
|
43
|
+
}),
|
|
44
|
+
z.object({
|
|
45
|
+
type: z.literal(MSG_SESSION_CLOSED),
|
|
46
|
+
}),
|
|
47
|
+
z.object({
|
|
48
|
+
type: z.literal(MSG_INTERRUPTION_DETECTED),
|
|
49
|
+
created_at: z.number(),
|
|
50
|
+
probabilities: z.array(z.number()).default([]),
|
|
51
|
+
prediction_duration: z.number().default(0),
|
|
52
|
+
}),
|
|
53
|
+
z.object({
|
|
54
|
+
type: z.literal(MSG_INFERENCE_DONE),
|
|
55
|
+
created_at: z.number(),
|
|
56
|
+
probabilities: z.array(z.number()).default([]),
|
|
57
|
+
prediction_duration: z.number().default(0),
|
|
58
|
+
is_bargein: z.boolean().optional(),
|
|
59
|
+
}),
|
|
60
|
+
z.object({
|
|
61
|
+
type: z.literal(MSG_ERROR),
|
|
62
|
+
message: z.string(),
|
|
63
|
+
code: z.number().optional(),
|
|
64
|
+
session_id: z.string().optional(),
|
|
65
|
+
}),
|
|
66
|
+
]);
|
|
67
|
+
|
|
68
|
+
type WsMessage = z.infer<typeof wsMessageSchema>;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Creates a WebSocket connection and waits for it to open.
|
|
72
|
+
*/
|
|
73
|
+
async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket> {
|
|
74
|
+
const baseUrl = options.baseUrl.replace(/^http/, 'ws');
|
|
75
|
+
const token = await createAccessToken(options.apiKey, options.apiSecret);
|
|
76
|
+
const url = `${baseUrl}/bargein`;
|
|
77
|
+
|
|
78
|
+
const ws = new WebSocket(url, {
|
|
79
|
+
headers: { Authorization: `Bearer ${token}` },
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
await new Promise<void>((resolve, reject) => {
|
|
83
|
+
const timeout = setTimeout(() => {
|
|
84
|
+
ws.terminate();
|
|
85
|
+
reject(new Error('WebSocket connection timeout'));
|
|
86
|
+
}, options.timeout);
|
|
87
|
+
ws.once('open', () => {
|
|
88
|
+
clearTimeout(timeout);
|
|
89
|
+
resolve();
|
|
90
|
+
});
|
|
91
|
+
ws.once('error', (err: Error) => {
|
|
92
|
+
clearTimeout(timeout);
|
|
93
|
+
ws.terminate();
|
|
94
|
+
reject(err);
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
return ws;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export interface WsTransportResult {
|
|
102
|
+
transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;
|
|
103
|
+
reconnect: () => Promise<void>;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Creates a WebSocket transport TransformStream for interruption detection.
|
|
108
|
+
*
|
|
109
|
+
* This transport receives Int16Array audio slices and outputs InterruptionEvents.
|
|
110
|
+
* It maintains a persistent WebSocket connection with automatic retry on failure.
|
|
111
|
+
* Returns both the transport and a reconnect function for option updates.
|
|
112
|
+
*/
|
|
113
|
+
export function createWsTransport(
|
|
114
|
+
options: WsTransportOptions,
|
|
115
|
+
getState: () => WsTransportState,
|
|
116
|
+
setState: (partial: Partial<WsTransportState>) => void,
|
|
117
|
+
updateUserSpeakingSpan?: (entry: InterruptionCacheEntry) => void,
|
|
118
|
+
onRequestSent?: () => void,
|
|
119
|
+
getAndResetNumRequests?: () => number,
|
|
120
|
+
): WsTransportResult {
|
|
121
|
+
const logger = log();
|
|
122
|
+
let ws: WebSocket | null = null;
|
|
123
|
+
let outputController: TransformStreamDefaultController<OverlappingSpeechEvent> | null = null;
|
|
124
|
+
|
|
125
|
+
function setupMessageHandler(socket: WebSocket): void {
|
|
126
|
+
socket.on('message', (data: WebSocket.Data) => {
|
|
127
|
+
try {
|
|
128
|
+
const message = wsMessageSchema.parse(JSON.parse(data.toString()));
|
|
129
|
+
handleMessage(message);
|
|
130
|
+
} catch {
|
|
131
|
+
logger.warn({ data: data.toString() }, 'Failed to parse WebSocket message');
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
socket.on('error', (err: Error) => {
|
|
136
|
+
logger.error({ err }, 'WebSocket error');
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
socket.on('close', (code: number, reason: Buffer) => {
|
|
140
|
+
logger.debug({ code, reason: reason.toString() }, 'WebSocket closed');
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async function ensureConnection(): Promise<void> {
|
|
145
|
+
if (ws && ws.readyState === WebSocket.OPEN) return;
|
|
146
|
+
|
|
147
|
+
const maxRetries = options.maxRetries ?? 3;
|
|
148
|
+
let lastError: Error | null = null;
|
|
149
|
+
|
|
150
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
151
|
+
try {
|
|
152
|
+
ws = await connectWebSocket(options);
|
|
153
|
+
setupMessageHandler(ws);
|
|
154
|
+
|
|
155
|
+
// Send session.create message
|
|
156
|
+
const sessionCreateMsg = JSON.stringify({
|
|
157
|
+
type: MSG_SESSION_CREATE,
|
|
158
|
+
settings: {
|
|
159
|
+
sample_rate: options.sampleRate,
|
|
160
|
+
num_channels: 1,
|
|
161
|
+
threshold: options.threshold,
|
|
162
|
+
min_frames: options.minFrames,
|
|
163
|
+
encoding: 's16le',
|
|
164
|
+
},
|
|
165
|
+
});
|
|
166
|
+
ws.send(sessionCreateMsg);
|
|
167
|
+
return;
|
|
168
|
+
} catch (err) {
|
|
169
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
170
|
+
if (attempt < maxRetries) {
|
|
171
|
+
const delay = intervalForRetry(attempt);
|
|
172
|
+
logger.debug(
|
|
173
|
+
{ attempt, delay, err: lastError.message },
|
|
174
|
+
'WebSocket connection failed, retrying',
|
|
175
|
+
);
|
|
176
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
throw lastError ?? new Error('Failed to connect to WebSocket after retries');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function handleMessage(message: WsMessage): void {
|
|
185
|
+
const state = getState();
|
|
186
|
+
|
|
187
|
+
switch (message.type) {
|
|
188
|
+
case MSG_SESSION_CREATED:
|
|
189
|
+
logger.debug('WebSocket session created');
|
|
190
|
+
break;
|
|
191
|
+
|
|
192
|
+
case MSG_INTERRUPTION_DETECTED: {
|
|
193
|
+
const createdAt = message.created_at;
|
|
194
|
+
const overlapSpeechStartedAt = state.overlapSpeechStartedAt;
|
|
195
|
+
if (state.overlapSpeechStarted && overlapSpeechStartedAt !== undefined) {
|
|
196
|
+
const existing = state.cache.get(createdAt);
|
|
197
|
+
|
|
198
|
+
const totalDurationInS =
|
|
199
|
+
existing?.requestStartedAt !== undefined
|
|
200
|
+
? (performance.now() - existing.requestStartedAt) / 1000
|
|
201
|
+
: (performance.now() - createdAt) / 1000;
|
|
202
|
+
|
|
203
|
+
const entry = state.cache.setOrUpdate(
|
|
204
|
+
createdAt,
|
|
205
|
+
() => new InterruptionCacheEntry({ createdAt }),
|
|
206
|
+
{
|
|
207
|
+
speechInput: existing?.speechInput,
|
|
208
|
+
requestStartedAt: existing?.requestStartedAt,
|
|
209
|
+
totalDurationInS,
|
|
210
|
+
probabilities: message.probabilities,
|
|
211
|
+
isInterruption: true,
|
|
212
|
+
predictionDurationInS: message.prediction_duration,
|
|
213
|
+
detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,
|
|
214
|
+
},
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
if (updateUserSpeakingSpan) {
|
|
218
|
+
updateUserSpeakingSpan(entry);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
logger.debug(
|
|
222
|
+
{
|
|
223
|
+
totalDuration: entry.totalDurationInS,
|
|
224
|
+
predictionDuration: entry.predictionDurationInS,
|
|
225
|
+
detectionDelay: entry.detectionDelayInS,
|
|
226
|
+
probability: entry.probability,
|
|
227
|
+
},
|
|
228
|
+
'interruption detected',
|
|
229
|
+
);
|
|
230
|
+
|
|
231
|
+
const event: OverlappingSpeechEvent = {
|
|
232
|
+
type: 'user_overlapping_speech',
|
|
233
|
+
timestamp: Date.now(),
|
|
234
|
+
isInterruption: true,
|
|
235
|
+
totalDurationInS: entry.totalDurationInS,
|
|
236
|
+
predictionDurationInS: entry.predictionDurationInS,
|
|
237
|
+
overlapStartedAt: overlapSpeechStartedAt,
|
|
238
|
+
speechInput: entry.speechInput,
|
|
239
|
+
probabilities: entry.probabilities,
|
|
240
|
+
detectionDelayInS: entry.detectionDelayInS,
|
|
241
|
+
probability: entry.probability,
|
|
242
|
+
numRequests: getAndResetNumRequests?.() ?? 0,
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
outputController?.enqueue(event);
|
|
246
|
+
setState({ overlapSpeechStarted: false });
|
|
247
|
+
}
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
case MSG_INFERENCE_DONE: {
|
|
252
|
+
const createdAt = message.created_at;
|
|
253
|
+
const overlapSpeechStartedAt = state.overlapSpeechStartedAt;
|
|
254
|
+
if (state.overlapSpeechStarted && overlapSpeechStartedAt !== undefined) {
|
|
255
|
+
const existing = state.cache.get(createdAt);
|
|
256
|
+
const totalDurationInS =
|
|
257
|
+
existing?.requestStartedAt !== undefined
|
|
258
|
+
? (performance.now() - existing.requestStartedAt) / 1000
|
|
259
|
+
: (performance.now() - createdAt) / 1000;
|
|
260
|
+
const entry = state.cache.setOrUpdate(
|
|
261
|
+
createdAt,
|
|
262
|
+
() => new InterruptionCacheEntry({ createdAt }),
|
|
263
|
+
{
|
|
264
|
+
speechInput: existing?.speechInput,
|
|
265
|
+
requestStartedAt: existing?.requestStartedAt,
|
|
266
|
+
totalDurationInS,
|
|
267
|
+
predictionDurationInS: message.prediction_duration,
|
|
268
|
+
probabilities: message.probabilities,
|
|
269
|
+
isInterruption: message.is_bargein ?? false,
|
|
270
|
+
detectionDelayInS: (Date.now() - overlapSpeechStartedAt) / 1000,
|
|
271
|
+
},
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
logger.debug(
|
|
275
|
+
{
|
|
276
|
+
totalDurationInS: entry.totalDurationInS,
|
|
277
|
+
predictionDurationInS: entry.predictionDurationInS,
|
|
278
|
+
},
|
|
279
|
+
'interruption inference done',
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
case MSG_SESSION_CLOSED:
|
|
286
|
+
logger.debug('WebSocket session closed');
|
|
287
|
+
break;
|
|
288
|
+
|
|
289
|
+
case MSG_ERROR:
|
|
290
|
+
outputController?.error(
|
|
291
|
+
new Error(
|
|
292
|
+
`LiveKit Adaptive Interruption error${
|
|
293
|
+
message.code !== undefined ? ` (${message.code})` : ''
|
|
294
|
+
}: ${message.message}`,
|
|
295
|
+
),
|
|
296
|
+
);
|
|
297
|
+
break;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function sendAudioData(audioSlice: Int16Array): void {
|
|
302
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
303
|
+
throw new Error('WebSocket not connected');
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const state = getState();
|
|
307
|
+
// Use truncated timestamp consistently for both cache key and header
|
|
308
|
+
// This ensures the server's response created_at matches our cache key
|
|
309
|
+
const createdAt = Math.floor(performance.now());
|
|
310
|
+
|
|
311
|
+
// Store the audio data in cache with truncated timestamp
|
|
312
|
+
state.cache.set(
|
|
313
|
+
createdAt,
|
|
314
|
+
new InterruptionCacheEntry({
|
|
315
|
+
createdAt,
|
|
316
|
+
requestStartedAt: performance.now(),
|
|
317
|
+
speechInput: audioSlice,
|
|
318
|
+
}),
|
|
319
|
+
);
|
|
320
|
+
|
|
321
|
+
// Create header: 8-byte little-endian uint64 timestamp (milliseconds as integer)
|
|
322
|
+
const header = new ArrayBuffer(8);
|
|
323
|
+
const view = new DataView(header);
|
|
324
|
+
view.setUint32(0, createdAt >>> 0, true);
|
|
325
|
+
view.setUint32(4, Math.floor(createdAt / 0x100000000) >>> 0, true);
|
|
326
|
+
|
|
327
|
+
// Combine header and audio data
|
|
328
|
+
const audioBytes = new Uint8Array(
|
|
329
|
+
audioSlice.buffer,
|
|
330
|
+
audioSlice.byteOffset,
|
|
331
|
+
audioSlice.byteLength,
|
|
332
|
+
);
|
|
333
|
+
const combined = new Uint8Array(8 + audioBytes.length);
|
|
334
|
+
combined.set(new Uint8Array(header), 0);
|
|
335
|
+
combined.set(audioBytes, 8);
|
|
336
|
+
|
|
337
|
+
try {
|
|
338
|
+
ws.send(combined);
|
|
339
|
+
onRequestSent?.();
|
|
340
|
+
} catch (e: unknown) {
|
|
341
|
+
logger.error(e, `failed to send audio via websocket`);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
function close(): void {
|
|
346
|
+
if (ws?.readyState === WebSocket.OPEN) {
|
|
347
|
+
const closeMsg = JSON.stringify({ type: MSG_SESSION_CLOSE });
|
|
348
|
+
try {
|
|
349
|
+
ws.send(closeMsg);
|
|
350
|
+
} catch (e: unknown) {
|
|
351
|
+
logger.error(e, 'failed to send close message');
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
ws?.close(1000); // signal normal websocket closure
|
|
355
|
+
ws = null;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Reconnect the WebSocket with updated options.
|
|
360
|
+
* This is called when options are updated via updateOptions().
|
|
361
|
+
*/
|
|
362
|
+
async function reconnect(): Promise<void> {
|
|
363
|
+
close();
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
const transport = new TransformStream<
|
|
367
|
+
Int16Array | OverlappingSpeechEvent,
|
|
368
|
+
OverlappingSpeechEvent
|
|
369
|
+
>(
|
|
370
|
+
{
|
|
371
|
+
async start(controller) {
|
|
372
|
+
outputController = controller;
|
|
373
|
+
await ensureConnection();
|
|
374
|
+
},
|
|
375
|
+
|
|
376
|
+
transform(chunk, controller) {
|
|
377
|
+
if (!(chunk instanceof Int16Array)) {
|
|
378
|
+
controller.enqueue(chunk);
|
|
379
|
+
return;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Only forwards buffered audio while overlap speech is actively on.
|
|
383
|
+
const state = getState();
|
|
384
|
+
if (!state.overlapSpeechStartedAt || !state.overlapSpeechStarted) return;
|
|
385
|
+
|
|
386
|
+
try {
|
|
387
|
+
sendAudioData(chunk);
|
|
388
|
+
} catch (err) {
|
|
389
|
+
logger.error({ err }, 'Failed to send audio data over WebSocket');
|
|
390
|
+
}
|
|
391
|
+
},
|
|
392
|
+
|
|
393
|
+
flush() {
|
|
394
|
+
close();
|
|
395
|
+
},
|
|
396
|
+
},
|
|
397
|
+
{ highWaterMark: 2 },
|
|
398
|
+
{ highWaterMark: 2 },
|
|
399
|
+
);
|
|
400
|
+
|
|
401
|
+
return { transport, reconnect };
|
|
402
|
+
}
|
package/src/inference/llm.ts
CHANGED
|
@@ -2,19 +2,12 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import OpenAI from 'openai';
|
|
5
|
-
import {
|
|
6
|
-
APIConnectionError,
|
|
7
|
-
APIStatusError,
|
|
8
|
-
APITimeoutError,
|
|
9
|
-
DEFAULT_API_CONNECT_OPTIONS,
|
|
10
|
-
type Expand,
|
|
11
|
-
toError,
|
|
12
|
-
} from '../index.js';
|
|
5
|
+
import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
|
|
13
6
|
import * as llm from '../llm/index.js';
|
|
7
|
+
import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
14
8
|
import type { APIConnectOptions } from '../types.js';
|
|
15
|
-
import { type
|
|
16
|
-
|
|
17
|
-
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
|
|
9
|
+
import { type Expand, toError } from '../utils.js';
|
|
10
|
+
import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
|
|
18
11
|
|
|
19
12
|
export type OpenAIModels =
|
|
20
13
|
| 'openai/gpt-5.2'
|
|
@@ -127,7 +120,7 @@ export class LLM extends llm.LLM {
|
|
|
127
120
|
strictToolSchema = false,
|
|
128
121
|
} = opts;
|
|
129
122
|
|
|
130
|
-
const lkBaseURL = baseURL ||
|
|
123
|
+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
|
|
131
124
|
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
|
|
132
125
|
if (!lkApiKey) {
|
|
133
126
|
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
|
|
@@ -163,6 +156,10 @@ export class LLM extends llm.LLM {
|
|
|
163
156
|
return this.opts.model;
|
|
164
157
|
}
|
|
165
158
|
|
|
159
|
+
get provider(): string {
|
|
160
|
+
return 'livekit';
|
|
161
|
+
}
|
|
162
|
+
|
|
166
163
|
static fromModelString(modelString: string): LLM {
|
|
167
164
|
return new LLM({ model: modelString });
|
|
168
165
|
}
|
package/src/inference/stt.ts
CHANGED
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
type SttTranscriptEvent,
|
|
23
23
|
sttServerEventSchema,
|
|
24
24
|
} from './api_protos.js';
|
|
25
|
-
import { type AnyString, connectWs, createAccessToken } from './utils.js';
|
|
25
|
+
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
|
|
26
26
|
|
|
27
27
|
export type DeepgramModels =
|
|
28
28
|
| 'deepgram/flux-general'
|
|
@@ -151,7 +151,6 @@ export type STTEncoding = 'pcm_s16le';
|
|
|
151
151
|
|
|
152
152
|
const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
|
|
153
153
|
const DEFAULT_SAMPLE_RATE = 16000;
|
|
154
|
-
const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
|
|
155
154
|
const DEFAULT_CANCEL_TIMEOUT = 5000;
|
|
156
155
|
|
|
157
156
|
export interface InferenceSTTOptions<TModel extends STTModels> {
|
|
@@ -203,7 +202,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
203
202
|
connOptions,
|
|
204
203
|
} = opts || {};
|
|
205
204
|
|
|
206
|
-
const lkBaseURL = baseURL ||
|
|
205
|
+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
|
|
207
206
|
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
|
|
208
207
|
if (!lkApiKey) {
|
|
209
208
|
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
|
|
@@ -253,6 +252,14 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
253
252
|
return 'inference.STT';
|
|
254
253
|
}
|
|
255
254
|
|
|
255
|
+
get model(): string {
|
|
256
|
+
return this.opts.model ?? 'auto';
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
get provider(): string {
|
|
260
|
+
return 'livekit';
|
|
261
|
+
}
|
|
262
|
+
|
|
256
263
|
static fromModelString(modelString: string): STT<AnyString> {
|
|
257
264
|
const [model, language] = parseSTTModelString(modelString);
|
|
258
265
|
return new STT({ model, language });
|
package/src/inference/tts.ts
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
ttsClientEventSchema,
|
|
20
20
|
ttsServerEventSchema,
|
|
21
21
|
} from './api_protos.js';
|
|
22
|
-
import { type AnyString, connectWs, createAccessToken } from './utils.js';
|
|
22
|
+
import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
|
|
23
23
|
|
|
24
24
|
export type CartesiaModels =
|
|
25
25
|
| 'cartesia/sonic-3'
|
|
@@ -136,7 +136,6 @@ type TTSEncoding = 'pcm_s16le';
|
|
|
136
136
|
|
|
137
137
|
const DEFAULT_ENCODING: TTSEncoding = 'pcm_s16le';
|
|
138
138
|
const DEFAULT_SAMPLE_RATE = 16000;
|
|
139
|
-
const DEFAULT_BASE_URL = 'https://agent-gateway.livekit.cloud/v1';
|
|
140
139
|
const NUM_CHANNELS = 1;
|
|
141
140
|
const DEFAULT_LANGUAGE = 'en';
|
|
142
141
|
|
|
@@ -193,7 +192,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
|
193
192
|
connOptions,
|
|
194
193
|
} = opts || {};
|
|
195
194
|
|
|
196
|
-
const lkBaseURL = baseURL ||
|
|
195
|
+
const lkBaseURL = baseURL || getDefaultInferenceUrl();
|
|
197
196
|
const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
|
|
198
197
|
if (!lkApiKey) {
|
|
199
198
|
throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
|
|
@@ -254,6 +253,14 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
|
254
253
|
return 'inference.TTS';
|
|
255
254
|
}
|
|
256
255
|
|
|
256
|
+
get model(): string {
|
|
257
|
+
return this.opts.model ?? 'unknown';
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
get provider(): string {
|
|
261
|
+
return 'livekit';
|
|
262
|
+
}
|
|
263
|
+
|
|
257
264
|
static fromModelString(modelString: string): TTS<AnyString> {
|
|
258
265
|
const [model, voice] = parseTTSModelString(modelString);
|
|
259
266
|
return new TTS({ model, voice: voice || undefined });
|
package/src/inference/utils.ts
CHANGED
|
@@ -3,10 +3,38 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AccessToken } from 'livekit-server-sdk';
|
|
5
5
|
import { WebSocket } from 'ws';
|
|
6
|
-
import { APIConnectionError, APIStatusError } from '../
|
|
6
|
+
import { APIConnectionError, APIStatusError } from '../_exceptions.js';
|
|
7
7
|
|
|
8
8
|
export type AnyString = string & NonNullable<unknown>;
|
|
9
9
|
|
|
10
|
+
/** Default production inference URL */
|
|
11
|
+
export const DEFAULT_INFERENCE_URL = 'https://agent-gateway.livekit.cloud/v1';
|
|
12
|
+
|
|
13
|
+
/** Staging inference URL */
|
|
14
|
+
export const STAGING_INFERENCE_URL = 'https://agent-gateway.staging.livekit.cloud/v1';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Get the default inference URL based on the environment.
|
|
18
|
+
*
|
|
19
|
+
* Priority:
|
|
20
|
+
* 1. LIVEKIT_INFERENCE_URL if set
|
|
21
|
+
* 2. If LIVEKIT_URL contains '.staging.livekit.cloud', use staging gateway
|
|
22
|
+
* 3. Otherwise, use production gateway
|
|
23
|
+
*/
|
|
24
|
+
export function getDefaultInferenceUrl(): string {
|
|
25
|
+
const inferenceUrl = process.env.LIVEKIT_INFERENCE_URL;
|
|
26
|
+
if (inferenceUrl) {
|
|
27
|
+
return inferenceUrl;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const livekitUrl = process.env.LIVEKIT_URL || '';
|
|
31
|
+
if (livekitUrl.includes('.staging.livekit.cloud')) {
|
|
32
|
+
return STAGING_INFERENCE_URL;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return DEFAULT_INFERENCE_URL;
|
|
36
|
+
}
|
|
37
|
+
|
|
10
38
|
export async function createAccessToken(
|
|
11
39
|
apiKey: string,
|
|
12
40
|
apiSecret: string,
|