@livekit/agents 1.0.48 → 1.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.cjs +27 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +9 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +18 -0
- package/dist/constants.js.map +1 -1
- package/dist/inference/api_protos.d.cts +71 -71
- package/dist/inference/api_protos.d.ts +71 -71
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +147 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +63 -0
- package/dist/inference/interruption/http_transport.d.ts +63 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +121 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +181 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +147 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +329 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +295 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +14 -10
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +2 -1
- package/dist/inference/llm.d.ts +2 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +8 -10
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +7 -2
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +2 -0
- package/dist/inference/stt.d.ts +2 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +8 -3
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +7 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -0
- package/dist/inference/tts.d.ts +2 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +8 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +13 -0
- package/dist/inference/utils.d.ts +13 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +19 -1
- package/dist/llm/chat_context.d.ts +19 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +16 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +9 -0
- package/dist/llm/llm.d.ts +9 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +16 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +45 -1
- package/dist/metrics/base.d.ts +45 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +3 -0
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +9 -0
- package/dist/metrics/usage_collector.d.ts +9 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +3 -0
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +9 -0
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +9 -0
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/multi_input_stream.test.cjs +4 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -1
- package/dist/stream/multi_input_stream.test.js +5 -1
- package/dist/stream/multi_input_stream.test.js.map +1 -1
- package/dist/stream/stream_channel.cjs +31 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +4 -2
- package/dist/stream/stream_channel.d.ts +4 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +31 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stt/stt.cjs +34 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +22 -0
- package/dist/stt/stt.d.ts +22 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +34 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/otel_http_exporter.cjs +24 -5
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
- package/dist/telemetry/otel_http_exporter.d.cts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
- package/dist/telemetry/otel_http_exporter.js +24 -5
- package/dist/telemetry/otel_http_exporter.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +5 -5
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +9 -5
- package/dist/telemetry/trace_types.d.ts +9 -5
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +5 -5
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/telemetry/traces.cjs +47 -8
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +47 -8
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +64 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +34 -0
- package/dist/tts/tts.d.ts +34 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +64 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +25 -4
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -2
- package/dist/voice/agent.d.ts +10 -2
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +25 -4
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +261 -36
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +20 -6
- package/dist/voice/agent_activity.d.ts +20 -6
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +262 -37
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +105 -48
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +90 -20
- package/dist/voice/agent_session.d.ts +90 -20
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +105 -46
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +287 -6
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +42 -3
- package/dist/voice/audio_recognition.d.ts +42 -3
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +289 -7
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/client_events.cjs +554 -0
- package/dist/voice/client_events.cjs.map +1 -0
- package/dist/voice/client_events.d.cts +195 -0
- package/dist/voice/client_events.d.ts +195 -0
- package/dist/voice/client_events.d.ts.map +1 -0
- package/dist/voice/client_events.js +548 -0
- package/dist/voice/client_events.js.map +1 -0
- package/dist/voice/events.cjs +1 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +8 -5
- package/dist/voice/events.d.ts +8 -5
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +43 -8
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -3
- package/dist/voice/generation.d.ts +3 -3
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +20 -8
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -0
- package/dist/voice/report.d.ts +5 -0
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +20 -8
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +106 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +105 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +5 -39
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +4 -9
- package/dist/voice/room_io/room_io.d.ts +4 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -40
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +97 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +25 -0
- package/dist/voice/turn_config/utils.d.ts +25 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +73 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +86 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +85 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/wire_format.cjs +798 -0
- package/dist/voice/wire_format.cjs.map +1 -0
- package/dist/voice/wire_format.d.cts +5503 -0
- package/dist/voice/wire_format.d.ts +5503 -0
- package/dist/voice/wire_format.d.ts.map +1 -0
- package/dist/voice/wire_format.js +728 -0
- package/dist/voice/wire_format.js.map +1 -0
- package/package.json +2 -1
- package/src/constants.ts +13 -0
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +187 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +188 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +402 -0
- package/src/inference/llm.ts +9 -12
- package/src/inference/stt.ts +10 -3
- package/src/inference/tts.ts +10 -3
- package/src/inference/utils.ts +29 -1
- package/src/llm/chat_context.ts +40 -2
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +16 -0
- package/src/llm/realtime.ts +4 -0
- package/src/metrics/base.ts +48 -1
- package/src/metrics/index.ts +11 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +11 -0
- package/src/metrics/utils.ts +11 -0
- package/src/stream/multi_input_stream.test.ts +6 -1
- package/src/stream/stream_channel.ts +34 -2
- package/src/stt/stt.ts +38 -0
- package/src/telemetry/otel_http_exporter.ts +28 -5
- package/src/telemetry/trace_types.ts +11 -8
- package/src/telemetry/traces.ts +111 -54
- package/src/tts/tts.ts +69 -1
- package/src/voice/agent.ts +30 -3
- package/src/voice/agent_activity.ts +327 -28
- package/src/voice/agent_session.ts +207 -59
- package/src/voice/audio_recognition.ts +385 -9
- package/src/voice/client_events.ts +838 -0
- package/src/voice/events.ts +14 -4
- package/src/voice/generation.ts +52 -9
- package/src/voice/index.ts +1 -0
- package/src/voice/report.test.ts +117 -0
- package/src/voice/report.ts +29 -6
- package/src/voice/room_io/room_io.ts +7 -61
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +100 -0
- package/src/voice/turn_config/utils.ts +103 -0
- package/src/voice/wire_format.ts +827 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import { AudioFrame, AudioResampler } from "@livekit/rtc-node";
|
|
2
|
+
import { TransformStream } from "stream/web";
|
|
3
|
+
import { log } from "../../log.js";
|
|
4
|
+
import { createStreamChannel } from "../../stream/stream_channel.js";
|
|
5
|
+
import { traceTypes } from "../../telemetry/index.js";
|
|
6
|
+
import { FRAMES_PER_SECOND, apiConnectDefaults } from "./defaults.js";
|
|
7
|
+
import { createHttpTransport } from "./http_transport.js";
|
|
8
|
+
import { InterruptionCacheEntry } from "./interruption_cache_entry.js";
|
|
9
|
+
import {
|
|
10
|
+
} from "./types.js";
|
|
11
|
+
import { BoundedCache } from "./utils.js";
|
|
12
|
+
import { createWsTransport } from "./ws_transport.js";
|
|
13
|
+
class InterruptionStreamSentinel {
|
|
14
|
+
static agentSpeechStarted() {
|
|
15
|
+
return { type: "agent-speech-started" };
|
|
16
|
+
}
|
|
17
|
+
static agentSpeechEnded() {
|
|
18
|
+
return { type: "agent-speech-ended" };
|
|
19
|
+
}
|
|
20
|
+
static overlapSpeechStarted(speechDuration, startedAt, userSpeakingSpan) {
|
|
21
|
+
return { type: "overlap-speech-started", speechDuration, startedAt, userSpeakingSpan };
|
|
22
|
+
}
|
|
23
|
+
static overlapSpeechEnded(endedAt) {
|
|
24
|
+
return { type: "overlap-speech-ended", endedAt };
|
|
25
|
+
}
|
|
26
|
+
static flush() {
|
|
27
|
+
return { type: "flush" };
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function updateUserSpeakingSpan(span, entry) {
|
|
31
|
+
span.setAttribute(
|
|
32
|
+
traceTypes.ATTR_IS_INTERRUPTION,
|
|
33
|
+
(entry.isInterruption ?? false).toString().toLowerCase()
|
|
34
|
+
);
|
|
35
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
|
|
36
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);
|
|
37
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);
|
|
38
|
+
span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);
|
|
39
|
+
}
|
|
40
|
+
class InterruptionStreamBase {
|
|
41
|
+
inputStream;
|
|
42
|
+
eventStream;
|
|
43
|
+
resampler;
|
|
44
|
+
numRequests = 0;
|
|
45
|
+
userSpeakingSpan;
|
|
46
|
+
overlapSpeechStartedAt;
|
|
47
|
+
options;
|
|
48
|
+
apiOptions;
|
|
49
|
+
model;
|
|
50
|
+
logger = log();
|
|
51
|
+
// Store reconnect function for WebSocket transport
|
|
52
|
+
wsReconnect;
|
|
53
|
+
// Mutable transport options that can be updated via updateOptions()
|
|
54
|
+
transportOptions;
|
|
55
|
+
constructor(model, apiOptions) {
|
|
56
|
+
this.inputStream = createStreamChannel();
|
|
57
|
+
this.model = model;
|
|
58
|
+
this.options = { ...model.options };
|
|
59
|
+
this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
|
|
60
|
+
this.transportOptions = {
|
|
61
|
+
baseUrl: this.options.baseUrl,
|
|
62
|
+
apiKey: this.options.apiKey,
|
|
63
|
+
apiSecret: this.options.apiSecret,
|
|
64
|
+
sampleRate: this.options.sampleRate,
|
|
65
|
+
threshold: this.options.threshold,
|
|
66
|
+
minFrames: this.options.minFrames,
|
|
67
|
+
timeout: this.options.inferenceTimeout,
|
|
68
|
+
maxRetries: this.apiOptions.maxRetries
|
|
69
|
+
};
|
|
70
|
+
this.eventStream = this.setupTransform();
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Update stream options. For WebSocket transport, this triggers a reconnection.
|
|
74
|
+
*/
|
|
75
|
+
async updateOptions(options) {
|
|
76
|
+
if (options.threshold !== void 0) {
|
|
77
|
+
this.options.threshold = options.threshold;
|
|
78
|
+
this.transportOptions.threshold = options.threshold;
|
|
79
|
+
}
|
|
80
|
+
if (options.minInterruptionDurationInS !== void 0) {
|
|
81
|
+
this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
|
|
82
|
+
this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
|
|
83
|
+
this.transportOptions.minFrames = this.options.minFrames;
|
|
84
|
+
}
|
|
85
|
+
if (this.options.useProxy && this.wsReconnect) {
|
|
86
|
+
await this.wsReconnect();
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
setupTransform() {
|
|
90
|
+
let agentSpeechStarted = false;
|
|
91
|
+
let startIdx = 0;
|
|
92
|
+
let accumulatedSamples = 0;
|
|
93
|
+
let overlapSpeechStarted = false;
|
|
94
|
+
let overlapCount = 0;
|
|
95
|
+
const cache = new BoundedCache(10);
|
|
96
|
+
const inferenceS16Data = new Int16Array(
|
|
97
|
+
Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate)
|
|
98
|
+
).fill(0);
|
|
99
|
+
const getState = () => ({
|
|
100
|
+
overlapSpeechStarted,
|
|
101
|
+
overlapSpeechStartedAt: this.overlapSpeechStartedAt,
|
|
102
|
+
cache,
|
|
103
|
+
overlapCount
|
|
104
|
+
});
|
|
105
|
+
const setState = (partial) => {
|
|
106
|
+
if (partial.overlapSpeechStarted !== void 0) {
|
|
107
|
+
overlapSpeechStarted = partial.overlapSpeechStarted;
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
const handleSpanUpdate = (entry) => {
|
|
111
|
+
if (this.userSpeakingSpan) {
|
|
112
|
+
updateUserSpeakingSpan(this.userSpeakingSpan, entry);
|
|
113
|
+
this.userSpeakingSpan = void 0;
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
const onRequestSent = () => {
|
|
117
|
+
this.numRequests++;
|
|
118
|
+
};
|
|
119
|
+
const getAndResetNumRequests = () => {
|
|
120
|
+
const n = this.numRequests;
|
|
121
|
+
this.numRequests = 0;
|
|
122
|
+
return n;
|
|
123
|
+
};
|
|
124
|
+
const audioTransformer = new TransformStream(
|
|
125
|
+
{
|
|
126
|
+
transform: (chunk, controller) => {
|
|
127
|
+
if (chunk instanceof AudioFrame) {
|
|
128
|
+
if (!agentSpeechStarted) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (this.options.sampleRate !== chunk.sampleRate) {
|
|
132
|
+
controller.error("the sample rate of the input frames must be consistent");
|
|
133
|
+
this.logger.error("the sample rate of the input frames must be consistent");
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
const result = writeToInferenceS16Data(
|
|
137
|
+
chunk,
|
|
138
|
+
startIdx,
|
|
139
|
+
inferenceS16Data,
|
|
140
|
+
this.options.maxAudioDurationInS
|
|
141
|
+
);
|
|
142
|
+
startIdx = result.startIdx;
|
|
143
|
+
accumulatedSamples += result.samplesWritten;
|
|
144
|
+
if (accumulatedSamples >= Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) && overlapSpeechStarted) {
|
|
145
|
+
const audioSlice = inferenceS16Data.slice(0, startIdx);
|
|
146
|
+
accumulatedSamples = 0;
|
|
147
|
+
controller.enqueue(audioSlice);
|
|
148
|
+
}
|
|
149
|
+
} else if (chunk.type === "agent-speech-started") {
|
|
150
|
+
this.logger.debug("agent speech started");
|
|
151
|
+
agentSpeechStarted = true;
|
|
152
|
+
overlapSpeechStarted = false;
|
|
153
|
+
this.overlapSpeechStartedAt = void 0;
|
|
154
|
+
accumulatedSamples = 0;
|
|
155
|
+
overlapCount = 0;
|
|
156
|
+
startIdx = 0;
|
|
157
|
+
this.numRequests = 0;
|
|
158
|
+
cache.clear();
|
|
159
|
+
} else if (chunk.type === "agent-speech-ended") {
|
|
160
|
+
this.logger.debug("agent speech ended");
|
|
161
|
+
agentSpeechStarted = false;
|
|
162
|
+
overlapSpeechStarted = false;
|
|
163
|
+
this.overlapSpeechStartedAt = void 0;
|
|
164
|
+
accumulatedSamples = 0;
|
|
165
|
+
overlapCount = 0;
|
|
166
|
+
startIdx = 0;
|
|
167
|
+
this.numRequests = 0;
|
|
168
|
+
cache.clear();
|
|
169
|
+
} else if (chunk.type === "overlap-speech-started" && agentSpeechStarted) {
|
|
170
|
+
this.overlapSpeechStartedAt = chunk.startedAt;
|
|
171
|
+
this.userSpeakingSpan = chunk.userSpeakingSpan;
|
|
172
|
+
this.logger.debug("overlap speech started, starting interruption inference");
|
|
173
|
+
overlapSpeechStarted = true;
|
|
174
|
+
accumulatedSamples = 0;
|
|
175
|
+
overlapCount += 1;
|
|
176
|
+
if (overlapCount <= 1) {
|
|
177
|
+
const keepSize = Math.round(chunk.speechDuration / 1e3 * this.options.sampleRate) + Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);
|
|
178
|
+
const shiftCount = Math.max(0, startIdx - keepSize);
|
|
179
|
+
inferenceS16Data.copyWithin(0, shiftCount, startIdx);
|
|
180
|
+
startIdx -= shiftCount;
|
|
181
|
+
}
|
|
182
|
+
cache.clear();
|
|
183
|
+
} else if (chunk.type === "overlap-speech-ended") {
|
|
184
|
+
this.logger.debug("overlap speech ended");
|
|
185
|
+
if (overlapSpeechStarted) {
|
|
186
|
+
this.userSpeakingSpan = void 0;
|
|
187
|
+
let latestEntry = cache.pop(
|
|
188
|
+
(entry) => entry.totalDurationInS !== void 0 && entry.totalDurationInS > 0
|
|
189
|
+
);
|
|
190
|
+
if (!latestEntry) {
|
|
191
|
+
this.logger.debug("no request made for overlap speech");
|
|
192
|
+
latestEntry = InterruptionCacheEntry.default();
|
|
193
|
+
}
|
|
194
|
+
const e = latestEntry ?? InterruptionCacheEntry.default();
|
|
195
|
+
const event = {
|
|
196
|
+
type: "user_overlapping_speech",
|
|
197
|
+
timestamp: chunk.endedAt,
|
|
198
|
+
isInterruption: false,
|
|
199
|
+
overlapStartedAt: this.overlapSpeechStartedAt,
|
|
200
|
+
speechInput: e.speechInput,
|
|
201
|
+
probabilities: e.probabilities,
|
|
202
|
+
totalDurationInS: e.totalDurationInS,
|
|
203
|
+
detectionDelayInS: e.detectionDelayInS,
|
|
204
|
+
predictionDurationInS: e.predictionDurationInS,
|
|
205
|
+
probability: e.probability,
|
|
206
|
+
numRequests: getAndResetNumRequests()
|
|
207
|
+
};
|
|
208
|
+
controller.enqueue(event);
|
|
209
|
+
overlapSpeechStarted = false;
|
|
210
|
+
accumulatedSamples = 0;
|
|
211
|
+
}
|
|
212
|
+
this.overlapSpeechStartedAt = void 0;
|
|
213
|
+
} else if (chunk.type === "flush") {
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
{ highWaterMark: 32 },
|
|
218
|
+
{ highWaterMark: 32 }
|
|
219
|
+
);
|
|
220
|
+
const transportOptions = this.transportOptions;
|
|
221
|
+
let transport;
|
|
222
|
+
if (this.options.useProxy) {
|
|
223
|
+
const wsResult = createWsTransport(
|
|
224
|
+
transportOptions,
|
|
225
|
+
getState,
|
|
226
|
+
setState,
|
|
227
|
+
handleSpanUpdate,
|
|
228
|
+
onRequestSent,
|
|
229
|
+
getAndResetNumRequests
|
|
230
|
+
);
|
|
231
|
+
transport = wsResult.transport;
|
|
232
|
+
this.wsReconnect = wsResult.reconnect;
|
|
233
|
+
} else {
|
|
234
|
+
transport = createHttpTransport(
|
|
235
|
+
transportOptions,
|
|
236
|
+
getState,
|
|
237
|
+
setState,
|
|
238
|
+
handleSpanUpdate,
|
|
239
|
+
getAndResetNumRequests
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
const eventEmitter = new TransformStream({
|
|
243
|
+
transform: (chunk, controller) => {
|
|
244
|
+
this.model.emit("user_overlapping_speech", chunk);
|
|
245
|
+
const metrics = {
|
|
246
|
+
type: "interruption_metrics",
|
|
247
|
+
timestamp: chunk.timestamp,
|
|
248
|
+
totalDuration: chunk.totalDurationInS * 1e3,
|
|
249
|
+
predictionDuration: chunk.predictionDurationInS * 1e3,
|
|
250
|
+
detectionDelay: chunk.detectionDelayInS * 1e3,
|
|
251
|
+
numInterruptions: chunk.isInterruption ? 1 : 0,
|
|
252
|
+
numBackchannels: chunk.isInterruption ? 0 : 1,
|
|
253
|
+
numRequests: chunk.numRequests,
|
|
254
|
+
metadata: {
|
|
255
|
+
modelProvider: this.model.provider,
|
|
256
|
+
modelName: this.model.model
|
|
257
|
+
}
|
|
258
|
+
};
|
|
259
|
+
this.model.emit("metrics_collected", metrics);
|
|
260
|
+
controller.enqueue(chunk);
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
return this.inputStream.stream().pipeThrough(audioTransformer).pipeThrough(transport).pipeThrough(eventEmitter);
|
|
264
|
+
}
|
|
265
|
+
ensureInputNotEnded() {
|
|
266
|
+
if (this.inputStream.closed) {
|
|
267
|
+
throw new Error("input stream is closed");
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
ensureStreamsNotEnded() {
|
|
271
|
+
this.ensureInputNotEnded();
|
|
272
|
+
}
|
|
273
|
+
getResamplerFor(inputSampleRate) {
|
|
274
|
+
if (!this.resampler) {
|
|
275
|
+
this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
|
|
276
|
+
}
|
|
277
|
+
return this.resampler;
|
|
278
|
+
}
|
|
279
|
+
stream() {
|
|
280
|
+
return this.eventStream;
|
|
281
|
+
}
|
|
282
|
+
async pushFrame(frame) {
|
|
283
|
+
this.ensureStreamsNotEnded();
|
|
284
|
+
if (!(frame instanceof AudioFrame)) {
|
|
285
|
+
return this.inputStream.write(frame);
|
|
286
|
+
} else if (this.options.sampleRate !== frame.sampleRate) {
|
|
287
|
+
const resampler = this.getResamplerFor(frame.sampleRate);
|
|
288
|
+
if (resampler.inputRate !== frame.sampleRate) {
|
|
289
|
+
throw new Error("the sample rate of the input frames must be consistent");
|
|
290
|
+
}
|
|
291
|
+
for (const resampledFrame of resampler.push(frame)) {
|
|
292
|
+
await this.inputStream.write(resampledFrame);
|
|
293
|
+
}
|
|
294
|
+
} else {
|
|
295
|
+
await this.inputStream.write(frame);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
async flush() {
|
|
299
|
+
this.ensureStreamsNotEnded();
|
|
300
|
+
await this.inputStream.write(InterruptionStreamSentinel.flush());
|
|
301
|
+
}
|
|
302
|
+
async endInput() {
|
|
303
|
+
await this.flush();
|
|
304
|
+
await this.inputStream.close();
|
|
305
|
+
}
|
|
306
|
+
async close() {
|
|
307
|
+
if (!this.inputStream.closed) await this.inputStream.close();
|
|
308
|
+
this.model.removeStream(this);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
function writeToInferenceS16Data(frame, startIdx, outData, maxAudioDuration) {
|
|
312
|
+
const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
|
|
313
|
+
if (frame.samplesPerChannel > outData.length) {
|
|
314
|
+
throw new Error("frame samples are greater than the max window size");
|
|
315
|
+
}
|
|
316
|
+
const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
|
|
317
|
+
if (shift > 0) {
|
|
318
|
+
outData.copyWithin(0, shift, startIdx);
|
|
319
|
+
startIdx -= shift;
|
|
320
|
+
}
|
|
321
|
+
const frameData = new Int16Array(
|
|
322
|
+
frame.data.buffer,
|
|
323
|
+
frame.data.byteOffset,
|
|
324
|
+
frame.samplesPerChannel * frame.channels
|
|
325
|
+
);
|
|
326
|
+
if (frame.channels > 1) {
|
|
327
|
+
for (let i = 0; i < frame.samplesPerChannel; i++) {
|
|
328
|
+
let sum = 0;
|
|
329
|
+
for (let ch = 0; ch < frame.channels; ch++) {
|
|
330
|
+
sum += frameData[i * frame.channels + ch] ?? 0;
|
|
331
|
+
}
|
|
332
|
+
outData[startIdx + i] = Math.floor(sum / frame.channels);
|
|
333
|
+
}
|
|
334
|
+
} else {
|
|
335
|
+
outData.set(frameData, startIdx);
|
|
336
|
+
}
|
|
337
|
+
startIdx += frame.samplesPerChannel;
|
|
338
|
+
return { startIdx, samplesWritten: frame.samplesPerChannel };
|
|
339
|
+
}
|
|
340
|
+
export {
|
|
341
|
+
InterruptionStreamBase,
|
|
342
|
+
InterruptionStreamSentinel
|
|
343
|
+
};
|
|
344
|
+
//# sourceMappingURL=interruption_stream.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/interruption_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { Span } from '@opentelemetry/api';\nimport { type ReadableStream, TransformStream } from 'stream/web';\nimport { log } from '../../log.js';\nimport type { InterruptionMetrics } from '../../metrics/base.js';\nimport { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';\nimport { traceTypes } from '../../telemetry/index.js';\nimport { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';\nimport type { InterruptionDetectionError } from './errors.js';\nimport { createHttpTransport } from './http_transport.js';\nimport { InterruptionCacheEntry } from './interruption_cache_entry.js';\nimport type { AdaptiveInterruptionDetector } from './interruption_detector.js';\nimport {\n type AgentSpeechEnded,\n type AgentSpeechStarted,\n type ApiConnectOptions,\n type Flush,\n type InterruptionOptions,\n type InterruptionSentinel,\n type OverlapSpeechEnded,\n type OverlapSpeechStarted,\n type OverlappingSpeechEvent,\n} from './types.js';\nimport { BoundedCache } from './utils.js';\nimport { createWsTransport } from './ws_transport.js';\n\n// Re-export sentinel types for backwards compatibility\nexport type {\n AgentSpeechEnded,\n AgentSpeechStarted,\n ApiConnectOptions,\n Flush,\n InterruptionSentinel,\n OverlapSpeechEnded,\n OverlapSpeechStarted,\n};\n\nexport class InterruptionStreamSentinel {\n static agentSpeechStarted(): AgentSpeechStarted {\n return { type: 'agent-speech-started' };\n }\n\n static agentSpeechEnded(): AgentSpeechEnded {\n return { type: 'agent-speech-ended' };\n }\n\n static overlapSpeechStarted(\n speechDuration: number,\n startedAt: number,\n userSpeakingSpan?: Span,\n ): OverlapSpeechStarted {\n return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };\n }\n\n static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {\n return { type: 'overlap-speech-ended', endedAt };\n }\n\n static flush(): Flush {\n return { type: 'flush' };\n }\n}\n\nfunction updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {\n span.setAttribute(\n traceTypes.ATTR_IS_INTERRUPTION,\n (entry.isInterruption ?? false).toString().toLowerCase(),\n );\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);\n span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);\n}\n\nexport class InterruptionStreamBase {\n private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;\n\n private eventStream: ReadableStream<OverlappingSpeechEvent>;\n\n private resampler?: AudioResampler;\n\n private numRequests = 0;\n\n private userSpeakingSpan: Span | undefined;\n\n private overlapSpeechStartedAt: number | undefined;\n\n private options: InterruptionOptions;\n\n private apiOptions: ApiConnectOptions;\n\n private model: AdaptiveInterruptionDetector;\n\n private logger = log();\n\n // Store reconnect function for WebSocket transport\n private wsReconnect?: () => Promise<void>;\n\n // Mutable transport options that can be updated via updateOptions()\n private transportOptions: {\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n sampleRate: number;\n threshold: number;\n minFrames: number;\n timeout: number;\n maxRetries: number;\n };\n\n constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {\n this.inputStream = createStreamChannel<\n InterruptionSentinel | AudioFrame,\n InterruptionDetectionError\n >();\n\n this.model = model;\n this.options = { ...model.options };\n this.apiOptions = { ...apiConnectDefaults, ...apiOptions };\n\n // Initialize mutable transport options\n this.transportOptions = {\n baseUrl: this.options.baseUrl,\n apiKey: this.options.apiKey,\n apiSecret: this.options.apiSecret,\n sampleRate: this.options.sampleRate,\n threshold: this.options.threshold,\n minFrames: this.options.minFrames,\n timeout: this.options.inferenceTimeout,\n maxRetries: this.apiOptions.maxRetries,\n };\n\n this.eventStream = this.setupTransform();\n }\n\n /**\n * Update stream options. For WebSocket transport, this triggers a reconnection.\n */\n async updateOptions(options: {\n threshold?: number;\n minInterruptionDurationInS?: number;\n }): Promise<void> {\n if (options.threshold !== undefined) {\n this.options.threshold = options.threshold;\n this.transportOptions.threshold = options.threshold;\n }\n if (options.minInterruptionDurationInS !== undefined) {\n this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;\n this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);\n this.transportOptions.minFrames = this.options.minFrames;\n }\n // Trigger WebSocket reconnection if using proxy (WebSocket transport)\n if (this.options.useProxy && this.wsReconnect) {\n await this.wsReconnect();\n }\n }\n\n private setupTransform(): ReadableStream<OverlappingSpeechEvent> {\n let agentSpeechStarted = false;\n let startIdx = 0;\n let accumulatedSamples = 0;\n let overlapSpeechStarted = false;\n let overlapCount = 0;\n const cache = new BoundedCache<number, InterruptionCacheEntry>(10);\n const inferenceS16Data = new Int16Array(\n Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),\n ).fill(0);\n\n // State accessors for transport\n const getState = () => ({\n overlapSpeechStarted,\n overlapSpeechStartedAt: this.overlapSpeechStartedAt,\n cache,\n overlapCount,\n });\n const setState = (partial: { overlapSpeechStarted?: boolean }) => {\n if (partial.overlapSpeechStarted !== undefined) {\n overlapSpeechStarted = partial.overlapSpeechStarted;\n }\n };\n const handleSpanUpdate = (entry: InterruptionCacheEntry) => {\n if (this.userSpeakingSpan) {\n updateUserSpeakingSpan(this.userSpeakingSpan, entry);\n this.userSpeakingSpan = undefined;\n }\n };\n\n const onRequestSent = () => {\n this.numRequests++;\n };\n\n const getAndResetNumRequests = (): number => {\n const n = this.numRequests;\n this.numRequests = 0;\n return n;\n };\n\n // First transform: process input frames/sentinels and output audio slices or events\n const audioTransformer = new TransformStream<\n InterruptionSentinel | AudioFrame,\n Int16Array | OverlappingSpeechEvent\n >(\n {\n transform: (chunk, controller) => {\n if (chunk instanceof AudioFrame) {\n if (!agentSpeechStarted) {\n return;\n }\n if (this.options.sampleRate !== chunk.sampleRate) {\n controller.error('the sample rate of the input frames must be consistent');\n this.logger.error('the sample rate of the input frames must be consistent');\n return;\n }\n const result = writeToInferenceS16Data(\n chunk,\n startIdx,\n inferenceS16Data,\n this.options.maxAudioDurationInS,\n );\n startIdx = result.startIdx;\n accumulatedSamples += result.samplesWritten;\n\n if (\n accumulatedSamples >=\n Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&\n overlapSpeechStarted\n ) {\n const audioSlice = inferenceS16Data.slice(0, startIdx);\n accumulatedSamples = 0;\n controller.enqueue(audioSlice);\n }\n } else if (chunk.type === 'agent-speech-started') {\n this.logger.debug('agent speech started');\n agentSpeechStarted = true;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'agent-speech-ended') {\n this.logger.debug('agent speech ended');\n agentSpeechStarted = false;\n overlapSpeechStarted = false;\n this.overlapSpeechStartedAt = undefined;\n accumulatedSamples = 0;\n overlapCount = 0;\n startIdx = 0;\n this.numRequests = 0;\n cache.clear();\n } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {\n this.overlapSpeechStartedAt = chunk.startedAt;\n this.userSpeakingSpan = chunk.userSpeakingSpan;\n this.logger.debug('overlap speech started, starting interruption inference');\n overlapSpeechStarted = true;\n accumulatedSamples = 0;\n overlapCount += 1;\n if (overlapCount <= 1) {\n const keepSize =\n Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +\n Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);\n const shiftCount = Math.max(0, startIdx - keepSize);\n inferenceS16Data.copyWithin(0, shiftCount, startIdx);\n startIdx -= shiftCount;\n }\n cache.clear();\n } else if (chunk.type === 'overlap-speech-ended') {\n this.logger.debug('overlap speech ended');\n if (overlapSpeechStarted) {\n this.userSpeakingSpan = undefined;\n let latestEntry = cache.pop(\n (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,\n );\n if (!latestEntry) {\n this.logger.debug('no request made for overlap speech');\n latestEntry = InterruptionCacheEntry.default();\n }\n const e = latestEntry ?? InterruptionCacheEntry.default();\n const event: OverlappingSpeechEvent = {\n type: 'user_overlapping_speech',\n timestamp: chunk.endedAt,\n isInterruption: false,\n overlapStartedAt: this.overlapSpeechStartedAt,\n speechInput: e.speechInput,\n probabilities: e.probabilities,\n totalDurationInS: e.totalDurationInS,\n detectionDelayInS: e.detectionDelayInS,\n predictionDurationInS: e.predictionDurationInS,\n probability: e.probability,\n numRequests: getAndResetNumRequests(),\n };\n controller.enqueue(event);\n overlapSpeechStarted = false;\n accumulatedSamples = 0;\n }\n this.overlapSpeechStartedAt = undefined;\n } else if (chunk.type === 'flush') {\n // no-op\n }\n },\n },\n { highWaterMark: 32 },\n { highWaterMark: 32 },\n );\n\n // Second transform: transport layer (HTTP or WebSocket based on useProxy)\n const transportOptions = this.transportOptions;\n\n let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;\n if (this.options.useProxy) {\n const wsResult = createWsTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n onRequestSent,\n getAndResetNumRequests,\n );\n transport = wsResult.transport;\n this.wsReconnect = wsResult.reconnect;\n } else {\n transport = createHttpTransport(\n transportOptions,\n getState,\n setState,\n handleSpanUpdate,\n getAndResetNumRequests,\n );\n }\n\n const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({\n transform: (chunk, controller) => {\n this.model.emit('user_overlapping_speech', chunk);\n\n const metrics: InterruptionMetrics = {\n type: 'interruption_metrics',\n timestamp: chunk.timestamp,\n totalDuration: chunk.totalDurationInS * 1000,\n predictionDuration: chunk.predictionDurationInS * 1000,\n detectionDelay: chunk.detectionDelayInS * 1000,\n numInterruptions: chunk.isInterruption ? 1 : 0,\n numBackchannels: chunk.isInterruption ? 0 : 1,\n numRequests: chunk.numRequests,\n metadata: {\n modelProvider: this.model.provider,\n modelName: this.model.model,\n },\n };\n this.model.emit('metrics_collected', metrics);\n\n controller.enqueue(chunk);\n },\n });\n\n // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream\n return this.inputStream\n .stream()\n .pipeThrough(audioTransformer)\n .pipeThrough(transport)\n .pipeThrough(eventEmitter);\n }\n\n private ensureInputNotEnded() {\n if (this.inputStream.closed) {\n throw new Error('input stream is closed');\n }\n }\n\n private ensureStreamsNotEnded() {\n this.ensureInputNotEnded();\n }\n\n private getResamplerFor(inputSampleRate: number): AudioResampler {\n if (!this.resampler) {\n this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);\n }\n return this.resampler;\n }\n\n stream(): ReadableStream<OverlappingSpeechEvent> {\n return this.eventStream;\n }\n\n async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {\n this.ensureStreamsNotEnded();\n if (!(frame instanceof AudioFrame)) {\n return this.inputStream.write(frame);\n } else if (this.options.sampleRate !== frame.sampleRate) {\n const resampler = this.getResamplerFor(frame.sampleRate);\n if (resampler.inputRate !== frame.sampleRate) {\n throw new Error('the sample rate of the input frames must be consistent');\n }\n for (const resampledFrame of resampler.push(frame)) {\n await this.inputStream.write(resampledFrame);\n }\n } else {\n await this.inputStream.write(frame);\n }\n }\n\n async flush(): Promise<void> {\n this.ensureStreamsNotEnded();\n await this.inputStream.write(InterruptionStreamSentinel.flush());\n }\n\n async endInput(): Promise<void> {\n await this.flush();\n await this.inputStream.close();\n }\n\n async close(): Promise<void> {\n if (!this.inputStream.closed) await this.inputStream.close();\n this.model.removeStream(this);\n }\n}\n\n/**\n * Write the audio frame to the output data array and return the new start index\n * and the number of samples written.\n */\nfunction writeToInferenceS16Data(\n frame: AudioFrame,\n startIdx: number,\n outData: Int16Array,\n maxAudioDuration: number,\n): { startIdx: number; samplesWritten: number } {\n const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);\n\n if (frame.samplesPerChannel > outData.length) {\n throw new Error('frame samples are greater than the max window size');\n }\n\n // Shift the data to the left if the window would overflow\n const shift = startIdx + frame.samplesPerChannel - maxWindowSize;\n if (shift > 0) {\n outData.copyWithin(0, shift, startIdx);\n startIdx -= shift;\n }\n\n // Get the frame data as Int16Array\n const frameData = new Int16Array(\n frame.data.buffer,\n frame.data.byteOffset,\n frame.samplesPerChannel * frame.channels,\n );\n\n if (frame.channels > 1) {\n // Mix down multiple channels to mono by averaging\n for (let i = 0; i < frame.samplesPerChannel; i++) {\n let sum = 0;\n for (let ch = 0; ch < frame.channels; ch++) {\n sum += frameData[i * frame.channels + ch] ?? 0;\n }\n outData[startIdx + i] = Math.floor(sum / frame.channels);\n }\n } else {\n // Single channel - copy directly\n outData.set(frameData, startIdx);\n }\n\n startIdx += frame.samplesPerChannel;\n return { startIdx, samplesWritten: frame.samplesPerChannel };\n}\n"],"mappings":"AAGA,SAAS,YAAY,sBAAsB;AAE3C,SAA8B,uBAAuB;AACrD,SAAS,WAAW;AAEpB,SAA6B,2BAA2B;AACxD,SAAS,kBAAkB;AAC3B,SAAS,mBAAmB,0BAA0B;AAEtD,SAAS,2BAA2B;AACpC,SAAS,8BAA8B;AAEvC;AAAA,OAUO;AACP,SAAS,oBAAoB;AAC7B,SAAS,yBAAyB;AAa3B,MAAM,2BAA2B;AAAA,EACtC,OAAO,qBAAyC;AAC9C,WAAO,EAAE,MAAM,uBAAuB;AAAA,EACxC;AAAA,EAEA,OAAO,mBAAqC;AAC1C,WAAO,EAAE,MAAM,qBAAqB;AAAA,EACtC;AAAA,EAEA,OAAO,qBACL,gBACA,WACA,kBACsB;AACtB,WAAO,EAAE,MAAM,0BAA0B,gBAAgB,WAAW,iBAAiB;AAAA,EACvF;AAAA,EAEA,OAAO,mBAAmB,SAAqC;AAC7D,WAAO,EAAE,MAAM,wBAAwB,QAAQ;AAAA,EACjD;AAAA,EAEA,OAAO,QAAe;AACpB,WAAO,EAAE,MAAM,QAAQ;AAAA,EACzB;AACF;AAEA,SAAS,uBAAuB,MAAY,OAA+B;AACzE,OAAK;AAAA,IACH,WAAW;AAAA,KACV,MAAM,kBAAkB,OAAO,SAAS,EAAE,YAAY;AAAA,EACzD;AACA,OAAK,aAAa,WAAW,+BAA+B,MAAM,WAAW;AAC7E,OAAK,aAAa,WAAW,kCAAkC,MAAM,gBAAgB;AACrF,OAAK,aAAa,WAAW,uCAAuC,MAAM,qBAAqB;AAC/F,OAAK,aAAa,WAAW,mCAAmC,MAAM,iBAAiB;AACzF;AAEO,MAAM,uBAAuB;AAAA,EAC1B;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,cAAc;AAAA,EAEd;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA;AAAA,EAEA,SAAS,IAAI;AAAA;AAAA,EAGb;AAAA;AAAA,EAGA;AAAA,EAWR,YAAY,OAAqC,YAAwC;AACvF,SAAK,cAAc,oBAGjB;AAEF,SAAK,QAAQ;AACb,SAAK,UAAU,EAAE,GAAG,MAAM,QAAQ;AAClC,SAAK,aAAa,EAAE,GAAG,oBAAoB,GAAG,WAAW;AAGzD,SAAK,mBAAmB;AAAA,MACtB,SAAS,KAAK,QAAQ;AAAA,MACtB,QAAQ,KAAK,QAAQ;AAAA,MACrB,WAAW,KAAK,QAAQ;AAAA,MACxB,YAAY,KAAK,QAAQ;AAAA,MACzB,WAAW,KAAK,QAAQ;AAAA,MACxB,WAAW,KAAK,QAAQ;AAAA,MACxB,SAAS,KAAK,QAAQ;AAAA,MACtB,YAAY,KAAK,WAAW;AAAA,IAC9B;AAEA,SAAK,cAAc,KAAK,eAAe;AAAA,EACzC;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,cAAc,SAGF;AAChB,QAAI,QAAQ,cAAc,QAAW;AACnC,WAAK,QAAQ,YAAY,QAAQ;AACjC,WAAK,iBAAiB,YAAY,QAAQ;AAAA,IAC5C;AACA,QAAI,QAAQ,+BAA+B,QAAW;AACpD,WAAK,QAAQ,6BAA6B,QAAQ;AAClD,WAAK,QAAQ,YAAY,KAAK,KAAK,QAAQ,6BAA6B,iBAAiB;AACzF,WAAK,iBAAiB,YAAY,KAAK,QAAQ;AAAA,IACjD;AAEA,QAAI,KAAK,QAAQ,YAAY,KAAK,aAAa;AAC7C,YAAM,KAAK,YAAY;AAAA,IACzB;AAAA,EACF;AAAA,EAEQ,iBAAyD;AAC/D,QAAI,qBAAqB;AACzB,QAAI,WAAW;AACf,QAAI,qBAAqB;AACzB,QAAI,uBAAuB;AAC3B,QAAI,eAAe;AACnB,UAAM,QAAQ,IAAI,aAA6C,EAAE;AACjE,UAAM,mBAAmB,IAAI;AAAA,MAC3B,KAAK,KAAK,KAAK,QAAQ,sBAAsB,KAAK,QAAQ,UAAU;AAAA,IACtE,EAAE,KAAK,CAAC;AAGR,UAAM,WAAW,OAAO;AAAA,MACtB;AAAA,MACA,wBAAwB,KAAK;AAAA,MAC7B;AAAA,MACA;AAAA,IACF;AACA,UAAM,WAAW,CAAC,YAAgD;AAChE,UAAI,QAAQ,yBAAyB,QAAW;AAC9C,+BAAuB,QAAQ;AAAA,MACjC;AAAA,IACF;AACA,UAAM,mBAAmB,CAAC,UAAkC;AAC1D,UAAI,KAAK,kBAAkB;AACzB,+BAAuB,KAAK,kBAAkB,KAAK;AACnD,aAAK,mBAAmB;AAAA,MAC1B;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM;AAC1B,WAAK;AAAA,IACP;AAEA,UAAM,yBAAyB,MAAc;AAC3C,YAAM,IAAI,KAAK;AACf,WAAK,cAAc;AACnB,aAAO;AAAA,IACT;AAGA,UAAM,mBAAmB,IAAI;AAAA,MAI3B;AAAA,QACE,WAAW,CAAC,OAAO,eAAe;AAChC,cAAI,iBAAiB,YAAY;AAC/B,gBAAI,CAAC,oBAAoB;AACvB;AAAA,YACF;AACA,gBAAI,KAAK,QAAQ,eAAe,MAAM,YAAY;AAChD,yBAAW,MAAM,wDAAwD;AACzE,mBAAK,OAAO,MAAM,wDAAwD;AAC1E;AAAA,YACF;AACA,kBAAM,SAAS;AAAA,cACb;AAAA,cACA;AAAA,cACA;AAAA,cACA,KAAK,QAAQ;AAAA,YACf;AACA,uBAAW,OAAO;AAClB,kCAAsB,OAAO;AAE7B,gBACE,sBACE,KAAK,MAAM,KAAK,QAAQ,uBAAuB,KAAK,QAAQ,UAAU,KACxE,sBACA;AACA,oBAAM,aAAa,iBAAiB,MAAM,GAAG,QAAQ;AACrD,mCAAqB;AACrB,yBAAW,QAAQ,UAAU;AAAA,YAC/B;AAAA,UACF,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,sBAAsB;AAC9C,iBAAK,OAAO,MAAM,oBAAoB;AACtC,iCAAqB;AACrB,mCAAuB;AACvB,iBAAK,yBAAyB;AAC9B,iCAAqB;AACrB,2BAAe;AACf,uBAAW;AACX,iBAAK,cAAc;AACnB,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,4BAA4B,oBAAoB;AACxE,iBAAK,yBAAyB,MAAM;AACpC,iBAAK,mBAAmB,MAAM;AAC9B,iBAAK,OAAO,MAAM,yDAAyD;AAC3E,mCAAuB;AACvB,iCAAqB;AACrB,4BAAgB;AAChB,gBAAI,gBAAgB,GAAG;AACrB,oBAAM,WACJ,KAAK,MAAO,MAAM,iBAAiB,MAAQ,KAAK,QAAQ,UAAU,IAClE,KAAK,MAAM,KAAK,QAAQ,yBAAyB,KAAK,QAAQ,UAAU;AAC1E,oBAAM,aAAa,KAAK,IAAI,GAAG,WAAW,QAAQ;AAClD,+BAAiB,WAAW,GAAG,YAAY,QAAQ;AACnD,0BAAY;AAAA,YACd;AACA,kBAAM,MAAM;AAAA,UACd,WAAW,MAAM,SAAS,wBAAwB;AAChD,iBAAK,OAAO,MAAM,sBAAsB;AACxC,gBAAI,sBAAsB;AACxB,mBAAK,mBAAmB;AACxB,kBAAI,cAAc,MAAM;AAAA,gBACtB,CAAC,UAAU,MAAM,qBAAqB,UAAa,MAAM,mBAAmB;AAAA,cAC9E;AACA,kBAAI,CAAC,aAAa;AAChB,qBAAK,OAAO,MAAM,oCAAoC;AACtD,8BAAc,uBAAuB,QAAQ;AAAA,cAC/C;AACA,oBAAM,IAAI,eAAe,uBAAuB,QAAQ;AACxD,oBAAM,QAAgC;AAAA,gBACpC,MAAM;AAAA,gBACN,WAAW,MAAM;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,kBAAkB,KAAK;AAAA,gBACvB,aAAa,EAAE;AAAA,gBACf,eAAe,EAAE;AAAA,gBACjB,kBAAkB,EAAE;AAAA,gBACpB,mBAAmB,EAAE;AAAA,gBACrB,uBAAuB,EAAE;AAAA,gBACzB,aAAa,EAAE;AAAA,gBACf,aAAa,uBAAuB;AAAA,cACtC;AACA,yBAAW,QAAQ,KAAK;AACxB,qCAAuB;AACvB,mCAAqB;AAAA,YACvB;AACA,iBAAK,yBAAyB;AAAA,UAChC,WAAW,MAAM,SAAS,SAAS;AAAA,UAEnC;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,eAAe,GAAG;AAAA,MACpB,EAAE,eAAe,GAAG;AAAA,IACtB;AAGA,UAAM,mBAAmB,KAAK;AAE9B,QAAI;AACJ,QAAI,KAAK,QAAQ,UAAU;AACzB,YAAM,WAAW;AAAA,QACf;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,kBAAY,SAAS;AACrB,WAAK,cAAc,SAAS;AAAA,IAC9B,OAAO;AACL,kBAAY;AAAA,QACV;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,IAAI,gBAAgE;AAAA,MACvF,WAAW,CAAC,OAAO,eAAe;AAChC,aAAK,MAAM,KAAK,2BAA2B,KAAK;AAEhD,cAAM,UAA+B;AAAA,UACnC,MAAM;AAAA,UACN,WAAW,MAAM;AAAA,UACjB,eAAe,MAAM,mBAAmB;AAAA,UACxC,oBAAoB,MAAM,wBAAwB;AAAA,UAClD,gBAAgB,MAAM,oBAAoB;AAAA,UAC1C,kBAAkB,MAAM,iBAAiB,IAAI;AAAA,UAC7C,iBAAiB,MAAM,iBAAiB,IAAI;AAAA,UAC5C,aAAa,MAAM;AAAA,UACnB,UAAU;AAAA,YACR,eAAe,KAAK,MAAM;AAAA,YAC1B,WAAW,KAAK,MAAM;AAAA,UACxB;AAAA,QACF;AACA,aAAK,MAAM,KAAK,qBAAqB,OAAO;AAE5C,mBAAW,QAAQ,KAAK;AAAA,MAC1B;AAAA,IACF,CAAC;AAGD,WAAO,KAAK,YACT,OAAO,EACP,YAAY,gBAAgB,EAC5B,YAAY,SAAS,EACrB,YAAY,YAAY;AAAA,EAC7B;AAAA,EAEQ,sBAAsB;AAC5B,QAAI,KAAK,YAAY,QAAQ;AAC3B,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C;AAAA,EACF;AAAA,EAEQ,wBAAwB;AAC9B,SAAK,oBAAoB;AAAA,EAC3B;AAAA,EAEQ,gBAAgB,iBAAyC;AAC/D,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY,IAAI,eAAe,iBAAiB,KAAK,QAAQ,UAAU;AAAA,IAC9E;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,SAAiD;AAC/C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,UAAU,OAAyD;AACvE,SAAK,sBAAsB;AAC3B,QAAI,EAAE,iBAAiB,aAAa;AAClC,aAAO,KAAK,YAAY,MAAM,KAAK;AAAA,IACrC,WAAW,KAAK,QAAQ,eAAe,MAAM,YAAY;AACvD,YAAM,YAAY,KAAK,gBAAgB,MAAM,UAAU;AACvD,UAAI,UAAU,cAAc,MAAM,YAAY;AAC5C,cAAM,IAAI,MAAM,wDAAwD;AAAA,MAC1E;AACA,iBAAW,kBAAkB,UAAU,KAAK,KAAK,GAAG;AAClD,cAAM,KAAK,YAAY,MAAM,cAAc;AAAA,MAC7C;AAAA,IACF,OAAO;AACL,YAAM,KAAK,YAAY,MAAM,KAAK;AAAA,IACpC;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,sBAAsB;AAC3B,UAAM,KAAK,YAAY,MAAM,2BAA2B,MAAM,CAAC;AAAA,EACjE;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAM,KAAK,MAAM;AACjB,UAAM,KAAK,YAAY,MAAM;AAAA,EAC/B;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,CAAC,KAAK,YAAY,OAAQ,OAAM,KAAK,YAAY,MAAM;AAC3D,SAAK,MAAM,aAAa,IAAI;AAAA,EAC9B;AACF;AAMA,SAAS,wBACP,OACA,UACA,SACA,kBAC8C;AAC9C,QAAM,gBAAgB,KAAK,MAAM,mBAAmB,MAAM,UAAU;AAEpE,MAAI,MAAM,oBAAoB,QAAQ,QAAQ;AAC5C,UAAM,IAAI,MAAM,oDAAoD;AAAA,EACtE;AAGA,QAAM,QAAQ,WAAW,MAAM,oBAAoB;AACnD,MAAI,QAAQ,GAAG;AACb,YAAQ,WAAW,GAAG,OAAO,QAAQ;AACrC,gBAAY;AAAA,EACd;AAGA,QAAM,YAAY,IAAI;AAAA,IACpB,MAAM,KAAK;AAAA,IACX,MAAM,KAAK;AAAA,IACX,MAAM,oBAAoB,MAAM;AAAA,EAClC;AAEA,MAAI,MAAM,WAAW,GAAG;AAEtB,aAAS,IAAI,GAAG,IAAI,MAAM,mBAAmB,KAAK;AAChD,UAAI,MAAM;AACV,eAAS,KAAK,GAAG,KAAK,MAAM,UAAU,MAAM;AAC1C,eAAO,UAAU,IAAI,MAAM,WAAW,EAAE,KAAK;AAAA,MAC/C;AACA,cAAQ,WAAW,CAAC,IAAI,KAAK,MAAM,MAAM,MAAM,QAAQ;AAAA,IACzD;AAAA,EACF,OAAO;AAEL,YAAQ,IAAI,WAAW,QAAQ;AAAA,EACjC;AAEA,cAAY,MAAM;AAClB,SAAO,EAAE,UAAU,gBAAgB,MAAM,kBAAkB;AAC7D;","names":[]}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __copyProps = (to, from, except, desc) => {
|
|
7
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
8
|
+
for (let key of __getOwnPropNames(from))
|
|
9
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
10
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
11
|
+
}
|
|
12
|
+
return to;
|
|
13
|
+
};
|
|
14
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
15
|
+
var types_exports = {};
|
|
16
|
+
module.exports = __toCommonJS(types_exports);
|
|
17
|
+
//# sourceMappingURL=types.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/inference/interruption/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { Span } from '@opentelemetry/api';\n\nexport interface OverlappingSpeechEvent {\n type: 'user_overlapping_speech';\n timestamp: number;\n isInterruption: boolean;\n totalDurationInS: number;\n predictionDurationInS: number;\n detectionDelayInS: number;\n overlapStartedAt?: number;\n speechInput?: Int16Array;\n probabilities?: number[];\n probability: number;\n numRequests: number;\n}\n\n/**\n * Configuration options for interruption detection.\n */\nexport interface InterruptionOptions {\n sampleRate: number;\n threshold: number;\n minFrames: number;\n maxAudioDurationInS: number;\n audioPrefixDurationInS: number;\n detectionIntervalInS: number;\n inferenceTimeout: number;\n minInterruptionDurationInS: number;\n baseUrl: string;\n apiKey: string;\n apiSecret: string;\n useProxy: boolean;\n}\n\n/**\n * API connection options for transport layers.\n */\nexport interface ApiConnectOptions {\n maxRetries: number;\n retryInterval: number;\n timeout: number;\n}\n\n// Sentinel types for stream control signals\n\nexport interface AgentSpeechStarted {\n type: 'agent-speech-started';\n}\n\nexport interface AgentSpeechEnded {\n type: 'agent-speech-ended';\n}\n\nexport interface OverlapSpeechStarted {\n type: 'overlap-speech-started';\n /** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */\n speechDuration: number;\n /** Absolute timestamp (ms) when overlap speech started, computed at call-site. */\n startedAt: number;\n userSpeakingSpan?: Span;\n}\n\nexport interface OverlapSpeechEnded {\n type: 'overlap-speech-ended';\n /** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */\n endedAt: number;\n}\n\nexport interface Flush {\n type: 'flush';\n}\n\n/**\n * Union type for all stream control signals.\n */\nexport type InterruptionSentinel =\n | AgentSpeechStarted\n | AgentSpeechEnded\n | OverlapSpeechStarted\n | OverlapSpeechEnded\n | Flush;\n"],"mappings":";;;;;;;;;;;;;;AAAA;AAAA;","names":[]}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { Span } from '@opentelemetry/api';
|
|
2
|
+
export interface OverlappingSpeechEvent {
|
|
3
|
+
type: 'user_overlapping_speech';
|
|
4
|
+
timestamp: number;
|
|
5
|
+
isInterruption: boolean;
|
|
6
|
+
totalDurationInS: number;
|
|
7
|
+
predictionDurationInS: number;
|
|
8
|
+
detectionDelayInS: number;
|
|
9
|
+
overlapStartedAt?: number;
|
|
10
|
+
speechInput?: Int16Array;
|
|
11
|
+
probabilities?: number[];
|
|
12
|
+
probability: number;
|
|
13
|
+
numRequests: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Configuration options for interruption detection.
|
|
17
|
+
*/
|
|
18
|
+
export interface InterruptionOptions {
|
|
19
|
+
sampleRate: number;
|
|
20
|
+
threshold: number;
|
|
21
|
+
minFrames: number;
|
|
22
|
+
maxAudioDurationInS: number;
|
|
23
|
+
audioPrefixDurationInS: number;
|
|
24
|
+
detectionIntervalInS: number;
|
|
25
|
+
inferenceTimeout: number;
|
|
26
|
+
minInterruptionDurationInS: number;
|
|
27
|
+
baseUrl: string;
|
|
28
|
+
apiKey: string;
|
|
29
|
+
apiSecret: string;
|
|
30
|
+
useProxy: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* API connection options for transport layers.
|
|
34
|
+
*/
|
|
35
|
+
export interface ApiConnectOptions {
|
|
36
|
+
maxRetries: number;
|
|
37
|
+
retryInterval: number;
|
|
38
|
+
timeout: number;
|
|
39
|
+
}
|
|
40
|
+
export interface AgentSpeechStarted {
|
|
41
|
+
type: 'agent-speech-started';
|
|
42
|
+
}
|
|
43
|
+
export interface AgentSpeechEnded {
|
|
44
|
+
type: 'agent-speech-ended';
|
|
45
|
+
}
|
|
46
|
+
export interface OverlapSpeechStarted {
|
|
47
|
+
type: 'overlap-speech-started';
|
|
48
|
+
/** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */
|
|
49
|
+
speechDuration: number;
|
|
50
|
+
/** Absolute timestamp (ms) when overlap speech started, computed at call-site. */
|
|
51
|
+
startedAt: number;
|
|
52
|
+
userSpeakingSpan?: Span;
|
|
53
|
+
}
|
|
54
|
+
export interface OverlapSpeechEnded {
|
|
55
|
+
type: 'overlap-speech-ended';
|
|
56
|
+
/** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */
|
|
57
|
+
endedAt: number;
|
|
58
|
+
}
|
|
59
|
+
export interface Flush {
|
|
60
|
+
type: 'flush';
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Union type for all stream control signals.
|
|
64
|
+
*/
|
|
65
|
+
export type InterruptionSentinel = AgentSpeechStarted | AgentSpeechEnded | OverlapSpeechStarted | OverlapSpeechEnded | Flush;
|
|
66
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { Span } from '@opentelemetry/api';
|
|
2
|
+
export interface OverlappingSpeechEvent {
|
|
3
|
+
type: 'user_overlapping_speech';
|
|
4
|
+
timestamp: number;
|
|
5
|
+
isInterruption: boolean;
|
|
6
|
+
totalDurationInS: number;
|
|
7
|
+
predictionDurationInS: number;
|
|
8
|
+
detectionDelayInS: number;
|
|
9
|
+
overlapStartedAt?: number;
|
|
10
|
+
speechInput?: Int16Array;
|
|
11
|
+
probabilities?: number[];
|
|
12
|
+
probability: number;
|
|
13
|
+
numRequests: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Configuration options for interruption detection.
|
|
17
|
+
*/
|
|
18
|
+
export interface InterruptionOptions {
|
|
19
|
+
sampleRate: number;
|
|
20
|
+
threshold: number;
|
|
21
|
+
minFrames: number;
|
|
22
|
+
maxAudioDurationInS: number;
|
|
23
|
+
audioPrefixDurationInS: number;
|
|
24
|
+
detectionIntervalInS: number;
|
|
25
|
+
inferenceTimeout: number;
|
|
26
|
+
minInterruptionDurationInS: number;
|
|
27
|
+
baseUrl: string;
|
|
28
|
+
apiKey: string;
|
|
29
|
+
apiSecret: string;
|
|
30
|
+
useProxy: boolean;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* API connection options for transport layers.
|
|
34
|
+
*/
|
|
35
|
+
export interface ApiConnectOptions {
|
|
36
|
+
maxRetries: number;
|
|
37
|
+
retryInterval: number;
|
|
38
|
+
timeout: number;
|
|
39
|
+
}
|
|
40
|
+
export interface AgentSpeechStarted {
|
|
41
|
+
type: 'agent-speech-started';
|
|
42
|
+
}
|
|
43
|
+
export interface AgentSpeechEnded {
|
|
44
|
+
type: 'agent-speech-ended';
|
|
45
|
+
}
|
|
46
|
+
export interface OverlapSpeechStarted {
|
|
47
|
+
type: 'overlap-speech-started';
|
|
48
|
+
/** Duration of the speech segment in milliseconds (matches VADEvent.speechDuration units). */
|
|
49
|
+
speechDuration: number;
|
|
50
|
+
/** Absolute timestamp (ms) when overlap speech started, computed at call-site. */
|
|
51
|
+
startedAt: number;
|
|
52
|
+
userSpeakingSpan?: Span;
|
|
53
|
+
}
|
|
54
|
+
export interface OverlapSpeechEnded {
|
|
55
|
+
type: 'overlap-speech-ended';
|
|
56
|
+
/** Absolute timestamp (ms) when overlap speech ended, used as the non-interruption event timestamp. */
|
|
57
|
+
endedAt: number;
|
|
58
|
+
}
|
|
59
|
+
export interface Flush {
|
|
60
|
+
type: 'flush';
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Union type for all stream control signals.
|
|
64
|
+
*/
|
|
65
|
+
export type InterruptionSentinel = AgentSpeechStarted | AgentSpeechEnded | OverlapSpeechStarted | OverlapSpeechEnded | Flush;
|
|
66
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/inference/interruption/types.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAE/C,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,yBAAyB,CAAC;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,OAAO,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,qBAAqB,EAAE,MAAM,CAAC;IAC9B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,sBAAsB,EAAE,MAAM,CAAC;IAC/B,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,0BAA0B,EAAE,MAAM,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;CACjB;AAID,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,sBAAsB,CAAC;CAC9B;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,oBAAoB,CAAC;CAC5B;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,wBAAwB,CAAC;IAC/B,8FAA8F;IAC9F,cAAc,EAAE,MAAM,CAAC;IACvB,kFAAkF;IAClF,SAAS,EAAE,MAAM,CAAC;IAClB,gBAAgB,CAAC,EAAE,IAAI,CAAC;CACzB;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,uGAAuG;IACvG,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,KAAK;IACpB,IAAI,EAAE,OAAO,CAAC;CACf;AAED;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAC5B,kBAAkB,GAClB,gBAAgB,GAChB,oBAAoB,GACpB,kBAAkB,GAClB,KAAK,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|