@livekit/agents 1.0.48 → 1.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.cjs +27 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +9 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +18 -0
- package/dist/constants.js.map +1 -1
- package/dist/inference/api_protos.d.cts +71 -71
- package/dist/inference/api_protos.d.ts +71 -71
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +147 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +63 -0
- package/dist/inference/interruption/http_transport.d.ts +63 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +121 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +181 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +147 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +329 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +295 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +14 -10
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +2 -1
- package/dist/inference/llm.d.ts +2 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +8 -10
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +7 -2
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +2 -0
- package/dist/inference/stt.d.ts +2 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +8 -3
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +7 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -0
- package/dist/inference/tts.d.ts +2 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +8 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +13 -0
- package/dist/inference/utils.d.ts +13 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +19 -1
- package/dist/llm/chat_context.d.ts +19 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +16 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +9 -0
- package/dist/llm/llm.d.ts +9 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +16 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +45 -1
- package/dist/metrics/base.d.ts +45 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +3 -0
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +9 -0
- package/dist/metrics/usage_collector.d.ts +9 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +3 -0
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +9 -0
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +9 -0
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/multi_input_stream.test.cjs +4 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -1
- package/dist/stream/multi_input_stream.test.js +5 -1
- package/dist/stream/multi_input_stream.test.js.map +1 -1
- package/dist/stream/stream_channel.cjs +31 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +4 -2
- package/dist/stream/stream_channel.d.ts +4 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +31 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stt/stt.cjs +34 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +22 -0
- package/dist/stt/stt.d.ts +22 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +34 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/otel_http_exporter.cjs +24 -5
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
- package/dist/telemetry/otel_http_exporter.d.cts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
- package/dist/telemetry/otel_http_exporter.js +24 -5
- package/dist/telemetry/otel_http_exporter.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +5 -5
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +9 -5
- package/dist/telemetry/trace_types.d.ts +9 -5
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +5 -5
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/telemetry/traces.cjs +47 -8
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +47 -8
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +64 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +34 -0
- package/dist/tts/tts.d.ts +34 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +64 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +25 -4
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -2
- package/dist/voice/agent.d.ts +10 -2
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +25 -4
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +261 -36
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +20 -6
- package/dist/voice/agent_activity.d.ts +20 -6
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +262 -37
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +105 -48
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +90 -20
- package/dist/voice/agent_session.d.ts +90 -20
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +105 -46
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +287 -6
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +42 -3
- package/dist/voice/audio_recognition.d.ts +42 -3
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +289 -7
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/client_events.cjs +554 -0
- package/dist/voice/client_events.cjs.map +1 -0
- package/dist/voice/client_events.d.cts +195 -0
- package/dist/voice/client_events.d.ts +195 -0
- package/dist/voice/client_events.d.ts.map +1 -0
- package/dist/voice/client_events.js +548 -0
- package/dist/voice/client_events.js.map +1 -0
- package/dist/voice/events.cjs +1 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +8 -5
- package/dist/voice/events.d.ts +8 -5
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +43 -8
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -3
- package/dist/voice/generation.d.ts +3 -3
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +20 -8
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -0
- package/dist/voice/report.d.ts +5 -0
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +20 -8
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +106 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +105 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +5 -39
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +4 -9
- package/dist/voice/room_io/room_io.d.ts +4 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -40
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +97 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +25 -0
- package/dist/voice/turn_config/utils.d.ts +25 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +73 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +86 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +85 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/wire_format.cjs +798 -0
- package/dist/voice/wire_format.cjs.map +1 -0
- package/dist/voice/wire_format.d.cts +5503 -0
- package/dist/voice/wire_format.d.ts +5503 -0
- package/dist/voice/wire_format.d.ts.map +1 -0
- package/dist/voice/wire_format.js +728 -0
- package/dist/voice/wire_format.js.map +1 -0
- package/package.json +2 -1
- package/src/constants.ts +13 -0
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +187 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +188 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +402 -0
- package/src/inference/llm.ts +9 -12
- package/src/inference/stt.ts +10 -3
- package/src/inference/tts.ts +10 -3
- package/src/inference/utils.ts +29 -1
- package/src/llm/chat_context.ts +40 -2
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +16 -0
- package/src/llm/realtime.ts +4 -0
- package/src/metrics/base.ts +48 -1
- package/src/metrics/index.ts +11 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +11 -0
- package/src/metrics/utils.ts +11 -0
- package/src/stream/multi_input_stream.test.ts +6 -1
- package/src/stream/stream_channel.ts +34 -2
- package/src/stt/stt.ts +38 -0
- package/src/telemetry/otel_http_exporter.ts +28 -5
- package/src/telemetry/trace_types.ts +11 -8
- package/src/telemetry/traces.ts +111 -54
- package/src/tts/tts.ts +69 -1
- package/src/voice/agent.ts +30 -3
- package/src/voice/agent_activity.ts +327 -28
- package/src/voice/agent_session.ts +207 -59
- package/src/voice/audio_recognition.ts +385 -9
- package/src/voice/client_events.ts +838 -0
- package/src/voice/events.ts +14 -4
- package/src/voice/generation.ts +52 -9
- package/src/voice/index.ts +1 -0
- package/src/voice/report.test.ts +117 -0
- package/src/voice/report.ts +29 -6
- package/src/voice/room_io/room_io.ts +7 -61
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +100 -0
- package/src/voice/turn_config/utils.ts +103 -0
- package/src/voice/wire_format.ts +827 -0
package/src/voice/events.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
|
|
5
|
+
import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
|
|
4
6
|
import type {
|
|
5
7
|
ChatMessage,
|
|
6
8
|
FunctionCall,
|
|
@@ -25,6 +27,7 @@ export enum AgentSessionEventTypes {
|
|
|
25
27
|
FunctionToolsExecuted = 'function_tools_executed',
|
|
26
28
|
MetricsCollected = 'metrics_collected',
|
|
27
29
|
SpeechCreated = 'speech_created',
|
|
30
|
+
UserOverlappingSpeech = 'user_overlapping_speech',
|
|
28
31
|
Error = 'error',
|
|
29
32
|
Close = 'close',
|
|
30
33
|
}
|
|
@@ -215,13 +218,13 @@ export const createSpeechCreatedEvent = ({
|
|
|
215
218
|
|
|
216
219
|
export type ErrorEvent = {
|
|
217
220
|
type: 'error';
|
|
218
|
-
error: RealtimeModelError | STTError | TTSError | LLMError | unknown;
|
|
221
|
+
error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | unknown;
|
|
219
222
|
source: LLM | STT | TTS | RealtimeModel | unknown;
|
|
220
223
|
createdAt: number;
|
|
221
224
|
};
|
|
222
225
|
|
|
223
226
|
export const createErrorEvent = (
|
|
224
|
-
error: RealtimeModelError | STTError | TTSError | LLMError | unknown,
|
|
227
|
+
error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | unknown,
|
|
225
228
|
source: LLM | STT | TTS | RealtimeModel | unknown,
|
|
226
229
|
createdAt: number = Date.now(),
|
|
227
230
|
): ErrorEvent => ({
|
|
@@ -233,14 +236,20 @@ export const createErrorEvent = (
|
|
|
233
236
|
|
|
234
237
|
export type CloseEvent = {
|
|
235
238
|
type: 'close';
|
|
236
|
-
error: RealtimeModelError | STTError | TTSError | LLMError | null;
|
|
239
|
+
error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError | null;
|
|
237
240
|
reason: ShutdownReason;
|
|
238
241
|
createdAt: number;
|
|
239
242
|
};
|
|
240
243
|
|
|
241
244
|
export const createCloseEvent = (
|
|
242
245
|
reason: ShutdownReason,
|
|
243
|
-
error:
|
|
246
|
+
error:
|
|
247
|
+
| RealtimeModelError
|
|
248
|
+
| STTError
|
|
249
|
+
| TTSError
|
|
250
|
+
| LLMError
|
|
251
|
+
| InterruptionDetectionError
|
|
252
|
+
| null = null,
|
|
244
253
|
createdAt: number = Date.now(),
|
|
245
254
|
): CloseEvent => ({
|
|
246
255
|
type: 'close',
|
|
@@ -257,5 +266,6 @@ export type AgentEvent =
|
|
|
257
266
|
| ConversationItemAddedEvent
|
|
258
267
|
| FunctionToolsExecutedEvent
|
|
259
268
|
| SpeechCreatedEvent
|
|
269
|
+
| OverlappingSpeechEvent
|
|
260
270
|
| ErrorEvent
|
|
261
271
|
| CloseEvent;
|
package/src/voice/generation.ts
CHANGED
|
@@ -51,6 +51,7 @@ export class _LLMGenerationData {
|
|
|
51
51
|
generatedText: string = '';
|
|
52
52
|
generatedToolCalls: FunctionCall[];
|
|
53
53
|
id: string;
|
|
54
|
+
ttft?: number;
|
|
54
55
|
|
|
55
56
|
constructor(
|
|
56
57
|
public readonly textStream: ReadableStream<string>,
|
|
@@ -416,6 +417,8 @@ export function performLLMInference(
|
|
|
416
417
|
toolCtx: ToolContext,
|
|
417
418
|
modelSettings: ModelSettings,
|
|
418
419
|
controller: AbortController,
|
|
420
|
+
model?: string,
|
|
421
|
+
provider?: string,
|
|
419
422
|
): [Task<void>, _LLMGenerationData] {
|
|
420
423
|
const textStream = new IdentityTransform<string>();
|
|
421
424
|
const toolCallStream = new IdentityTransform<FunctionCall>();
|
|
@@ -431,8 +434,17 @@ export function performLLMInference(
|
|
|
431
434
|
);
|
|
432
435
|
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOLS, JSON.stringify(Object.keys(toolCtx)));
|
|
433
436
|
|
|
437
|
+
if (model) {
|
|
438
|
+
span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, model);
|
|
439
|
+
}
|
|
440
|
+
if (provider) {
|
|
441
|
+
span.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, provider);
|
|
442
|
+
}
|
|
443
|
+
|
|
434
444
|
let llmStreamReader: ReadableStreamDefaultReader<string | ChatChunk> | null = null;
|
|
435
445
|
let llmStream: ReadableStream<string | ChatChunk> | null = null;
|
|
446
|
+
const startTime = performance.now() / 1000; // Convert to seconds
|
|
447
|
+
let firstTokenReceived = false;
|
|
436
448
|
|
|
437
449
|
try {
|
|
438
450
|
llmStream = await node(chatCtx, toolCtx, modelSettings);
|
|
@@ -455,6 +467,11 @@ export function performLLMInference(
|
|
|
455
467
|
const { done, value: chunk } = result;
|
|
456
468
|
if (done) break;
|
|
457
469
|
|
|
470
|
+
if (!firstTokenReceived) {
|
|
471
|
+
firstTokenReceived = true;
|
|
472
|
+
data.ttft = performance.now() / 1000 - startTime;
|
|
473
|
+
}
|
|
474
|
+
|
|
458
475
|
if (typeof chunk === 'string') {
|
|
459
476
|
data.generatedText += chunk;
|
|
460
477
|
await textWriter.write(chunk);
|
|
@@ -493,6 +510,9 @@ export function performLLMInference(
|
|
|
493
510
|
}
|
|
494
511
|
|
|
495
512
|
span.setAttribute(traceTypes.ATTR_RESPONSE_TEXT, data.generatedText);
|
|
513
|
+
if (data.ttft !== undefined) {
|
|
514
|
+
span.setAttribute(traceTypes.ATTR_RESPONSE_TTFT, data.ttft);
|
|
515
|
+
}
|
|
496
516
|
} catch (error) {
|
|
497
517
|
if (error instanceof DOMException && error.name === 'AbortError') {
|
|
498
518
|
// Abort signal was triggered, handle gracefully
|
|
@@ -527,6 +547,8 @@ export function performTTSInference(
|
|
|
527
547
|
text: ReadableStream<string | TimedString>,
|
|
528
548
|
modelSettings: ModelSettings,
|
|
529
549
|
controller: AbortController,
|
|
550
|
+
model?: string,
|
|
551
|
+
provider?: string,
|
|
530
552
|
): [Task<void>, _TTSGenerationData] {
|
|
531
553
|
const audioStream = new IdentityTransform<AudioFrame>();
|
|
532
554
|
const outputWriter = audioStream.writable.getWriter();
|
|
@@ -558,10 +580,27 @@ export function performTTSInference(
|
|
|
558
580
|
}
|
|
559
581
|
})();
|
|
560
582
|
|
|
561
|
-
|
|
583
|
+
let ttfb: number | undefined;
|
|
584
|
+
|
|
585
|
+
const genData: _TTSGenerationData = {
|
|
586
|
+
audioStream: audioOutputStream,
|
|
587
|
+
timedTextsFut,
|
|
588
|
+
ttfb: undefined,
|
|
589
|
+
};
|
|
590
|
+
|
|
591
|
+
const _performTTSInferenceImpl = async (signal: AbortSignal, span: Span) => {
|
|
592
|
+
if (model) {
|
|
593
|
+
span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, model);
|
|
594
|
+
}
|
|
595
|
+
if (provider) {
|
|
596
|
+
span.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, provider);
|
|
597
|
+
}
|
|
598
|
+
|
|
562
599
|
let ttsStreamReader: ReadableStreamDefaultReader<AudioFrame> | null = null;
|
|
563
600
|
let ttsStream: ReadableStream<AudioFrame> | null = null;
|
|
564
601
|
let pushedDuration = 0;
|
|
602
|
+
const startTime = performance.now() / 1000; // Convert to seconds
|
|
603
|
+
let firstByteReceived = false;
|
|
565
604
|
|
|
566
605
|
try {
|
|
567
606
|
ttsStream = await node(textOnlyStream.readable, modelSettings);
|
|
@@ -595,6 +634,13 @@ export function performTTSInference(
|
|
|
595
634
|
break;
|
|
596
635
|
}
|
|
597
636
|
|
|
637
|
+
if (!firstByteReceived) {
|
|
638
|
+
firstByteReceived = true;
|
|
639
|
+
ttfb = performance.now() / 1000 - startTime;
|
|
640
|
+
genData.ttfb = ttfb;
|
|
641
|
+
span.setAttribute(traceTypes.ATTR_RESPONSE_TTFB, ttfb);
|
|
642
|
+
}
|
|
643
|
+
|
|
598
644
|
// Write the audio frame to the output stream
|
|
599
645
|
await outputWriter.write(frame);
|
|
600
646
|
|
|
@@ -631,6 +677,10 @@ export function performTTSInference(
|
|
|
631
677
|
}
|
|
632
678
|
throw error;
|
|
633
679
|
} finally {
|
|
680
|
+
if (!timedTextsFut.done) {
|
|
681
|
+
// Ensure downstream consumers don't hang on errors.
|
|
682
|
+
timedTextsFut.resolve(null);
|
|
683
|
+
}
|
|
634
684
|
ttsStreamReader?.releaseLock();
|
|
635
685
|
await ttsStream?.cancel();
|
|
636
686
|
await outputWriter.close();
|
|
@@ -642,16 +692,11 @@ export function performTTSInference(
|
|
|
642
692
|
const currentContext = otelContext.active();
|
|
643
693
|
|
|
644
694
|
const inferenceTask = async (signal: AbortSignal) =>
|
|
645
|
-
tracer.startActiveSpan(async () => _performTTSInferenceImpl(signal), {
|
|
695
|
+
tracer.startActiveSpan(async (span) => _performTTSInferenceImpl(signal, span), {
|
|
646
696
|
name: 'tts_node',
|
|
647
697
|
context: currentContext,
|
|
648
698
|
});
|
|
649
699
|
|
|
650
|
-
const genData: _TTSGenerationData = {
|
|
651
|
-
audioStream: audioOutputStream,
|
|
652
|
-
timedTextsFut,
|
|
653
|
-
};
|
|
654
|
-
|
|
655
700
|
return [
|
|
656
701
|
Task.from((controller) => inferenceTask(controller.signal), controller, 'performTTSInference'),
|
|
657
702
|
genData,
|
|
@@ -719,7 +764,6 @@ export function performTextForwarding(
|
|
|
719
764
|
|
|
720
765
|
export interface _AudioOut {
|
|
721
766
|
audio: Array<AudioFrame>;
|
|
722
|
-
/** Future that will be set with the timestamp of the first frame's capture */
|
|
723
767
|
firstFrameFut: Future<number>;
|
|
724
768
|
}
|
|
725
769
|
|
|
@@ -807,7 +851,6 @@ export function performAudioForwarding(
|
|
|
807
851
|
];
|
|
808
852
|
}
|
|
809
853
|
|
|
810
|
-
// function_tool span is already implemented in tracableToolExecution below (line ~796)
|
|
811
854
|
export function performToolExecutions({
|
|
812
855
|
session,
|
|
813
856
|
speechHandle,
|
package/src/voice/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings }
|
|
|
5
5
|
export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
|
|
6
6
|
export * from './avatar/index.js';
|
|
7
7
|
export * from './background_audio.js';
|
|
8
|
+
export { type TextInputCallback, type TextInputEvent } from './client_events.js';
|
|
8
9
|
export * from './events.js';
|
|
9
10
|
export { type TimedString } from './io.js';
|
|
10
11
|
export * from './report.js';
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
|
+
import { ChatContext } from '../llm/chat_context.js';
|
|
6
|
+
import type { VoiceOptions } from './agent_session.js';
|
|
7
|
+
import { createSessionReport, sessionReportToJSON } from './report.js';
|
|
8
|
+
|
|
9
|
+
function baseOptions(): VoiceOptions {
|
|
10
|
+
return {
|
|
11
|
+
maxToolSteps: 3,
|
|
12
|
+
preemptiveGeneration: false,
|
|
13
|
+
userAwayTimeout: 15,
|
|
14
|
+
useTtsAlignedTranscript: true,
|
|
15
|
+
turnHandling: {},
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function serializeOptions(options: VoiceOptions) {
|
|
20
|
+
const report = createSessionReport({
|
|
21
|
+
jobId: 'job',
|
|
22
|
+
roomId: 'room-id',
|
|
23
|
+
room: 'room',
|
|
24
|
+
options,
|
|
25
|
+
events: [],
|
|
26
|
+
chatHistory: ChatContext.empty(),
|
|
27
|
+
enableRecording: false,
|
|
28
|
+
timestamp: 0,
|
|
29
|
+
startedAt: 0,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
const payload = sessionReportToJSON(report);
|
|
33
|
+
return payload.options as Record<string, unknown>;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
describe('sessionReportToJSON', () => {
|
|
37
|
+
it('serializes interruption and endpointing values from turnHandling', () => {
|
|
38
|
+
const options = baseOptions();
|
|
39
|
+
options.turnHandling = {
|
|
40
|
+
interruption: {
|
|
41
|
+
mode: 'adaptive',
|
|
42
|
+
discardAudioIfUninterruptible: false,
|
|
43
|
+
minDuration: 1200,
|
|
44
|
+
minWords: 2,
|
|
45
|
+
},
|
|
46
|
+
endpointing: {
|
|
47
|
+
minDelay: 900,
|
|
48
|
+
maxDelay: 4500,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const serialized = serializeOptions(options);
|
|
53
|
+
expect(serialized).toMatchObject({
|
|
54
|
+
allow_interruptions: true,
|
|
55
|
+
discard_audio_if_uninterruptible: false,
|
|
56
|
+
min_interruption_duration: 1200,
|
|
57
|
+
min_interruption_words: 2,
|
|
58
|
+
min_endpointing_delay: 900,
|
|
59
|
+
max_endpointing_delay: 4500,
|
|
60
|
+
max_tool_steps: 3,
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('prefers turnHandling values over deprecated flat fields', () => {
|
|
65
|
+
const options = baseOptions();
|
|
66
|
+
options.allowInterruptions = false;
|
|
67
|
+
options.discardAudioIfUninterruptible = true;
|
|
68
|
+
options.minInterruptionDuration = 400;
|
|
69
|
+
options.minInterruptionWords = 1;
|
|
70
|
+
options.minEndpointingDelay = 500;
|
|
71
|
+
options.maxEndpointingDelay = 2500;
|
|
72
|
+
options.turnHandling = {
|
|
73
|
+
interruption: {
|
|
74
|
+
mode: 'vad',
|
|
75
|
+
discardAudioIfUninterruptible: false,
|
|
76
|
+
minDuration: 1400,
|
|
77
|
+
minWords: 4,
|
|
78
|
+
},
|
|
79
|
+
endpointing: {
|
|
80
|
+
minDelay: 700,
|
|
81
|
+
maxDelay: 3900,
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const serialized = serializeOptions(options);
|
|
86
|
+
expect(serialized).toMatchObject({
|
|
87
|
+
allow_interruptions: true,
|
|
88
|
+
discard_audio_if_uninterruptible: false,
|
|
89
|
+
min_interruption_duration: 1400,
|
|
90
|
+
min_interruption_words: 4,
|
|
91
|
+
min_endpointing_delay: 700,
|
|
92
|
+
max_endpointing_delay: 3900,
|
|
93
|
+
max_tool_steps: 3,
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('falls back to deprecated flat fields when turnHandling values are absent', () => {
|
|
98
|
+
const options = baseOptions();
|
|
99
|
+
options.allowInterruptions = false;
|
|
100
|
+
options.discardAudioIfUninterruptible = false;
|
|
101
|
+
options.minInterruptionDuration = 600;
|
|
102
|
+
options.minInterruptionWords = 3;
|
|
103
|
+
options.minEndpointingDelay = 1000;
|
|
104
|
+
options.maxEndpointingDelay = 5000;
|
|
105
|
+
|
|
106
|
+
const serialized = serializeOptions(options);
|
|
107
|
+
expect(serialized).toMatchObject({
|
|
108
|
+
allow_interruptions: false,
|
|
109
|
+
discard_audio_if_uninterruptible: false,
|
|
110
|
+
min_interruption_duration: 600,
|
|
111
|
+
min_interruption_words: 3,
|
|
112
|
+
min_endpointing_delay: 1000,
|
|
113
|
+
max_endpointing_delay: 5000,
|
|
114
|
+
max_tool_steps: 3,
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
});
|
package/src/voice/report.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { ChatContext } from '../llm/chat_context.js';
|
|
5
|
+
import { type ModelUsage, filterZeroValues } from '../metrics/model_usage.js';
|
|
5
6
|
import type { VoiceOptions } from './agent_session.js';
|
|
6
7
|
import type { AgentEvent } from './events.js';
|
|
7
8
|
|
|
@@ -23,6 +24,8 @@ export interface SessionReport {
|
|
|
23
24
|
audioRecordingStartedAt?: number;
|
|
24
25
|
/** Duration of the session in milliseconds */
|
|
25
26
|
duration?: number;
|
|
27
|
+
/** Usage summaries for the session, one per model/provider combination */
|
|
28
|
+
modelUsage?: ModelUsage[];
|
|
26
29
|
}
|
|
27
30
|
|
|
28
31
|
export interface SessionReportOptions {
|
|
@@ -41,6 +44,8 @@ export interface SessionReportOptions {
|
|
|
41
44
|
audioRecordingPath?: string;
|
|
42
45
|
/** Timestamp when the audio recording started (milliseconds) */
|
|
43
46
|
audioRecordingStartedAt?: number;
|
|
47
|
+
/** Usage summaries for the session, one per model/provider combination */
|
|
48
|
+
modelUsage?: ModelUsage[];
|
|
44
49
|
}
|
|
45
50
|
|
|
46
51
|
export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
@@ -61,6 +66,7 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
|
61
66
|
audioRecordingStartedAt,
|
|
62
67
|
duration:
|
|
63
68
|
audioRecordingStartedAt !== undefined ? timestamp - audioRecordingStartedAt : undefined,
|
|
69
|
+
modelUsage: opts.modelUsage,
|
|
64
70
|
};
|
|
65
71
|
}
|
|
66
72
|
|
|
@@ -70,6 +76,22 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
|
70
76
|
// - Uploads to LiveKit Cloud observability endpoint with JWT auth
|
|
71
77
|
export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
|
|
72
78
|
const events: Record<string, unknown>[] = [];
|
|
79
|
+
const interruptionConfig = report.options.turnHandling?.interruption;
|
|
80
|
+
const endpointingConfig = report.options.turnHandling?.endpointing;
|
|
81
|
+
|
|
82
|
+
// Keep backwards compatibility with deprecated fields
|
|
83
|
+
const allowInterruptions =
|
|
84
|
+
interruptionConfig?.mode !== undefined
|
|
85
|
+
? interruptionConfig.mode !== false
|
|
86
|
+
: report.options.allowInterruptions;
|
|
87
|
+
const discardAudioIfUninterruptible =
|
|
88
|
+
interruptionConfig?.discardAudioIfUninterruptible ??
|
|
89
|
+
report.options.discardAudioIfUninterruptible;
|
|
90
|
+
const minInterruptionDuration =
|
|
91
|
+
interruptionConfig?.minDuration ?? report.options.minInterruptionDuration;
|
|
92
|
+
const minInterruptionWords = interruptionConfig?.minWords ?? report.options.minInterruptionWords;
|
|
93
|
+
const minEndpointingDelay = endpointingConfig?.minDelay ?? report.options.minEndpointingDelay;
|
|
94
|
+
const maxEndpointingDelay = endpointingConfig?.maxDelay ?? report.options.maxEndpointingDelay;
|
|
73
95
|
|
|
74
96
|
for (const event of report.events) {
|
|
75
97
|
if (event.type === 'metrics_collected') {
|
|
@@ -85,16 +107,17 @@ export function sessionReportToJSON(report: SessionReport): Record<string, unkno
|
|
|
85
107
|
room: report.room,
|
|
86
108
|
events,
|
|
87
109
|
options: {
|
|
88
|
-
allow_interruptions:
|
|
89
|
-
discard_audio_if_uninterruptible:
|
|
90
|
-
min_interruption_duration:
|
|
91
|
-
min_interruption_words:
|
|
92
|
-
min_endpointing_delay:
|
|
93
|
-
max_endpointing_delay:
|
|
110
|
+
allow_interruptions: allowInterruptions,
|
|
111
|
+
discard_audio_if_uninterruptible: discardAudioIfUninterruptible,
|
|
112
|
+
min_interruption_duration: minInterruptionDuration,
|
|
113
|
+
min_interruption_words: minInterruptionWords,
|
|
114
|
+
min_endpointing_delay: minEndpointingDelay,
|
|
115
|
+
max_endpointing_delay: maxEndpointingDelay,
|
|
94
116
|
max_tool_steps: report.options.maxToolSteps,
|
|
95
117
|
},
|
|
96
118
|
chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
|
|
97
119
|
enable_user_data_training: report.enableRecording,
|
|
98
120
|
timestamp: report.timestamp,
|
|
121
|
+
usage: report.modelUsage ? report.modelUsage.map(filterZeroValues) : null,
|
|
99
122
|
};
|
|
100
123
|
}
|
|
@@ -12,17 +12,16 @@ import {
|
|
|
12
12
|
type RemoteParticipant,
|
|
13
13
|
type Room,
|
|
14
14
|
RoomEvent,
|
|
15
|
-
type TextStreamInfo,
|
|
16
|
-
type TextStreamReader,
|
|
17
15
|
TrackPublishOptions,
|
|
18
16
|
TrackSource,
|
|
19
17
|
} from '@livekit/rtc-node';
|
|
20
18
|
import type { WritableStreamDefaultWriter } from 'node:stream/web';
|
|
21
|
-
import { ATTRIBUTE_PUBLISH_ON_BEHALF
|
|
19
|
+
import { ATTRIBUTE_PUBLISH_ON_BEHALF } from '../../constants.js';
|
|
22
20
|
import { log } from '../../log.js';
|
|
23
21
|
import { IdentityTransform } from '../../stream/identity_transform.js';
|
|
24
22
|
import { Future, Task, waitForAbort } from '../../utils.js';
|
|
25
23
|
import { type AgentSession } from '../agent_session.js';
|
|
24
|
+
import type { TextInputCallback } from '../client_events.js';
|
|
26
25
|
import {
|
|
27
26
|
AgentSessionEventTypes,
|
|
28
27
|
type AgentStateChangedEvent,
|
|
@@ -39,15 +38,7 @@ import {
|
|
|
39
38
|
ParticipantTranscriptionOutput,
|
|
40
39
|
} from './_output.js';
|
|
41
40
|
|
|
42
|
-
export
|
|
43
|
-
text: string;
|
|
44
|
-
info: TextStreamInfo;
|
|
45
|
-
participant: RemoteParticipant;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export type TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => void | Promise<void>;
|
|
49
|
-
|
|
50
|
-
const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => {
|
|
41
|
+
export const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess, ev) => {
|
|
51
42
|
sess.interrupt();
|
|
52
43
|
sess.generateReply({ userInput: ev.text });
|
|
53
44
|
};
|
|
@@ -146,8 +137,6 @@ export class RoomIO {
|
|
|
146
137
|
private forwardUserTranscriptTask?: Task<void>;
|
|
147
138
|
private initTask?: Task<void>;
|
|
148
139
|
|
|
149
|
-
private textStreamHandlerRegistered = false;
|
|
150
|
-
|
|
151
140
|
private logger = log();
|
|
152
141
|
|
|
153
142
|
constructor({
|
|
@@ -282,37 +271,6 @@ export class RoomIO {
|
|
|
282
271
|
}
|
|
283
272
|
};
|
|
284
273
|
|
|
285
|
-
private onUserTextInput = (reader: TextStreamReader, participantInfo: { identity: string }) => {
|
|
286
|
-
if (participantInfo.identity !== this.participantIdentity) {
|
|
287
|
-
return;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
const participant = this.room.remoteParticipants.get(participantInfo.identity);
|
|
291
|
-
if (!participant) {
|
|
292
|
-
this.logger.warn('participant not found, ignoring text input');
|
|
293
|
-
return;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
const readText = async () => {
|
|
297
|
-
const text = await reader.readAll();
|
|
298
|
-
|
|
299
|
-
const textInputResult = this.inputOptions.textInputCallback!(this.agentSession, {
|
|
300
|
-
text,
|
|
301
|
-
info: reader.info,
|
|
302
|
-
participant,
|
|
303
|
-
});
|
|
304
|
-
|
|
305
|
-
// check if callback is a Promise
|
|
306
|
-
if (textInputResult instanceof Promise) {
|
|
307
|
-
await textInputResult;
|
|
308
|
-
}
|
|
309
|
-
};
|
|
310
|
-
|
|
311
|
-
readText().catch((error) => {
|
|
312
|
-
this.logger.error({ error }, 'Error reading text input');
|
|
313
|
-
});
|
|
314
|
-
};
|
|
315
|
-
|
|
316
274
|
private async forwardUserTranscript(signal: AbortSignal): Promise<void> {
|
|
317
275
|
const reader = this.userTranscriptStream.readable.getReader();
|
|
318
276
|
try {
|
|
@@ -387,6 +345,10 @@ export class RoomIO {
|
|
|
387
345
|
return this.participantAvailableFuture.done;
|
|
388
346
|
}
|
|
389
347
|
|
|
348
|
+
get rtcRoom(): Room {
|
|
349
|
+
return this.room;
|
|
350
|
+
}
|
|
351
|
+
|
|
390
352
|
get linkedParticipant(): RemoteParticipant | undefined {
|
|
391
353
|
if (!this.isParticipantAvailable) {
|
|
392
354
|
return undefined;
|
|
@@ -439,17 +401,6 @@ export class RoomIO {
|
|
|
439
401
|
}
|
|
440
402
|
|
|
441
403
|
start() {
|
|
442
|
-
if (this.inputOptions.textEnabled) {
|
|
443
|
-
try {
|
|
444
|
-
this.room.registerTextStreamHandler(TOPIC_CHAT, this.onUserTextInput);
|
|
445
|
-
this.textStreamHandlerRegistered = true;
|
|
446
|
-
} catch (error) {
|
|
447
|
-
if (this.inputOptions.textEnabled) {
|
|
448
|
-
this.logger.warn(`text stream handler for topic "${TOPIC_CHAT}" already set, ignoring`);
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
|
|
453
404
|
// -- create inputs --
|
|
454
405
|
if (this.inputOptions.audioEnabled) {
|
|
455
406
|
this.audioInput = new ParticipantAudioInputStream({
|
|
@@ -525,11 +476,6 @@ export class RoomIO {
|
|
|
525
476
|
this.agentSession.off(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
|
|
526
477
|
this.agentSession.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
|
|
527
478
|
|
|
528
|
-
if (this.textStreamHandlerRegistered) {
|
|
529
|
-
this.room.unregisterTextStreamHandler(TOPIC_CHAT);
|
|
530
|
-
this.textStreamHandlerRegistered = false;
|
|
531
|
-
}
|
|
532
|
-
|
|
533
479
|
await this.initTask?.cancelAndWait();
|
|
534
480
|
|
|
535
481
|
// Close stream FIRST so reader.read() in forwardUserTranscript can exit.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
/**
|
|
5
|
+
* Configuration for endpointing, which determines when the user's turn is complete.
|
|
6
|
+
*/
|
|
7
|
+
export interface EndpointingOptions {
|
|
8
|
+
/**
|
|
9
|
+
* Endpointing mode. `"fixed"` uses a fixed delay, `"dynamic"` adjusts delay based on
|
|
10
|
+
* end-of-utterance prediction.
|
|
11
|
+
* @defaultValue "fixed"
|
|
12
|
+
*/
|
|
13
|
+
mode: 'fixed' | 'dynamic';
|
|
14
|
+
/**
|
|
15
|
+
* Minimum time in milliseconds since the last detected speech before the agent declares the user's
|
|
16
|
+
* turn complete. In VAD mode this effectively behaves like `max(VAD silence, minDelay)`;
|
|
17
|
+
* in STT mode it is applied after the STT end-of-speech signal, so it can be additive with
|
|
18
|
+
* the STT provider's endpointing delay.
|
|
19
|
+
* @defaultValue 500
|
|
20
|
+
*/
|
|
21
|
+
minDelay: number;
|
|
22
|
+
/**
|
|
23
|
+
* Maximum time in milliseconds the agent will wait before terminating the turn.
|
|
24
|
+
* @defaultValue 3000
|
|
25
|
+
*/
|
|
26
|
+
maxDelay: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export const defaultEndpointingOptions = {
|
|
30
|
+
mode: 'fixed',
|
|
31
|
+
minDelay: 500,
|
|
32
|
+
maxDelay: 3000,
|
|
33
|
+
} as const satisfies EndpointingOptions;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
/**
|
|
5
|
+
* Configuration for interruption handling.
|
|
6
|
+
*/
|
|
7
|
+
export interface InterruptionOptions {
|
|
8
|
+
/**
|
|
9
|
+
* Whether interruptions are enabled.
|
|
10
|
+
* @defaultValue true
|
|
11
|
+
*/
|
|
12
|
+
enabled: boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Interruption handling strategy. `"adaptive"` for ML-based detection, `"vad"` for simple
|
|
15
|
+
* voice-activity detection. `undefined` means auto-detect.
|
|
16
|
+
* @defaultValue undefined
|
|
17
|
+
*/
|
|
18
|
+
mode: 'adaptive' | 'vad' | false | undefined;
|
|
19
|
+
/**
|
|
20
|
+
* When `true`, buffered audio is dropped while the agent is speaking and cannot be interrupted.
|
|
21
|
+
* @defaultValue true
|
|
22
|
+
*/
|
|
23
|
+
discardAudioIfUninterruptible: boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Minimum speech length in milliseconds to register as an interruption.
|
|
26
|
+
* @defaultValue 500
|
|
27
|
+
*/
|
|
28
|
+
minDuration: number;
|
|
29
|
+
/**
|
|
30
|
+
* Minimum number of words to consider an interruption, only used if STT is enabled.
|
|
31
|
+
* @defaultValue 0
|
|
32
|
+
*/
|
|
33
|
+
minWords: number;
|
|
34
|
+
/**
|
|
35
|
+
* If set, emit an `agentFalseInterruption` event after this amount of time if the user is
|
|
36
|
+
* silent and no user transcript is detected after the interruption. Set to `undefined` to
|
|
37
|
+
* disable. The value is in milliseconds.
|
|
38
|
+
* @defaultValue 2000
|
|
39
|
+
*/
|
|
40
|
+
falseInterruptionTimeout: number;
|
|
41
|
+
/**
|
|
42
|
+
* Whether to resume the false interruption after the `falseInterruptionTimeout`.
|
|
43
|
+
* @defaultValue true
|
|
44
|
+
*/
|
|
45
|
+
resumeFalseInterruption: boolean;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export const defaultInterruptionOptions = {
|
|
49
|
+
enabled: true,
|
|
50
|
+
mode: undefined,
|
|
51
|
+
discardAudioIfUninterruptible: true,
|
|
52
|
+
minDuration: 500,
|
|
53
|
+
minWords: 0,
|
|
54
|
+
falseInterruptionTimeout: 2000,
|
|
55
|
+
resumeFalseInterruption: true,
|
|
56
|
+
} as const satisfies InterruptionOptions;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { TurnDetectionMode } from '../agent_session.js';
|
|
5
|
+
import { type EndpointingOptions, defaultEndpointingOptions } from './endpointing.js';
|
|
6
|
+
import { type InterruptionOptions, defaultInterruptionOptions } from './interruption.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Configuration for the turn handling system. Used to configure the turn taking behavior of the
|
|
10
|
+
* session.
|
|
11
|
+
*/
|
|
12
|
+
export interface TurnHandlingOptions {
|
|
13
|
+
/**
|
|
14
|
+
* Strategy for deciding when the user has finished speaking.
|
|
15
|
+
*
|
|
16
|
+
* - `"stt"` – rely on speech-to-text end-of-utterance cues
|
|
17
|
+
* - `"vad"` – rely on Voice Activity Detection start/stop cues
|
|
18
|
+
* - `"realtime_llm"` – use server-side detection from a realtime LLM
|
|
19
|
+
* - `"manual"` – caller controls turn boundaries explicitly
|
|
20
|
+
*
|
|
21
|
+
* If not set, the session chooses the best available mode in priority order
|
|
22
|
+
* `realtime_llm → vad → stt → manual`; it automatically falls back if the necessary model
|
|
23
|
+
* is missing.
|
|
24
|
+
*/
|
|
25
|
+
turnDetection: TurnDetectionMode | undefined;
|
|
26
|
+
/**
|
|
27
|
+
* Configuration for endpointing.
|
|
28
|
+
*/
|
|
29
|
+
endpointing: Partial<EndpointingOptions>;
|
|
30
|
+
/**
|
|
31
|
+
* Configuration for interruption handling.
|
|
32
|
+
*/
|
|
33
|
+
interruption: Partial<InterruptionOptions>;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface InternalTurnHandlingOptions extends TurnHandlingOptions {
|
|
37
|
+
endpointing: EndpointingOptions;
|
|
38
|
+
interruption: InterruptionOptions;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export const defaultTurnHandlingOptions: InternalTurnHandlingOptions = {
|
|
42
|
+
turnDetection: undefined,
|
|
43
|
+
interruption: defaultInterruptionOptions,
|
|
44
|
+
endpointing: defaultEndpointingOptions,
|
|
45
|
+
};
|