@livekit/agents 1.0.48 → 1.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.cjs +27 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +9 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +18 -0
- package/dist/constants.js.map +1 -1
- package/dist/inference/api_protos.d.cts +71 -71
- package/dist/inference/api_protos.d.ts +71 -71
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +147 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +63 -0
- package/dist/inference/interruption/http_transport.d.ts +63 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +121 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +181 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +147 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +329 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +295 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +14 -10
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +2 -1
- package/dist/inference/llm.d.ts +2 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +8 -10
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +7 -2
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +2 -0
- package/dist/inference/stt.d.ts +2 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +8 -3
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +7 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -0
- package/dist/inference/tts.d.ts +2 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +8 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +13 -0
- package/dist/inference/utils.d.ts +13 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/llm/chat_context.cjs +20 -2
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +19 -1
- package/dist/llm/chat_context.d.ts +19 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +20 -2
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +16 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +9 -0
- package/dist/llm/llm.d.ts +9 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +16 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +45 -1
- package/dist/metrics/base.d.ts +45 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +3 -0
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +9 -0
- package/dist/metrics/usage_collector.d.ts +9 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +3 -0
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +9 -0
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +9 -0
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/multi_input_stream.test.cjs +4 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -1
- package/dist/stream/multi_input_stream.test.js +5 -1
- package/dist/stream/multi_input_stream.test.js.map +1 -1
- package/dist/stream/stream_channel.cjs +31 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +4 -2
- package/dist/stream/stream_channel.d.ts +4 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +31 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stt/stt.cjs +34 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +22 -0
- package/dist/stt/stt.d.ts +22 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +34 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/otel_http_exporter.cjs +24 -5
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
- package/dist/telemetry/otel_http_exporter.d.cts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts +1 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
- package/dist/telemetry/otel_http_exporter.js +24 -5
- package/dist/telemetry/otel_http_exporter.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +5 -5
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +9 -5
- package/dist/telemetry/trace_types.d.ts +9 -5
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +5 -5
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/telemetry/traces.cjs +47 -8
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +47 -8
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +64 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +34 -0
- package/dist/tts/tts.d.ts +34 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +64 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +25 -4
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -2
- package/dist/voice/agent.d.ts +10 -2
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +25 -4
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +261 -36
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +20 -6
- package/dist/voice/agent_activity.d.ts +20 -6
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +262 -37
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +105 -48
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +90 -20
- package/dist/voice/agent_session.d.ts +90 -20
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +105 -46
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +287 -6
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +42 -3
- package/dist/voice/audio_recognition.d.ts +42 -3
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +289 -7
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/client_events.cjs +554 -0
- package/dist/voice/client_events.cjs.map +1 -0
- package/dist/voice/client_events.d.cts +195 -0
- package/dist/voice/client_events.d.ts +195 -0
- package/dist/voice/client_events.d.ts.map +1 -0
- package/dist/voice/client_events.js +548 -0
- package/dist/voice/client_events.js.map +1 -0
- package/dist/voice/events.cjs +1 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +8 -5
- package/dist/voice/events.d.ts +8 -5
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +43 -8
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -3
- package/dist/voice/generation.d.ts +3 -3
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +20 -8
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -0
- package/dist/voice/report.d.ts +5 -0
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +20 -8
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +106 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +105 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +5 -39
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +4 -9
- package/dist/voice/room_io/room_io.d.ts +4 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -40
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +97 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +25 -0
- package/dist/voice/turn_config/utils.d.ts +25 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +73 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +86 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +85 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/wire_format.cjs +798 -0
- package/dist/voice/wire_format.cjs.map +1 -0
- package/dist/voice/wire_format.d.cts +5503 -0
- package/dist/voice/wire_format.d.ts +5503 -0
- package/dist/voice/wire_format.d.ts.map +1 -0
- package/dist/voice/wire_format.js +728 -0
- package/dist/voice/wire_format.js.map +1 -0
- package/package.json +2 -1
- package/src/constants.ts +13 -0
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +187 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +188 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +402 -0
- package/src/inference/llm.ts +9 -12
- package/src/inference/stt.ts +10 -3
- package/src/inference/tts.ts +10 -3
- package/src/inference/utils.ts +29 -1
- package/src/llm/chat_context.ts +40 -2
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +16 -0
- package/src/llm/realtime.ts +4 -0
- package/src/metrics/base.ts +48 -1
- package/src/metrics/index.ts +11 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +11 -0
- package/src/metrics/utils.ts +11 -0
- package/src/stream/multi_input_stream.test.ts +6 -1
- package/src/stream/stream_channel.ts +34 -2
- package/src/stt/stt.ts +38 -0
- package/src/telemetry/otel_http_exporter.ts +28 -5
- package/src/telemetry/trace_types.ts +11 -8
- package/src/telemetry/traces.ts +111 -54
- package/src/tts/tts.ts +69 -1
- package/src/voice/agent.ts +30 -3
- package/src/voice/agent_activity.ts +327 -28
- package/src/voice/agent_session.ts +207 -59
- package/src/voice/audio_recognition.ts +385 -9
- package/src/voice/client_events.ts +838 -0
- package/src/voice/events.ts +14 -4
- package/src/voice/generation.ts +52 -9
- package/src/voice/index.ts +1 -0
- package/src/voice/report.test.ts +117 -0
- package/src/voice/report.ts +29 -6
- package/src/voice/room_io/room_io.ts +7 -61
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +100 -0
- package/src/voice/turn_config/utils.ts +103 -0
- package/src/voice/wire_format.ts +827 -0
package/src/telemetry/traces.ts
CHANGED
|
@@ -22,8 +22,9 @@ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
|
|
|
22
22
|
import FormData from 'form-data';
|
|
23
23
|
import { AccessToken } from 'livekit-server-sdk';
|
|
24
24
|
import fs from 'node:fs/promises';
|
|
25
|
-
import type { ChatContent, ChatItem } from '../llm/index.js';
|
|
25
|
+
import type { ChatContent, ChatItem, ChatRole } from '../llm/index.js';
|
|
26
26
|
import { enableOtelLogging } from '../log.js';
|
|
27
|
+
import { filterZeroValues } from '../metrics/model_usage.js';
|
|
27
28
|
import type { SessionReport } from '../voice/report.js';
|
|
28
29
|
import { type SimpleLogRecord, SimpleOTLPHttpLogExporter } from './otel_http_exporter.js';
|
|
29
30
|
import { flushPinoLogs, initPinoCloudExporter } from './pino_otel_transport.js';
|
|
@@ -285,24 +286,80 @@ export async function flushOtelLogs(): Promise<void> {
|
|
|
285
286
|
await flushPinoLogs();
|
|
286
287
|
}
|
|
287
288
|
|
|
289
|
+
/** Proto-compatible role enum values. */
|
|
290
|
+
type ProtoRole = 'DEVELOPER' | 'SYSTEM' | 'USER' | 'ASSISTANT';
|
|
291
|
+
|
|
292
|
+
const ROLE_MAP: Record<ChatRole, ProtoRole> = {
|
|
293
|
+
developer: 'DEVELOPER',
|
|
294
|
+
system: 'SYSTEM',
|
|
295
|
+
user: 'USER',
|
|
296
|
+
assistant: 'ASSISTANT',
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
interface ProtoMetricsReport {
|
|
300
|
+
startedSpeakingAt?: string;
|
|
301
|
+
stoppedSpeakingAt?: string;
|
|
302
|
+
transcriptionDelay?: number;
|
|
303
|
+
endOfTurnDelay?: number;
|
|
304
|
+
onUserTurnCompletedDelay?: number;
|
|
305
|
+
llmNodeTtft?: number;
|
|
306
|
+
ttsNodeTtfb?: number;
|
|
307
|
+
e2eLatency?: number;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
interface ProtoMessage {
|
|
311
|
+
id: string;
|
|
312
|
+
role: ProtoRole;
|
|
313
|
+
content: { text: ChatContent }[];
|
|
314
|
+
createdAt: string;
|
|
315
|
+
interrupted?: boolean;
|
|
316
|
+
extra?: Record<string, unknown>;
|
|
317
|
+
transcriptConfidence?: number;
|
|
318
|
+
metrics?: ProtoMetricsReport;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
interface ProtoFunctionCall {
|
|
322
|
+
id: string;
|
|
323
|
+
callId: string;
|
|
324
|
+
arguments: string | Record<string, unknown>;
|
|
325
|
+
name: string;
|
|
326
|
+
createdAt: string;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
interface ProtoFunctionCallOutput {
|
|
330
|
+
id: string;
|
|
331
|
+
name: string;
|
|
332
|
+
callId: string;
|
|
333
|
+
output: string;
|
|
334
|
+
isError: boolean;
|
|
335
|
+
createdAt: string;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
interface ProtoAgentHandoff {
|
|
339
|
+
id: string;
|
|
340
|
+
newAgentId: string;
|
|
341
|
+
createdAt: string;
|
|
342
|
+
oldAgentId?: string;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
interface ProtoChatItem {
|
|
346
|
+
message?: ProtoMessage;
|
|
347
|
+
functionCall?: ProtoFunctionCall;
|
|
348
|
+
functionCallOutput?: ProtoFunctionCallOutput;
|
|
349
|
+
agentHandoff?: ProtoAgentHandoff;
|
|
350
|
+
}
|
|
351
|
+
|
|
288
352
|
/**
|
|
289
353
|
* Convert ChatItem to proto-compatible dictionary format.
|
|
290
354
|
* TODO: Use actual agent_session proto types once @livekit/protocol v1.43.1+ is published
|
|
291
355
|
*/
|
|
292
|
-
function chatItemToProto(item: ChatItem):
|
|
293
|
-
const itemDict:
|
|
356
|
+
function chatItemToProto(item: ChatItem): ProtoChatItem {
|
|
357
|
+
const itemDict: ProtoChatItem = {};
|
|
294
358
|
|
|
295
359
|
if (item.type === 'message') {
|
|
296
|
-
const
|
|
297
|
-
developer: 'DEVELOPER',
|
|
298
|
-
system: 'SYSTEM',
|
|
299
|
-
user: 'USER',
|
|
300
|
-
assistant: 'ASSISTANT',
|
|
301
|
-
};
|
|
302
|
-
|
|
303
|
-
const msg: Record<string, any> = {
|
|
360
|
+
const msg: ProtoMessage = {
|
|
304
361
|
id: item.id,
|
|
305
|
-
role:
|
|
362
|
+
role: ROLE_MAP[item.role] ?? (item.role.toUpperCase() as ProtoRole),
|
|
306
363
|
content: item.content.map((c: ChatContent) => ({ text: c })),
|
|
307
364
|
createdAt: toRFC3339(item.createdAt),
|
|
308
365
|
};
|
|
@@ -311,44 +368,43 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
|
|
|
311
368
|
msg.interrupted = item.interrupted;
|
|
312
369
|
}
|
|
313
370
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
// }
|
|
371
|
+
if (item.extra && Object.keys(item.extra).length > 0) {
|
|
372
|
+
msg.extra = item.extra;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (item.transcriptConfidence !== undefined) {
|
|
376
|
+
msg.transcriptConfidence = item.transcriptConfidence;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const metrics = item.metrics;
|
|
380
|
+
if (metrics && Object.keys(metrics).length > 0) {
|
|
381
|
+
const protoMetrics: ProtoMetricsReport = {};
|
|
382
|
+
if (metrics.startedSpeakingAt !== undefined) {
|
|
383
|
+
protoMetrics.startedSpeakingAt = toRFC3339(metrics.startedSpeakingAt * 1000);
|
|
384
|
+
}
|
|
385
|
+
if (metrics.stoppedSpeakingAt !== undefined) {
|
|
386
|
+
protoMetrics.stoppedSpeakingAt = toRFC3339(metrics.stoppedSpeakingAt * 1000);
|
|
387
|
+
}
|
|
388
|
+
if (metrics.transcriptionDelay !== undefined) {
|
|
389
|
+
protoMetrics.transcriptionDelay = metrics.transcriptionDelay;
|
|
390
|
+
}
|
|
391
|
+
if (metrics.endOfTurnDelay !== undefined) {
|
|
392
|
+
protoMetrics.endOfTurnDelay = metrics.endOfTurnDelay;
|
|
393
|
+
}
|
|
394
|
+
if (metrics.onUserTurnCompletedDelay !== undefined) {
|
|
395
|
+
protoMetrics.onUserTurnCompletedDelay = metrics.onUserTurnCompletedDelay;
|
|
396
|
+
}
|
|
397
|
+
if (metrics.llmNodeTtft !== undefined) {
|
|
398
|
+
protoMetrics.llmNodeTtft = metrics.llmNodeTtft;
|
|
399
|
+
}
|
|
400
|
+
if (metrics.ttsNodeTtfb !== undefined) {
|
|
401
|
+
protoMetrics.ttsNodeTtfb = metrics.ttsNodeTtfb;
|
|
402
|
+
}
|
|
403
|
+
if (metrics.e2eLatency !== undefined) {
|
|
404
|
+
protoMetrics.e2eLatency = metrics.e2eLatency;
|
|
405
|
+
}
|
|
406
|
+
msg.metrics = protoMetrics;
|
|
407
|
+
}
|
|
352
408
|
|
|
353
409
|
itemDict.message = msg;
|
|
354
410
|
} else if (item.type === 'function_call') {
|
|
@@ -369,7 +425,7 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
|
|
|
369
425
|
createdAt: toRFC3339(item.createdAt),
|
|
370
426
|
};
|
|
371
427
|
} else if (item.type === 'agent_handoff') {
|
|
372
|
-
const handoff:
|
|
428
|
+
const handoff: ProtoAgentHandoff = {
|
|
373
429
|
id: item.id,
|
|
374
430
|
newAgentId: item.newAgentId,
|
|
375
431
|
createdAt: toRFC3339(item.createdAt),
|
|
@@ -397,9 +453,7 @@ function chatItemToProto(item: ChatItem): Record<string, any> {
|
|
|
397
453
|
}
|
|
398
454
|
|
|
399
455
|
/**
|
|
400
|
-
* Convert timestamp to RFC3339 format
|
|
401
|
-
* Note: TypeScript createdAt is in milliseconds (Date.now()), not seconds like Python.
|
|
402
|
-
* @internal
|
|
456
|
+
* Convert timestamp to RFC3339 format
|
|
403
457
|
*/
|
|
404
458
|
function toRFC3339(valueMs: number | Date): string {
|
|
405
459
|
// valueMs is already in milliseconds (from Date.now())
|
|
@@ -445,6 +499,8 @@ export async function uploadSessionReport(options: {
|
|
|
445
499
|
'logger.name': 'chat_history',
|
|
446
500
|
};
|
|
447
501
|
|
|
502
|
+
const usage = report.modelUsage?.map(filterZeroValues) || null;
|
|
503
|
+
|
|
448
504
|
logRecords.push({
|
|
449
505
|
body: 'session report',
|
|
450
506
|
timestampMs: report.startedAt || report.timestamp || 0,
|
|
@@ -453,6 +509,7 @@ export async function uploadSessionReport(options: {
|
|
|
453
509
|
'session.options': report.options || {},
|
|
454
510
|
'session.report_timestamp': report.timestamp,
|
|
455
511
|
agent_name: agentName,
|
|
512
|
+
usage,
|
|
456
513
|
},
|
|
457
514
|
});
|
|
458
515
|
|
package/src/tts/tts.ts
CHANGED
|
@@ -96,6 +96,30 @@ export abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCal
|
|
|
96
96
|
return this.#numChannels;
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
+
/**
|
|
100
|
+
* Get the model name/identifier for this TTS instance.
|
|
101
|
+
*
|
|
102
|
+
* @returns The model name if available, "unknown" otherwise.
|
|
103
|
+
*
|
|
104
|
+
* @remarks
|
|
105
|
+
* Plugins should override this property to provide their model information.
|
|
106
|
+
*/
|
|
107
|
+
get model(): string {
|
|
108
|
+
return 'unknown';
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Get the provider name for this TTS instance.
|
|
113
|
+
*
|
|
114
|
+
* @returns The provider name if available, "unknown" otherwise.
|
|
115
|
+
*
|
|
116
|
+
* @remarks
|
|
117
|
+
* Plugins should override this property to provide their provider information.
|
|
118
|
+
*/
|
|
119
|
+
get provider(): string {
|
|
120
|
+
return 'unknown';
|
|
121
|
+
}
|
|
122
|
+
|
|
99
123
|
/**
|
|
100
124
|
* Receives text and returns synthesis in the form of a {@link ChunkedStream}
|
|
101
125
|
*/
|
|
@@ -159,6 +183,8 @@ export abstract class SynthesizeStream
|
|
|
159
183
|
#metricsText = '';
|
|
160
184
|
#monitorMetricsTask?: Promise<void>;
|
|
161
185
|
#ttsRequestSpan?: Span;
|
|
186
|
+
#inputTokens = 0;
|
|
187
|
+
#outputTokens = 0;
|
|
162
188
|
|
|
163
189
|
constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {
|
|
164
190
|
this.#tts = tts;
|
|
@@ -284,6 +310,18 @@ export abstract class SynthesizeStream
|
|
|
284
310
|
}
|
|
285
311
|
}
|
|
286
312
|
|
|
313
|
+
/**
|
|
314
|
+
* Set token usage for token-based TTS billing (e.g., OpenAI TTS).
|
|
315
|
+
* Plugins should call this method to report token usage.
|
|
316
|
+
*/
|
|
317
|
+
protected setTokenUsage({
|
|
318
|
+
inputTokens = 0,
|
|
319
|
+
outputTokens = 0,
|
|
320
|
+
}: { inputTokens?: number; outputTokens?: number } = {}): void {
|
|
321
|
+
this.#inputTokens = inputTokens;
|
|
322
|
+
this.#outputTokens = outputTokens;
|
|
323
|
+
}
|
|
324
|
+
|
|
287
325
|
protected async monitorMetrics() {
|
|
288
326
|
const startTime = process.hrtime.bigint();
|
|
289
327
|
let audioDurationMs = 0;
|
|
@@ -305,12 +343,22 @@ export abstract class SynthesizeStream
|
|
|
305
343
|
audioDurationMs: roundedAudioDurationMs,
|
|
306
344
|
cancelled: this.abortController.signal.aborted,
|
|
307
345
|
label: this.#tts.label,
|
|
308
|
-
|
|
346
|
+
inputTokens: this.#inputTokens,
|
|
347
|
+
outputTokens: this.#outputTokens,
|
|
348
|
+
streamed: true,
|
|
349
|
+
metadata: {
|
|
350
|
+
modelProvider: this.#tts.provider,
|
|
351
|
+
modelName: this.#tts.model,
|
|
352
|
+
},
|
|
309
353
|
};
|
|
310
354
|
if (this.#ttsRequestSpan) {
|
|
311
355
|
this.#ttsRequestSpan.setAttribute(traceTypes.ATTR_TTS_METRICS, JSON.stringify(metrics));
|
|
312
356
|
}
|
|
313
357
|
this.#tts.emit('metrics_collected', metrics);
|
|
358
|
+
|
|
359
|
+
// Reset token usage after emitting metrics for the next segment
|
|
360
|
+
this.#inputTokens = 0;
|
|
361
|
+
this.#outputTokens = 0;
|
|
314
362
|
}
|
|
315
363
|
};
|
|
316
364
|
|
|
@@ -434,6 +482,8 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
|
|
|
434
482
|
#ttsRequestSpan?: Span;
|
|
435
483
|
private _connOptions: APIConnectOptions;
|
|
436
484
|
private logger = log();
|
|
485
|
+
#inputTokens = 0;
|
|
486
|
+
#outputTokens = 0;
|
|
437
487
|
|
|
438
488
|
protected abortController = new AbortController();
|
|
439
489
|
|
|
@@ -541,6 +591,18 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
|
|
|
541
591
|
return this.abortController.signal;
|
|
542
592
|
}
|
|
543
593
|
|
|
594
|
+
/**
|
|
595
|
+
* Set token usage for token-based TTS billing (e.g., OpenAI TTS).
|
|
596
|
+
* Plugins should call this method to report token usage.
|
|
597
|
+
*/
|
|
598
|
+
protected setTokenUsage({
|
|
599
|
+
inputTokens = 0,
|
|
600
|
+
outputTokens = 0,
|
|
601
|
+
}: { inputTokens?: number; outputTokens?: number } = {}): void {
|
|
602
|
+
this.#inputTokens = inputTokens;
|
|
603
|
+
this.#outputTokens = outputTokens;
|
|
604
|
+
}
|
|
605
|
+
|
|
544
606
|
protected async monitorMetrics() {
|
|
545
607
|
const startTime = process.hrtime.bigint();
|
|
546
608
|
let audioDurationMs = 0;
|
|
@@ -568,7 +630,13 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
|
|
|
568
630
|
audioDurationMs: Math.round(audioDurationMs),
|
|
569
631
|
cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here
|
|
570
632
|
label: this.#tts.label,
|
|
633
|
+
inputTokens: this.#inputTokens,
|
|
634
|
+
outputTokens: this.#outputTokens,
|
|
571
635
|
streamed: false,
|
|
636
|
+
metadata: {
|
|
637
|
+
modelProvider: this.#tts.provider,
|
|
638
|
+
modelName: this.#tts.model,
|
|
639
|
+
},
|
|
572
640
|
};
|
|
573
641
|
|
|
574
642
|
if (this.#ttsRequestSpan) {
|
package/src/voice/agent.ts
CHANGED
|
@@ -35,6 +35,9 @@ import { type AgentActivity, agentActivityStorage } from './agent_activity.js';
|
|
|
35
35
|
import type { AgentSession, TurnDetectionMode } from './agent_session.js';
|
|
36
36
|
import type { TimedString } from './io.js';
|
|
37
37
|
import type { SpeechHandle } from './speech_handle.js';
|
|
38
|
+
import type { InterruptionOptions } from './turn_config/interruption.js';
|
|
39
|
+
import type { TurnHandlingOptions } from './turn_config/turn_handling.js';
|
|
40
|
+
import { migrateLegacyOptions } from './turn_config/utils.js';
|
|
38
41
|
|
|
39
42
|
export const functionCallStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();
|
|
40
43
|
export const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
|
|
@@ -110,6 +113,7 @@ export interface AgentOptions<UserData> {
|
|
|
110
113
|
instructions: string;
|
|
111
114
|
chatCtx?: ChatContext;
|
|
112
115
|
tools?: ToolContext<UserData>;
|
|
116
|
+
/** @deprecated use turnHandling instead */
|
|
113
117
|
turnDetection?: TurnDetectionMode;
|
|
114
118
|
stt?: STT | STTModelString;
|
|
115
119
|
vad?: VAD;
|
|
@@ -117,16 +121,19 @@ export interface AgentOptions<UserData> {
|
|
|
117
121
|
tts?: TTS | TTSModelString;
|
|
118
122
|
allowInterruptions?: boolean;
|
|
119
123
|
minConsecutiveSpeechDelay?: number;
|
|
124
|
+
turnHandling?: TurnHandlingOptions;
|
|
120
125
|
useTtsAlignedTranscript?: boolean;
|
|
121
126
|
}
|
|
122
127
|
|
|
123
128
|
export class Agent<UserData = any> {
|
|
124
129
|
private _id: string;
|
|
125
|
-
private turnDetection?: TurnDetectionMode;
|
|
126
130
|
private _stt?: STT;
|
|
127
131
|
private _vad?: VAD;
|
|
128
132
|
private _llm?: LLM | RealtimeModel;
|
|
129
133
|
private _tts?: TTS;
|
|
134
|
+
private turnHandling?: TurnHandlingOptions;
|
|
135
|
+
private _interruptionDetection: InterruptionOptions['mode'];
|
|
136
|
+
private _allowInterruptions?: boolean;
|
|
130
137
|
private _useTtsAlignedTranscript?: boolean;
|
|
131
138
|
|
|
132
139
|
/** @internal */
|
|
@@ -151,7 +158,9 @@ export class Agent<UserData = any> {
|
|
|
151
158
|
vad,
|
|
152
159
|
llm,
|
|
153
160
|
tts,
|
|
161
|
+
turnHandling,
|
|
154
162
|
useTtsAlignedTranscript,
|
|
163
|
+
allowInterruptions,
|
|
155
164
|
}: AgentOptions<UserData>) {
|
|
156
165
|
if (id) {
|
|
157
166
|
this._id = id;
|
|
@@ -176,7 +185,12 @@ export class Agent<UserData = any> {
|
|
|
176
185
|
})
|
|
177
186
|
: ChatContext.empty();
|
|
178
187
|
|
|
179
|
-
|
|
188
|
+
const migratedOptions = migrateLegacyOptions({
|
|
189
|
+
turnDetection,
|
|
190
|
+
options: { turnHandling, allowInterruptions },
|
|
191
|
+
});
|
|
192
|
+
this.turnHandling = migratedOptions.options.turnHandling;
|
|
193
|
+
|
|
180
194
|
this._vad = vad;
|
|
181
195
|
|
|
182
196
|
if (typeof stt === 'string') {
|
|
@@ -197,6 +211,10 @@ export class Agent<UserData = any> {
|
|
|
197
211
|
this._tts = tts;
|
|
198
212
|
}
|
|
199
213
|
|
|
214
|
+
this._interruptionDetection = this.turnHandling?.interruption.mode;
|
|
215
|
+
if (this.turnHandling?.interruption.mode !== undefined) {
|
|
216
|
+
this._allowInterruptions = !!this.turnHandling.interruption.mode;
|
|
217
|
+
}
|
|
200
218
|
this._useTtsAlignedTranscript = useTtsAlignedTranscript;
|
|
201
219
|
|
|
202
220
|
this._agentActivity = undefined;
|
|
@@ -242,6 +260,14 @@ export class Agent<UserData = any> {
|
|
|
242
260
|
return this.getActivityOrThrow().agentSession as AgentSession<UserData>;
|
|
243
261
|
}
|
|
244
262
|
|
|
263
|
+
get interruptionDetection(): InterruptionOptions['mode'] {
|
|
264
|
+
return this._interruptionDetection;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
get allowInterruptions(): boolean | undefined {
|
|
268
|
+
return this._allowInterruptions;
|
|
269
|
+
}
|
|
270
|
+
|
|
245
271
|
async onEnter(): Promise<void> {}
|
|
246
272
|
|
|
247
273
|
async onExit(): Promise<void> {}
|
|
@@ -341,7 +367,8 @@ export class Agent<UserData = any> {
|
|
|
341
367
|
|
|
342
368
|
// Set startTimeOffset to provide linear timestamps across reconnections
|
|
343
369
|
const audioInputStartedAt =
|
|
344
|
-
activity.
|
|
370
|
+
activity.inputStartedAt ?? // Use input started at proxied from AudioRecognition if available
|
|
371
|
+
activity.agentSession._recorderIO?.recordingStartedAt ?? // Fallback to recording start time if available
|
|
345
372
|
activity.agentSession._startedAt ?? // Fallback to session start time
|
|
346
373
|
Date.now(); // Fallback to current time
|
|
347
374
|
|