@livekit/agents 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_exceptions.cjs.map +1 -1
- package/dist/_exceptions.d.ts.map +1 -1
- package/dist/_exceptions.js.map +1 -1
- package/dist/audio.cjs +89 -3
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.d.cts +36 -1
- package/dist/audio.d.ts +36 -1
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +76 -2
- package/dist/audio.js.map +1 -1
- package/dist/beta/index.cjs +29 -0
- package/dist/beta/index.cjs.map +1 -0
- package/dist/beta/index.d.cts +2 -0
- package/dist/beta/index.d.ts +2 -0
- package/dist/beta/index.d.ts.map +1 -0
- package/dist/beta/index.js +7 -0
- package/dist/beta/index.js.map +1 -0
- package/dist/beta/workflows/index.cjs +29 -0
- package/dist/beta/workflows/index.cjs.map +1 -0
- package/dist/beta/workflows/index.d.cts +2 -0
- package/dist/beta/workflows/index.d.ts +2 -0
- package/dist/beta/workflows/index.d.ts.map +1 -0
- package/dist/beta/workflows/index.js +7 -0
- package/dist/beta/workflows/index.js.map +1 -0
- package/dist/beta/workflows/task_group.cjs +165 -0
- package/dist/beta/workflows/task_group.cjs.map +1 -0
- package/dist/beta/workflows/task_group.d.cts +32 -0
- package/dist/beta/workflows/task_group.d.ts +32 -0
- package/dist/beta/workflows/task_group.d.ts.map +1 -0
- package/dist/beta/workflows/task_group.js +141 -0
- package/dist/beta/workflows/task_group.js.map +1 -0
- package/dist/cli.cjs +44 -46
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +3 -3
- package/dist/cli.d.ts +3 -3
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +45 -47
- package/dist/cli.js.map +1 -1
- package/dist/connection_pool.cjs +242 -0
- package/dist/connection_pool.cjs.map +1 -0
- package/dist/connection_pool.d.cts +123 -0
- package/dist/connection_pool.d.ts +123 -0
- package/dist/connection_pool.d.ts.map +1 -0
- package/dist/connection_pool.js +218 -0
- package/dist/connection_pool.js.map +1 -0
- package/dist/connection_pool.test.cjs +256 -0
- package/dist/connection_pool.test.cjs.map +1 -0
- package/dist/connection_pool.test.js +255 -0
- package/dist/connection_pool.test.js.map +1 -0
- package/dist/constants.cjs +30 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +10 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +20 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/http_server.cjs +9 -6
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.cts +5 -1
- package/dist/http_server.d.ts +5 -1
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js +9 -6
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +24 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -11
- package/dist/index.d.ts +15 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -9
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.cjs +70 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +373 -32
- package/dist/inference/api_protos.d.ts +373 -32
- package/dist/inference/api_protos.d.ts.map +1 -1
- package/dist/inference/api_protos.js +62 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/inference/index.cjs +8 -0
- package/dist/inference/index.cjs.map +1 -1
- package/dist/inference/index.d.cts +3 -4
- package/dist/inference/index.d.ts +3 -4
- package/dist/inference/index.d.ts.map +1 -1
- package/dist/inference/index.js +18 -3
- package/dist/inference/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +163 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +65 -0
- package/dist/inference/interruption/http_transport.d.ts +65 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +137 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +198 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +164 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +347 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +313 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +106 -66
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +65 -43
- package/dist/inference/llm.d.ts +65 -43
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +100 -66
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +319 -170
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +64 -15
- package/dist/inference/stt.d.ts +64 -15
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +319 -170
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +218 -0
- package/dist/inference/stt.test.cjs.map +1 -0
- package/dist/inference/stt.test.js +217 -0
- package/dist/inference/stt.test.js.map +1 -0
- package/dist/inference/tts.cjs +249 -71
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +94 -17
- package/dist/inference/tts.d.ts +94 -17
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +249 -77
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +305 -0
- package/dist/inference/tts.test.cjs.map +1 -0
- package/dist/inference/tts.test.js +304 -0
- package/dist/inference/tts.test.js.map +1 -0
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +14 -1
- package/dist/inference/utils.d.ts +14 -1
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs +6 -3
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/inference_proc_executor.d.ts.map +1 -1
- package/dist/ipc/inference_proc_executor.js +6 -3
- package/dist/ipc/inference_proc_executor.js.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.cjs +13 -1
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +13 -1
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_executor.cjs +6 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.d.ts.map +1 -1
- package/dist/ipc/job_proc_executor.js +6 -1
- package/dist/ipc/job_proc_executor.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +89 -17
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +68 -18
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +34 -8
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +8 -0
- package/dist/ipc/supervised_proc.d.ts +8 -0
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +34 -8
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/ipc/supervised_proc.test.cjs +145 -0
- package/dist/ipc/supervised_proc.test.cjs.map +1 -0
- package/dist/ipc/supervised_proc.test.js +122 -0
- package/dist/ipc/supervised_proc.test.js.map +1 -0
- package/dist/job.cjs +109 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +14 -0
- package/dist/job.d.ts +14 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +99 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/chat_context.cjs +345 -3
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +86 -2
- package/dist/llm/chat_context.d.ts +86 -2
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +344 -3
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +692 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +692 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/fallback_adapter.cjs +280 -0
- package/dist/llm/fallback_adapter.cjs.map +1 -0
- package/dist/llm/fallback_adapter.d.cts +73 -0
- package/dist/llm/fallback_adapter.d.ts +73 -0
- package/dist/llm/fallback_adapter.d.ts.map +1 -0
- package/dist/llm/fallback_adapter.js +256 -0
- package/dist/llm/fallback_adapter.js.map +1 -0
- package/dist/llm/fallback_adapter.test.cjs +176 -0
- package/dist/llm/fallback_adapter.test.cjs.map +1 -0
- package/dist/llm/fallback_adapter.test.js +175 -0
- package/dist/llm/fallback_adapter.test.js.map +1 -0
- package/dist/llm/index.cjs +11 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +4 -3
- package/dist/llm/index.d.ts +4 -3
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +13 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +65 -11
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +13 -2
- package/dist/llm/llm.d.ts +13 -2
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +65 -11
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs +6 -2
- package/dist/llm/provider_format/google.cjs.map +1 -1
- package/dist/llm/provider_format/google.d.cts +1 -1
- package/dist/llm/provider_format/google.d.ts +1 -1
- package/dist/llm/provider_format/google.d.ts.map +1 -1
- package/dist/llm/provider_format/google.js +6 -2
- package/dist/llm/provider_format/google.js.map +1 -1
- package/dist/llm/provider_format/google.test.cjs +48 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -1
- package/dist/llm/provider_format/google.test.js +54 -1
- package/dist/llm/provider_format/google.test.js.map +1 -1
- package/dist/llm/provider_format/index.cjs +2 -0
- package/dist/llm/provider_format/index.cjs.map +1 -1
- package/dist/llm/provider_format/index.d.cts +2 -2
- package/dist/llm/provider_format/index.d.ts +2 -2
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/index.js +6 -1
- package/dist/llm/provider_format/index.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +126 -24
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.cts +1 -0
- package/dist/llm/provider_format/openai.d.ts +1 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +124 -23
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +393 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +400 -2
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +5 -4
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +5 -4
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +15 -1
- package/dist/llm/realtime.d.ts +15 -1
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/llm/remote_chat_context.cjs.map +1 -1
- package/dist/llm/remote_chat_context.d.cts +2 -0
- package/dist/llm/remote_chat_context.d.ts +2 -0
- package/dist/llm/remote_chat_context.d.ts.map +1 -1
- package/dist/llm/remote_chat_context.js.map +1 -1
- package/dist/llm/tool_context.cjs +50 -2
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +47 -11
- package/dist/llm/tool_context.d.ts +47 -11
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +48 -3
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/llm/tool_context.test.cjs +197 -0
- package/dist/llm/tool_context.test.cjs.map +1 -1
- package/dist/llm/tool_context.test.js +175 -0
- package/dist/llm/tool_context.test.js.map +1 -1
- package/dist/llm/utils.cjs +107 -12
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +10 -3
- package/dist/llm/utils.d.ts +10 -3
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +106 -12
- package/dist/llm/utils.js.map +1 -1
- package/dist/llm/utils.test.cjs +90 -0
- package/dist/llm/utils.test.cjs.map +1 -1
- package/dist/llm/utils.test.js +98 -2
- package/dist/llm/utils.test.js.map +1 -1
- package/dist/llm/zod-utils.cjs +102 -0
- package/dist/llm/zod-utils.cjs.map +1 -0
- package/dist/llm/zod-utils.d.cts +65 -0
- package/dist/llm/zod-utils.d.ts +65 -0
- package/dist/llm/zod-utils.d.ts.map +1 -0
- package/dist/llm/zod-utils.js +64 -0
- package/dist/llm/zod-utils.js.map +1 -0
- package/dist/llm/zod-utils.test.cjs +472 -0
- package/dist/llm/zod-utils.test.cjs.map +1 -0
- package/dist/llm/zod-utils.test.js +455 -0
- package/dist/llm/zod-utils.test.js.map +1 -0
- package/dist/log.cjs +45 -14
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +8 -1
- package/dist/log.d.ts +8 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +45 -15
- package/dist/log.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +75 -19
- package/dist/metrics/base.d.ts +75 -19
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +5 -2
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +10 -1
- package/dist/metrics/usage_collector.d.ts +10 -1
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +5 -2
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +23 -7
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +23 -7
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +31 -10
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.cts +6 -1
- package/dist/stream/deferred_stream.d.ts +6 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +31 -10
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stream/deferred_stream.test.cjs +2 -2
- package/dist/stream/deferred_stream.test.cjs.map +1 -1
- package/dist/stream/deferred_stream.test.js +2 -2
- package/dist/stream/deferred_stream.test.js.map +1 -1
- package/dist/stream/index.cjs +3 -0
- package/dist/stream/index.cjs.map +1 -1
- package/dist/stream/index.d.cts +1 -0
- package/dist/stream/index.d.ts +1 -0
- package/dist/stream/index.d.ts.map +1 -1
- package/dist/stream/index.js +2 -0
- package/dist/stream/index.js.map +1 -1
- package/dist/stream/multi_input_stream.cjs +139 -0
- package/dist/stream/multi_input_stream.cjs.map +1 -0
- package/dist/stream/multi_input_stream.d.cts +55 -0
- package/dist/stream/multi_input_stream.d.ts +55 -0
- package/dist/stream/multi_input_stream.d.ts.map +1 -0
- package/dist/stream/multi_input_stream.js +115 -0
- package/dist/stream/multi_input_stream.js.map +1 -0
- package/dist/stream/multi_input_stream.test.cjs +344 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -0
- package/dist/stream/multi_input_stream.test.js +343 -0
- package/dist/stream/multi_input_stream.test.js.map +1 -0
- package/dist/stream/stream_channel.cjs +39 -1
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +5 -2
- package/dist/stream/stream_channel.d.ts +5 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +39 -1
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stream/stream_channel.test.cjs +27 -0
- package/dist/stream/stream_channel.test.cjs.map +1 -1
- package/dist/stream/stream_channel.test.js +27 -0
- package/dist/stream/stream_channel.test.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +24 -9
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.cts +7 -3
- package/dist/stt/stream_adapter.d.ts +7 -3
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +24 -9
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +94 -19
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +68 -5
- package/dist/stt/stt.d.ts +68 -5
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +96 -21
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/index.cjs +72 -0
- package/dist/telemetry/index.cjs.map +1 -0
- package/dist/telemetry/index.d.cts +7 -0
- package/dist/telemetry/index.d.ts +7 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +37 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +166 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +63 -0
- package/dist/telemetry/otel_http_exporter.d.ts +63 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +142 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +233 -0
- package/dist/telemetry/trace_types.cjs.map +1 -0
- package/dist/telemetry/trace_types.d.cts +74 -0
- package/dist/telemetry/trace_types.d.ts +74 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -0
- package/dist/telemetry/trace_types.js +141 -0
- package/dist/telemetry/trace_types.js.map +1 -0
- package/dist/telemetry/traces.cjs +484 -0
- package/dist/telemetry/traces.cjs.map +1 -0
- package/dist/telemetry/traces.d.cts +116 -0
- package/dist/telemetry/traces.d.ts +116 -0
- package/dist/telemetry/traces.d.ts.map +1 -0
- package/dist/telemetry/traces.js +449 -0
- package/dist/telemetry/traces.js.map +1 -0
- package/dist/telemetry/utils.cjs +86 -0
- package/dist/telemetry/utils.cjs.map +1 -0
- package/dist/telemetry/utils.d.cts +5 -0
- package/dist/telemetry/utils.d.ts +5 -0
- package/dist/telemetry/utils.d.ts.map +1 -0
- package/dist/telemetry/utils.js +51 -0
- package/dist/telemetry/utils.js.map +1 -0
- package/dist/tokenize/basic/sentence.cjs +3 -3
- package/dist/tokenize/basic/sentence.cjs.map +1 -1
- package/dist/tokenize/basic/sentence.js +3 -3
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/tokenizer.test.cjs +3 -1
- package/dist/tokenize/tokenizer.test.cjs.map +1 -1
- package/dist/tokenize/tokenizer.test.js +3 -1
- package/dist/tokenize/tokenizer.test.js.map +1 -1
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.cts +6 -0
- package/dist/transcription.d.ts +6 -0
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js.map +1 -1
- package/dist/tts/fallback_adapter.cjs +472 -0
- package/dist/tts/fallback_adapter.cjs.map +1 -0
- package/dist/tts/fallback_adapter.d.cts +110 -0
- package/dist/tts/fallback_adapter.d.ts +110 -0
- package/dist/tts/fallback_adapter.d.ts.map +1 -0
- package/dist/tts/fallback_adapter.js +448 -0
- package/dist/tts/fallback_adapter.js.map +1 -0
- package/dist/tts/index.cjs +3 -0
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -0
- package/dist/tts/index.d.ts +1 -0
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +2 -0
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +25 -8
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +6 -3
- package/dist/tts/stream_adapter.d.ts +6 -3
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +25 -8
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +189 -57
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +58 -6
- package/dist/tts/tts.d.ts +58 -6
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +191 -59
- package/dist/tts/tts.js.map +1 -1
- package/dist/typed_promise.cjs +48 -0
- package/dist/typed_promise.cjs.map +1 -0
- package/dist/typed_promise.d.cts +24 -0
- package/dist/typed_promise.d.ts +24 -0
- package/dist/typed_promise.d.ts.map +1 -0
- package/dist/typed_promise.js +28 -0
- package/dist/typed_promise.js.map +1 -0
- package/dist/types.cjs +24 -32
- package/dist/types.cjs.map +1 -1
- package/dist/types.d.cts +45 -10
- package/dist/types.d.ts +45 -10
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +20 -30
- package/dist/types.js.map +1 -1
- package/dist/utils.cjs +124 -28
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +41 -1
- package/dist/utils.d.ts +41 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +119 -27
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +73 -1
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +74 -10
- package/dist/utils.test.js.map +1 -1
- package/dist/vad.cjs +35 -15
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +15 -5
- package/dist/vad.d.ts +15 -5
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +35 -15
- package/dist/vad.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.cjs.map +1 -1
- package/dist/version.d.cts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/dist/voice/agent.cjs +258 -35
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +54 -13
- package/dist/voice/agent.d.ts +54 -13
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +254 -34
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +314 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +316 -2
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +1116 -385
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +72 -11
- package/dist/voice/agent_activity.d.ts +72 -11
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +1119 -383
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +550 -90
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +185 -25
- package/dist/voice/agent_session.d.ts +185 -25
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +556 -91
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +605 -46
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +96 -4
- package/dist/voice/audio_recognition.d.ts +96 -4
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +611 -47
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +295 -0
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
- package/dist/voice/audio_recognition_span.test.js +299 -0
- package/dist/voice/audio_recognition_span.test.js.map +1 -0
- package/dist/voice/avatar/datastream_io.cjs +7 -1
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.d.cts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
- package/dist/voice/avatar/datastream_io.js +7 -1
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs +367 -0
- package/dist/voice/background_audio.cjs.map +1 -0
- package/dist/voice/background_audio.d.cts +123 -0
- package/dist/voice/background_audio.d.ts +123 -0
- package/dist/voice/background_audio.d.ts.map +1 -0
- package/dist/voice/background_audio.js +343 -0
- package/dist/voice/background_audio.js.map +1 -0
- package/dist/voice/events.cjs +3 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +16 -9
- package/dist/voice/events.d.ts +16 -9
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +3 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +205 -41
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +21 -5
- package/dist/voice/generation.d.ts +21 -5
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +215 -43
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/generation_tools.test.cjs +236 -0
- package/dist/voice/generation_tools.test.cjs.map +1 -0
- package/dist/voice/generation_tools.test.js +235 -0
- package/dist/voice/generation_tools.test.js.map +1 -0
- package/dist/voice/index.cjs +33 -2
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +8 -2
- package/dist/voice/index.d.ts +8 -2
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +19 -2
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/interruption_detection.test.cjs +114 -0
- package/dist/voice/interruption_detection.test.cjs.map +1 -0
- package/dist/voice/interruption_detection.test.js +113 -0
- package/dist/voice/interruption_detection.test.js.map +1 -0
- package/dist/voice/io.cjs +66 -6
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +67 -7
- package/dist/voice/io.d.ts +67 -7
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +62 -5
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/index.cjs +23 -0
- package/dist/voice/recorder_io/index.cjs.map +1 -0
- package/dist/voice/recorder_io/index.d.cts +2 -0
- package/dist/voice/recorder_io/index.d.ts +2 -0
- package/dist/voice/recorder_io/index.d.ts.map +1 -0
- package/dist/voice/recorder_io/index.js +2 -0
- package/dist/voice/recorder_io/index.js.map +1 -0
- package/dist/voice/recorder_io/recorder_io.cjs +607 -0
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
- package/dist/voice/recorder_io/recorder_io.d.cts +106 -0
- package/dist/voice/recorder_io/recorder_io.d.ts +106 -0
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
- package/dist/voice/recorder_io/recorder_io.js +573 -0
- package/dist/voice/recorder_io/recorder_io.js.map +1 -0
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +88 -0
- package/dist/voice/report.cjs.map +1 -0
- package/dist/voice/report.d.cts +49 -0
- package/dist/voice/report.d.ts +49 -0
- package/dist/voice/report.d.ts.map +1 -0
- package/dist/voice/report.js +63 -0
- package/dist/voice/report.js.map +1 -0
- package/dist/voice/report.test.cjs +121 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +120 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/_input.cjs +40 -7
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +5 -2
- package/dist/voice/room_io/_input.d.ts +5 -2
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +41 -8
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +19 -11
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.d.cts +7 -4
- package/dist/voice/room_io/_output.d.ts +7 -4
- package/dist/voice/room_io/_output.d.ts.map +1 -1
- package/dist/voice/room_io/_output.js +20 -12
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +33 -6
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +29 -9
- package/dist/voice/room_io/room_io.d.ts +29 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +33 -7
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +22 -4
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +17 -2
- package/dist/voice/speech_handle.d.ts +17 -2
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +21 -4
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/fake_llm.cjs +127 -0
- package/dist/voice/testing/fake_llm.cjs.map +1 -0
- package/dist/voice/testing/fake_llm.d.cts +30 -0
- package/dist/voice/testing/fake_llm.d.ts +30 -0
- package/dist/voice/testing/fake_llm.d.ts.map +1 -0
- package/dist/voice/testing/fake_llm.js +103 -0
- package/dist/voice/testing/fake_llm.js.map +1 -0
- package/dist/voice/testing/index.cjs +57 -0
- package/dist/voice/testing/index.cjs.map +1 -0
- package/dist/voice/testing/index.d.cts +21 -0
- package/dist/voice/testing/index.d.ts +21 -0
- package/dist/voice/testing/index.d.ts.map +1 -0
- package/dist/voice/testing/index.js +35 -0
- package/dist/voice/testing/index.js.map +1 -0
- package/dist/voice/testing/run_result.cjs +817 -0
- package/dist/voice/testing/run_result.cjs.map +1 -0
- package/dist/voice/testing/run_result.d.cts +385 -0
- package/dist/voice/testing/run_result.d.ts +385 -0
- package/dist/voice/testing/run_result.d.ts.map +1 -0
- package/dist/voice/testing/run_result.js +790 -0
- package/dist/voice/testing/run_result.js.map +1 -0
- package/dist/voice/testing/types.cjs +46 -0
- package/dist/voice/testing/types.cjs.map +1 -0
- package/dist/voice/testing/types.d.cts +83 -0
- package/dist/voice/testing/types.d.ts +83 -0
- package/dist/voice/testing/types.d.ts.map +1 -0
- package/dist/voice/testing/types.js +19 -0
- package/dist/voice/testing/types.js.map +1 -0
- package/dist/voice/transcription/synchronizer.cjs +139 -15
- package/dist/voice/transcription/synchronizer.cjs.map +1 -1
- package/dist/voice/transcription/synchronizer.d.cts +35 -4
- package/dist/voice/transcription/synchronizer.d.ts +35 -4
- package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
- package/dist/voice/transcription/synchronizer.js +143 -16
- package/dist/voice/transcription/synchronizer.js.map +1 -1
- package/dist/voice/transcription/synchronizer.test.cjs +151 -0
- package/dist/voice/transcription/synchronizer.test.cjs.map +1 -0
- package/dist/voice/transcription/synchronizer.test.js +150 -0
- package/dist/voice/transcription/synchronizer.test.js.map +1 -0
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +157 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +37 -0
- package/dist/voice/turn_config/utils.d.ts +37 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +131 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +128 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +127 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/utils.cjs +47 -0
- package/dist/voice/utils.cjs.map +1 -0
- package/dist/voice/utils.d.cts +4 -0
- package/dist/voice/utils.d.ts +4 -0
- package/dist/voice/utils.d.ts.map +1 -0
- package/dist/voice/utils.js +23 -0
- package/dist/voice/utils.js.map +1 -0
- package/dist/worker.cjs +44 -52
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +18 -8
- package/dist/worker.d.ts +18 -8
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +43 -43
- package/dist/worker.js.map +1 -1
- package/package.json +35 -13
- package/resources/NOTICE +2 -0
- package/resources/keyboard-typing.ogg +0 -0
- package/resources/keyboard-typing2.ogg +0 -0
- package/resources/office-ambience.ogg +0 -0
- package/src/_exceptions.ts +5 -0
- package/src/audio.ts +132 -1
- package/src/beta/index.ts +9 -0
- package/src/beta/workflows/index.ts +9 -0
- package/src/beta/workflows/task_group.ts +203 -0
- package/src/cli.ts +57 -66
- package/src/connection_pool.test.ts +346 -0
- package/src/connection_pool.ts +307 -0
- package/src/constants.ts +14 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/http_server.ts +18 -6
- package/src/index.ts +15 -13
- package/src/inference/api_protos.ts +85 -2
- package/src/inference/index.ts +32 -4
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +207 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +204 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +416 -0
- package/src/inference/llm.ts +214 -163
- package/src/inference/stt.test.ts +253 -0
- package/src/inference/stt.ts +449 -208
- package/src/inference/tts.test.ts +354 -0
- package/src/inference/tts.ts +417 -115
- package/src/inference/utils.ts +30 -2
- package/src/ipc/inference_proc_executor.ts +11 -3
- package/src/ipc/inference_proc_lazy_main.ts +13 -1
- package/src/ipc/job_proc_executor.ts +11 -1
- package/src/ipc/job_proc_lazy_main.ts +86 -20
- package/src/ipc/supervised_proc.test.ts +153 -0
- package/src/ipc/supervised_proc.ts +39 -10
- package/src/job.ts +120 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
- package/src/llm/chat_context.test.ts +787 -0
- package/src/llm/chat_context.ts +493 -2
- package/src/llm/fallback_adapter.test.ts +238 -0
- package/src/llm/fallback_adapter.ts +394 -0
- package/src/llm/index.ts +13 -0
- package/src/llm/llm.ts +77 -12
- package/src/llm/provider_format/google.test.ts +72 -1
- package/src/llm/provider_format/google.ts +10 -6
- package/src/llm/provider_format/index.ts +7 -2
- package/src/llm/provider_format/openai.test.ts +480 -2
- package/src/llm/provider_format/openai.ts +152 -21
- package/src/llm/provider_format/utils.ts +11 -5
- package/src/llm/realtime.ts +23 -2
- package/src/llm/remote_chat_context.ts +2 -2
- package/src/llm/tool_context.test.ts +210 -1
- package/src/llm/tool_context.ts +115 -17
- package/src/llm/utils.test.ts +103 -2
- package/src/llm/utils.ts +152 -16
- package/src/llm/zod-utils.test.ts +577 -0
- package/src/llm/zod-utils.ts +153 -0
- package/src/log.ts +71 -19
- package/src/metrics/base.ts +78 -19
- package/src/metrics/index.ts +12 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +14 -3
- package/src/metrics/utils.ts +27 -7
- package/src/stream/deferred_stream.test.ts +3 -3
- package/src/stream/deferred_stream.ts +43 -11
- package/src/stream/index.ts +1 -0
- package/src/stream/multi_input_stream.test.ts +545 -0
- package/src/stream/multi_input_stream.ts +172 -0
- package/src/stream/stream_channel.test.ts +37 -0
- package/src/stream/stream_channel.ts +43 -3
- package/src/stt/stream_adapter.ts +30 -9
- package/src/stt/stt.ts +140 -23
- package/src/telemetry/index.ts +28 -0
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +218 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/trace_types.ts +109 -0
- package/src/telemetry/traces.ts +673 -0
- package/src/telemetry/utils.ts +61 -0
- package/src/tokenize/basic/sentence.ts +3 -3
- package/src/tokenize/tokenizer.test.ts +4 -0
- package/src/transcription.ts +6 -0
- package/src/tts/fallback_adapter.ts +586 -0
- package/src/tts/index.ts +1 -0
- package/src/tts/stream_adapter.ts +38 -8
- package/src/tts/tts.ts +245 -62
- package/src/typed_promise.ts +67 -0
- package/src/types.ts +62 -33
- package/src/utils.test.ts +90 -10
- package/src/utils.ts +178 -33
- package/src/vad.ts +42 -18
- package/src/version.ts +1 -1
- package/src/voice/agent.test.ts +347 -2
- package/src/voice/agent.ts +346 -44
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +1457 -388
- package/src/voice/agent_session.ts +817 -112
- package/src/voice/audio_recognition.ts +845 -70
- package/src/voice/audio_recognition_span.test.ts +341 -0
- package/src/voice/avatar/datastream_io.ts +9 -1
- package/src/voice/background_audio.ts +494 -0
- package/src/voice/events.ts +27 -7
- package/src/voice/generation.ts +310 -56
- package/src/voice/generation_tools.test.ts +268 -0
- package/src/voice/index.ts +17 -3
- package/src/voice/interruption_detection.test.ts +151 -0
- package/src/voice/io.ts +115 -12
- package/src/voice/recorder_io/index.ts +4 -0
- package/src/voice/recorder_io/recorder_io.ts +783 -0
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +136 -0
- package/src/voice/report.ts +140 -0
- package/src/voice/room_io/_input.ts +45 -10
- package/src/voice/room_io/_output.ts +26 -14
- package/src/voice/room_io/room_io.ts +67 -22
- package/src/voice/speech_handle.ts +38 -6
- package/src/voice/testing/fake_llm.ts +138 -0
- package/src/voice/testing/index.ts +52 -0
- package/src/voice/testing/run_result.ts +995 -0
- package/src/voice/testing/types.ts +118 -0
- package/src/voice/transcription/synchronizer.test.ts +206 -0
- package/src/voice/transcription/synchronizer.ts +204 -19
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +148 -0
- package/src/voice/turn_config/utils.ts +167 -0
- package/src/voice/utils.ts +29 -0
- package/src/worker.ts +92 -78
- package/src/llm/__snapshots__/utils.test.ts.snap +0 -65
|
@@ -1,31 +1,48 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { Mutex } from '@livekit/mutex';
|
|
4
5
|
import type { AudioFrame, Room } from '@livekit/rtc-node';
|
|
5
6
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
7
|
+
import type { Context, Span } from '@opentelemetry/api';
|
|
8
|
+
import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
|
|
6
9
|
import { EventEmitter } from 'node:events';
|
|
7
10
|
import type { ReadableStream } from 'node:stream/web';
|
|
11
|
+
import type { z } from 'zod';
|
|
8
12
|
import {
|
|
9
13
|
LLM as InferenceLLM,
|
|
10
14
|
STT as InferenceSTT,
|
|
11
15
|
TTS as InferenceTTS,
|
|
12
16
|
type LLMModels,
|
|
13
|
-
type
|
|
14
|
-
type
|
|
17
|
+
type STTModelString,
|
|
18
|
+
type TTSModelString,
|
|
15
19
|
} from '../inference/index.js';
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
20
|
+
import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
|
|
21
|
+
import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
|
|
22
|
+
import { type JobContext, getJobContext } from '../job.js';
|
|
23
|
+
import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
|
|
24
|
+
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
18
25
|
import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
|
|
19
26
|
import type { LLMError } from '../llm/llm.js';
|
|
20
27
|
import { log } from '../log.js';
|
|
28
|
+
import { type ModelUsage, ModelUsageCollector, filterZeroValues } from '../metrics/model_usage.js';
|
|
21
29
|
import type { STT } from '../stt/index.js';
|
|
22
30
|
import type { STTError } from '../stt/stt.js';
|
|
31
|
+
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
23
32
|
import type { TTS, TTSError } from '../tts/tts.js';
|
|
33
|
+
import {
|
|
34
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
35
|
+
DEFAULT_SESSION_CONNECT_OPTIONS,
|
|
36
|
+
type ResolvedSessionConnectOptions,
|
|
37
|
+
type SessionConnectOptions,
|
|
38
|
+
} from '../types.js';
|
|
39
|
+
import { Task } from '../utils.js';
|
|
24
40
|
import type { VAD } from '../vad.js';
|
|
25
41
|
import type { Agent } from './agent.js';
|
|
26
42
|
import { AgentActivity } from './agent_activity.js';
|
|
27
43
|
import type { _TurnDetector } from './audio_recognition.js';
|
|
28
44
|
import {
|
|
45
|
+
type AgentEvent,
|
|
29
46
|
AgentSessionEventTypes,
|
|
30
47
|
type AgentState,
|
|
31
48
|
type AgentStateChangedEvent,
|
|
@@ -35,6 +52,7 @@ import {
|
|
|
35
52
|
type ErrorEvent,
|
|
36
53
|
type FunctionToolsExecutedEvent,
|
|
37
54
|
type MetricsCollectedEvent,
|
|
55
|
+
type ShutdownReason,
|
|
38
56
|
type SpeechCreatedEvent,
|
|
39
57
|
type UserInputTranscribedEvent,
|
|
40
58
|
type UserState,
|
|
@@ -45,29 +63,64 @@ import {
|
|
|
45
63
|
createUserStateChangedEvent,
|
|
46
64
|
} from './events.js';
|
|
47
65
|
import { AgentInput, AgentOutput } from './io.js';
|
|
48
|
-
import {
|
|
66
|
+
import { RecorderIO } from './recorder_io/index.js';
|
|
67
|
+
import { RoomSessionTransport, SessionHost } from './remote_session.js';
|
|
68
|
+
import {
|
|
69
|
+
DEFAULT_TEXT_INPUT_CALLBACK,
|
|
70
|
+
RoomIO,
|
|
71
|
+
type RoomInputOptions,
|
|
72
|
+
type RoomOutputOptions,
|
|
73
|
+
} from './room_io/index.js';
|
|
49
74
|
import type { UnknownUserData } from './run_context.js';
|
|
50
75
|
import type { SpeechHandle } from './speech_handle.js';
|
|
76
|
+
import { RunResult } from './testing/run_result.js';
|
|
77
|
+
import type { InterruptionOptions } from './turn_config/interruption.js';
|
|
78
|
+
import type {
|
|
79
|
+
InternalTurnHandlingOptions,
|
|
80
|
+
TurnHandlingOptions,
|
|
81
|
+
} from './turn_config/turn_handling.js';
|
|
82
|
+
import { migrateLegacyOptions } from './turn_config/utils.js';
|
|
83
|
+
import { setParticipantSpanAttributes } from './utils.js';
|
|
84
|
+
|
|
85
|
+
export interface AgentSessionUsage {
|
|
86
|
+
/** List of usage summaries, one per model/provider combination. */
|
|
87
|
+
modelUsage: Array<Partial<ModelUsage>>;
|
|
88
|
+
}
|
|
51
89
|
|
|
52
|
-
export interface
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
minInterruptionDuration: number;
|
|
56
|
-
minInterruptionWords: number;
|
|
57
|
-
minEndpointingDelay: number;
|
|
58
|
-
maxEndpointingDelay: number;
|
|
90
|
+
export interface InternalSessionOptions<UserData> extends AgentSessionOptions<UserData> {
|
|
91
|
+
turnHandling: InternalTurnHandlingOptions;
|
|
92
|
+
useTtsAlignedTranscript: boolean;
|
|
59
93
|
maxToolSteps: number;
|
|
94
|
+
userAwayTimeout: number | null;
|
|
60
95
|
}
|
|
61
96
|
|
|
62
|
-
const
|
|
63
|
-
allowInterruptions: true,
|
|
64
|
-
discardAudioIfUninterruptible: true,
|
|
65
|
-
minInterruptionDuration: 500,
|
|
66
|
-
minInterruptionWords: 0,
|
|
67
|
-
minEndpointingDelay: 500,
|
|
68
|
-
maxEndpointingDelay: 6000,
|
|
97
|
+
export const defaultAgentSessionOptions = {
|
|
69
98
|
maxToolSteps: 3,
|
|
70
|
-
|
|
99
|
+
preemptiveGeneration: true,
|
|
100
|
+
userAwayTimeout: 15.0,
|
|
101
|
+
aecWarmupDuration: 3000,
|
|
102
|
+
turnHandling: {},
|
|
103
|
+
useTtsAlignedTranscript: true,
|
|
104
|
+
} as const satisfies AgentSessionOptions;
|
|
105
|
+
|
|
106
|
+
/** @deprecated {@link VoiceOptions} has been flattened onto to {@link AgentSessionOptions} */
|
|
107
|
+
export type VoiceOptions = {
|
|
108
|
+
maxToolSteps: number;
|
|
109
|
+
preemptiveGeneration: boolean;
|
|
110
|
+
userAwayTimeout?: number | null;
|
|
111
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.mode instead. */
|
|
112
|
+
allowInterruptions?: boolean;
|
|
113
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.discardAudioIfUninterruptible instead. */
|
|
114
|
+
discardAudioIfUninterruptible?: boolean;
|
|
115
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minDuration instead. */
|
|
116
|
+
minInterruptionDuration?: number;
|
|
117
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minWords instead. */
|
|
118
|
+
minInterruptionWords?: number;
|
|
119
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.minDelay instead. */
|
|
120
|
+
minEndpointingDelay?: number;
|
|
121
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.maxDelay instead. */
|
|
122
|
+
maxEndpointingDelay?: number;
|
|
123
|
+
};
|
|
71
124
|
|
|
72
125
|
export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
|
|
73
126
|
|
|
@@ -81,16 +134,60 @@ export type AgentSessionCallbacks = {
|
|
|
81
134
|
[AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;
|
|
82
135
|
[AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;
|
|
83
136
|
[AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;
|
|
137
|
+
[AgentSessionEventTypes.OverlappingSpeech]: (ev: OverlappingSpeechEvent) => void;
|
|
84
138
|
};
|
|
85
139
|
|
|
86
140
|
export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
87
|
-
|
|
88
|
-
stt?: STT | STTModels;
|
|
141
|
+
stt?: STT | STTModelString;
|
|
89
142
|
vad?: VAD;
|
|
90
143
|
llm?: LLM | RealtimeModel | LLMModels;
|
|
91
|
-
tts?: TTS |
|
|
144
|
+
tts?: TTS | TTSModelString;
|
|
92
145
|
userData?: UserData;
|
|
146
|
+
connOptions?: SessionConnectOptions;
|
|
147
|
+
|
|
148
|
+
/** @deprecated use turnHandling.turnDetection instead */
|
|
149
|
+
turnDetection?: TurnDetectionMode;
|
|
150
|
+
/** @deprecated use top-level SessionOptions fields instead */
|
|
93
151
|
voiceOptions?: Partial<VoiceOptions>;
|
|
152
|
+
|
|
153
|
+
maxToolSteps?: number;
|
|
154
|
+
/**
|
|
155
|
+
* Whether to speculatively begin LLM and TTS requests before an end-of-turn is detected.
|
|
156
|
+
* When `true`, the agent sends inference calls as soon as a user transcript is received rather
|
|
157
|
+
* than waiting for a definitive turn boundary. This can reduce response latency by overlapping
|
|
158
|
+
* model inference with user audio, but may incur extra compute if the user interrupts or
|
|
159
|
+
* revises mid-utterance.
|
|
160
|
+
* @defaultValue true
|
|
161
|
+
*/
|
|
162
|
+
preemptiveGeneration?: boolean;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* If set, set the user state as "away" after this amount of time after user and agent are
|
|
166
|
+
* silent. Set to `null` to disable.
|
|
167
|
+
* @defaultValue 15.0
|
|
168
|
+
*/
|
|
169
|
+
userAwayTimeout?: number | null;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Duration in milliseconds for AEC (Acoustic Echo Cancellation) warmup, during which
|
|
173
|
+
* interruptions from audio activity are suppressed. Set to `null` to disable.
|
|
174
|
+
* @defaultValue 3000
|
|
175
|
+
*/
|
|
176
|
+
aecWarmupDuration?: number | null;
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Configuration for turn handling.
|
|
180
|
+
*/
|
|
181
|
+
turnHandling?: Partial<TurnHandlingOptions>;
|
|
182
|
+
|
|
183
|
+
useTtsAlignedTranscript?: boolean;
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
type ActivityTransitionOptions = {
|
|
187
|
+
previousActivity?: 'close' | 'pause';
|
|
188
|
+
newActivity?: 'start' | 'resume';
|
|
189
|
+
blockedTasks?: Task<any>[];
|
|
190
|
+
waitOnEnter?: boolean;
|
|
94
191
|
};
|
|
95
192
|
|
|
96
193
|
export class AgentSession<
|
|
@@ -102,59 +199,116 @@ export class AgentSession<
|
|
|
102
199
|
tts?: TTS;
|
|
103
200
|
turnDetection?: TurnDetectionMode;
|
|
104
201
|
|
|
202
|
+
/** @deprecated use {@link sessionOptions } instead */
|
|
105
203
|
readonly options: VoiceOptions;
|
|
106
204
|
|
|
205
|
+
readonly sessionOptions: InternalSessionOptions<UserData>;
|
|
206
|
+
|
|
207
|
+
private readonly activityLock = new Mutex();
|
|
208
|
+
|
|
107
209
|
private agent?: Agent;
|
|
108
210
|
private activity?: AgentActivity;
|
|
109
211
|
private nextActivity?: AgentActivity;
|
|
212
|
+
private updateActivityTask?: Task<void>;
|
|
110
213
|
private started = false;
|
|
111
|
-
private
|
|
112
|
-
|
|
113
|
-
private roomIO?: RoomIO;
|
|
114
|
-
private logger = log();
|
|
214
|
+
private sessionHost?: SessionHost;
|
|
115
215
|
|
|
116
216
|
private _chatCtx: ChatContext;
|
|
117
217
|
private _userData: UserData | undefined;
|
|
218
|
+
private _userState: UserState = 'listening';
|
|
118
219
|
private _agentState: AgentState = 'initializing';
|
|
119
220
|
|
|
120
221
|
private _input: AgentInput;
|
|
121
222
|
private _output: AgentOutput;
|
|
122
223
|
|
|
123
224
|
private closingTask: Promise<void> | null = null;
|
|
225
|
+
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
226
|
+
|
|
227
|
+
private _aecWarmupTimer: NodeJS.Timeout | null = null;
|
|
228
|
+
|
|
229
|
+
// Connection options for STT, LLM, and TTS
|
|
230
|
+
private _connOptions: ResolvedSessionConnectOptions;
|
|
231
|
+
|
|
232
|
+
// Unrecoverable error counts, reset after agent speaking
|
|
233
|
+
private llmErrorCounts = 0;
|
|
234
|
+
private ttsErrorCounts = 0;
|
|
235
|
+
|
|
236
|
+
private sessionSpan?: Span;
|
|
237
|
+
private agentSpeakingSpan?: Span;
|
|
238
|
+
|
|
239
|
+
private _interruptionDetection?: InterruptionOptions['mode'];
|
|
240
|
+
|
|
241
|
+
/** @internal */
|
|
242
|
+
_usageCollector: ModelUsageCollector = new ModelUsageCollector();
|
|
243
|
+
|
|
244
|
+
/** @internal */
|
|
245
|
+
_roomIO?: RoomIO;
|
|
246
|
+
|
|
247
|
+
/** @internal */
|
|
248
|
+
_aecWarmupRemaining = 0;
|
|
249
|
+
|
|
250
|
+
/** @internal */
|
|
251
|
+
_recorderIO?: RecorderIO;
|
|
252
|
+
|
|
253
|
+
/** @internal */
|
|
254
|
+
rootSpanContext?: Context;
|
|
255
|
+
|
|
256
|
+
/** @internal */
|
|
257
|
+
_recordedEvents: AgentEvent[] = [];
|
|
258
|
+
|
|
259
|
+
/** @internal */
|
|
260
|
+
_enableRecording = false;
|
|
261
|
+
|
|
262
|
+
/** @internal - Timestamp when the session started (milliseconds) */
|
|
263
|
+
_startedAt?: number;
|
|
264
|
+
|
|
265
|
+
/** @internal - Current run state for testing */
|
|
266
|
+
_globalRunState?: RunResult;
|
|
267
|
+
|
|
268
|
+
/** @internal */
|
|
269
|
+
_userSpeakingSpan?: Span;
|
|
270
|
+
|
|
271
|
+
private logger = log();
|
|
124
272
|
|
|
125
|
-
constructor(
|
|
273
|
+
constructor(options: AgentSessionOptions<UserData>) {
|
|
126
274
|
super();
|
|
127
275
|
|
|
128
|
-
const {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
276
|
+
const { agentSessionOptions: opts, legacyVoiceOptions } =
|
|
277
|
+
migrateLegacyOptions<UserData>(options);
|
|
278
|
+
|
|
279
|
+
const { vad, stt, llm, tts, userData, connOptions, ...resolvedSessionOptions } = opts;
|
|
280
|
+
// Merge user-provided connOptions with defaults
|
|
281
|
+
this._connOptions = {
|
|
282
|
+
sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
|
|
283
|
+
llmConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.llmConnOptions },
|
|
284
|
+
ttsConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.ttsConnOptions },
|
|
285
|
+
maxUnrecoverableErrors:
|
|
286
|
+
connOptions?.maxUnrecoverableErrors ??
|
|
287
|
+
DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors,
|
|
288
|
+
};
|
|
137
289
|
|
|
138
290
|
this.vad = vad;
|
|
139
291
|
|
|
140
292
|
if (typeof stt === 'string') {
|
|
141
|
-
this.stt =
|
|
293
|
+
this.stt = InferenceSTT.fromModelString(stt);
|
|
142
294
|
} else {
|
|
143
295
|
this.stt = stt;
|
|
144
296
|
}
|
|
145
297
|
|
|
146
298
|
if (typeof llm === 'string') {
|
|
147
|
-
this.llm =
|
|
299
|
+
this.llm = InferenceLLM.fromModelString(llm);
|
|
148
300
|
} else {
|
|
149
301
|
this.llm = llm;
|
|
150
302
|
}
|
|
151
303
|
|
|
152
304
|
if (typeof tts === 'string') {
|
|
153
|
-
this.tts =
|
|
305
|
+
this.tts = InferenceTTS.fromModelString(tts);
|
|
154
306
|
} else {
|
|
155
307
|
this.tts = tts;
|
|
156
308
|
}
|
|
157
|
-
|
|
309
|
+
|
|
310
|
+
this.turnDetection = resolvedSessionOptions.turnHandling.turnDetection;
|
|
311
|
+
this._interruptionDetection = resolvedSessionOptions.turnHandling.interruption?.mode;
|
|
158
312
|
this._userData = userData;
|
|
159
313
|
|
|
160
314
|
// configurable IO
|
|
@@ -163,7 +317,21 @@ export class AgentSession<
|
|
|
163
317
|
|
|
164
318
|
// This is the "global" chat context, it holds the entire conversation history
|
|
165
319
|
this._chatCtx = ChatContext.empty();
|
|
166
|
-
this.
|
|
320
|
+
this.sessionOptions = resolvedSessionOptions;
|
|
321
|
+
this.options = legacyVoiceOptions;
|
|
322
|
+
this._aecWarmupRemaining = this.sessionOptions.aecWarmupDuration ?? 0;
|
|
323
|
+
|
|
324
|
+
this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
|
|
325
|
+
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
emit<K extends keyof AgentSessionCallbacks>(
|
|
329
|
+
event: K,
|
|
330
|
+
...args: Parameters<AgentSessionCallbacks[K]>
|
|
331
|
+
): boolean {
|
|
332
|
+
const eventData = args[0] as AgentEvent;
|
|
333
|
+
this._recordedEvents.push(eventData);
|
|
334
|
+
return super.emit(event, ...args);
|
|
167
335
|
}
|
|
168
336
|
|
|
169
337
|
get input(): AgentInput {
|
|
@@ -186,64 +354,134 @@ export class AgentSession<
|
|
|
186
354
|
return this._chatCtx;
|
|
187
355
|
}
|
|
188
356
|
|
|
357
|
+
/** Connection options for STT, LLM, and TTS. */
|
|
358
|
+
get connOptions(): ResolvedSessionConnectOptions {
|
|
359
|
+
return this._connOptions;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
get interruptionDetection() {
|
|
363
|
+
return this._interruptionDetection;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Returns usage summaries for this session, one per model/provider combination.
|
|
368
|
+
*/
|
|
369
|
+
get usage(): AgentSessionUsage {
|
|
370
|
+
// Skip zero fields for more concise usage display (matches python behavior).
|
|
371
|
+
return { modelUsage: this._usageCollector.flatten().map(filterZeroValues) };
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
get useTtsAlignedTranscript(): boolean {
|
|
375
|
+
return this.sessionOptions.useTtsAlignedTranscript;
|
|
376
|
+
}
|
|
377
|
+
|
|
189
378
|
set userData(value: UserData) {
|
|
190
379
|
this._userData = value;
|
|
191
380
|
}
|
|
192
381
|
|
|
193
|
-
async
|
|
382
|
+
private async _startImpl({
|
|
194
383
|
agent,
|
|
195
384
|
room,
|
|
196
385
|
inputOptions,
|
|
197
386
|
outputOptions,
|
|
387
|
+
span,
|
|
198
388
|
}: {
|
|
199
389
|
agent: Agent;
|
|
200
|
-
room
|
|
390
|
+
room?: Room;
|
|
201
391
|
inputOptions?: Partial<RoomInputOptions>;
|
|
202
392
|
outputOptions?: Partial<RoomOutputOptions>;
|
|
393
|
+
span: Span;
|
|
203
394
|
}): Promise<void> {
|
|
204
|
-
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
395
|
+
span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
|
|
207
396
|
|
|
208
397
|
this.agent = agent;
|
|
209
398
|
this._updateAgentState('initializing');
|
|
210
399
|
|
|
211
400
|
const tasks: Promise<void>[] = [];
|
|
212
|
-
// Check for existing input/output configuration and warn if needed
|
|
213
|
-
if (this.input.audio && inputOptions?.audioEnabled !== false) {
|
|
214
|
-
this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
|
|
215
|
-
}
|
|
216
401
|
|
|
217
|
-
if (
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
402
|
+
if (room && !this._roomIO) {
|
|
403
|
+
// Check for existing input/output configuration and warn if needed
|
|
404
|
+
if (this.input.audio && inputOptions?.audioEnabled !== false) {
|
|
405
|
+
this.logger.warn(
|
|
406
|
+
'RoomIO audio input is enabled but input.audio is already set, ignoring..',
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
if (this.output.audio && outputOptions?.audioEnabled !== false) {
|
|
411
|
+
this.logger.warn(
|
|
412
|
+
'RoomIO audio output is enabled but output.audio is already set, ignoring..',
|
|
413
|
+
);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
|
|
417
|
+
this.logger.warn(
|
|
418
|
+
'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
|
|
419
|
+
);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
this._roomIO = new RoomIO({
|
|
423
|
+
agentSession: this,
|
|
424
|
+
room,
|
|
425
|
+
inputOptions,
|
|
426
|
+
outputOptions,
|
|
427
|
+
});
|
|
428
|
+
|
|
429
|
+
this._roomIO.start();
|
|
430
|
+
|
|
431
|
+
const transport = new RoomSessionTransport(room, this._roomIO);
|
|
432
|
+
this.sessionHost = new SessionHost(transport);
|
|
433
|
+
this.sessionHost.registerSession(this);
|
|
434
|
+
if (inputOptions?.textEnabled !== false) {
|
|
435
|
+
this.sessionHost.registerTextInput(
|
|
436
|
+
inputOptions?.textInputCallback ?? DEFAULT_TEXT_INPUT_CALLBACK,
|
|
437
|
+
);
|
|
438
|
+
}
|
|
221
439
|
}
|
|
222
440
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
441
|
+
let ctx: JobContext | undefined = undefined;
|
|
442
|
+
try {
|
|
443
|
+
ctx = getJobContext();
|
|
444
|
+
} catch {
|
|
445
|
+
// JobContext is not available in evals
|
|
227
446
|
}
|
|
228
447
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
448
|
+
if (ctx) {
|
|
449
|
+
if (room && ctx.room === room && !room.isConnected) {
|
|
450
|
+
this.logger.debug('Auto-connecting to room via job context');
|
|
451
|
+
tasks.push(ctx.connect());
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (ctx._primaryAgentSession === undefined) {
|
|
455
|
+
ctx._primaryAgentSession = this;
|
|
456
|
+
} else if (this._enableRecording) {
|
|
457
|
+
throw new Error(
|
|
458
|
+
'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use `session.start({ record: false })`.',
|
|
459
|
+
);
|
|
460
|
+
}
|
|
236
461
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
462
|
+
if (this.input.audio && this.output.audio && this._enableRecording) {
|
|
463
|
+
this._recorderIO = new RecorderIO({ agentSession: this });
|
|
464
|
+
this.input.audio = this._recorderIO.recordInput(this.input.audio);
|
|
465
|
+
this.output.audio = this._recorderIO.recordOutput(this.output.audio);
|
|
466
|
+
|
|
467
|
+
// Start recording to session directory
|
|
468
|
+
const sessionDir = ctx.sessionDirectory;
|
|
469
|
+
if (sessionDir) {
|
|
470
|
+
tasks.push(this._recorderIO.start(`${sessionDir}/audio.ogg`));
|
|
471
|
+
}
|
|
472
|
+
}
|
|
241
473
|
}
|
|
474
|
+
|
|
242
475
|
// TODO(AJS-265): add shutdown callback to job context
|
|
243
|
-
|
|
476
|
+
// Initial start does not wait on onEnter
|
|
477
|
+
tasks.push(this._updateActivity(this.agent, { waitOnEnter: false }));
|
|
244
478
|
|
|
245
479
|
await Promise.allSettled(tasks);
|
|
246
480
|
|
|
481
|
+
if (this.sessionHost) {
|
|
482
|
+
await this.sessionHost.start();
|
|
483
|
+
}
|
|
484
|
+
|
|
247
485
|
// Log used IO configuration
|
|
248
486
|
this.logger.debug(
|
|
249
487
|
`using audio io: ${this.input.audio ? '`' + this.input.audio.constructor.name + '`' : '(none)'} -> \`AgentSession\` -> ${this.output.audio ? '`' + this.output.audio.constructor.name + '`' : '(none)'}`,
|
|
@@ -254,14 +492,94 @@ export class AgentSession<
|
|
|
254
492
|
);
|
|
255
493
|
|
|
256
494
|
this.started = true;
|
|
495
|
+
this._startedAt = Date.now();
|
|
257
496
|
this._updateAgentState('listening');
|
|
258
497
|
}
|
|
259
498
|
|
|
499
|
+
async start({
|
|
500
|
+
agent,
|
|
501
|
+
room,
|
|
502
|
+
inputOptions,
|
|
503
|
+
outputOptions,
|
|
504
|
+
record,
|
|
505
|
+
}: {
|
|
506
|
+
agent: Agent;
|
|
507
|
+
room?: Room;
|
|
508
|
+
inputOptions?: Partial<RoomInputOptions>;
|
|
509
|
+
outputOptions?: Partial<RoomOutputOptions>;
|
|
510
|
+
record?: boolean;
|
|
511
|
+
}): Promise<void> {
|
|
512
|
+
if (this.started) {
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
this._usageCollector = new ModelUsageCollector();
|
|
517
|
+
|
|
518
|
+
let ctx: JobContext | undefined = undefined;
|
|
519
|
+
try {
|
|
520
|
+
ctx = getJobContext();
|
|
521
|
+
|
|
522
|
+
if (record === undefined) {
|
|
523
|
+
record = ctx.job.enableRecording;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
this._enableRecording = record;
|
|
527
|
+
|
|
528
|
+
if (this._enableRecording) {
|
|
529
|
+
ctx.initRecording();
|
|
530
|
+
}
|
|
531
|
+
} catch (error) {
|
|
532
|
+
// JobContext is not available in evals
|
|
533
|
+
this.logger.warn('JobContext is not available');
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
this.sessionSpan = tracer.startSpan({
|
|
537
|
+
name: 'agent_session',
|
|
538
|
+
context: ROOT_CONTEXT,
|
|
539
|
+
});
|
|
540
|
+
|
|
541
|
+
this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
|
|
542
|
+
|
|
543
|
+
await this._startImpl({
|
|
544
|
+
agent,
|
|
545
|
+
room,
|
|
546
|
+
inputOptions,
|
|
547
|
+
outputOptions,
|
|
548
|
+
span: this.sessionSpan,
|
|
549
|
+
});
|
|
550
|
+
}
|
|
551
|
+
|
|
260
552
|
updateAgent(agent: Agent): void {
|
|
261
553
|
this.agent = agent;
|
|
262
554
|
|
|
263
|
-
if (this.started) {
|
|
264
|
-
|
|
555
|
+
if (!this.started) {
|
|
556
|
+
return;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const _updateActivityTask = async (oldTask: Task<void> | undefined, agent: Agent) => {
|
|
560
|
+
if (oldTask) {
|
|
561
|
+
try {
|
|
562
|
+
await oldTask.result;
|
|
563
|
+
} catch (error) {
|
|
564
|
+
this.logger.error(error, 'previous updateAgent transition failed');
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
await this._updateActivity(agent);
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
const oldTask = this.updateActivityTask;
|
|
572
|
+
this.updateActivityTask = Task.from(
|
|
573
|
+
async () => _updateActivityTask(oldTask, agent),
|
|
574
|
+
undefined,
|
|
575
|
+
'AgentSession_updateActivityTask',
|
|
576
|
+
);
|
|
577
|
+
|
|
578
|
+
const runState = this._globalRunState;
|
|
579
|
+
if (runState) {
|
|
580
|
+
// Don't mark the RunResult as done, if there is currently an agent transition happening.
|
|
581
|
+
// (used to make sure we're correctly adding the AgentHandoffResult before completion)
|
|
582
|
+
runState._watchHandle(this.updateActivityTask);
|
|
265
583
|
}
|
|
266
584
|
}
|
|
267
585
|
|
|
@@ -292,14 +610,42 @@ export class AgentSession<
|
|
|
292
610
|
throw new Error('AgentSession is not running');
|
|
293
611
|
}
|
|
294
612
|
|
|
295
|
-
|
|
613
|
+
const doSay = (activity: AgentActivity, nextActivity?: AgentActivity) => {
|
|
614
|
+
if (activity.schedulingPaused) {
|
|
615
|
+
if (!nextActivity) {
|
|
616
|
+
throw new Error('AgentSession is closing, cannot use say()');
|
|
617
|
+
}
|
|
618
|
+
return nextActivity.say(text, options);
|
|
619
|
+
}
|
|
620
|
+
return activity.say(text, options);
|
|
621
|
+
};
|
|
622
|
+
|
|
623
|
+
const runState = this._globalRunState;
|
|
624
|
+
let handle: SpeechHandle;
|
|
625
|
+
|
|
626
|
+
// attach to the session span if called outside of the AgentSession
|
|
627
|
+
const activeSpan = trace.getActiveSpan();
|
|
628
|
+
if (!activeSpan && this.rootSpanContext) {
|
|
629
|
+
handle = otelContext.with(this.rootSpanContext, () =>
|
|
630
|
+
doSay(this.activity!, this.nextActivity),
|
|
631
|
+
);
|
|
632
|
+
} else {
|
|
633
|
+
handle = doSay(this.activity, this.nextActivity);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
if (runState) {
|
|
637
|
+
runState._watchHandle(handle);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
return handle;
|
|
296
641
|
}
|
|
297
642
|
|
|
298
|
-
interrupt() {
|
|
643
|
+
interrupt(options?: { force?: boolean }) {
|
|
299
644
|
if (!this.activity) {
|
|
300
645
|
throw new Error('AgentSession is not running');
|
|
301
646
|
}
|
|
302
|
-
|
|
647
|
+
|
|
648
|
+
return this.activity.interrupt(options);
|
|
303
649
|
}
|
|
304
650
|
|
|
305
651
|
generateReply(options?: {
|
|
@@ -319,33 +665,178 @@ export class AgentSession<
|
|
|
319
665
|
})
|
|
320
666
|
: undefined;
|
|
321
667
|
|
|
322
|
-
|
|
323
|
-
if (
|
|
324
|
-
|
|
668
|
+
const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
|
|
669
|
+
if (activity.schedulingPaused) {
|
|
670
|
+
if (!nextActivity) {
|
|
671
|
+
throw new Error('AgentSession is closing, cannot use generateReply()');
|
|
672
|
+
}
|
|
673
|
+
return nextActivity.generateReply({ userMessage, ...options });
|
|
325
674
|
}
|
|
326
|
-
return
|
|
675
|
+
return activity.generateReply({ userMessage, ...options });
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
// attach to the session span if called outside of the AgentSession
|
|
679
|
+
const activeSpan = trace.getActiveSpan();
|
|
680
|
+
let handle: SpeechHandle;
|
|
681
|
+
if (!activeSpan && this.rootSpanContext) {
|
|
682
|
+
handle = otelContext.with(this.rootSpanContext, () =>
|
|
683
|
+
doGenerateReply(this.activity!, this.nextActivity),
|
|
684
|
+
);
|
|
685
|
+
} else {
|
|
686
|
+
handle = doGenerateReply(this.activity!, this.nextActivity);
|
|
327
687
|
}
|
|
328
688
|
|
|
329
|
-
|
|
330
|
-
|
|
689
|
+
if (this._globalRunState) {
|
|
690
|
+
this._globalRunState._watchHandle(handle);
|
|
691
|
+
}
|
|
331
692
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
this.nextActivity = new AgentActivity(agent, this);
|
|
693
|
+
return handle;
|
|
694
|
+
}
|
|
335
695
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
696
|
+
/**
|
|
697
|
+
* Run a test with user input and return a result for assertions.
|
|
698
|
+
*
|
|
699
|
+
* This method is primarily used for testing agent behavior without
|
|
700
|
+
* requiring a real room connection.
|
|
701
|
+
*
|
|
702
|
+
* @example
|
|
703
|
+
* ```typescript
|
|
704
|
+
* const result = await session.run({ userInput: 'Hello' });
|
|
705
|
+
* result.expect.nextEvent().isMessage({ role: 'assistant' });
|
|
706
|
+
* result.expect.noMoreEvents();
|
|
707
|
+
* ```
|
|
708
|
+
*
|
|
709
|
+
* @param options - Run options including user input and optional output type
|
|
710
|
+
* @returns A RunResult that resolves when the agent finishes responding
|
|
711
|
+
*/
|
|
712
|
+
run<T = unknown>({
|
|
713
|
+
userInput,
|
|
714
|
+
outputType,
|
|
715
|
+
}: {
|
|
716
|
+
userInput: string;
|
|
717
|
+
outputType?: z.ZodType<T>;
|
|
718
|
+
}): RunResult<T> {
|
|
719
|
+
if (this._globalRunState && !this._globalRunState.done()) {
|
|
720
|
+
throw new Error('nested runs are not supported');
|
|
339
721
|
}
|
|
340
722
|
|
|
341
|
-
|
|
342
|
-
|
|
723
|
+
const runState = new RunResult<T>({
|
|
724
|
+
userInput,
|
|
725
|
+
outputType,
|
|
726
|
+
});
|
|
343
727
|
|
|
344
|
-
|
|
728
|
+
this._globalRunState = runState;
|
|
729
|
+
|
|
730
|
+
// Defer generateReply through the activityLock to ensure any in-progress
|
|
731
|
+
// activity transition (e.g. AgentTask started from onEnter) completes first.
|
|
732
|
+
// TS Task.from starts onEnter synchronously, so the transition may already be
|
|
733
|
+
// mid-flight by the time run() is called after session.start() resolves.
|
|
734
|
+
// Acquiring and immediately releasing the lock guarantees FIFO ordering:
|
|
735
|
+
// the transition's lock section finishes before we route generateReply.
|
|
736
|
+
(async () => {
|
|
737
|
+
try {
|
|
738
|
+
const unlock = await this.activityLock.lock();
|
|
739
|
+
unlock();
|
|
740
|
+
this.generateReply({ userInput });
|
|
741
|
+
} catch (e) {
|
|
742
|
+
runState._reject(e instanceof Error ? e : new Error(String(e)));
|
|
743
|
+
}
|
|
744
|
+
})();
|
|
345
745
|
|
|
346
|
-
|
|
347
|
-
|
|
746
|
+
return runState;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
/** @internal */
|
|
750
|
+
async _updateActivity(agent: Agent, options: ActivityTransitionOptions = {}): Promise<void> {
|
|
751
|
+
const { previousActivity = 'close', newActivity = 'start', blockedTasks = [] } = options;
|
|
752
|
+
const waitOnEnter = options.waitOnEnter ?? newActivity === 'start';
|
|
753
|
+
|
|
754
|
+
const runWithContext = async () => {
|
|
755
|
+
const unlock = await this.activityLock.lock();
|
|
756
|
+
let onEnterTask: Task<void> | undefined;
|
|
757
|
+
|
|
758
|
+
try {
|
|
759
|
+
this.agent = agent;
|
|
760
|
+
const prevActivityObj = this.activity;
|
|
761
|
+
|
|
762
|
+
if (newActivity === 'start') {
|
|
763
|
+
const prevAgent = prevActivityObj?.agent;
|
|
764
|
+
if (
|
|
765
|
+
agent._agentActivity &&
|
|
766
|
+
// allow updating the same agent that is running
|
|
767
|
+
(agent !== prevAgent || previousActivity !== 'close')
|
|
768
|
+
) {
|
|
769
|
+
throw new Error('Cannot start agent: an activity is already running');
|
|
770
|
+
}
|
|
771
|
+
this.nextActivity = new AgentActivity(agent, this);
|
|
772
|
+
} else if (newActivity === 'resume') {
|
|
773
|
+
if (!agent._agentActivity) {
|
|
774
|
+
throw new Error('Cannot resume agent: no existing activity to resume');
|
|
775
|
+
}
|
|
776
|
+
this.nextActivity = agent._agentActivity;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
if (prevActivityObj && prevActivityObj !== this.nextActivity) {
|
|
780
|
+
if (previousActivity === 'pause') {
|
|
781
|
+
await prevActivityObj.pause({ blockedTasks });
|
|
782
|
+
} else {
|
|
783
|
+
await prevActivityObj.drain();
|
|
784
|
+
await prevActivityObj.close();
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
this.activity = this.nextActivity;
|
|
789
|
+
this.nextActivity = undefined;
|
|
790
|
+
|
|
791
|
+
const runState = this._globalRunState;
|
|
792
|
+
const handoffItem = new AgentHandoffItem({
|
|
793
|
+
oldAgentId: prevActivityObj?.agent.id,
|
|
794
|
+
newAgentId: agent.id,
|
|
795
|
+
});
|
|
796
|
+
|
|
797
|
+
if (runState) {
|
|
798
|
+
runState._agentHandoff({
|
|
799
|
+
item: handoffItem,
|
|
800
|
+
oldAgent: prevActivityObj?.agent,
|
|
801
|
+
newAgent: this.activity!.agent,
|
|
802
|
+
});
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
this._chatCtx.insert(handoffItem);
|
|
806
|
+
this.logger.debug(
|
|
807
|
+
{ previousAgentId: prevActivityObj?.agent.id, newAgentId: agent.id },
|
|
808
|
+
'Agent handoff inserted into chat context',
|
|
809
|
+
);
|
|
810
|
+
|
|
811
|
+
if (newActivity === 'start') {
|
|
812
|
+
await this.activity!.start();
|
|
813
|
+
} else {
|
|
814
|
+
await this.activity!.resume();
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
onEnterTask = this.activity!._onEnterTask;
|
|
818
|
+
|
|
819
|
+
if (this._input.audio) {
|
|
820
|
+
this.activity!.attachAudioInput(this._input.audio.stream);
|
|
821
|
+
}
|
|
822
|
+
} finally {
|
|
823
|
+
unlock();
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if (waitOnEnter) {
|
|
827
|
+
if (!onEnterTask) {
|
|
828
|
+
throw new Error('expected onEnter task to be available while waitOnEnter=true');
|
|
829
|
+
}
|
|
830
|
+
await onEnterTask.result;
|
|
831
|
+
}
|
|
832
|
+
};
|
|
833
|
+
|
|
834
|
+
// Run within session span context if available
|
|
835
|
+
if (this.rootSpanContext) {
|
|
836
|
+
return otelContext.with(this.rootSpanContext, runWithContext);
|
|
348
837
|
}
|
|
838
|
+
|
|
839
|
+
return runWithContext();
|
|
349
840
|
}
|
|
350
841
|
|
|
351
842
|
get chatCtx(): ChatContext {
|
|
@@ -356,6 +847,10 @@ export class AgentSession<
|
|
|
356
847
|
return this._agentState;
|
|
357
848
|
}
|
|
358
849
|
|
|
850
|
+
get userState(): UserState {
|
|
851
|
+
return this._userState;
|
|
852
|
+
}
|
|
853
|
+
|
|
359
854
|
get currentAgent(): Agent {
|
|
360
855
|
if (!this.agent) {
|
|
361
856
|
throw new Error('AgentSession is not running');
|
|
@@ -368,29 +863,58 @@ export class AgentSession<
|
|
|
368
863
|
await this.closeImpl(CloseReason.USER_INITIATED);
|
|
369
864
|
}
|
|
370
865
|
|
|
866
|
+
shutdown(options?: { drain?: boolean; reason?: ShutdownReason }): void {
|
|
867
|
+
const { drain = true, reason = CloseReason.USER_INITIATED } = options ?? {};
|
|
868
|
+
|
|
869
|
+
this._closeSoon({
|
|
870
|
+
reason,
|
|
871
|
+
drain,
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
|
|
371
875
|
/** @internal */
|
|
372
876
|
_closeSoon({
|
|
373
877
|
reason,
|
|
374
878
|
drain = false,
|
|
375
879
|
error = null,
|
|
376
880
|
}: {
|
|
377
|
-
reason:
|
|
881
|
+
reason: ShutdownReason;
|
|
378
882
|
drain?: boolean;
|
|
379
883
|
error?: RealtimeModelError | STTError | TTSError | LLMError | null;
|
|
380
884
|
}): void {
|
|
381
885
|
if (this.closingTask) {
|
|
382
886
|
return;
|
|
383
887
|
}
|
|
384
|
-
this.closeImpl(reason, error, drain)
|
|
888
|
+
this.closingTask = this.closeImpl(reason, error, drain).finally(() => {
|
|
889
|
+
this.closingTask = null;
|
|
890
|
+
});
|
|
385
891
|
}
|
|
386
892
|
|
|
387
893
|
/** @internal */
|
|
388
|
-
_onError(
|
|
894
|
+
_onError(
|
|
895
|
+
error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError,
|
|
896
|
+
): void {
|
|
389
897
|
if (this.closingTask || error.recoverable) {
|
|
390
898
|
return;
|
|
391
899
|
}
|
|
392
900
|
|
|
393
|
-
|
|
901
|
+
// Track error counts per type to implement max_unrecoverable_errors logic
|
|
902
|
+
if (error.type === 'llm_error') {
|
|
903
|
+
this.llmErrorCounts += 1;
|
|
904
|
+
if (this.llmErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
|
|
905
|
+
return;
|
|
906
|
+
}
|
|
907
|
+
} else if (error.type === 'tts_error') {
|
|
908
|
+
this.ttsErrorCounts += 1;
|
|
909
|
+
if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
|
|
910
|
+
return;
|
|
911
|
+
}
|
|
912
|
+
} else if (error.type === 'interruption_detection_error') {
|
|
913
|
+
this.logger.error(error.toString());
|
|
914
|
+
return;
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
this.logger.error(error, 'AgentSession is closing due to an unrecoverable error');
|
|
394
918
|
|
|
395
919
|
this.closingTask = (async () => {
|
|
396
920
|
await this.closeImpl(CloseReason.ERROR, error);
|
|
@@ -406,13 +930,56 @@ export class AgentSession<
|
|
|
406
930
|
}
|
|
407
931
|
|
|
408
932
|
/** @internal */
|
|
409
|
-
|
|
933
|
+
_toolItemsAdded(items: (FunctionCall | FunctionCallOutput)[]): void {
|
|
934
|
+
this._chatCtx.insert(items);
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
/** @internal */
|
|
938
|
+
_updateAgentState(state: AgentState, options?: { startTime?: number; otelContext?: Context }) {
|
|
410
939
|
if (this._agentState === state) {
|
|
411
940
|
return;
|
|
412
941
|
}
|
|
413
942
|
|
|
943
|
+
if (state === 'speaking') {
|
|
944
|
+
this.llmErrorCounts = 0;
|
|
945
|
+
this.ttsErrorCounts = 0;
|
|
946
|
+
|
|
947
|
+
if (this.agentSpeakingSpan === undefined) {
|
|
948
|
+
this.agentSpeakingSpan = tracer.startSpan({
|
|
949
|
+
name: 'agent_speaking',
|
|
950
|
+
context: options?.otelContext ?? this.rootSpanContext,
|
|
951
|
+
startTime: options?.startTime,
|
|
952
|
+
});
|
|
953
|
+
|
|
954
|
+
const localParticipant = this._roomIO?.localParticipant;
|
|
955
|
+
if (localParticipant) {
|
|
956
|
+
setParticipantSpanAttributes(this.agentSpeakingSpan, localParticipant);
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
} else if (this.agentSpeakingSpan !== undefined) {
|
|
960
|
+
// TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
|
|
961
|
+
this.agentSpeakingSpan.end();
|
|
962
|
+
this.agentSpeakingSpan = undefined;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
if (state === 'speaking' && this._aecWarmupRemaining > 0 && this._aecWarmupTimer === null) {
|
|
966
|
+
this._aecWarmupTimer = setTimeout(() => this._onAecWarmupExpired(), this._aecWarmupRemaining);
|
|
967
|
+
this.logger.debug(
|
|
968
|
+
{ warmupDurationMs: this._aecWarmupRemaining },
|
|
969
|
+
'aec warmup active, disabling interruptions',
|
|
970
|
+
);
|
|
971
|
+
}
|
|
972
|
+
|
|
414
973
|
const oldState = this._agentState;
|
|
415
974
|
this._agentState = state;
|
|
975
|
+
|
|
976
|
+
// Handle user away timer based on state changes
|
|
977
|
+
if (state === 'listening' && this._userState === 'listening') {
|
|
978
|
+
this._setUserAwayTimer();
|
|
979
|
+
} else {
|
|
980
|
+
this._cancelUserAwayTimer();
|
|
981
|
+
}
|
|
982
|
+
|
|
416
983
|
this.emit(
|
|
417
984
|
AgentSessionEventTypes.AgentStateChanged,
|
|
418
985
|
createAgentStateChangedEvent(oldState, state),
|
|
@@ -420,13 +987,40 @@ export class AgentSession<
|
|
|
420
987
|
}
|
|
421
988
|
|
|
422
989
|
/** @internal */
|
|
423
|
-
_updateUserState(
|
|
424
|
-
|
|
990
|
+
_updateUserState(
|
|
991
|
+
state: UserState,
|
|
992
|
+
options?: { lastSpeakingTime?: number; otelContext?: Context },
|
|
993
|
+
) {
|
|
994
|
+
if (this._userState === state) {
|
|
425
995
|
return;
|
|
426
996
|
}
|
|
427
997
|
|
|
428
|
-
|
|
429
|
-
|
|
998
|
+
if (state === 'speaking' && this._userSpeakingSpan === undefined) {
|
|
999
|
+
this._userSpeakingSpan = tracer.startSpan({
|
|
1000
|
+
name: 'user_speaking',
|
|
1001
|
+
context: options?.otelContext ?? this.rootSpanContext,
|
|
1002
|
+
startTime: options?.lastSpeakingTime,
|
|
1003
|
+
});
|
|
1004
|
+
|
|
1005
|
+
const linked = this._roomIO?.linkedParticipant;
|
|
1006
|
+
if (linked) {
|
|
1007
|
+
setParticipantSpanAttributes(this._userSpeakingSpan, linked);
|
|
1008
|
+
}
|
|
1009
|
+
} else if (this._userSpeakingSpan !== undefined) {
|
|
1010
|
+
this._userSpeakingSpan.end(options?.lastSpeakingTime);
|
|
1011
|
+
this._userSpeakingSpan = undefined;
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
const oldState = this._userState;
|
|
1015
|
+
this._userState = state;
|
|
1016
|
+
|
|
1017
|
+
// Handle user away timer based on state changes
|
|
1018
|
+
if (state === 'listening' && this._agentState === 'listening') {
|
|
1019
|
+
this._setUserAwayTimer();
|
|
1020
|
+
} else {
|
|
1021
|
+
this._cancelUserAwayTimer();
|
|
1022
|
+
}
|
|
1023
|
+
|
|
430
1024
|
this.emit(
|
|
431
1025
|
AgentSessionEventTypes.UserStateChanged,
|
|
432
1026
|
createUserStateChangedEvent(oldState, state),
|
|
@@ -448,29 +1042,119 @@ export class AgentSession<
|
|
|
448
1042
|
|
|
449
1043
|
private onTextOutputChanged(): void {}
|
|
450
1044
|
|
|
1045
|
+
private _setUserAwayTimer(): void {
|
|
1046
|
+
this._cancelUserAwayTimer();
|
|
1047
|
+
|
|
1048
|
+
if (
|
|
1049
|
+
this.sessionOptions.userAwayTimeout === null ||
|
|
1050
|
+
this.sessionOptions.userAwayTimeout === undefined
|
|
1051
|
+
) {
|
|
1052
|
+
return;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
if (this._roomIO && !this._roomIO.isParticipantAvailable) {
|
|
1056
|
+
return;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
this.userAwayTimer = setTimeout(() => {
|
|
1060
|
+
this.logger.debug('User away timeout triggered');
|
|
1061
|
+
this._updateUserState('away');
|
|
1062
|
+
}, this.sessionOptions.userAwayTimeout * 1000);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
private _cancelUserAwayTimer(): void {
|
|
1066
|
+
if (this.userAwayTimer !== null) {
|
|
1067
|
+
clearTimeout(this.userAwayTimer);
|
|
1068
|
+
this.userAwayTimer = null;
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
/** @internal */
|
|
1073
|
+
_onAecWarmupExpired(): void {
|
|
1074
|
+
if (this._aecWarmupRemaining > 0) {
|
|
1075
|
+
this.logger.debug('aec warmup expired, re-enabling interruptions');
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
this._aecWarmupRemaining = 0;
|
|
1079
|
+
if (this._aecWarmupTimer !== null) {
|
|
1080
|
+
clearTimeout(this._aecWarmupTimer);
|
|
1081
|
+
this._aecWarmupTimer = null;
|
|
1082
|
+
}
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
|
|
1086
|
+
if (this._userState === 'away' && ev.isFinal) {
|
|
1087
|
+
this.logger.debug('User returned from away state due to speech input');
|
|
1088
|
+
this._updateUserState('listening');
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
|
|
451
1092
|
private async closeImpl(
|
|
452
|
-
reason:
|
|
453
|
-
error:
|
|
1093
|
+
reason: ShutdownReason,
|
|
1094
|
+
error:
|
|
1095
|
+
| RealtimeModelError
|
|
1096
|
+
| LLMError
|
|
1097
|
+
| TTSError
|
|
1098
|
+
| STTError
|
|
1099
|
+
| InterruptionDetectionError
|
|
1100
|
+
| null = null,
|
|
1101
|
+
drain: boolean = false,
|
|
1102
|
+
): Promise<void> {
|
|
1103
|
+
if (this.rootSpanContext) {
|
|
1104
|
+
return otelContext.with(this.rootSpanContext, async () => {
|
|
1105
|
+
await this.closeImplInner(reason, error, drain);
|
|
1106
|
+
});
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
return this.closeImplInner(reason, error, drain);
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
private async closeImplInner(
|
|
1113
|
+
reason: ShutdownReason,
|
|
1114
|
+
error:
|
|
1115
|
+
| RealtimeModelError
|
|
1116
|
+
| LLMError
|
|
1117
|
+
| TTSError
|
|
1118
|
+
| STTError
|
|
1119
|
+
| InterruptionDetectionError
|
|
1120
|
+
| null = null,
|
|
454
1121
|
drain: boolean = false,
|
|
455
1122
|
): Promise<void> {
|
|
456
1123
|
if (!this.started) {
|
|
457
1124
|
return;
|
|
458
1125
|
}
|
|
459
1126
|
|
|
1127
|
+
this._cancelUserAwayTimer();
|
|
1128
|
+
this._onAecWarmupExpired();
|
|
1129
|
+
this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
1130
|
+
|
|
460
1131
|
if (this.activity) {
|
|
461
1132
|
if (!drain) {
|
|
462
1133
|
try {
|
|
463
|
-
this.activity.interrupt();
|
|
1134
|
+
await this.activity.interrupt({ force: true }).await;
|
|
464
1135
|
} catch (error) {
|
|
465
|
-
|
|
466
|
-
// TODO(shubhra): force interrupt or wait for it to finish?
|
|
467
|
-
// it might be an audio played from the error callback
|
|
1136
|
+
this.logger.warn({ error }, 'Error interrupting activity');
|
|
468
1137
|
}
|
|
469
1138
|
}
|
|
1139
|
+
|
|
470
1140
|
await this.activity.drain();
|
|
471
1141
|
// wait any uninterruptible speech to finish
|
|
472
1142
|
await this.activity.currentSpeech?.waitForPlayout();
|
|
473
|
-
|
|
1143
|
+
|
|
1144
|
+
if (reason !== CloseReason.ERROR) {
|
|
1145
|
+
this.activity.commitUserTurn({ audioDetached: true, throwIfNotReady: false });
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
try {
|
|
1149
|
+
this.activity.detachAudioInput();
|
|
1150
|
+
} catch (error) {
|
|
1151
|
+
// Ignore detach errors during cleanup - source may not have been set
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
// Close recorder before detaching inputs/outputs (keep reference for session report)
|
|
1156
|
+
if (this._recorderIO) {
|
|
1157
|
+
await this._recorderIO.close();
|
|
474
1158
|
}
|
|
475
1159
|
|
|
476
1160
|
// detach the inputs and outputs
|
|
@@ -478,18 +1162,39 @@ export class AgentSession<
|
|
|
478
1162
|
this.output.audio = null;
|
|
479
1163
|
this.output.transcription = null;
|
|
480
1164
|
|
|
481
|
-
await this.
|
|
482
|
-
this.
|
|
1165
|
+
await this.sessionHost?.close();
|
|
1166
|
+
this.sessionHost = undefined;
|
|
1167
|
+
|
|
1168
|
+
await this._roomIO?.close();
|
|
1169
|
+
this._roomIO = undefined;
|
|
483
1170
|
|
|
484
1171
|
await this.activity?.close();
|
|
485
1172
|
this.activity = undefined;
|
|
486
1173
|
|
|
1174
|
+
if (this.sessionSpan) {
|
|
1175
|
+
this.sessionSpan.end();
|
|
1176
|
+
this.sessionSpan = undefined;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
if (this._userSpeakingSpan) {
|
|
1180
|
+
this._userSpeakingSpan.end();
|
|
1181
|
+
this._userSpeakingSpan = undefined;
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
if (this.agentSpeakingSpan) {
|
|
1185
|
+
this.agentSpeakingSpan.end();
|
|
1186
|
+
this.agentSpeakingSpan = undefined;
|
|
1187
|
+
}
|
|
1188
|
+
|
|
487
1189
|
this.started = false;
|
|
488
1190
|
|
|
489
1191
|
this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
|
|
490
1192
|
|
|
491
|
-
this.
|
|
1193
|
+
this._userState = 'listening';
|
|
492
1194
|
this._agentState = 'initializing';
|
|
1195
|
+
this.rootSpanContext = undefined;
|
|
1196
|
+
this.llmErrorCounts = 0;
|
|
1197
|
+
this.ttsErrorCounts = 0;
|
|
493
1198
|
|
|
494
1199
|
this.logger.info({ reason, error }, 'AgentSession closed');
|
|
495
1200
|
}
|