@livekit/agents 0.7.9 → 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_exceptions.cjs +109 -0
- package/dist/_exceptions.cjs.map +1 -0
- package/dist/_exceptions.d.cts +64 -0
- package/dist/_exceptions.d.ts +64 -0
- package/dist/_exceptions.d.ts.map +1 -0
- package/dist/_exceptions.js +80 -0
- package/dist/_exceptions.js.map +1 -0
- package/dist/audio.cjs +10 -3
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.d.cts +2 -0
- package/dist/audio.d.ts +2 -0
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +8 -2
- package/dist/audio.js.map +1 -1
- package/dist/cli.cjs +25 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +25 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +6 -3
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +2 -1
- package/dist/constants.d.ts +2 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +4 -2
- package/dist/constants.js.map +1 -1
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.cts +1 -0
- package/dist/http_server.d.ts +1 -0
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +27 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -10
- package/dist/index.d.ts +13 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +15 -11
- package/dist/index.js.map +1 -1
- package/dist/inference_runner.cjs +0 -1
- package/dist/inference_runner.cjs.map +1 -1
- package/dist/inference_runner.d.cts +2 -3
- package/dist/inference_runner.d.ts +2 -3
- package/dist/inference_runner.d.ts.map +1 -1
- package/dist/inference_runner.js +0 -1
- package/dist/inference_runner.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs +2 -2
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/inference_proc_executor.js +2 -2
- package/dist/ipc/inference_proc_executor.js.map +1 -1
- package/dist/ipc/job_executor.cjs.map +1 -1
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_proc_executor.cjs +1 -0
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.js +1 -0
- package/dist/ipc/job_proc_executor.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +1 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +1 -1
- package/dist/ipc/supervised_proc.d.ts +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/job.cjs +14 -2
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +8 -0
- package/dist/job.d.ts +8 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +12 -1
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +332 -82
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +152 -48
- package/dist/llm/chat_context.d.ts +152 -48
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +327 -81
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +380 -0
- package/dist/llm/chat_context.test.cjs.map +1 -0
- package/dist/llm/chat_context.test.js +385 -0
- package/dist/llm/chat_context.test.js.map +1 -0
- package/dist/llm/index.cjs +37 -8
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +7 -3
- package/dist/llm/index.d.ts +7 -3
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +39 -9
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +98 -33
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +50 -24
- package/dist/llm/llm.d.ts +50 -24
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +99 -33
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs +128 -0
- package/dist/llm/provider_format/google.cjs.map +1 -0
- package/dist/llm/provider_format/google.d.cts +6 -0
- package/dist/llm/provider_format/google.d.ts +6 -0
- package/dist/llm/provider_format/google.d.ts.map +1 -0
- package/dist/llm/provider_format/google.js +104 -0
- package/dist/llm/provider_format/google.js.map +1 -0
- package/dist/llm/provider_format/google.test.cjs +676 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -0
- package/dist/llm/provider_format/google.test.js +675 -0
- package/dist/llm/provider_format/google.test.js.map +1 -0
- package/dist/llm/provider_format/index.cjs +40 -0
- package/dist/llm/provider_format/index.cjs.map +1 -0
- package/dist/llm/provider_format/index.d.cts +4 -0
- package/dist/llm/provider_format/index.d.ts +4 -0
- package/dist/llm/provider_format/index.d.ts.map +1 -0
- package/dist/llm/provider_format/index.js +16 -0
- package/dist/llm/provider_format/index.js.map +1 -0
- package/dist/llm/provider_format/openai.cjs +116 -0
- package/dist/llm/provider_format/openai.cjs.map +1 -0
- package/dist/llm/provider_format/openai.d.cts +3 -0
- package/dist/llm/provider_format/openai.d.ts +3 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -0
- package/dist/llm/provider_format/openai.js +92 -0
- package/dist/llm/provider_format/openai.js.map +1 -0
- package/dist/llm/provider_format/openai.test.cjs +490 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -0
- package/dist/llm/provider_format/openai.test.js +489 -0
- package/dist/llm/provider_format/openai.test.js.map +1 -0
- package/dist/llm/provider_format/utils.cjs +146 -0
- package/dist/llm/provider_format/utils.cjs.map +1 -0
- package/dist/llm/provider_format/utils.d.cts +38 -0
- package/dist/llm/provider_format/utils.d.ts +38 -0
- package/dist/llm/provider_format/utils.d.ts.map +1 -0
- package/dist/llm/provider_format/utils.js +122 -0
- package/dist/llm/provider_format/utils.js.map +1 -0
- package/dist/llm/realtime.cjs +77 -0
- package/dist/llm/realtime.cjs.map +1 -0
- package/dist/llm/realtime.d.cts +98 -0
- package/dist/llm/realtime.d.ts +98 -0
- package/dist/llm/realtime.d.ts.map +1 -0
- package/dist/llm/realtime.js +52 -0
- package/dist/llm/realtime.js.map +1 -0
- package/dist/llm/remote_chat_context.cjs +112 -0
- package/dist/llm/remote_chat_context.cjs.map +1 -0
- package/dist/llm/remote_chat_context.d.cts +23 -0
- package/dist/llm/remote_chat_context.d.ts +23 -0
- package/dist/llm/remote_chat_context.d.ts.map +1 -0
- package/dist/llm/remote_chat_context.js +88 -0
- package/dist/llm/remote_chat_context.js.map +1 -0
- package/dist/llm/remote_chat_context.test.cjs +225 -0
- package/dist/llm/remote_chat_context.test.cjs.map +1 -0
- package/dist/llm/remote_chat_context.test.js +224 -0
- package/dist/llm/remote_chat_context.test.js.map +1 -0
- package/dist/llm/tool_context.cjs +111 -0
- package/dist/llm/tool_context.cjs.map +1 -0
- package/dist/llm/tool_context.d.cts +125 -0
- package/dist/llm/tool_context.d.ts +125 -0
- package/dist/llm/tool_context.d.ts.map +1 -0
- package/dist/llm/tool_context.js +80 -0
- package/dist/llm/tool_context.js.map +1 -0
- package/dist/llm/tool_context.test.cjs +162 -0
- package/dist/llm/tool_context.test.cjs.map +1 -0
- package/dist/llm/tool_context.test.js +161 -0
- package/dist/llm/tool_context.test.js.map +1 -0
- package/dist/llm/tool_context.type.test.cjs +92 -0
- package/dist/llm/tool_context.type.test.cjs.map +1 -0
- package/dist/llm/tool_context.type.test.js +91 -0
- package/dist/llm/tool_context.type.test.js.map +1 -0
- package/dist/llm/utils.cjs +260 -0
- package/dist/llm/utils.cjs.map +1 -0
- package/dist/llm/utils.d.cts +42 -0
- package/dist/llm/utils.d.ts +42 -0
- package/dist/llm/utils.d.ts.map +1 -0
- package/dist/llm/utils.js +223 -0
- package/dist/llm/utils.js.map +1 -0
- package/dist/llm/utils.test.cjs +513 -0
- package/dist/llm/utils.test.cjs.map +1 -0
- package/dist/llm/utils.test.js +490 -0
- package/dist/llm/utils.test.js.map +1 -0
- package/dist/metrics/base.cjs +0 -27
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +105 -63
- package/dist/metrics/base.d.ts +105 -63
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/base.js +0 -19
- package/dist/metrics/base.js.map +1 -1
- package/dist/metrics/index.cjs +0 -3
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -3
- package/dist/metrics/index.d.ts +2 -3
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +0 -2
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/usage_collector.cjs +17 -12
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +3 -2
- package/dist/metrics/usage_collector.d.ts +3 -2
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +17 -12
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +22 -59
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.cts +1 -8
- package/dist/metrics/utils.d.ts +1 -8
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +22 -52
- package/dist/metrics/utils.js.map +1 -1
- package/dist/multimodal/index.cjs +0 -2
- package/dist/multimodal/index.cjs.map +1 -1
- package/dist/multimodal/index.d.cts +0 -1
- package/dist/multimodal/index.d.ts +0 -1
- package/dist/multimodal/index.d.ts.map +1 -1
- package/dist/multimodal/index.js +0 -1
- package/dist/multimodal/index.js.map +1 -1
- package/dist/plugin.cjs +24 -8
- package/dist/plugin.cjs.map +1 -1
- package/dist/plugin.d.cts +18 -4
- package/dist/plugin.d.ts +18 -4
- package/dist/plugin.d.ts.map +1 -1
- package/dist/plugin.js +22 -7
- package/dist/plugin.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +98 -0
- package/dist/stream/deferred_stream.cjs.map +1 -0
- package/dist/stream/deferred_stream.d.cts +27 -0
- package/dist/stream/deferred_stream.d.ts +27 -0
- package/dist/stream/deferred_stream.d.ts.map +1 -0
- package/dist/stream/deferred_stream.js +73 -0
- package/dist/stream/deferred_stream.js.map +1 -0
- package/dist/stream/deferred_stream.test.cjs +527 -0
- package/dist/stream/deferred_stream.test.cjs.map +1 -0
- package/dist/stream/deferred_stream.test.js +526 -0
- package/dist/stream/deferred_stream.test.js.map +1 -0
- package/dist/stream/identity_transform.cjs +42 -0
- package/dist/stream/identity_transform.cjs.map +1 -0
- package/dist/stream/identity_transform.d.cts +6 -0
- package/dist/stream/identity_transform.d.ts +6 -0
- package/dist/stream/identity_transform.d.ts.map +1 -0
- package/dist/stream/identity_transform.js +18 -0
- package/dist/stream/identity_transform.js.map +1 -0
- package/dist/stream/identity_transform.test.cjs +125 -0
- package/dist/stream/identity_transform.test.cjs.map +1 -0
- package/dist/stream/identity_transform.test.js +124 -0
- package/dist/stream/identity_transform.test.js.map +1 -0
- package/dist/stream/index.cjs +38 -0
- package/dist/stream/index.cjs.map +1 -0
- package/dist/stream/index.d.cts +5 -0
- package/dist/stream/index.d.ts +5 -0
- package/dist/stream/index.d.ts.map +1 -0
- package/dist/stream/index.js +11 -0
- package/dist/stream/index.js.map +1 -0
- package/dist/stream/merge_readable_streams.cjs +59 -0
- package/dist/stream/merge_readable_streams.cjs.map +1 -0
- package/dist/stream/merge_readable_streams.d.cts +4 -0
- package/dist/stream/merge_readable_streams.d.ts +4 -0
- package/dist/stream/merge_readable_streams.d.ts.map +1 -0
- package/dist/stream/merge_readable_streams.js +35 -0
- package/dist/stream/merge_readable_streams.js.map +1 -0
- package/dist/stream/stream_channel.cjs +47 -0
- package/dist/stream/stream_channel.cjs.map +1 -0
- package/dist/stream/stream_channel.d.cts +9 -0
- package/dist/stream/stream_channel.d.ts +9 -0
- package/dist/stream/stream_channel.d.ts.map +1 -0
- package/dist/stream/stream_channel.js +23 -0
- package/dist/stream/stream_channel.js.map +1 -0
- package/dist/stream/stream_channel.test.cjs +97 -0
- package/dist/stream/stream_channel.test.cjs.map +1 -0
- package/dist/stream/stream_channel.test.js +96 -0
- package/dist/stream/stream_channel.test.js.map +1 -0
- package/dist/stt/stream_adapter.cjs +3 -4
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.cts +1 -0
- package/dist/stt/stream_adapter.d.ts +1 -0
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +3 -4
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +101 -10
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +26 -5
- package/dist/stt/stt.d.ts +26 -5
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +102 -11
- package/dist/stt/stt.js.map +1 -1
- package/dist/tokenize/basic/basic.cjs +10 -5
- package/dist/tokenize/basic/basic.cjs.map +1 -1
- package/dist/tokenize/basic/basic.d.cts +7 -1
- package/dist/tokenize/basic/basic.d.ts +7 -1
- package/dist/tokenize/basic/basic.d.ts.map +1 -1
- package/dist/tokenize/basic/basic.js +10 -5
- package/dist/tokenize/basic/basic.js.map +1 -1
- package/dist/tokenize/basic/sentence.cjs +14 -6
- package/dist/tokenize/basic/sentence.cjs.map +1 -1
- package/dist/tokenize/basic/sentence.d.cts +1 -1
- package/dist/tokenize/basic/sentence.d.ts +1 -1
- package/dist/tokenize/basic/sentence.d.ts.map +1 -1
- package/dist/tokenize/basic/sentence.js +14 -6
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/token_stream.cjs +5 -3
- package/dist/tokenize/token_stream.cjs.map +1 -1
- package/dist/tokenize/token_stream.d.cts +1 -0
- package/dist/tokenize/token_stream.d.ts +1 -0
- package/dist/tokenize/token_stream.d.ts.map +1 -1
- package/dist/tokenize/token_stream.js +6 -4
- package/dist/tokenize/token_stream.js.map +1 -1
- package/dist/transcription.cjs +1 -2
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js +2 -3
- package/dist/transcription.js.map +1 -1
- package/dist/tts/index.cjs +2 -4
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -1
- package/dist/tts/index.d.ts +1 -1
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +1 -3
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +26 -13
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +1 -1
- package/dist/tts/stream_adapter.d.ts +1 -1
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +27 -14
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +157 -25
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +29 -5
- package/dist/tts/tts.d.ts +29 -5
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +157 -24
- package/dist/tts/tts.js.map +1 -1
- package/dist/types.cjs +60 -0
- package/dist/types.cjs.map +1 -0
- package/dist/types.d.cts +13 -0
- package/dist/types.d.ts +13 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +35 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.cjs +281 -27
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +134 -9
- package/dist/utils.d.ts +134 -9
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +265 -26
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +492 -0
- package/dist/utils.test.cjs.map +1 -0
- package/dist/utils.test.js +498 -0
- package/dist/utils.test.js.map +1 -0
- package/dist/vad.cjs +76 -20
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +25 -5
- package/dist/vad.d.ts +25 -5
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +76 -20
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent.cjs +245 -0
- package/dist/voice/agent.cjs.map +1 -0
- package/dist/voice/agent.d.cts +78 -0
- package/dist/voice/agent.d.ts +78 -0
- package/dist/voice/agent.d.ts.map +1 -0
- package/dist/voice/agent.js +220 -0
- package/dist/voice/agent.js.map +1 -0
- package/dist/voice/agent.test.cjs +61 -0
- package/dist/voice/agent.test.cjs.map +1 -0
- package/dist/voice/agent.test.js +60 -0
- package/dist/voice/agent.test.js.map +1 -0
- package/dist/voice/agent_activity.cjs +1453 -0
- package/dist/voice/agent_activity.cjs.map +1 -0
- package/dist/voice/agent_activity.d.cts +94 -0
- package/dist/voice/agent_activity.d.ts +94 -0
- package/dist/voice/agent_activity.d.ts.map +1 -0
- package/dist/voice/agent_activity.js +1449 -0
- package/dist/voice/agent_activity.js.map +1 -0
- package/dist/voice/agent_session.cjs +312 -0
- package/dist/voice/agent_session.cjs.map +1 -0
- package/dist/voice/agent_session.d.cts +121 -0
- package/dist/voice/agent_session.d.ts +121 -0
- package/dist/voice/agent_session.d.ts.map +1 -0
- package/dist/voice/agent_session.js +295 -0
- package/dist/voice/agent_session.js.map +1 -0
- package/dist/voice/audio_recognition.cjs +375 -0
- package/dist/voice/audio_recognition.cjs.map +1 -0
- package/dist/voice/audio_recognition.d.cts +80 -0
- package/dist/voice/audio_recognition.d.ts +80 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -0
- package/dist/voice/audio_recognition.js +351 -0
- package/dist/voice/audio_recognition.js.map +1 -0
- package/dist/voice/events.cjs +145 -0
- package/dist/voice/events.cjs.map +1 -0
- package/dist/voice/events.d.cts +124 -0
- package/dist/voice/events.d.ts +124 -0
- package/dist/voice/events.d.ts.map +1 -0
- package/dist/voice/events.js +110 -0
- package/dist/voice/events.js.map +1 -0
- package/dist/voice/generation.cjs +700 -0
- package/dist/voice/generation.cjs.map +1 -0
- package/dist/voice/generation.d.cts +115 -0
- package/dist/voice/generation.d.ts +115 -0
- package/dist/voice/generation.d.ts.map +1 -0
- package/dist/voice/generation.js +672 -0
- package/dist/voice/generation.js.map +1 -0
- package/dist/voice/index.cjs +40 -0
- package/dist/voice/index.cjs.map +1 -0
- package/dist/voice/index.d.cts +5 -0
- package/dist/voice/index.d.ts +5 -0
- package/dist/voice/index.d.ts.map +1 -0
- package/dist/voice/index.js +11 -0
- package/dist/voice/index.js.map +1 -0
- package/dist/voice/io.cjs +245 -0
- package/dist/voice/io.cjs.map +1 -0
- package/dist/voice/io.d.cts +101 -0
- package/dist/voice/io.d.ts +101 -0
- package/dist/voice/io.d.ts.map +1 -0
- package/dist/voice/io.js +217 -0
- package/dist/voice/io.js.map +1 -0
- package/dist/voice/room_io/_input.cjs +121 -0
- package/dist/voice/room_io/_input.cjs.map +1 -0
- package/dist/voice/room_io/_input.d.cts +24 -0
- package/dist/voice/room_io/_input.d.ts +24 -0
- package/dist/voice/room_io/_input.d.ts.map +1 -0
- package/dist/voice/room_io/_input.js +102 -0
- package/dist/voice/room_io/_input.js.map +1 -0
- package/dist/voice/room_io/_output.cjs +358 -0
- package/dist/voice/room_io/_output.cjs.map +1 -0
- package/dist/voice/room_io/_output.d.cts +75 -0
- package/dist/voice/room_io/_output.d.ts +75 -0
- package/dist/voice/room_io/_output.d.ts.map +1 -0
- package/dist/voice/room_io/_output.js +342 -0
- package/dist/voice/room_io/_output.js.map +1 -0
- package/dist/voice/room_io/index.cjs +25 -0
- package/dist/voice/room_io/index.cjs.map +1 -0
- package/dist/voice/room_io/index.d.cts +3 -0
- package/dist/voice/room_io/index.d.ts +3 -0
- package/dist/voice/room_io/index.d.ts.map +1 -0
- package/dist/voice/room_io/index.js +3 -0
- package/dist/voice/room_io/index.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +370 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -0
- package/dist/voice/room_io/room_io.d.cts +73 -0
- package/dist/voice/room_io/room_io.d.ts +73 -0
- package/dist/voice/room_io/room_io.d.ts.map +1 -0
- package/dist/voice/room_io/room_io.js +361 -0
- package/dist/voice/room_io/room_io.js.map +1 -0
- package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
- package/dist/voice/run_context.cjs.map +1 -0
- package/dist/voice/run_context.d.cts +12 -0
- package/dist/voice/run_context.d.ts +12 -0
- package/dist/voice/run_context.d.ts.map +1 -0
- package/dist/voice/run_context.js +14 -0
- package/dist/voice/run_context.js.map +1 -0
- package/dist/voice/speech_handle.cjs +105 -0
- package/dist/voice/speech_handle.cjs.map +1 -0
- package/dist/voice/speech_handle.d.cts +46 -0
- package/dist/voice/speech_handle.d.ts +46 -0
- package/dist/voice/speech_handle.d.ts.map +1 -0
- package/dist/voice/speech_handle.js +81 -0
- package/dist/voice/speech_handle.js.map +1 -0
- package/dist/voice/transcription/_utils.cjs +45 -0
- package/dist/voice/transcription/_utils.cjs.map +1 -0
- package/dist/voice/transcription/_utils.d.cts +3 -0
- package/dist/voice/transcription/_utils.d.ts +3 -0
- package/dist/voice/transcription/_utils.d.ts.map +1 -0
- package/dist/voice/transcription/_utils.js +21 -0
- package/dist/voice/transcription/_utils.js.map +1 -0
- package/dist/voice/transcription/index.cjs +23 -0
- package/dist/voice/transcription/index.cjs.map +1 -0
- package/dist/voice/transcription/index.d.cts +2 -0
- package/dist/voice/transcription/index.d.ts +2 -0
- package/dist/voice/transcription/index.d.ts.map +1 -0
- package/dist/voice/transcription/index.js +2 -0
- package/dist/voice/transcription/index.js.map +1 -0
- package/dist/voice/transcription/synchronizer.cjs +380 -0
- package/dist/voice/transcription/synchronizer.cjs.map +1 -0
- package/dist/voice/transcription/synchronizer.d.cts +86 -0
- package/dist/voice/transcription/synchronizer.d.ts +86 -0
- package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
- package/dist/voice/transcription/synchronizer.js +355 -0
- package/dist/voice/transcription/synchronizer.js.map +1 -0
- package/dist/worker.cjs +22 -4
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +1 -1
- package/dist/worker.d.ts +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +22 -4
- package/dist/worker.js.map +1 -1
- package/package.json +9 -2
- package/src/_exceptions.ts +137 -0
- package/src/audio.ts +12 -1
- package/src/cli.ts +37 -0
- package/src/constants.ts +2 -1
- package/src/http_server.ts +1 -0
- package/src/index.ts +13 -10
- package/src/inference_runner.ts +2 -3
- package/src/ipc/inference_proc_executor.ts +2 -2
- package/src/ipc/job_executor.ts +1 -1
- package/src/ipc/job_proc_executor.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +1 -1
- package/src/job.ts +18 -0
- package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
- package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
- package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
- package/src/llm/chat_context.test.ts +450 -0
- package/src/llm/chat_context.ts +501 -103
- package/src/llm/index.ts +53 -18
- package/src/llm/llm.ts +149 -50
- package/src/llm/provider_format/google.test.ts +772 -0
- package/src/llm/provider_format/google.ts +130 -0
- package/src/llm/provider_format/index.ts +23 -0
- package/src/llm/provider_format/openai.test.ts +581 -0
- package/src/llm/provider_format/openai.ts +118 -0
- package/src/llm/provider_format/utils.ts +183 -0
- package/src/llm/realtime.ts +151 -0
- package/src/llm/remote_chat_context.test.ts +290 -0
- package/src/llm/remote_chat_context.ts +114 -0
- package/src/llm/tool_context.test.ts +198 -0
- package/src/llm/tool_context.ts +259 -0
- package/src/llm/tool_context.type.test.ts +115 -0
- package/src/llm/utils.test.ts +670 -0
- package/src/llm/utils.ts +324 -0
- package/src/metrics/base.ts +110 -78
- package/src/metrics/index.ts +3 -9
- package/src/metrics/usage_collector.ts +19 -13
- package/src/metrics/utils.ts +24 -69
- package/src/multimodal/index.ts +0 -1
- package/src/plugin.ts +26 -8
- package/src/stream/deferred_stream.test.ts +755 -0
- package/src/stream/deferred_stream.ts +110 -0
- package/src/stream/identity_transform.test.ts +179 -0
- package/src/stream/identity_transform.ts +18 -0
- package/src/stream/index.ts +7 -0
- package/src/stream/merge_readable_streams.ts +40 -0
- package/src/stream/stream_channel.test.ts +129 -0
- package/src/stream/stream_channel.ts +32 -0
- package/src/stt/stream_adapter.ts +3 -5
- package/src/stt/stt.ts +135 -17
- package/src/tokenize/basic/basic.ts +13 -5
- package/src/tokenize/basic/sentence.ts +20 -6
- package/src/tokenize/token_stream.ts +7 -4
- package/src/transcription.ts +2 -3
- package/src/tts/index.ts +0 -1
- package/src/tts/stream_adapter.ts +42 -16
- package/src/tts/tts.ts +203 -21
- package/src/types.ts +42 -0
- package/src/utils.test.ts +658 -0
- package/src/utils.ts +375 -44
- package/src/vad.ts +90 -22
- package/src/voice/agent.test.ts +80 -0
- package/src/voice/agent.ts +332 -0
- package/src/voice/agent_activity.ts +1913 -0
- package/src/voice/agent_session.ts +460 -0
- package/src/voice/audio_recognition.ts +474 -0
- package/src/voice/events.ts +252 -0
- package/src/voice/generation.ts +881 -0
- package/src/voice/index.ts +7 -0
- package/src/voice/io.ts +304 -0
- package/src/voice/room_io/_input.ts +144 -0
- package/src/voice/room_io/_output.ts +436 -0
- package/src/voice/room_io/index.ts +5 -0
- package/src/voice/room_io/room_io.ts +495 -0
- package/src/voice/run_context.ts +20 -0
- package/src/voice/speech_handle.ts +104 -0
- package/src/voice/transcription/_utils.ts +25 -0
- package/src/voice/transcription/index.ts +4 -0
- package/src/voice/transcription/synchronizer.ts +478 -0
- package/src/worker.ts +22 -2
- package/dist/llm/function_context.cjs +0 -103
- package/dist/llm/function_context.cjs.map +0 -1
- package/dist/llm/function_context.d.cts +0 -47
- package/dist/llm/function_context.d.ts +0 -47
- package/dist/llm/function_context.d.ts.map +0 -1
- package/dist/llm/function_context.js +0 -78
- package/dist/llm/function_context.js.map +0 -1
- package/dist/llm/function_context.test.cjs +0 -218
- package/dist/llm/function_context.test.cjs.map +0 -1
- package/dist/llm/function_context.test.js +0 -217
- package/dist/llm/function_context.test.js.map +0 -1
- package/dist/multimodal/multimodal_agent.cjs +0 -486
- package/dist/multimodal/multimodal_agent.cjs.map +0 -1
- package/dist/multimodal/multimodal_agent.d.cts +0 -48
- package/dist/multimodal/multimodal_agent.d.ts +0 -48
- package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
- package/dist/multimodal/multimodal_agent.js +0 -461
- package/dist/multimodal/multimodal_agent.js.map +0 -1
- package/dist/pipeline/agent_output.cjs +0 -197
- package/dist/pipeline/agent_output.cjs.map +0 -1
- package/dist/pipeline/agent_output.d.cts +0 -33
- package/dist/pipeline/agent_output.d.ts +0 -33
- package/dist/pipeline/agent_output.d.ts.map +0 -1
- package/dist/pipeline/agent_output.js +0 -172
- package/dist/pipeline/agent_output.js.map +0 -1
- package/dist/pipeline/agent_playout.cjs +0 -175
- package/dist/pipeline/agent_playout.cjs.map +0 -1
- package/dist/pipeline/agent_playout.d.cts +0 -40
- package/dist/pipeline/agent_playout.d.ts +0 -40
- package/dist/pipeline/agent_playout.d.ts.map +0 -1
- package/dist/pipeline/agent_playout.js +0 -139
- package/dist/pipeline/agent_playout.js.map +0 -1
- package/dist/pipeline/human_input.cjs +0 -171
- package/dist/pipeline/human_input.cjs.map +0 -1
- package/dist/pipeline/human_input.d.cts +0 -30
- package/dist/pipeline/human_input.d.ts +0 -30
- package/dist/pipeline/human_input.d.ts.map +0 -1
- package/dist/pipeline/human_input.js +0 -146
- package/dist/pipeline/human_input.js.map +0 -1
- package/dist/pipeline/index.cjs.map +0 -1
- package/dist/pipeline/index.d.cts +0 -2
- package/dist/pipeline/index.d.ts +0 -2
- package/dist/pipeline/index.d.ts.map +0 -1
- package/dist/pipeline/index.js +0 -11
- package/dist/pipeline/index.js.map +0 -1
- package/dist/pipeline/pipeline_agent.cjs +0 -859
- package/dist/pipeline/pipeline_agent.cjs.map +0 -1
- package/dist/pipeline/pipeline_agent.d.cts +0 -150
- package/dist/pipeline/pipeline_agent.d.ts +0 -150
- package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
- package/dist/pipeline/pipeline_agent.js +0 -837
- package/dist/pipeline/pipeline_agent.js.map +0 -1
- package/dist/pipeline/speech_handle.cjs +0 -176
- package/dist/pipeline/speech_handle.cjs.map +0 -1
- package/dist/pipeline/speech_handle.d.cts +0 -37
- package/dist/pipeline/speech_handle.d.ts +0 -37
- package/dist/pipeline/speech_handle.d.ts.map +0 -1
- package/dist/pipeline/speech_handle.js +0 -152
- package/dist/pipeline/speech_handle.js.map +0 -1
- package/src/llm/function_context.test.ts +0 -248
- package/src/llm/function_context.ts +0 -142
- package/src/multimodal/multimodal_agent.ts +0 -592
- package/src/pipeline/agent_output.ts +0 -219
- package/src/pipeline/agent_playout.ts +0 -192
- package/src/pipeline/human_input.ts +0 -188
- package/src/pipeline/index.ts +0 -15
- package/src/pipeline/pipeline_agent.ts +0 -1197
- package/src/pipeline/speech_handle.ts +0 -201
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
|
|
2
|
-
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
-
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
4
|
-
import { Future } from '../utils.js';
|
|
5
|
-
import { SynthesisHandle } from './agent_output.js';
|
|
6
|
-
export declare enum AgentPlayoutEvent {
|
|
7
|
-
PLAYOUT_STARTED = 0,
|
|
8
|
-
PLAYOUT_STOPPED = 1
|
|
9
|
-
}
|
|
10
|
-
export type AgentPlayoutCallbacks = {
|
|
11
|
-
[AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;
|
|
12
|
-
[AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;
|
|
13
|
-
};
|
|
14
|
-
export declare class PlayoutHandle {
|
|
15
|
-
#private;
|
|
16
|
-
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
|
|
17
|
-
totalPlayedTime?: number;
|
|
18
|
-
synchronizer: TextAudioSynchronizer;
|
|
19
|
-
pushedDuration: number;
|
|
20
|
-
intFut: Future;
|
|
21
|
-
doneFut: Future;
|
|
22
|
-
constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
|
|
23
|
-
get speechId(): string;
|
|
24
|
-
get interrupted(): boolean;
|
|
25
|
-
get timePlayed(): number;
|
|
26
|
-
get done(): boolean;
|
|
27
|
-
interrupt(): void;
|
|
28
|
-
join(): Future;
|
|
29
|
-
}
|
|
30
|
-
declare const AgentPlayout_base: new () => TypedEmitter<AgentPlayoutCallbacks>;
|
|
31
|
-
export declare class AgentPlayout extends AgentPlayout_base {
|
|
32
|
-
#private;
|
|
33
|
-
constructor(audioSource: AudioSource);
|
|
34
|
-
get targetVolume(): number;
|
|
35
|
-
set targetVolume(vol: number);
|
|
36
|
-
play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
|
|
37
|
-
close(): Promise<void>;
|
|
38
|
-
}
|
|
39
|
-
export {};
|
|
40
|
-
//# sourceMappingURL=agent_playout.d.ts.map
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
|
|
2
|
-
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
-
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
4
|
-
import { Future } from '../utils.js';
|
|
5
|
-
import { SynthesisHandle } from './agent_output.js';
|
|
6
|
-
export declare enum AgentPlayoutEvent {
|
|
7
|
-
PLAYOUT_STARTED = 0,
|
|
8
|
-
PLAYOUT_STOPPED = 1
|
|
9
|
-
}
|
|
10
|
-
export type AgentPlayoutCallbacks = {
|
|
11
|
-
[AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;
|
|
12
|
-
[AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;
|
|
13
|
-
};
|
|
14
|
-
export declare class PlayoutHandle {
|
|
15
|
-
#private;
|
|
16
|
-
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
|
|
17
|
-
totalPlayedTime?: number;
|
|
18
|
-
synchronizer: TextAudioSynchronizer;
|
|
19
|
-
pushedDuration: number;
|
|
20
|
-
intFut: Future;
|
|
21
|
-
doneFut: Future;
|
|
22
|
-
constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
|
|
23
|
-
get speechId(): string;
|
|
24
|
-
get interrupted(): boolean;
|
|
25
|
-
get timePlayed(): number;
|
|
26
|
-
get done(): boolean;
|
|
27
|
-
interrupt(): void;
|
|
28
|
-
join(): Future;
|
|
29
|
-
}
|
|
30
|
-
declare const AgentPlayout_base: new () => TypedEmitter<AgentPlayoutCallbacks>;
|
|
31
|
-
export declare class AgentPlayout extends AgentPlayout_base {
|
|
32
|
-
#private;
|
|
33
|
-
constructor(audioSource: AudioSource);
|
|
34
|
-
get targetVolume(): number;
|
|
35
|
-
set targetVolume(vol: number);
|
|
36
|
-
play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
|
|
37
|
-
close(): Promise<void>;
|
|
38
|
-
}
|
|
39
|
-
export {};
|
|
40
|
-
//# sourceMappingURL=agent_playout.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IAyFV,KAAK;CAIZ"}
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
import EventEmitter from "node:events";
|
|
2
|
-
import { log } from "../log.js";
|
|
3
|
-
import { CancellablePromise, Future, gracefullyCancel } from "../utils.js";
|
|
4
|
-
import { SynthesisHandle } from "./agent_output.js";
|
|
5
|
-
var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
|
|
6
|
-
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
|
|
7
|
-
AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
|
|
8
|
-
return AgentPlayoutEvent2;
|
|
9
|
-
})(AgentPlayoutEvent || {});
|
|
10
|
-
class PlayoutHandle {
|
|
11
|
-
#speechId;
|
|
12
|
-
#audioSource;
|
|
13
|
-
playoutSource;
|
|
14
|
-
totalPlayedTime;
|
|
15
|
-
synchronizer;
|
|
16
|
-
#interrupted = false;
|
|
17
|
-
pushedDuration = 0;
|
|
18
|
-
intFut = new Future();
|
|
19
|
-
doneFut = new Future();
|
|
20
|
-
constructor(speechId, audioSource, playoutSource, synchronizer) {
|
|
21
|
-
this.#speechId = speechId;
|
|
22
|
-
this.#audioSource = audioSource;
|
|
23
|
-
this.playoutSource = playoutSource;
|
|
24
|
-
this.synchronizer = synchronizer;
|
|
25
|
-
}
|
|
26
|
-
get speechId() {
|
|
27
|
-
return this.#speechId;
|
|
28
|
-
}
|
|
29
|
-
get interrupted() {
|
|
30
|
-
return this.#interrupted;
|
|
31
|
-
}
|
|
32
|
-
get timePlayed() {
|
|
33
|
-
return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
|
|
34
|
-
}
|
|
35
|
-
get done() {
|
|
36
|
-
return this.doneFut.done || this.#interrupted;
|
|
37
|
-
}
|
|
38
|
-
interrupt() {
|
|
39
|
-
if (this.done) {
|
|
40
|
-
return;
|
|
41
|
-
}
|
|
42
|
-
this.intFut.resolve();
|
|
43
|
-
this.#interrupted = true;
|
|
44
|
-
}
|
|
45
|
-
join() {
|
|
46
|
-
return this.doneFut;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
class AgentPlayout extends EventEmitter {
|
|
50
|
-
#closed = false;
|
|
51
|
-
#audioSource;
|
|
52
|
-
#targetVolume = 1;
|
|
53
|
-
#playoutTask;
|
|
54
|
-
#logger = log();
|
|
55
|
-
constructor(audioSource) {
|
|
56
|
-
super();
|
|
57
|
-
this.#audioSource = audioSource;
|
|
58
|
-
}
|
|
59
|
-
get targetVolume() {
|
|
60
|
-
return this.#targetVolume;
|
|
61
|
-
}
|
|
62
|
-
set targetVolume(vol) {
|
|
63
|
-
this.#targetVolume = vol;
|
|
64
|
-
}
|
|
65
|
-
play(speechId, playoutSource, synchronizer) {
|
|
66
|
-
if (this.#closed) {
|
|
67
|
-
throw new Error("source closed");
|
|
68
|
-
}
|
|
69
|
-
const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);
|
|
70
|
-
this.#playoutTask = this.#playout(handle, this.#playoutTask);
|
|
71
|
-
return handle;
|
|
72
|
-
}
|
|
73
|
-
#playout(handle, oldTask) {
|
|
74
|
-
return new CancellablePromise(async (resolve, _, onCancel) => {
|
|
75
|
-
const cancel = () => {
|
|
76
|
-
captureTask.cancel();
|
|
77
|
-
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
78
|
-
if (handle.interrupted || captureTask.error) {
|
|
79
|
-
handle.synchronizer.close(true);
|
|
80
|
-
this.#audioSource.clearQueue();
|
|
81
|
-
}
|
|
82
|
-
if (!firstFrame) {
|
|
83
|
-
this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
|
|
84
|
-
}
|
|
85
|
-
handle.doneFut.resolve();
|
|
86
|
-
this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
|
|
87
|
-
};
|
|
88
|
-
onCancel(() => {
|
|
89
|
-
cancel();
|
|
90
|
-
});
|
|
91
|
-
if (oldTask) {
|
|
92
|
-
await gracefullyCancel(oldTask);
|
|
93
|
-
}
|
|
94
|
-
if (this.#audioSource.queuedDuration > 0) {
|
|
95
|
-
this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
|
|
96
|
-
}
|
|
97
|
-
let firstFrame = true;
|
|
98
|
-
const captureTask = new CancellablePromise(async (resolve2, _2, onCancel2) => {
|
|
99
|
-
let cancelled = false;
|
|
100
|
-
onCancel2(() => {
|
|
101
|
-
cancelled = true;
|
|
102
|
-
});
|
|
103
|
-
for await (const frame of handle.playoutSource) {
|
|
104
|
-
if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
|
|
105
|
-
break;
|
|
106
|
-
}
|
|
107
|
-
if (firstFrame) {
|
|
108
|
-
this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
|
|
109
|
-
this.emit(0 /* PLAYOUT_STARTED */);
|
|
110
|
-
handle.synchronizer.segmentPlayoutStarted();
|
|
111
|
-
firstFrame = false;
|
|
112
|
-
}
|
|
113
|
-
handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
|
|
114
|
-
handle.synchronizer.pushAudio(frame);
|
|
115
|
-
await this.#audioSource.captureFrame(frame);
|
|
116
|
-
}
|
|
117
|
-
await this.#audioSource.waitForPlayout();
|
|
118
|
-
handle.synchronizer.close(false);
|
|
119
|
-
resolve2();
|
|
120
|
-
});
|
|
121
|
-
try {
|
|
122
|
-
await Promise.any([captureTask, handle.intFut.await]);
|
|
123
|
-
} finally {
|
|
124
|
-
cancel();
|
|
125
|
-
resolve();
|
|
126
|
-
}
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
async close() {
|
|
130
|
-
this.#closed = true;
|
|
131
|
-
await this.#playoutTask;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
export {
|
|
135
|
-
AgentPlayout,
|
|
136
|
-
AgentPlayoutEvent,
|
|
137
|
-
PlayoutHandle
|
|
138
|
-
};
|
|
139
|
-
//# sourceMappingURL=agent_playout.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
|
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
-
var __export = (target, all) => {
|
|
7
|
-
for (var name in all)
|
|
8
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
-
};
|
|
10
|
-
var __copyProps = (to, from, except, desc) => {
|
|
11
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
-
for (let key of __getOwnPropNames(from))
|
|
13
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
-
}
|
|
16
|
-
return to;
|
|
17
|
-
};
|
|
18
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
-
var human_input_exports = {};
|
|
20
|
-
__export(human_input_exports, {
|
|
21
|
-
HumanInput: () => HumanInput,
|
|
22
|
-
HumanInputEvent: () => HumanInputEvent
|
|
23
|
-
});
|
|
24
|
-
module.exports = __toCommonJS(human_input_exports);
|
|
25
|
-
var import_rtc_node = require("@livekit/rtc-node");
|
|
26
|
-
var import_node_events = require("node:events");
|
|
27
|
-
var import_log = require("../log.cjs");
|
|
28
|
-
var import_stt = require("../stt/stt.cjs");
|
|
29
|
-
var import_utils = require("../utils.cjs");
|
|
30
|
-
var import_vad = require("../vad.cjs");
|
|
31
|
-
var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
|
|
32
|
-
HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
|
|
33
|
-
HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
|
|
34
|
-
HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
|
|
35
|
-
HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
|
|
36
|
-
HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
|
|
37
|
-
return HumanInputEvent2;
|
|
38
|
-
})(HumanInputEvent || {});
|
|
39
|
-
class HumanInput extends import_node_events.EventEmitter {
|
|
40
|
-
#closed = false;
|
|
41
|
-
#room;
|
|
42
|
-
#vad;
|
|
43
|
-
#stt;
|
|
44
|
-
#participant;
|
|
45
|
-
#subscribedTrack;
|
|
46
|
-
#recognizeTask;
|
|
47
|
-
#speaking = false;
|
|
48
|
-
#speechProbability = 0;
|
|
49
|
-
#logger = (0, import_log.log)();
|
|
50
|
-
#noiseCancellation;
|
|
51
|
-
constructor(room, vad, stt, participant, noiseCancellation) {
|
|
52
|
-
super();
|
|
53
|
-
this.#room = room;
|
|
54
|
-
this.#vad = vad;
|
|
55
|
-
this.#stt = stt;
|
|
56
|
-
this.#participant = participant;
|
|
57
|
-
this.#noiseCancellation = noiseCancellation;
|
|
58
|
-
this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
|
|
59
|
-
this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
|
|
60
|
-
this.#subscribeToMicrophone();
|
|
61
|
-
}
|
|
62
|
-
get participant() {
|
|
63
|
-
return this.#participant;
|
|
64
|
-
}
|
|
65
|
-
get subscribedTrack() {
|
|
66
|
-
return this.#subscribedTrack;
|
|
67
|
-
}
|
|
68
|
-
#subscribeToMicrophone() {
|
|
69
|
-
if (!this.#participant) {
|
|
70
|
-
this.#logger.error("Participant is not set");
|
|
71
|
-
return;
|
|
72
|
-
}
|
|
73
|
-
let microphonePublication = void 0;
|
|
74
|
-
for (const publication of this.#participant.trackPublications.values()) {
|
|
75
|
-
if (publication.source === import_rtc_node.TrackSource.SOURCE_MICROPHONE) {
|
|
76
|
-
microphonePublication = publication;
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
if (!microphonePublication) {
|
|
81
|
-
return;
|
|
82
|
-
}
|
|
83
|
-
if (!microphonePublication.subscribed) {
|
|
84
|
-
microphonePublication.setSubscribed(true);
|
|
85
|
-
}
|
|
86
|
-
const track = microphonePublication.track;
|
|
87
|
-
if (track && track !== this.#subscribedTrack) {
|
|
88
|
-
this.#subscribedTrack = track;
|
|
89
|
-
if (this.#recognizeTask) {
|
|
90
|
-
this.#recognizeTask.cancel();
|
|
91
|
-
}
|
|
92
|
-
const audioStreamOptions = {
|
|
93
|
-
sampleRate: 16e3,
|
|
94
|
-
numChannels: 1,
|
|
95
|
-
...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
|
|
96
|
-
};
|
|
97
|
-
const audioStream = new import_rtc_node.AudioStream(track, audioStreamOptions);
|
|
98
|
-
this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
|
|
99
|
-
let cancelled = false;
|
|
100
|
-
onCancel(() => {
|
|
101
|
-
cancelled = true;
|
|
102
|
-
});
|
|
103
|
-
const sttStream = this.#stt.stream();
|
|
104
|
-
const vadStream = this.#vad.stream();
|
|
105
|
-
const audioStreamCo = async () => {
|
|
106
|
-
for await (const ev of audioStream) {
|
|
107
|
-
if (cancelled) return;
|
|
108
|
-
sttStream.pushFrame(ev);
|
|
109
|
-
vadStream.pushFrame(ev);
|
|
110
|
-
}
|
|
111
|
-
};
|
|
112
|
-
const vadStreamCo = async () => {
|
|
113
|
-
for await (const ev of vadStream) {
|
|
114
|
-
if (cancelled) return;
|
|
115
|
-
switch (ev.type) {
|
|
116
|
-
case import_vad.VADEventType.START_OF_SPEECH:
|
|
117
|
-
this.#speaking = true;
|
|
118
|
-
this.emit(0 /* START_OF_SPEECH */, ev);
|
|
119
|
-
break;
|
|
120
|
-
case import_vad.VADEventType.INFERENCE_DONE:
|
|
121
|
-
this.#speechProbability = ev.probability;
|
|
122
|
-
this.emit(1 /* VAD_INFERENCE_DONE */, ev);
|
|
123
|
-
break;
|
|
124
|
-
case import_vad.VADEventType.END_OF_SPEECH:
|
|
125
|
-
this.#speaking = false;
|
|
126
|
-
this.emit(2 /* END_OF_SPEECH */, ev);
|
|
127
|
-
break;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
};
|
|
131
|
-
const sttStreamCo = async () => {
|
|
132
|
-
for await (const ev of sttStream) {
|
|
133
|
-
if (cancelled) return;
|
|
134
|
-
if (ev.type === import_stt.SpeechEventType.FINAL_TRANSCRIPT) {
|
|
135
|
-
this.emit(3 /* FINAL_TRANSCRIPT */, ev);
|
|
136
|
-
} else if (ev.type == import_stt.SpeechEventType.INTERIM_TRANSCRIPT) {
|
|
137
|
-
this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
};
|
|
141
|
-
await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
|
|
142
|
-
sttStream.close();
|
|
143
|
-
vadStream.close();
|
|
144
|
-
resolve();
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
get speaking() {
|
|
149
|
-
return this.#speaking;
|
|
150
|
-
}
|
|
151
|
-
get speakingProbability() {
|
|
152
|
-
return this.#speechProbability;
|
|
153
|
-
}
|
|
154
|
-
async close() {
|
|
155
|
-
if (this.#closed) {
|
|
156
|
-
throw new Error("HumanInput already closed");
|
|
157
|
-
}
|
|
158
|
-
this.#closed = true;
|
|
159
|
-
this.#room.removeAllListeners();
|
|
160
|
-
this.#speaking = false;
|
|
161
|
-
if (this.#recognizeTask) {
|
|
162
|
-
await (0, import_utils.gracefullyCancel)(this.#recognizeTask);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
// Annotate the CommonJS export names for ESM import in node:
|
|
167
|
-
0 && (module.exports = {
|
|
168
|
-
HumanInput,
|
|
169
|
-
HumanInputEvent
|
|
170
|
-
});
|
|
171
|
-
//# sourceMappingURL=human_input.cjs.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n #noiseCancellation?: NoiseCancellationOptions;\n\n constructor(\n room: Room,\n vad: VAD,\n stt: STT,\n participant: RemoteParticipant,\n noiseCancellation?: NoiseCancellationOptions,\n ) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n this.#noiseCancellation = noiseCancellation;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n get participant(): RemoteParticipant {\n return this.#participant;\n }\n\n get subscribedTrack(): RemoteAudioTrack | undefined {\n return this.#subscribedTrack;\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStreamOptions = {\n sampleRate: 16000,\n numChannels: 1,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n const audioStream = new AudioStream(track, audioStreamOptions);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAUA,sBAAoD;AAEpD,yBAA6B;AAC7B,iBAAoB;AAEpB,iBAAgC;AAChC,mBAAqD;AAErD,iBAA6B;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,gCAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,cAAU,gBAAI;AAAA,EACd;AAAA,EAEA,YACE,MACA,KACA,KACA,aACA,mBACA;AACA,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,qBAAqB;AAE1B,SAAK,MAAM,GAAG,0BAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,0BAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,kBAAgD;AAClD,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,qBAAqB;AAAA,QACzB,YAAY;AAAA,QACZ,aAAa;AAAA,QACb,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,YAAM,cAAc,IAAI,4BAAY,OAAO,kBAAkB;AAG7D,WAAK,iBAAiB,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,2BAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,2BAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,gBAAM,+BAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
2
|
-
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
-
import type { STT, SpeechEvent } from '../stt/stt.js';
|
|
4
|
-
import type { VAD, VADEvent } from '../vad.js';
|
|
5
|
-
export declare enum HumanInputEvent {
|
|
6
|
-
START_OF_SPEECH = 0,
|
|
7
|
-
VAD_INFERENCE_DONE = 1,
|
|
8
|
-
END_OF_SPEECH = 2,
|
|
9
|
-
FINAL_TRANSCRIPT = 3,
|
|
10
|
-
INTERIM_TRANSCRIPT = 4
|
|
11
|
-
}
|
|
12
|
-
export type HumanInputCallbacks = {
|
|
13
|
-
[HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
|
|
14
|
-
[HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
|
|
15
|
-
[HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
|
|
16
|
-
[HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
17
|
-
[HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
18
|
-
};
|
|
19
|
-
declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
|
|
20
|
-
export declare class HumanInput extends HumanInput_base {
|
|
21
|
-
#private;
|
|
22
|
-
constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant, noiseCancellation?: NoiseCancellationOptions);
|
|
23
|
-
get participant(): RemoteParticipant;
|
|
24
|
-
get subscribedTrack(): RemoteAudioTrack | undefined;
|
|
25
|
-
get speaking(): boolean;
|
|
26
|
-
get speakingProbability(): number;
|
|
27
|
-
close(): Promise<void>;
|
|
28
|
-
}
|
|
29
|
-
export {};
|
|
30
|
-
//# sourceMappingURL=human_input.d.ts.map
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
|
|
2
|
-
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
3
|
-
import type { STT, SpeechEvent } from '../stt/stt.js';
|
|
4
|
-
import type { VAD, VADEvent } from '../vad.js';
|
|
5
|
-
export declare enum HumanInputEvent {
|
|
6
|
-
START_OF_SPEECH = 0,
|
|
7
|
-
VAD_INFERENCE_DONE = 1,
|
|
8
|
-
END_OF_SPEECH = 2,
|
|
9
|
-
FINAL_TRANSCRIPT = 3,
|
|
10
|
-
INTERIM_TRANSCRIPT = 4
|
|
11
|
-
}
|
|
12
|
-
export type HumanInputCallbacks = {
|
|
13
|
-
[HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
|
|
14
|
-
[HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
|
|
15
|
-
[HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
|
|
16
|
-
[HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
17
|
-
[HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
|
|
18
|
-
};
|
|
19
|
-
declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
|
|
20
|
-
export declare class HumanInput extends HumanInput_base {
|
|
21
|
-
#private;
|
|
22
|
-
constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant, noiseCancellation?: NoiseCancellationOptions);
|
|
23
|
-
get participant(): RemoteParticipant;
|
|
24
|
-
get subscribedTrack(): RemoteAudioTrack | undefined;
|
|
25
|
-
get speaking(): boolean;
|
|
26
|
-
get speakingProbability(): number;
|
|
27
|
-
close(): Promise<void>;
|
|
28
|
-
}
|
|
29
|
-
export {};
|
|
30
|
-
//# sourceMappingURL=human_input.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"human_input.d.ts","sourceRoot":"","sources":["../../src/pipeline/human_input.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,wBAAwB,EACxB,gBAAgB,EAChB,iBAAiB,EAEjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAGtD,OAAO,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C,oBAAY,eAAe;IACzB,eAAe,IAAA;IACf,kBAAkB,IAAA;IAClB,aAAa,IAAA;IACb,gBAAgB,IAAA;IAChB,kBAAkB,IAAA;CACnB;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,CAAC,eAAe,CAAC,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC7D,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAChE,CAAC,eAAe,CAAC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3D,CAAC,eAAe,CAAC,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;IACjE,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;CACpE,CAAC;yCAEyD,aAAa,mBAAmB,CAAC;AAA5F,qBAAa,UAAW,SAAQ,eAA6D;;gBAczF,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,GAAG,EACR,GAAG,EAAE,GAAG,EACR,WAAW,EAAE,iBAAiB,EAC9B,iBAAiB,CAAC,EAAE,wBAAwB;IAc9C,IAAI,WAAW,IAAI,iBAAiB,CAEnC;IAED,IAAI,eAAe,IAAI,gBAAgB,GAAG,SAAS,CAElD;IA8FD,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,mBAAmB,IAAI,MAAM,CAEhC;IAEK,KAAK;CAWZ"}
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
import { AudioStream, RoomEvent, TrackSource } from "@livekit/rtc-node";
|
|
2
|
-
import { EventEmitter } from "node:events";
|
|
3
|
-
import { log } from "../log.js";
|
|
4
|
-
import { SpeechEventType } from "../stt/stt.js";
|
|
5
|
-
import { CancellablePromise, gracefullyCancel } from "../utils.js";
|
|
6
|
-
import { VADEventType } from "../vad.js";
|
|
7
|
-
var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
|
|
8
|
-
HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
|
|
9
|
-
HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
|
|
10
|
-
HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
|
|
11
|
-
HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
|
|
12
|
-
HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
|
|
13
|
-
return HumanInputEvent2;
|
|
14
|
-
})(HumanInputEvent || {});
|
|
15
|
-
class HumanInput extends EventEmitter {
|
|
16
|
-
#closed = false;
|
|
17
|
-
#room;
|
|
18
|
-
#vad;
|
|
19
|
-
#stt;
|
|
20
|
-
#participant;
|
|
21
|
-
#subscribedTrack;
|
|
22
|
-
#recognizeTask;
|
|
23
|
-
#speaking = false;
|
|
24
|
-
#speechProbability = 0;
|
|
25
|
-
#logger = log();
|
|
26
|
-
#noiseCancellation;
|
|
27
|
-
constructor(room, vad, stt, participant, noiseCancellation) {
|
|
28
|
-
super();
|
|
29
|
-
this.#room = room;
|
|
30
|
-
this.#vad = vad;
|
|
31
|
-
this.#stt = stt;
|
|
32
|
-
this.#participant = participant;
|
|
33
|
-
this.#noiseCancellation = noiseCancellation;
|
|
34
|
-
this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
|
|
35
|
-
this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
|
|
36
|
-
this.#subscribeToMicrophone();
|
|
37
|
-
}
|
|
38
|
-
get participant() {
|
|
39
|
-
return this.#participant;
|
|
40
|
-
}
|
|
41
|
-
get subscribedTrack() {
|
|
42
|
-
return this.#subscribedTrack;
|
|
43
|
-
}
|
|
44
|
-
#subscribeToMicrophone() {
|
|
45
|
-
if (!this.#participant) {
|
|
46
|
-
this.#logger.error("Participant is not set");
|
|
47
|
-
return;
|
|
48
|
-
}
|
|
49
|
-
let microphonePublication = void 0;
|
|
50
|
-
for (const publication of this.#participant.trackPublications.values()) {
|
|
51
|
-
if (publication.source === TrackSource.SOURCE_MICROPHONE) {
|
|
52
|
-
microphonePublication = publication;
|
|
53
|
-
break;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
if (!microphonePublication) {
|
|
57
|
-
return;
|
|
58
|
-
}
|
|
59
|
-
if (!microphonePublication.subscribed) {
|
|
60
|
-
microphonePublication.setSubscribed(true);
|
|
61
|
-
}
|
|
62
|
-
const track = microphonePublication.track;
|
|
63
|
-
if (track && track !== this.#subscribedTrack) {
|
|
64
|
-
this.#subscribedTrack = track;
|
|
65
|
-
if (this.#recognizeTask) {
|
|
66
|
-
this.#recognizeTask.cancel();
|
|
67
|
-
}
|
|
68
|
-
const audioStreamOptions = {
|
|
69
|
-
sampleRate: 16e3,
|
|
70
|
-
numChannels: 1,
|
|
71
|
-
...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
|
|
72
|
-
};
|
|
73
|
-
const audioStream = new AudioStream(track, audioStreamOptions);
|
|
74
|
-
this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {
|
|
75
|
-
let cancelled = false;
|
|
76
|
-
onCancel(() => {
|
|
77
|
-
cancelled = true;
|
|
78
|
-
});
|
|
79
|
-
const sttStream = this.#stt.stream();
|
|
80
|
-
const vadStream = this.#vad.stream();
|
|
81
|
-
const audioStreamCo = async () => {
|
|
82
|
-
for await (const ev of audioStream) {
|
|
83
|
-
if (cancelled) return;
|
|
84
|
-
sttStream.pushFrame(ev);
|
|
85
|
-
vadStream.pushFrame(ev);
|
|
86
|
-
}
|
|
87
|
-
};
|
|
88
|
-
const vadStreamCo = async () => {
|
|
89
|
-
for await (const ev of vadStream) {
|
|
90
|
-
if (cancelled) return;
|
|
91
|
-
switch (ev.type) {
|
|
92
|
-
case VADEventType.START_OF_SPEECH:
|
|
93
|
-
this.#speaking = true;
|
|
94
|
-
this.emit(0 /* START_OF_SPEECH */, ev);
|
|
95
|
-
break;
|
|
96
|
-
case VADEventType.INFERENCE_DONE:
|
|
97
|
-
this.#speechProbability = ev.probability;
|
|
98
|
-
this.emit(1 /* VAD_INFERENCE_DONE */, ev);
|
|
99
|
-
break;
|
|
100
|
-
case VADEventType.END_OF_SPEECH:
|
|
101
|
-
this.#speaking = false;
|
|
102
|
-
this.emit(2 /* END_OF_SPEECH */, ev);
|
|
103
|
-
break;
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
};
|
|
107
|
-
const sttStreamCo = async () => {
|
|
108
|
-
for await (const ev of sttStream) {
|
|
109
|
-
if (cancelled) return;
|
|
110
|
-
if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {
|
|
111
|
-
this.emit(3 /* FINAL_TRANSCRIPT */, ev);
|
|
112
|
-
} else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {
|
|
113
|
-
this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
};
|
|
117
|
-
await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
|
|
118
|
-
sttStream.close();
|
|
119
|
-
vadStream.close();
|
|
120
|
-
resolve();
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
get speaking() {
|
|
125
|
-
return this.#speaking;
|
|
126
|
-
}
|
|
127
|
-
get speakingProbability() {
|
|
128
|
-
return this.#speechProbability;
|
|
129
|
-
}
|
|
130
|
-
async close() {
|
|
131
|
-
if (this.#closed) {
|
|
132
|
-
throw new Error("HumanInput already closed");
|
|
133
|
-
}
|
|
134
|
-
this.#closed = true;
|
|
135
|
-
this.#room.removeAllListeners();
|
|
136
|
-
this.#speaking = false;
|
|
137
|
-
if (this.#recognizeTask) {
|
|
138
|
-
await gracefullyCancel(this.#recognizeTask);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
export {
|
|
143
|
-
HumanInput,
|
|
144
|
-
HumanInputEvent
|
|
145
|
-
};
|
|
146
|
-
//# sourceMappingURL=human_input.js.map
|