@livekit/agents 0.7.9 → 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_exceptions.cjs +109 -0
- package/dist/_exceptions.cjs.map +1 -0
- package/dist/_exceptions.d.cts +64 -0
- package/dist/_exceptions.d.ts +64 -0
- package/dist/_exceptions.d.ts.map +1 -0
- package/dist/_exceptions.js +80 -0
- package/dist/_exceptions.js.map +1 -0
- package/dist/audio.cjs +10 -3
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.d.cts +2 -0
- package/dist/audio.d.ts +2 -0
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +8 -2
- package/dist/audio.js.map +1 -1
- package/dist/cli.cjs +25 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +25 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +6 -3
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +2 -1
- package/dist/constants.d.ts +2 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +4 -2
- package/dist/constants.js.map +1 -1
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.cts +1 -0
- package/dist/http_server.d.ts +1 -0
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +27 -20
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -10
- package/dist/index.d.ts +13 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +15 -11
- package/dist/index.js.map +1 -1
- package/dist/inference_runner.cjs +0 -1
- package/dist/inference_runner.cjs.map +1 -1
- package/dist/inference_runner.d.cts +2 -3
- package/dist/inference_runner.d.ts +2 -3
- package/dist/inference_runner.d.ts.map +1 -1
- package/dist/inference_runner.js +0 -1
- package/dist/inference_runner.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs +2 -2
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/inference_proc_executor.js +2 -2
- package/dist/ipc/inference_proc_executor.js.map +1 -1
- package/dist/ipc/job_executor.cjs.map +1 -1
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_proc_executor.cjs +1 -0
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.js +1 -0
- package/dist/ipc/job_proc_executor.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +1 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +1 -1
- package/dist/ipc/supervised_proc.d.ts +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/job.cjs +14 -2
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +8 -0
- package/dist/job.d.ts +8 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +12 -1
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +332 -82
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +152 -48
- package/dist/llm/chat_context.d.ts +152 -48
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +327 -81
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +380 -0
- package/dist/llm/chat_context.test.cjs.map +1 -0
- package/dist/llm/chat_context.test.js +385 -0
- package/dist/llm/chat_context.test.js.map +1 -0
- package/dist/llm/index.cjs +37 -8
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +7 -3
- package/dist/llm/index.d.ts +7 -3
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +39 -9
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +97 -33
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +50 -24
- package/dist/llm/llm.d.ts +50 -24
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +98 -33
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs +128 -0
- package/dist/llm/provider_format/google.cjs.map +1 -0
- package/dist/llm/provider_format/google.d.cts +6 -0
- package/dist/llm/provider_format/google.d.ts +6 -0
- package/dist/llm/provider_format/google.d.ts.map +1 -0
- package/dist/llm/provider_format/google.js +104 -0
- package/dist/llm/provider_format/google.js.map +1 -0
- package/dist/llm/provider_format/google.test.cjs +676 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -0
- package/dist/llm/provider_format/google.test.js +675 -0
- package/dist/llm/provider_format/google.test.js.map +1 -0
- package/dist/llm/provider_format/index.cjs +40 -0
- package/dist/llm/provider_format/index.cjs.map +1 -0
- package/dist/llm/provider_format/index.d.cts +4 -0
- package/dist/llm/provider_format/index.d.ts +4 -0
- package/dist/llm/provider_format/index.d.ts.map +1 -0
- package/dist/llm/provider_format/index.js +16 -0
- package/dist/llm/provider_format/index.js.map +1 -0
- package/dist/llm/provider_format/openai.cjs +116 -0
- package/dist/llm/provider_format/openai.cjs.map +1 -0
- package/dist/llm/provider_format/openai.d.cts +3 -0
- package/dist/llm/provider_format/openai.d.ts +3 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -0
- package/dist/llm/provider_format/openai.js +92 -0
- package/dist/llm/provider_format/openai.js.map +1 -0
- package/dist/llm/provider_format/openai.test.cjs +490 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -0
- package/dist/llm/provider_format/openai.test.js +489 -0
- package/dist/llm/provider_format/openai.test.js.map +1 -0
- package/dist/llm/provider_format/utils.cjs +146 -0
- package/dist/llm/provider_format/utils.cjs.map +1 -0
- package/dist/llm/provider_format/utils.d.cts +38 -0
- package/dist/llm/provider_format/utils.d.ts +38 -0
- package/dist/llm/provider_format/utils.d.ts.map +1 -0
- package/dist/llm/provider_format/utils.js +122 -0
- package/dist/llm/provider_format/utils.js.map +1 -0
- package/dist/llm/realtime.cjs +77 -0
- package/dist/llm/realtime.cjs.map +1 -0
- package/dist/llm/realtime.d.cts +98 -0
- package/dist/llm/realtime.d.ts +98 -0
- package/dist/llm/realtime.d.ts.map +1 -0
- package/dist/llm/realtime.js +52 -0
- package/dist/llm/realtime.js.map +1 -0
- package/dist/llm/remote_chat_context.cjs +112 -0
- package/dist/llm/remote_chat_context.cjs.map +1 -0
- package/dist/llm/remote_chat_context.d.cts +23 -0
- package/dist/llm/remote_chat_context.d.ts +23 -0
- package/dist/llm/remote_chat_context.d.ts.map +1 -0
- package/dist/llm/remote_chat_context.js +88 -0
- package/dist/llm/remote_chat_context.js.map +1 -0
- package/dist/llm/remote_chat_context.test.cjs +225 -0
- package/dist/llm/remote_chat_context.test.cjs.map +1 -0
- package/dist/llm/remote_chat_context.test.js +224 -0
- package/dist/llm/remote_chat_context.test.js.map +1 -0
- package/dist/llm/tool_context.cjs +111 -0
- package/dist/llm/tool_context.cjs.map +1 -0
- package/dist/llm/tool_context.d.cts +125 -0
- package/dist/llm/tool_context.d.ts +125 -0
- package/dist/llm/tool_context.d.ts.map +1 -0
- package/dist/llm/tool_context.js +80 -0
- package/dist/llm/tool_context.js.map +1 -0
- package/dist/llm/tool_context.test.cjs +162 -0
- package/dist/llm/tool_context.test.cjs.map +1 -0
- package/dist/llm/tool_context.test.js +161 -0
- package/dist/llm/tool_context.test.js.map +1 -0
- package/dist/llm/tool_context.type.test.cjs +92 -0
- package/dist/llm/tool_context.type.test.cjs.map +1 -0
- package/dist/llm/tool_context.type.test.js +91 -0
- package/dist/llm/tool_context.type.test.js.map +1 -0
- package/dist/llm/utils.cjs +260 -0
- package/dist/llm/utils.cjs.map +1 -0
- package/dist/llm/utils.d.cts +42 -0
- package/dist/llm/utils.d.ts +42 -0
- package/dist/llm/utils.d.ts.map +1 -0
- package/dist/llm/utils.js +223 -0
- package/dist/llm/utils.js.map +1 -0
- package/dist/llm/utils.test.cjs +513 -0
- package/dist/llm/utils.test.cjs.map +1 -0
- package/dist/llm/utils.test.js +490 -0
- package/dist/llm/utils.test.js.map +1 -0
- package/dist/metrics/base.cjs +0 -27
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +105 -63
- package/dist/metrics/base.d.ts +105 -63
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/base.js +0 -19
- package/dist/metrics/base.js.map +1 -1
- package/dist/metrics/index.cjs +0 -3
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -3
- package/dist/metrics/index.d.ts +2 -3
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +0 -2
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/usage_collector.cjs +17 -12
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +3 -2
- package/dist/metrics/usage_collector.d.ts +3 -2
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +17 -12
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +22 -59
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.cts +1 -8
- package/dist/metrics/utils.d.ts +1 -8
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +22 -52
- package/dist/metrics/utils.js.map +1 -1
- package/dist/multimodal/index.cjs +0 -2
- package/dist/multimodal/index.cjs.map +1 -1
- package/dist/multimodal/index.d.cts +0 -1
- package/dist/multimodal/index.d.ts +0 -1
- package/dist/multimodal/index.d.ts.map +1 -1
- package/dist/multimodal/index.js +0 -1
- package/dist/multimodal/index.js.map +1 -1
- package/dist/plugin.cjs +24 -8
- package/dist/plugin.cjs.map +1 -1
- package/dist/plugin.d.cts +18 -4
- package/dist/plugin.d.ts +18 -4
- package/dist/plugin.d.ts.map +1 -1
- package/dist/plugin.js +22 -7
- package/dist/plugin.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +98 -0
- package/dist/stream/deferred_stream.cjs.map +1 -0
- package/dist/stream/deferred_stream.d.cts +27 -0
- package/dist/stream/deferred_stream.d.ts +27 -0
- package/dist/stream/deferred_stream.d.ts.map +1 -0
- package/dist/stream/deferred_stream.js +73 -0
- package/dist/stream/deferred_stream.js.map +1 -0
- package/dist/stream/deferred_stream.test.cjs +527 -0
- package/dist/stream/deferred_stream.test.cjs.map +1 -0
- package/dist/stream/deferred_stream.test.js +526 -0
- package/dist/stream/deferred_stream.test.js.map +1 -0
- package/dist/stream/identity_transform.cjs +42 -0
- package/dist/stream/identity_transform.cjs.map +1 -0
- package/dist/stream/identity_transform.d.cts +6 -0
- package/dist/stream/identity_transform.d.ts +6 -0
- package/dist/stream/identity_transform.d.ts.map +1 -0
- package/dist/stream/identity_transform.js +18 -0
- package/dist/stream/identity_transform.js.map +1 -0
- package/dist/stream/identity_transform.test.cjs +125 -0
- package/dist/stream/identity_transform.test.cjs.map +1 -0
- package/dist/stream/identity_transform.test.js +124 -0
- package/dist/stream/identity_transform.test.js.map +1 -0
- package/dist/stream/index.cjs +38 -0
- package/dist/stream/index.cjs.map +1 -0
- package/dist/stream/index.d.cts +5 -0
- package/dist/stream/index.d.ts +5 -0
- package/dist/stream/index.d.ts.map +1 -0
- package/dist/stream/index.js +11 -0
- package/dist/stream/index.js.map +1 -0
- package/dist/stream/merge_readable_streams.cjs +59 -0
- package/dist/stream/merge_readable_streams.cjs.map +1 -0
- package/dist/stream/merge_readable_streams.d.cts +4 -0
- package/dist/stream/merge_readable_streams.d.ts +4 -0
- package/dist/stream/merge_readable_streams.d.ts.map +1 -0
- package/dist/stream/merge_readable_streams.js +35 -0
- package/dist/stream/merge_readable_streams.js.map +1 -0
- package/dist/stream/stream_channel.cjs +47 -0
- package/dist/stream/stream_channel.cjs.map +1 -0
- package/dist/stream/stream_channel.d.cts +9 -0
- package/dist/stream/stream_channel.d.ts +9 -0
- package/dist/stream/stream_channel.d.ts.map +1 -0
- package/dist/stream/stream_channel.js +23 -0
- package/dist/stream/stream_channel.js.map +1 -0
- package/dist/stream/stream_channel.test.cjs +97 -0
- package/dist/stream/stream_channel.test.cjs.map +1 -0
- package/dist/stream/stream_channel.test.js +96 -0
- package/dist/stream/stream_channel.test.js.map +1 -0
- package/dist/stt/stream_adapter.cjs +3 -4
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.cts +1 -0
- package/dist/stt/stream_adapter.d.ts +1 -0
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +3 -4
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +100 -10
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +26 -5
- package/dist/stt/stt.d.ts +26 -5
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +101 -11
- package/dist/stt/stt.js.map +1 -1
- package/dist/tokenize/basic/basic.cjs +10 -5
- package/dist/tokenize/basic/basic.cjs.map +1 -1
- package/dist/tokenize/basic/basic.d.cts +7 -1
- package/dist/tokenize/basic/basic.d.ts +7 -1
- package/dist/tokenize/basic/basic.d.ts.map +1 -1
- package/dist/tokenize/basic/basic.js +10 -5
- package/dist/tokenize/basic/basic.js.map +1 -1
- package/dist/tokenize/basic/sentence.cjs +14 -6
- package/dist/tokenize/basic/sentence.cjs.map +1 -1
- package/dist/tokenize/basic/sentence.d.cts +1 -1
- package/dist/tokenize/basic/sentence.d.ts +1 -1
- package/dist/tokenize/basic/sentence.d.ts.map +1 -1
- package/dist/tokenize/basic/sentence.js +14 -6
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/token_stream.cjs +5 -3
- package/dist/tokenize/token_stream.cjs.map +1 -1
- package/dist/tokenize/token_stream.d.cts +1 -0
- package/dist/tokenize/token_stream.d.ts +1 -0
- package/dist/tokenize/token_stream.d.ts.map +1 -1
- package/dist/tokenize/token_stream.js +6 -4
- package/dist/tokenize/token_stream.js.map +1 -1
- package/dist/transcription.cjs +1 -2
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js +2 -3
- package/dist/transcription.js.map +1 -1
- package/dist/tts/index.cjs +2 -4
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -1
- package/dist/tts/index.d.ts +1 -1
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +1 -3
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +26 -13
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +1 -1
- package/dist/tts/stream_adapter.d.ts +1 -1
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +27 -14
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +156 -25
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +29 -5
- package/dist/tts/tts.d.ts +29 -5
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +156 -24
- package/dist/tts/tts.js.map +1 -1
- package/dist/types.cjs +60 -0
- package/dist/types.cjs.map +1 -0
- package/dist/types.d.cts +13 -0
- package/dist/types.d.ts +13 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +35 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.cjs +298 -27
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +145 -9
- package/dist/utils.d.ts +145 -9
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +281 -26
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +491 -0
- package/dist/utils.test.cjs.map +1 -0
- package/dist/utils.test.js +498 -0
- package/dist/utils.test.js.map +1 -0
- package/dist/vad.cjs +76 -20
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +25 -5
- package/dist/vad.d.ts +25 -5
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +76 -20
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent.cjs +245 -0
- package/dist/voice/agent.cjs.map +1 -0
- package/dist/voice/agent.d.cts +78 -0
- package/dist/voice/agent.d.ts +78 -0
- package/dist/voice/agent.d.ts.map +1 -0
- package/dist/voice/agent.js +220 -0
- package/dist/voice/agent.js.map +1 -0
- package/dist/voice/agent.test.cjs +61 -0
- package/dist/voice/agent.test.cjs.map +1 -0
- package/dist/voice/agent.test.js +60 -0
- package/dist/voice/agent.test.js.map +1 -0
- package/dist/voice/agent_activity.cjs +1453 -0
- package/dist/voice/agent_activity.cjs.map +1 -0
- package/dist/voice/agent_activity.d.cts +94 -0
- package/dist/voice/agent_activity.d.ts +94 -0
- package/dist/voice/agent_activity.d.ts.map +1 -0
- package/dist/voice/agent_activity.js +1449 -0
- package/dist/voice/agent_activity.js.map +1 -0
- package/dist/voice/agent_session.cjs +312 -0
- package/dist/voice/agent_session.cjs.map +1 -0
- package/dist/voice/agent_session.d.cts +121 -0
- package/dist/voice/agent_session.d.ts +121 -0
- package/dist/voice/agent_session.d.ts.map +1 -0
- package/dist/voice/agent_session.js +295 -0
- package/dist/voice/agent_session.js.map +1 -0
- package/dist/voice/audio_recognition.cjs +374 -0
- package/dist/voice/audio_recognition.cjs.map +1 -0
- package/dist/voice/audio_recognition.d.cts +80 -0
- package/dist/voice/audio_recognition.d.ts +80 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -0
- package/dist/voice/audio_recognition.js +350 -0
- package/dist/voice/audio_recognition.js.map +1 -0
- package/dist/voice/events.cjs +145 -0
- package/dist/voice/events.cjs.map +1 -0
- package/dist/voice/events.d.cts +124 -0
- package/dist/voice/events.d.ts +124 -0
- package/dist/voice/events.d.ts.map +1 -0
- package/dist/voice/events.js +110 -0
- package/dist/voice/events.js.map +1 -0
- package/dist/voice/generation.cjs +700 -0
- package/dist/voice/generation.cjs.map +1 -0
- package/dist/voice/generation.d.cts +115 -0
- package/dist/voice/generation.d.ts +115 -0
- package/dist/voice/generation.d.ts.map +1 -0
- package/dist/voice/generation.js +672 -0
- package/dist/voice/generation.js.map +1 -0
- package/dist/voice/index.cjs +40 -0
- package/dist/voice/index.cjs.map +1 -0
- package/dist/voice/index.d.cts +5 -0
- package/dist/voice/index.d.ts +5 -0
- package/dist/voice/index.d.ts.map +1 -0
- package/dist/voice/index.js +11 -0
- package/dist/voice/index.js.map +1 -0
- package/dist/voice/io.cjs +245 -0
- package/dist/voice/io.cjs.map +1 -0
- package/dist/voice/io.d.cts +101 -0
- package/dist/voice/io.d.ts +101 -0
- package/dist/voice/io.d.ts.map +1 -0
- package/dist/voice/io.js +217 -0
- package/dist/voice/io.js.map +1 -0
- package/dist/voice/room_io/_input.cjs +121 -0
- package/dist/voice/room_io/_input.cjs.map +1 -0
- package/dist/voice/room_io/_input.d.cts +24 -0
- package/dist/voice/room_io/_input.d.ts +24 -0
- package/dist/voice/room_io/_input.d.ts.map +1 -0
- package/dist/voice/room_io/_input.js +102 -0
- package/dist/voice/room_io/_input.js.map +1 -0
- package/dist/voice/room_io/_output.cjs +358 -0
- package/dist/voice/room_io/_output.cjs.map +1 -0
- package/dist/voice/room_io/_output.d.cts +75 -0
- package/dist/voice/room_io/_output.d.ts +75 -0
- package/dist/voice/room_io/_output.d.ts.map +1 -0
- package/dist/voice/room_io/_output.js +342 -0
- package/dist/voice/room_io/_output.js.map +1 -0
- package/dist/voice/room_io/index.cjs +25 -0
- package/dist/voice/room_io/index.cjs.map +1 -0
- package/dist/voice/room_io/index.d.cts +3 -0
- package/dist/voice/room_io/index.d.ts +3 -0
- package/dist/voice/room_io/index.d.ts.map +1 -0
- package/dist/voice/room_io/index.js +3 -0
- package/dist/voice/room_io/index.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +370 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -0
- package/dist/voice/room_io/room_io.d.cts +73 -0
- package/dist/voice/room_io/room_io.d.ts +73 -0
- package/dist/voice/room_io/room_io.d.ts.map +1 -0
- package/dist/voice/room_io/room_io.js +361 -0
- package/dist/voice/room_io/room_io.js.map +1 -0
- package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
- package/dist/voice/run_context.cjs.map +1 -0
- package/dist/voice/run_context.d.cts +12 -0
- package/dist/voice/run_context.d.ts +12 -0
- package/dist/voice/run_context.d.ts.map +1 -0
- package/dist/voice/run_context.js +14 -0
- package/dist/voice/run_context.js.map +1 -0
- package/dist/voice/speech_handle.cjs +105 -0
- package/dist/voice/speech_handle.cjs.map +1 -0
- package/dist/voice/speech_handle.d.cts +46 -0
- package/dist/voice/speech_handle.d.ts +46 -0
- package/dist/voice/speech_handle.d.ts.map +1 -0
- package/dist/voice/speech_handle.js +81 -0
- package/dist/voice/speech_handle.js.map +1 -0
- package/dist/voice/transcription/_utils.cjs +45 -0
- package/dist/voice/transcription/_utils.cjs.map +1 -0
- package/dist/voice/transcription/_utils.d.cts +3 -0
- package/dist/voice/transcription/_utils.d.ts +3 -0
- package/dist/voice/transcription/_utils.d.ts.map +1 -0
- package/dist/voice/transcription/_utils.js +21 -0
- package/dist/voice/transcription/_utils.js.map +1 -0
- package/dist/voice/transcription/index.cjs +23 -0
- package/dist/voice/transcription/index.cjs.map +1 -0
- package/dist/voice/transcription/index.d.cts +2 -0
- package/dist/voice/transcription/index.d.ts +2 -0
- package/dist/voice/transcription/index.d.ts.map +1 -0
- package/dist/voice/transcription/index.js +2 -0
- package/dist/voice/transcription/index.js.map +1 -0
- package/dist/voice/transcription/synchronizer.cjs +379 -0
- package/dist/voice/transcription/synchronizer.cjs.map +1 -0
- package/dist/voice/transcription/synchronizer.d.cts +86 -0
- package/dist/voice/transcription/synchronizer.d.ts +86 -0
- package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
- package/dist/voice/transcription/synchronizer.js +354 -0
- package/dist/voice/transcription/synchronizer.js.map +1 -0
- package/dist/worker.cjs +22 -4
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +1 -1
- package/dist/worker.d.ts +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +22 -4
- package/dist/worker.js.map +1 -1
- package/package.json +8 -2
- package/src/_exceptions.ts +137 -0
- package/src/audio.ts +12 -1
- package/src/cli.ts +37 -0
- package/src/constants.ts +2 -1
- package/src/http_server.ts +1 -0
- package/src/index.ts +13 -10
- package/src/inference_runner.ts +2 -3
- package/src/ipc/inference_proc_executor.ts +2 -2
- package/src/ipc/job_executor.ts +1 -1
- package/src/ipc/job_proc_executor.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +1 -1
- package/src/job.ts +18 -0
- package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
- package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
- package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
- package/src/llm/chat_context.test.ts +450 -0
- package/src/llm/chat_context.ts +501 -103
- package/src/llm/index.ts +53 -18
- package/src/llm/llm.ts +148 -50
- package/src/llm/provider_format/google.test.ts +772 -0
- package/src/llm/provider_format/google.ts +130 -0
- package/src/llm/provider_format/index.ts +23 -0
- package/src/llm/provider_format/openai.test.ts +581 -0
- package/src/llm/provider_format/openai.ts +118 -0
- package/src/llm/provider_format/utils.ts +183 -0
- package/src/llm/realtime.ts +151 -0
- package/src/llm/remote_chat_context.test.ts +290 -0
- package/src/llm/remote_chat_context.ts +114 -0
- package/src/llm/tool_context.test.ts +198 -0
- package/src/llm/tool_context.ts +259 -0
- package/src/llm/tool_context.type.test.ts +115 -0
- package/src/llm/utils.test.ts +670 -0
- package/src/llm/utils.ts +324 -0
- package/src/metrics/base.ts +110 -78
- package/src/metrics/index.ts +3 -9
- package/src/metrics/usage_collector.ts +19 -13
- package/src/metrics/utils.ts +24 -69
- package/src/multimodal/index.ts +0 -1
- package/src/plugin.ts +26 -8
- package/src/stream/deferred_stream.test.ts +755 -0
- package/src/stream/deferred_stream.ts +110 -0
- package/src/stream/identity_transform.test.ts +179 -0
- package/src/stream/identity_transform.ts +18 -0
- package/src/stream/index.ts +7 -0
- package/src/stream/merge_readable_streams.ts +40 -0
- package/src/stream/stream_channel.test.ts +129 -0
- package/src/stream/stream_channel.ts +32 -0
- package/src/stt/stream_adapter.ts +3 -5
- package/src/stt/stt.ts +134 -17
- package/src/tokenize/basic/basic.ts +13 -5
- package/src/tokenize/basic/sentence.ts +20 -6
- package/src/tokenize/token_stream.ts +7 -4
- package/src/transcription.ts +2 -3
- package/src/tts/index.ts +0 -1
- package/src/tts/stream_adapter.ts +42 -16
- package/src/tts/tts.ts +202 -21
- package/src/types.ts +42 -0
- package/src/utils.test.ts +658 -0
- package/src/utils.ts +402 -44
- package/src/vad.ts +90 -22
- package/src/voice/agent.test.ts +80 -0
- package/src/voice/agent.ts +332 -0
- package/src/voice/agent_activity.ts +1913 -0
- package/src/voice/agent_session.ts +460 -0
- package/src/voice/audio_recognition.ts +473 -0
- package/src/voice/events.ts +252 -0
- package/src/voice/generation.ts +881 -0
- package/src/voice/index.ts +7 -0
- package/src/voice/io.ts +304 -0
- package/src/voice/room_io/_input.ts +144 -0
- package/src/voice/room_io/_output.ts +436 -0
- package/src/voice/room_io/index.ts +5 -0
- package/src/voice/room_io/room_io.ts +495 -0
- package/src/voice/run_context.ts +20 -0
- package/src/voice/speech_handle.ts +104 -0
- package/src/voice/transcription/_utils.ts +25 -0
- package/src/voice/transcription/index.ts +4 -0
- package/src/voice/transcription/synchronizer.ts +477 -0
- package/src/worker.ts +22 -2
- package/dist/llm/function_context.cjs +0 -103
- package/dist/llm/function_context.cjs.map +0 -1
- package/dist/llm/function_context.d.cts +0 -47
- package/dist/llm/function_context.d.ts +0 -47
- package/dist/llm/function_context.d.ts.map +0 -1
- package/dist/llm/function_context.js +0 -78
- package/dist/llm/function_context.js.map +0 -1
- package/dist/llm/function_context.test.cjs +0 -218
- package/dist/llm/function_context.test.cjs.map +0 -1
- package/dist/llm/function_context.test.js +0 -217
- package/dist/llm/function_context.test.js.map +0 -1
- package/dist/multimodal/multimodal_agent.cjs +0 -486
- package/dist/multimodal/multimodal_agent.cjs.map +0 -1
- package/dist/multimodal/multimodal_agent.d.cts +0 -48
- package/dist/multimodal/multimodal_agent.d.ts +0 -48
- package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
- package/dist/multimodal/multimodal_agent.js +0 -461
- package/dist/multimodal/multimodal_agent.js.map +0 -1
- package/dist/pipeline/agent_output.cjs +0 -197
- package/dist/pipeline/agent_output.cjs.map +0 -1
- package/dist/pipeline/agent_output.d.cts +0 -33
- package/dist/pipeline/agent_output.d.ts +0 -33
- package/dist/pipeline/agent_output.d.ts.map +0 -1
- package/dist/pipeline/agent_output.js +0 -172
- package/dist/pipeline/agent_output.js.map +0 -1
- package/dist/pipeline/agent_playout.cjs +0 -175
- package/dist/pipeline/agent_playout.cjs.map +0 -1
- package/dist/pipeline/agent_playout.d.cts +0 -40
- package/dist/pipeline/agent_playout.d.ts +0 -40
- package/dist/pipeline/agent_playout.d.ts.map +0 -1
- package/dist/pipeline/agent_playout.js +0 -139
- package/dist/pipeline/agent_playout.js.map +0 -1
- package/dist/pipeline/human_input.cjs +0 -171
- package/dist/pipeline/human_input.cjs.map +0 -1
- package/dist/pipeline/human_input.d.cts +0 -30
- package/dist/pipeline/human_input.d.ts +0 -30
- package/dist/pipeline/human_input.d.ts.map +0 -1
- package/dist/pipeline/human_input.js +0 -146
- package/dist/pipeline/human_input.js.map +0 -1
- package/dist/pipeline/index.cjs.map +0 -1
- package/dist/pipeline/index.d.cts +0 -2
- package/dist/pipeline/index.d.ts +0 -2
- package/dist/pipeline/index.d.ts.map +0 -1
- package/dist/pipeline/index.js +0 -11
- package/dist/pipeline/index.js.map +0 -1
- package/dist/pipeline/pipeline_agent.cjs +0 -859
- package/dist/pipeline/pipeline_agent.cjs.map +0 -1
- package/dist/pipeline/pipeline_agent.d.cts +0 -150
- package/dist/pipeline/pipeline_agent.d.ts +0 -150
- package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
- package/dist/pipeline/pipeline_agent.js +0 -837
- package/dist/pipeline/pipeline_agent.js.map +0 -1
- package/dist/pipeline/speech_handle.cjs +0 -176
- package/dist/pipeline/speech_handle.cjs.map +0 -1
- package/dist/pipeline/speech_handle.d.cts +0 -37
- package/dist/pipeline/speech_handle.d.ts +0 -37
- package/dist/pipeline/speech_handle.d.ts.map +0 -1
- package/dist/pipeline/speech_handle.js +0 -152
- package/dist/pipeline/speech_handle.js.map +0 -1
- package/src/llm/function_context.test.ts +0 -248
- package/src/llm/function_context.ts +0 -142
- package/src/multimodal/multimodal_agent.ts +0 -592
- package/src/pipeline/agent_output.ts +0 -219
- package/src/pipeline/agent_playout.ts +0 -192
- package/src/pipeline/human_input.ts +0 -188
- package/src/pipeline/index.ts +0 -15
- package/src/pipeline/pipeline_agent.ts +0 -1197
- package/src/pipeline/speech_handle.ts +0 -201
package/src/stt/stt.ts
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import type
|
|
4
|
+
import { type AudioFrame, AudioResampler } from '@livekit/rtc-node';
|
|
5
5
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
6
|
import { EventEmitter } from 'node:events';
|
|
7
|
+
import type { ReadableStream } from 'node:stream/web';
|
|
8
|
+
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
9
|
+
import { calculateAudioDuration } from '../audio.js';
|
|
10
|
+
import { log } from '../log.js';
|
|
7
11
|
import type { STTMetrics } from '../metrics/base.js';
|
|
12
|
+
import { DeferredReadableStream } from '../stream/deferred_stream.js';
|
|
13
|
+
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
8
14
|
import type { AudioBuffer } from '../utils.js';
|
|
9
|
-
import { AsyncIterableQueue } from '../utils.js';
|
|
15
|
+
import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';
|
|
10
16
|
|
|
11
17
|
/** Indicates start/middle/end of speech */
|
|
12
18
|
export enum SpeechEventType {
|
|
@@ -32,7 +38,6 @@ export enum SpeechEventType {
|
|
|
32
38
|
END_OF_SPEECH = 3,
|
|
33
39
|
/** Usage event, emitted periodically to indicate usage metrics. */
|
|
34
40
|
RECOGNITION_USAGE = 4,
|
|
35
|
-
METRICS_COLLECTED = 5,
|
|
36
41
|
}
|
|
37
42
|
|
|
38
43
|
/** SpeechData contains metadata about this {@link SpeechEvent}. */
|
|
@@ -67,8 +72,17 @@ export interface STTCapabilities {
|
|
|
67
72
|
interimResults: boolean;
|
|
68
73
|
}
|
|
69
74
|
|
|
75
|
+
export interface STTError {
|
|
76
|
+
type: 'stt_error';
|
|
77
|
+
timestamp: number;
|
|
78
|
+
label: string;
|
|
79
|
+
error: Error;
|
|
80
|
+
recoverable: boolean;
|
|
81
|
+
}
|
|
82
|
+
|
|
70
83
|
export type STTCallbacks = {
|
|
71
|
-
[
|
|
84
|
+
['metrics_collected']: (metrics: STTMetrics) => void;
|
|
85
|
+
['error']: (error: STTError) => void;
|
|
72
86
|
};
|
|
73
87
|
|
|
74
88
|
/**
|
|
@@ -97,19 +111,17 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
|
|
|
97
111
|
const startTime = process.hrtime.bigint();
|
|
98
112
|
const event = await this._recognize(frame);
|
|
99
113
|
const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
|
|
100
|
-
this.emit(
|
|
114
|
+
this.emit('metrics_collected', {
|
|
115
|
+
type: 'stt_metrics',
|
|
101
116
|
requestId: event.requestId ?? '',
|
|
102
117
|
timestamp: Date.now(),
|
|
103
118
|
duration,
|
|
104
119
|
label: this.label,
|
|
105
|
-
audioDuration:
|
|
106
|
-
? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)
|
|
107
|
-
: frame.samplesPerChannel / frame.sampleRate,
|
|
120
|
+
audioDuration: calculateAudioDuration(frame),
|
|
108
121
|
streamed: false,
|
|
109
122
|
});
|
|
110
123
|
return event;
|
|
111
124
|
}
|
|
112
|
-
|
|
113
125
|
protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;
|
|
114
126
|
|
|
115
127
|
/**
|
|
@@ -140,35 +152,126 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
140
152
|
protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
|
|
141
153
|
protected output = new AsyncIterableQueue<SpeechEvent>();
|
|
142
154
|
protected queue = new AsyncIterableQueue<SpeechEvent>();
|
|
155
|
+
protected neededSampleRate?: number;
|
|
156
|
+
protected resampler?: AudioResampler;
|
|
143
157
|
abstract label: string;
|
|
144
158
|
protected closed = false;
|
|
145
159
|
#stt: STT;
|
|
160
|
+
private deferredInputStream: DeferredReadableStream<AudioFrame>;
|
|
161
|
+
private logger = log();
|
|
162
|
+
private _connOptions: APIConnectOptions;
|
|
146
163
|
|
|
147
|
-
constructor(
|
|
164
|
+
constructor(
|
|
165
|
+
stt: STT,
|
|
166
|
+
sampleRate?: number,
|
|
167
|
+
connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
168
|
+
) {
|
|
148
169
|
this.#stt = stt;
|
|
170
|
+
this._connOptions = connectionOptions;
|
|
171
|
+
this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
|
|
172
|
+
this.neededSampleRate = sampleRate;
|
|
149
173
|
this.monitorMetrics();
|
|
174
|
+
this.pumpInput();
|
|
175
|
+
|
|
176
|
+
// this is a hack to immitate asyncio.create_task so that mainTask
|
|
177
|
+
// is run **after** the constructor has finished. Otherwise we get
|
|
178
|
+
// runtime error when trying to access class variables in the
|
|
179
|
+
// `run` method.
|
|
180
|
+
startSoon(() => this.mainTask().then(() => this.queue.close()));
|
|
150
181
|
}
|
|
151
182
|
|
|
152
|
-
|
|
153
|
-
|
|
183
|
+
private async mainTask() {
|
|
184
|
+
for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
|
|
185
|
+
try {
|
|
186
|
+
return await this.run();
|
|
187
|
+
} catch (error) {
|
|
188
|
+
if (error instanceof APIError) {
|
|
189
|
+
const retryInterval = this._connOptions._intervalForRetry(i);
|
|
190
|
+
|
|
191
|
+
if (this._connOptions.maxRetry === 0 || !error.retryable) {
|
|
192
|
+
this.emitError({ error, recoverable: false });
|
|
193
|
+
throw error;
|
|
194
|
+
} else if (i === this._connOptions.maxRetry) {
|
|
195
|
+
this.emitError({ error, recoverable: false });
|
|
196
|
+
throw new APIConnectionError({
|
|
197
|
+
message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,
|
|
198
|
+
options: { retryable: false },
|
|
199
|
+
});
|
|
200
|
+
} else {
|
|
201
|
+
this.emitError({ error, recoverable: true });
|
|
202
|
+
this.logger.warn(
|
|
203
|
+
{ tts: this.#stt.label, attempt: i + 1, error },
|
|
204
|
+
`failed to recognize speech, retrying in ${retryInterval}s`,
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (retryInterval > 0) {
|
|
209
|
+
await delay(retryInterval);
|
|
210
|
+
}
|
|
211
|
+
} else {
|
|
212
|
+
this.emitError({ error: toError(error), recoverable: false });
|
|
213
|
+
throw error;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {
|
|
220
|
+
this.#stt.emit('error', {
|
|
221
|
+
type: 'stt_error',
|
|
222
|
+
timestamp: Date.now(),
|
|
223
|
+
label: this.#stt.label,
|
|
224
|
+
error,
|
|
225
|
+
recoverable,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
154
228
|
|
|
229
|
+
protected async pumpInput() {
|
|
230
|
+
// TODO(AJS-35): Implement STT with webstreams API
|
|
231
|
+
const inputStream = this.deferredInputStream.stream;
|
|
232
|
+
const reader = inputStream.getReader();
|
|
233
|
+
|
|
234
|
+
try {
|
|
235
|
+
while (true) {
|
|
236
|
+
const { done, value } = await reader.read();
|
|
237
|
+
if (done) break;
|
|
238
|
+
this.pushFrame(value);
|
|
239
|
+
}
|
|
240
|
+
} catch (error) {
|
|
241
|
+
this.logger.error('Error in STTStream mainTask:', error);
|
|
242
|
+
} finally {
|
|
243
|
+
reader.releaseLock();
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
protected async monitorMetrics() {
|
|
155
248
|
for await (const event of this.queue) {
|
|
156
249
|
this.output.put(event);
|
|
157
250
|
if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
|
|
158
|
-
const duration = process.hrtime.bigint() - startTime;
|
|
159
251
|
const metrics: STTMetrics = {
|
|
252
|
+
type: 'stt_metrics',
|
|
160
253
|
timestamp: Date.now(),
|
|
161
254
|
requestId: event.requestId!,
|
|
162
|
-
duration:
|
|
163
|
-
label: this.label,
|
|
255
|
+
duration: 0,
|
|
256
|
+
label: this.#stt.label,
|
|
164
257
|
audioDuration: event.recognitionUsage!.audioDuration,
|
|
165
258
|
streamed: true,
|
|
166
259
|
};
|
|
167
|
-
this.#stt.emit(
|
|
260
|
+
this.#stt.emit('metrics_collected', metrics);
|
|
168
261
|
}
|
|
169
262
|
this.output.close();
|
|
170
263
|
}
|
|
171
264
|
|
|
265
|
+
protected abstract run(): Promise<void>;
|
|
266
|
+
|
|
267
|
+
updateInputStream(audioStream: ReadableStream<AudioFrame>) {
|
|
268
|
+
this.deferredInputStream.setSource(audioStream);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
detachInputStream() {
|
|
272
|
+
this.deferredInputStream.detachSource();
|
|
273
|
+
}
|
|
274
|
+
|
|
172
275
|
/** Push an audio frame to the STT */
|
|
173
276
|
pushFrame(frame: AudioFrame) {
|
|
174
277
|
if (this.input.closed) {
|
|
@@ -177,7 +280,21 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
177
280
|
if (this.closed) {
|
|
178
281
|
throw new Error('Stream is closed');
|
|
179
282
|
}
|
|
180
|
-
|
|
283
|
+
|
|
284
|
+
if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {
|
|
285
|
+
if (!this.resampler) {
|
|
286
|
+
this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (this.resampler) {
|
|
291
|
+
const frames = this.resampler.push(frame);
|
|
292
|
+
for (const frame of frames) {
|
|
293
|
+
this.input.put(frame);
|
|
294
|
+
}
|
|
295
|
+
} else {
|
|
296
|
+
this.input.put(frame);
|
|
297
|
+
}
|
|
181
298
|
}
|
|
182
299
|
|
|
183
300
|
/** Flush the STT, causing it to process all pending text */
|
|
@@ -12,17 +12,24 @@ interface TokenizerOptions {
|
|
|
12
12
|
language: string;
|
|
13
13
|
minSentenceLength: number;
|
|
14
14
|
streamContextLength: number;
|
|
15
|
+
retainFormat: boolean;
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
const defaultTokenizerOptions: TokenizerOptions = {
|
|
19
|
+
language: 'en-US',
|
|
20
|
+
minSentenceLength: 20,
|
|
21
|
+
streamContextLength: 10,
|
|
22
|
+
retainFormat: false,
|
|
23
|
+
};
|
|
24
|
+
|
|
17
25
|
export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
|
|
18
26
|
#config: TokenizerOptions;
|
|
19
27
|
|
|
20
|
-
constructor(
|
|
28
|
+
constructor(options?: Partial<TokenizerOptions>) {
|
|
21
29
|
super();
|
|
22
30
|
this.#config = {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
streamContextLength,
|
|
31
|
+
...defaultTokenizerOptions,
|
|
32
|
+
...options,
|
|
26
33
|
};
|
|
27
34
|
}
|
|
28
35
|
|
|
@@ -34,7 +41,8 @@ export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
|
|
|
34
41
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
35
42
|
stream(language?: string): tokenizer.SentenceStream {
|
|
36
43
|
return new BufferedSentenceStream(
|
|
37
|
-
(text: string) =>
|
|
44
|
+
(text: string) =>
|
|
45
|
+
splitSentences(text, this.#config.minSentenceLength, this.#config.retainFormat),
|
|
38
46
|
this.#config.minSentenceLength,
|
|
39
47
|
this.#config.streamContextLength,
|
|
40
48
|
);
|
|
@@ -5,7 +5,11 @@
|
|
|
5
5
|
/**
|
|
6
6
|
* Split the text into sentences.
|
|
7
7
|
*/
|
|
8
|
-
export const splitSentences = (
|
|
8
|
+
export const splitSentences = (
|
|
9
|
+
text: string,
|
|
10
|
+
minLength = 20,
|
|
11
|
+
retainFormat: boolean = false,
|
|
12
|
+
): [string, number, number][] => {
|
|
9
13
|
const alphabets = /([A-Za-z])/g;
|
|
10
14
|
const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;
|
|
11
15
|
const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
|
|
@@ -16,7 +20,12 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
|
|
|
16
20
|
const digits = /([0-9])/g;
|
|
17
21
|
const dots = /\.{2,}/g;
|
|
18
22
|
|
|
19
|
-
|
|
23
|
+
if (retainFormat) {
|
|
24
|
+
text = text.replaceAll('\n', '<nel><stop>');
|
|
25
|
+
} else {
|
|
26
|
+
text = text.replaceAll('\n', ' ');
|
|
27
|
+
}
|
|
28
|
+
|
|
20
29
|
text = text.replaceAll(prefixes, '$1<prd>');
|
|
21
30
|
text = text.replaceAll(websites, '<prd>$2');
|
|
22
31
|
text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');
|
|
@@ -47,6 +56,10 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
|
|
|
47
56
|
text = text.replaceAll('!', '!<stop>');
|
|
48
57
|
text = text.replaceAll('<prd>', '.');
|
|
49
58
|
|
|
59
|
+
if (retainFormat) {
|
|
60
|
+
text = text.replaceAll('<nel>', '\n');
|
|
61
|
+
}
|
|
62
|
+
|
|
50
63
|
const split = text.split('<stop>');
|
|
51
64
|
text = text.replaceAll('<stop>', '');
|
|
52
65
|
|
|
@@ -54,21 +67,22 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
|
|
|
54
67
|
let buf = '';
|
|
55
68
|
let start = 0;
|
|
56
69
|
let end = 0;
|
|
70
|
+
const prePad = retainFormat ? '' : ' ';
|
|
57
71
|
for (const match of split) {
|
|
58
|
-
const sentence = match.trim();
|
|
72
|
+
const sentence = retainFormat ? match : match.trim();
|
|
59
73
|
if (!sentence) continue;
|
|
60
74
|
|
|
61
|
-
buf +=
|
|
75
|
+
buf += prePad + sentence;
|
|
62
76
|
end += match.length;
|
|
63
77
|
if (buf.length > minLength) {
|
|
64
|
-
sentences.push([buf.slice(
|
|
78
|
+
sentences.push([buf.slice(prePad.length), start, end]);
|
|
65
79
|
start = end;
|
|
66
80
|
buf = '';
|
|
67
81
|
}
|
|
68
82
|
}
|
|
69
83
|
|
|
70
84
|
if (buf) {
|
|
71
|
-
sentences.push([buf.slice(
|
|
85
|
+
sentences.push([buf.slice(prePad.length), start, text.length - 1]);
|
|
72
86
|
}
|
|
73
87
|
|
|
74
88
|
return sentences;
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
5
|
-
import { AsyncIterableQueue } from '../utils.js';
|
|
4
|
+
import { AsyncIterableQueue, shortuuid } from '../utils.js';
|
|
6
5
|
import type { TokenData } from './tokenizer.js';
|
|
7
6
|
import { SentenceStream, WordStream } from './tokenizer.js';
|
|
8
7
|
|
|
@@ -25,7 +24,7 @@ export class BufferedTokenStream implements AsyncIterableIterator<TokenData> {
|
|
|
25
24
|
this.#minTokenLength = minTokenLength;
|
|
26
25
|
this.#minContextLength = minContextLength;
|
|
27
26
|
|
|
28
|
-
this.#currentSegmentId =
|
|
27
|
+
this.#currentSegmentId = shortuuid();
|
|
29
28
|
}
|
|
30
29
|
|
|
31
30
|
/** Push a string of text into the token stream */
|
|
@@ -90,7 +89,7 @@ export class BufferedTokenStream implements AsyncIterableIterator<TokenData> {
|
|
|
90
89
|
this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
|
|
91
90
|
}
|
|
92
91
|
|
|
93
|
-
this.#currentSegmentId =
|
|
92
|
+
this.#currentSegmentId = shortuuid();
|
|
94
93
|
}
|
|
95
94
|
|
|
96
95
|
this.#inBuf = '';
|
|
@@ -142,6 +141,10 @@ export class BufferedSentenceStream extends SentenceStream {
|
|
|
142
141
|
this.#stream.close();
|
|
143
142
|
}
|
|
144
143
|
|
|
144
|
+
endInput() {
|
|
145
|
+
this.#stream.endInput();
|
|
146
|
+
}
|
|
147
|
+
|
|
145
148
|
next(): Promise<IteratorResult<TokenData>> {
|
|
146
149
|
return this.#stream.next();
|
|
147
150
|
}
|
package/src/transcription.ts
CHANGED
|
@@ -4,11 +4,10 @@
|
|
|
4
4
|
import { TranscriptionSegment } from '@livekit/protocol';
|
|
5
5
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
6
6
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
7
|
-
import { randomUUID } from 'node:crypto';
|
|
8
7
|
import { EventEmitter } from 'node:events';
|
|
9
8
|
import { basic } from './tokenize/index.js';
|
|
10
9
|
import type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';
|
|
11
|
-
import { AsyncIterableQueue, Future } from './utils.js';
|
|
10
|
+
import { AsyncIterableQueue, Future, shortuuid } from './utils.js';
|
|
12
11
|
|
|
13
12
|
// standard speech rate in hyphens/ms
|
|
14
13
|
const STANDARD_SPEECH_RATE = 3830;
|
|
@@ -215,7 +214,7 @@ export class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitt
|
|
|
215
214
|
realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;
|
|
216
215
|
}
|
|
217
216
|
|
|
218
|
-
const segId = 'SG_'
|
|
217
|
+
const segId = shortuuid('SG_');
|
|
219
218
|
const words = this.#opts.splitWords(sentence);
|
|
220
219
|
const processedWords: string[] = [];
|
|
221
220
|
|
package/src/tts/index.ts
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';
|
|
5
|
+
import { Task } from '../utils.js';
|
|
5
6
|
import type { ChunkedStream } from './tts.js';
|
|
6
|
-
import { SynthesizeStream, TTS
|
|
7
|
+
import { SynthesizeStream, TTS } from './tts.js';
|
|
7
8
|
|
|
8
9
|
export class StreamAdapter extends TTS {
|
|
9
10
|
#tts: TTS;
|
|
@@ -17,8 +18,8 @@ export class StreamAdapter extends TTS {
|
|
|
17
18
|
this.label = this.#tts.label;
|
|
18
19
|
this.label = `tts.StreamAdapter<${this.#tts.label}>`;
|
|
19
20
|
|
|
20
|
-
this.#tts.on(
|
|
21
|
-
this.emit(
|
|
21
|
+
this.#tts.on('metrics_collected', (metrics) => {
|
|
22
|
+
this.emit('metrics_collected', metrics);
|
|
22
23
|
});
|
|
23
24
|
}
|
|
24
25
|
|
|
@@ -41,17 +42,13 @@ export class StreamAdapterWrapper extends SynthesizeStream {
|
|
|
41
42
|
this.#tts = tts;
|
|
42
43
|
this.#sentenceStream = sentenceTokenizer.stream();
|
|
43
44
|
this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;
|
|
44
|
-
|
|
45
|
-
this.#run();
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
async monitorMetrics() {
|
|
49
|
-
return; // do nothing
|
|
50
45
|
}
|
|
51
46
|
|
|
52
|
-
async
|
|
47
|
+
protected async run() {
|
|
53
48
|
const forwardInput = async () => {
|
|
54
49
|
for await (const input of this.input) {
|
|
50
|
+
if (this.abortController.signal.aborted) break;
|
|
51
|
+
|
|
55
52
|
if (input === SynthesizeStream.FLUSH_SENTINEL) {
|
|
56
53
|
this.#sentenceStream.flush();
|
|
57
54
|
} else {
|
|
@@ -62,15 +59,44 @@ export class StreamAdapterWrapper extends SynthesizeStream {
|
|
|
62
59
|
this.#sentenceStream.close();
|
|
63
60
|
};
|
|
64
61
|
|
|
65
|
-
const
|
|
62
|
+
const synthesizeSentenceStream = async () => {
|
|
63
|
+
let task: Task<void> | undefined;
|
|
64
|
+
const tokenCompletionTasks: Task<void>[] = [];
|
|
65
|
+
|
|
66
66
|
for await (const ev of this.#sentenceStream) {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
if (this.abortController.signal.aborted) break;
|
|
68
|
+
|
|
69
|
+
// this will enable non-blocking synthesis of the stream of tokens
|
|
70
|
+
task = Task.from(
|
|
71
|
+
(controller) => synthesize(ev.token, task, controller),
|
|
72
|
+
this.abortController,
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
tokenCompletionTasks.push(task);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
await Promise.all(tokenCompletionTasks.map((t) => t.result));
|
|
79
|
+
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const synthesize = async (
|
|
83
|
+
token: string,
|
|
84
|
+
prevTask: Task<void> | undefined,
|
|
85
|
+
controller: AbortController,
|
|
86
|
+
) => {
|
|
87
|
+
const audioStream = this.#tts.synthesize(token);
|
|
88
|
+
|
|
89
|
+
// wait for previous audio transcription to complete before starting
|
|
90
|
+
// to queuing audio frames of the current token
|
|
91
|
+
await prevTask?.result;
|
|
92
|
+
if (controller.signal.aborted) return;
|
|
93
|
+
|
|
94
|
+
for await (const audio of audioStream) {
|
|
95
|
+
if (controller.signal.aborted) break;
|
|
96
|
+
this.queue.put(audio);
|
|
70
97
|
}
|
|
71
|
-
this.output.put(SynthesizeStream.END_OF_STREAM);
|
|
72
98
|
};
|
|
73
99
|
|
|
74
|
-
Promise.all([forwardInput(),
|
|
100
|
+
await Promise.all([forwardInput(), synthesizeSentenceStream()]);
|
|
75
101
|
}
|
|
76
102
|
}
|