@livekit/agents 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_exceptions.cjs.map +1 -1
- package/dist/_exceptions.d.ts.map +1 -1
- package/dist/_exceptions.js.map +1 -1
- package/dist/audio.cjs +89 -3
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.d.cts +36 -1
- package/dist/audio.d.ts +36 -1
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +76 -2
- package/dist/audio.js.map +1 -1
- package/dist/beta/index.cjs +29 -0
- package/dist/beta/index.cjs.map +1 -0
- package/dist/beta/index.d.cts +2 -0
- package/dist/beta/index.d.ts +2 -0
- package/dist/beta/index.d.ts.map +1 -0
- package/dist/beta/index.js +7 -0
- package/dist/beta/index.js.map +1 -0
- package/dist/beta/workflows/index.cjs +29 -0
- package/dist/beta/workflows/index.cjs.map +1 -0
- package/dist/beta/workflows/index.d.cts +2 -0
- package/dist/beta/workflows/index.d.ts +2 -0
- package/dist/beta/workflows/index.d.ts.map +1 -0
- package/dist/beta/workflows/index.js +7 -0
- package/dist/beta/workflows/index.js.map +1 -0
- package/dist/beta/workflows/task_group.cjs +165 -0
- package/dist/beta/workflows/task_group.cjs.map +1 -0
- package/dist/beta/workflows/task_group.d.cts +32 -0
- package/dist/beta/workflows/task_group.d.ts +32 -0
- package/dist/beta/workflows/task_group.d.ts.map +1 -0
- package/dist/beta/workflows/task_group.js +141 -0
- package/dist/beta/workflows/task_group.js.map +1 -0
- package/dist/cli.cjs +44 -46
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +3 -3
- package/dist/cli.d.ts +3 -3
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +45 -47
- package/dist/cli.js.map +1 -1
- package/dist/connection_pool.cjs +242 -0
- package/dist/connection_pool.cjs.map +1 -0
- package/dist/connection_pool.d.cts +123 -0
- package/dist/connection_pool.d.ts +123 -0
- package/dist/connection_pool.d.ts.map +1 -0
- package/dist/connection_pool.js +218 -0
- package/dist/connection_pool.js.map +1 -0
- package/dist/connection_pool.test.cjs +256 -0
- package/dist/connection_pool.test.cjs.map +1 -0
- package/dist/connection_pool.test.js +255 -0
- package/dist/connection_pool.test.js.map +1 -0
- package/dist/constants.cjs +30 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +10 -0
- package/dist/constants.d.ts +10 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +20 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/http_server.cjs +9 -6
- package/dist/http_server.cjs.map +1 -1
- package/dist/http_server.d.cts +5 -1
- package/dist/http_server.d.ts +5 -1
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js +9 -6
- package/dist/http_server.js.map +1 -1
- package/dist/index.cjs +24 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +15 -11
- package/dist/index.d.ts +15 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -9
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.cjs +70 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +373 -32
- package/dist/inference/api_protos.d.ts +373 -32
- package/dist/inference/api_protos.d.ts.map +1 -1
- package/dist/inference/api_protos.js +62 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/inference/index.cjs +8 -0
- package/dist/inference/index.cjs.map +1 -1
- package/dist/inference/index.d.cts +3 -4
- package/dist/inference/index.d.ts +3 -4
- package/dist/inference/index.d.ts.map +1 -1
- package/dist/inference/index.js +18 -3
- package/dist/inference/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +81 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +19 -0
- package/dist/inference/interruption/defaults.d.ts +19 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +46 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +44 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +12 -0
- package/dist/inference/interruption/errors.d.ts +12 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +20 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +163 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +65 -0
- package/dist/inference/interruption/http_transport.d.ts +65 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +137 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
- package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
- package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_cache_entry.js +34 -0
- package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
- package/dist/inference/interruption/interruption_detector.cjs +198 -0
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
- package/dist/inference/interruption/interruption_detector.d.cts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts +59 -0
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_detector.js +164 -0
- package/dist/inference/interruption/interruption_detector.js.map +1 -0
- package/dist/inference/interruption/interruption_stream.cjs +368 -0
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
- package/dist/inference/interruption/interruption_stream.d.cts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts +46 -0
- package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
- package/dist/inference/interruption/interruption_stream.js +344 -0
- package/dist/inference/interruption/interruption_stream.js.map +1 -0
- package/dist/inference/interruption/types.cjs +17 -0
- package/dist/inference/interruption/types.cjs.map +1 -0
- package/dist/inference/interruption/types.d.cts +66 -0
- package/dist/inference/interruption/types.d.ts +66 -0
- package/dist/inference/interruption/types.d.ts.map +1 -0
- package/dist/inference/interruption/types.js +1 -0
- package/dist/inference/interruption/types.js.map +1 -0
- package/dist/inference/interruption/utils.cjs +130 -0
- package/dist/inference/interruption/utils.cjs.map +1 -0
- package/dist/inference/interruption/utils.d.cts +41 -0
- package/dist/inference/interruption/utils.d.ts +41 -0
- package/dist/inference/interruption/utils.d.ts.map +1 -0
- package/dist/inference/interruption/utils.js +105 -0
- package/dist/inference/interruption/utils.js.map +1 -0
- package/dist/inference/interruption/utils.test.cjs +105 -0
- package/dist/inference/interruption/utils.test.cjs.map +1 -0
- package/dist/inference/interruption/utils.test.js +104 -0
- package/dist/inference/interruption/utils.test.js.map +1 -0
- package/dist/inference/interruption/ws_transport.cjs +347 -0
- package/dist/inference/interruption/ws_transport.cjs.map +1 -0
- package/dist/inference/interruption/ws_transport.d.cts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts +33 -0
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
- package/dist/inference/interruption/ws_transport.js +313 -0
- package/dist/inference/interruption/ws_transport.js.map +1 -0
- package/dist/inference/llm.cjs +106 -66
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +65 -43
- package/dist/inference/llm.d.ts +65 -43
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +100 -66
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +319 -170
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +64 -15
- package/dist/inference/stt.d.ts +64 -15
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +319 -170
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +218 -0
- package/dist/inference/stt.test.cjs.map +1 -0
- package/dist/inference/stt.test.js +217 -0
- package/dist/inference/stt.test.js.map +1 -0
- package/dist/inference/tts.cjs +249 -71
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +94 -17
- package/dist/inference/tts.d.ts +94 -17
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +249 -77
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +305 -0
- package/dist/inference/tts.test.cjs.map +1 -0
- package/dist/inference/tts.test.js +304 -0
- package/dist/inference/tts.test.js.map +1 -0
- package/dist/inference/utils.cjs +26 -7
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +14 -1
- package/dist/inference/utils.d.ts +14 -1
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +18 -2
- package/dist/inference/utils.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs +6 -3
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/inference_proc_executor.d.ts.map +1 -1
- package/dist/ipc/inference_proc_executor.js +6 -3
- package/dist/ipc/inference_proc_executor.js.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.cjs +13 -1
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +13 -1
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_executor.cjs +6 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.d.ts.map +1 -1
- package/dist/ipc/job_proc_executor.js +6 -1
- package/dist/ipc/job_proc_executor.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +89 -17
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +68 -18
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +34 -8
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +8 -0
- package/dist/ipc/supervised_proc.d.ts +8 -0
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +34 -8
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/ipc/supervised_proc.test.cjs +145 -0
- package/dist/ipc/supervised_proc.test.cjs.map +1 -0
- package/dist/ipc/supervised_proc.test.js +122 -0
- package/dist/ipc/supervised_proc.test.js.map +1 -0
- package/dist/job.cjs +109 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +14 -0
- package/dist/job.d.ts +14 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +99 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/chat_context.cjs +345 -3
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +86 -2
- package/dist/llm/chat_context.d.ts +86 -2
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +344 -3
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +692 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +692 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/fallback_adapter.cjs +280 -0
- package/dist/llm/fallback_adapter.cjs.map +1 -0
- package/dist/llm/fallback_adapter.d.cts +73 -0
- package/dist/llm/fallback_adapter.d.ts +73 -0
- package/dist/llm/fallback_adapter.d.ts.map +1 -0
- package/dist/llm/fallback_adapter.js +256 -0
- package/dist/llm/fallback_adapter.js.map +1 -0
- package/dist/llm/fallback_adapter.test.cjs +176 -0
- package/dist/llm/fallback_adapter.test.cjs.map +1 -0
- package/dist/llm/fallback_adapter.test.js +175 -0
- package/dist/llm/fallback_adapter.test.js.map +1 -0
- package/dist/llm/index.cjs +11 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +4 -3
- package/dist/llm/index.d.ts +4 -3
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +13 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +65 -11
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +13 -2
- package/dist/llm/llm.d.ts +13 -2
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +65 -11
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs +6 -2
- package/dist/llm/provider_format/google.cjs.map +1 -1
- package/dist/llm/provider_format/google.d.cts +1 -1
- package/dist/llm/provider_format/google.d.ts +1 -1
- package/dist/llm/provider_format/google.d.ts.map +1 -1
- package/dist/llm/provider_format/google.js +6 -2
- package/dist/llm/provider_format/google.js.map +1 -1
- package/dist/llm/provider_format/google.test.cjs +48 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -1
- package/dist/llm/provider_format/google.test.js +54 -1
- package/dist/llm/provider_format/google.test.js.map +1 -1
- package/dist/llm/provider_format/index.cjs +2 -0
- package/dist/llm/provider_format/index.cjs.map +1 -1
- package/dist/llm/provider_format/index.d.cts +2 -2
- package/dist/llm/provider_format/index.d.ts +2 -2
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/index.js +6 -1
- package/dist/llm/provider_format/index.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +126 -24
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.cts +1 -0
- package/dist/llm/provider_format/openai.d.ts +1 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +124 -23
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +393 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +400 -2
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +5 -4
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +5 -4
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/llm/realtime.cjs +3 -0
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +15 -1
- package/dist/llm/realtime.d.ts +15 -1
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js +3 -0
- package/dist/llm/realtime.js.map +1 -1
- package/dist/llm/remote_chat_context.cjs.map +1 -1
- package/dist/llm/remote_chat_context.d.cts +2 -0
- package/dist/llm/remote_chat_context.d.ts +2 -0
- package/dist/llm/remote_chat_context.d.ts.map +1 -1
- package/dist/llm/remote_chat_context.js.map +1 -1
- package/dist/llm/tool_context.cjs +50 -2
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +47 -11
- package/dist/llm/tool_context.d.ts +47 -11
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +48 -3
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/llm/tool_context.test.cjs +197 -0
- package/dist/llm/tool_context.test.cjs.map +1 -1
- package/dist/llm/tool_context.test.js +175 -0
- package/dist/llm/tool_context.test.js.map +1 -1
- package/dist/llm/utils.cjs +107 -12
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +10 -3
- package/dist/llm/utils.d.ts +10 -3
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +106 -12
- package/dist/llm/utils.js.map +1 -1
- package/dist/llm/utils.test.cjs +90 -0
- package/dist/llm/utils.test.cjs.map +1 -1
- package/dist/llm/utils.test.js +98 -2
- package/dist/llm/utils.test.js.map +1 -1
- package/dist/llm/zod-utils.cjs +102 -0
- package/dist/llm/zod-utils.cjs.map +1 -0
- package/dist/llm/zod-utils.d.cts +65 -0
- package/dist/llm/zod-utils.d.ts +65 -0
- package/dist/llm/zod-utils.d.ts.map +1 -0
- package/dist/llm/zod-utils.js +64 -0
- package/dist/llm/zod-utils.js.map +1 -0
- package/dist/llm/zod-utils.test.cjs +472 -0
- package/dist/llm/zod-utils.test.cjs.map +1 -0
- package/dist/llm/zod-utils.test.js +455 -0
- package/dist/llm/zod-utils.test.js.map +1 -0
- package/dist/log.cjs +45 -14
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +8 -1
- package/dist/log.d.ts +8 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +45 -15
- package/dist/log.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +75 -19
- package/dist/metrics/base.d.ts +75 -19
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/index.cjs +5 -0
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +2 -1
- package/dist/metrics/index.d.ts +2 -1
- package/dist/metrics/index.d.ts.map +1 -1
- package/dist/metrics/index.js +6 -0
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/model_usage.cjs +189 -0
- package/dist/metrics/model_usage.cjs.map +1 -0
- package/dist/metrics/model_usage.d.cts +92 -0
- package/dist/metrics/model_usage.d.ts +92 -0
- package/dist/metrics/model_usage.d.ts.map +1 -0
- package/dist/metrics/model_usage.js +164 -0
- package/dist/metrics/model_usage.js.map +1 -0
- package/dist/metrics/model_usage.test.cjs +474 -0
- package/dist/metrics/model_usage.test.cjs.map +1 -0
- package/dist/metrics/model_usage.test.js +476 -0
- package/dist/metrics/model_usage.test.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +5 -2
- package/dist/metrics/usage_collector.cjs.map +1 -1
- package/dist/metrics/usage_collector.d.cts +10 -1
- package/dist/metrics/usage_collector.d.ts +10 -1
- package/dist/metrics/usage_collector.d.ts.map +1 -1
- package/dist/metrics/usage_collector.js +5 -2
- package/dist/metrics/usage_collector.js.map +1 -1
- package/dist/metrics/utils.cjs +23 -7
- package/dist/metrics/utils.cjs.map +1 -1
- package/dist/metrics/utils.d.ts.map +1 -1
- package/dist/metrics/utils.js +23 -7
- package/dist/metrics/utils.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +31 -10
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.cts +6 -1
- package/dist/stream/deferred_stream.d.ts +6 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +31 -10
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stream/deferred_stream.test.cjs +2 -2
- package/dist/stream/deferred_stream.test.cjs.map +1 -1
- package/dist/stream/deferred_stream.test.js +2 -2
- package/dist/stream/deferred_stream.test.js.map +1 -1
- package/dist/stream/index.cjs +3 -0
- package/dist/stream/index.cjs.map +1 -1
- package/dist/stream/index.d.cts +1 -0
- package/dist/stream/index.d.ts +1 -0
- package/dist/stream/index.d.ts.map +1 -1
- package/dist/stream/index.js +2 -0
- package/dist/stream/index.js.map +1 -1
- package/dist/stream/multi_input_stream.cjs +139 -0
- package/dist/stream/multi_input_stream.cjs.map +1 -0
- package/dist/stream/multi_input_stream.d.cts +55 -0
- package/dist/stream/multi_input_stream.d.ts +55 -0
- package/dist/stream/multi_input_stream.d.ts.map +1 -0
- package/dist/stream/multi_input_stream.js +115 -0
- package/dist/stream/multi_input_stream.js.map +1 -0
- package/dist/stream/multi_input_stream.test.cjs +344 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -0
- package/dist/stream/multi_input_stream.test.js +343 -0
- package/dist/stream/multi_input_stream.test.js.map +1 -0
- package/dist/stream/stream_channel.cjs +39 -1
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +5 -2
- package/dist/stream/stream_channel.d.ts +5 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +39 -1
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/stream/stream_channel.test.cjs +27 -0
- package/dist/stream/stream_channel.test.cjs.map +1 -1
- package/dist/stream/stream_channel.test.js +27 -0
- package/dist/stream/stream_channel.test.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +24 -9
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.cts +7 -3
- package/dist/stt/stream_adapter.d.ts +7 -3
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +24 -9
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +94 -19
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +68 -5
- package/dist/stt/stt.d.ts +68 -5
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +96 -21
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/index.cjs +72 -0
- package/dist/telemetry/index.cjs.map +1 -0
- package/dist/telemetry/index.d.cts +7 -0
- package/dist/telemetry/index.d.ts +7 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +37 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +166 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +63 -0
- package/dist/telemetry/otel_http_exporter.d.ts +63 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +142 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +233 -0
- package/dist/telemetry/trace_types.cjs.map +1 -0
- package/dist/telemetry/trace_types.d.cts +74 -0
- package/dist/telemetry/trace_types.d.ts +74 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -0
- package/dist/telemetry/trace_types.js +141 -0
- package/dist/telemetry/trace_types.js.map +1 -0
- package/dist/telemetry/traces.cjs +484 -0
- package/dist/telemetry/traces.cjs.map +1 -0
- package/dist/telemetry/traces.d.cts +116 -0
- package/dist/telemetry/traces.d.ts +116 -0
- package/dist/telemetry/traces.d.ts.map +1 -0
- package/dist/telemetry/traces.js +449 -0
- package/dist/telemetry/traces.js.map +1 -0
- package/dist/telemetry/utils.cjs +86 -0
- package/dist/telemetry/utils.cjs.map +1 -0
- package/dist/telemetry/utils.d.cts +5 -0
- package/dist/telemetry/utils.d.ts +5 -0
- package/dist/telemetry/utils.d.ts.map +1 -0
- package/dist/telemetry/utils.js +51 -0
- package/dist/telemetry/utils.js.map +1 -0
- package/dist/tokenize/basic/sentence.cjs +3 -3
- package/dist/tokenize/basic/sentence.cjs.map +1 -1
- package/dist/tokenize/basic/sentence.js +3 -3
- package/dist/tokenize/basic/sentence.js.map +1 -1
- package/dist/tokenize/tokenizer.test.cjs +3 -1
- package/dist/tokenize/tokenizer.test.cjs.map +1 -1
- package/dist/tokenize/tokenizer.test.js +3 -1
- package/dist/tokenize/tokenizer.test.js.map +1 -1
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.cts +6 -0
- package/dist/transcription.d.ts +6 -0
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js.map +1 -1
- package/dist/tts/fallback_adapter.cjs +472 -0
- package/dist/tts/fallback_adapter.cjs.map +1 -0
- package/dist/tts/fallback_adapter.d.cts +110 -0
- package/dist/tts/fallback_adapter.d.ts +110 -0
- package/dist/tts/fallback_adapter.d.ts.map +1 -0
- package/dist/tts/fallback_adapter.js +448 -0
- package/dist/tts/fallback_adapter.js.map +1 -0
- package/dist/tts/index.cjs +3 -0
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -0
- package/dist/tts/index.d.ts +1 -0
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +2 -0
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +25 -8
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +6 -3
- package/dist/tts/stream_adapter.d.ts +6 -3
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +25 -8
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +189 -57
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +58 -6
- package/dist/tts/tts.d.ts +58 -6
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +191 -59
- package/dist/tts/tts.js.map +1 -1
- package/dist/typed_promise.cjs +48 -0
- package/dist/typed_promise.cjs.map +1 -0
- package/dist/typed_promise.d.cts +24 -0
- package/dist/typed_promise.d.ts +24 -0
- package/dist/typed_promise.d.ts.map +1 -0
- package/dist/typed_promise.js +28 -0
- package/dist/typed_promise.js.map +1 -0
- package/dist/types.cjs +24 -32
- package/dist/types.cjs.map +1 -1
- package/dist/types.d.cts +45 -10
- package/dist/types.d.ts +45 -10
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +20 -30
- package/dist/types.js.map +1 -1
- package/dist/utils.cjs +124 -28
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +41 -1
- package/dist/utils.d.ts +41 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +119 -27
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +73 -1
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +74 -10
- package/dist/utils.test.js.map +1 -1
- package/dist/vad.cjs +35 -15
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +15 -5
- package/dist/vad.d.ts +15 -5
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +35 -15
- package/dist/vad.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.cjs.map +1 -1
- package/dist/version.d.cts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/dist/voice/agent.cjs +258 -35
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +54 -13
- package/dist/voice/agent.d.ts +54 -13
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +254 -34
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +314 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +316 -2
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +1116 -385
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +72 -11
- package/dist/voice/agent_activity.d.ts +72 -11
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +1119 -383
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +550 -90
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +185 -25
- package/dist/voice/agent_session.d.ts +185 -25
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +556 -91
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +605 -46
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +96 -4
- package/dist/voice/audio_recognition.d.ts +96 -4
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +611 -47
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +295 -0
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
- package/dist/voice/audio_recognition_span.test.js +299 -0
- package/dist/voice/audio_recognition_span.test.js.map +1 -0
- package/dist/voice/avatar/datastream_io.cjs +7 -1
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.d.cts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
- package/dist/voice/avatar/datastream_io.js +7 -1
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs +367 -0
- package/dist/voice/background_audio.cjs.map +1 -0
- package/dist/voice/background_audio.d.cts +123 -0
- package/dist/voice/background_audio.d.ts +123 -0
- package/dist/voice/background_audio.d.ts.map +1 -0
- package/dist/voice/background_audio.js +343 -0
- package/dist/voice/background_audio.js.map +1 -0
- package/dist/voice/events.cjs +3 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +16 -9
- package/dist/voice/events.d.ts +16 -9
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +3 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +205 -41
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +21 -5
- package/dist/voice/generation.d.ts +21 -5
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +215 -43
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/generation_tools.test.cjs +236 -0
- package/dist/voice/generation_tools.test.cjs.map +1 -0
- package/dist/voice/generation_tools.test.js +235 -0
- package/dist/voice/generation_tools.test.js.map +1 -0
- package/dist/voice/index.cjs +33 -2
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +8 -2
- package/dist/voice/index.d.ts +8 -2
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +19 -2
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/interruption_detection.test.cjs +114 -0
- package/dist/voice/interruption_detection.test.cjs.map +1 -0
- package/dist/voice/interruption_detection.test.js +113 -0
- package/dist/voice/interruption_detection.test.js.map +1 -0
- package/dist/voice/io.cjs +66 -6
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +67 -7
- package/dist/voice/io.d.ts +67 -7
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +62 -5
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/index.cjs +23 -0
- package/dist/voice/recorder_io/index.cjs.map +1 -0
- package/dist/voice/recorder_io/index.d.cts +2 -0
- package/dist/voice/recorder_io/index.d.ts +2 -0
- package/dist/voice/recorder_io/index.d.ts.map +1 -0
- package/dist/voice/recorder_io/index.js +2 -0
- package/dist/voice/recorder_io/index.js.map +1 -0
- package/dist/voice/recorder_io/recorder_io.cjs +607 -0
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
- package/dist/voice/recorder_io/recorder_io.d.cts +106 -0
- package/dist/voice/recorder_io/recorder_io.d.ts +106 -0
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
- package/dist/voice/recorder_io/recorder_io.js +573 -0
- package/dist/voice/recorder_io/recorder_io.js.map +1 -0
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +88 -0
- package/dist/voice/report.cjs.map +1 -0
- package/dist/voice/report.d.cts +49 -0
- package/dist/voice/report.d.ts +49 -0
- package/dist/voice/report.d.ts.map +1 -0
- package/dist/voice/report.js +63 -0
- package/dist/voice/report.js.map +1 -0
- package/dist/voice/report.test.cjs +121 -0
- package/dist/voice/report.test.cjs.map +1 -0
- package/dist/voice/report.test.js +120 -0
- package/dist/voice/report.test.js.map +1 -0
- package/dist/voice/room_io/_input.cjs +40 -7
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +5 -2
- package/dist/voice/room_io/_input.d.ts +5 -2
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +41 -8
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +19 -11
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.d.cts +7 -4
- package/dist/voice/room_io/_output.d.ts +7 -4
- package/dist/voice/room_io/_output.d.ts.map +1 -1
- package/dist/voice/room_io/_output.js +20 -12
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +33 -6
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +29 -9
- package/dist/voice/room_io/room_io.d.ts +29 -9
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +33 -7
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +22 -4
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +17 -2
- package/dist/voice/speech_handle.d.ts +17 -2
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +21 -4
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/fake_llm.cjs +127 -0
- package/dist/voice/testing/fake_llm.cjs.map +1 -0
- package/dist/voice/testing/fake_llm.d.cts +30 -0
- package/dist/voice/testing/fake_llm.d.ts +30 -0
- package/dist/voice/testing/fake_llm.d.ts.map +1 -0
- package/dist/voice/testing/fake_llm.js +103 -0
- package/dist/voice/testing/fake_llm.js.map +1 -0
- package/dist/voice/testing/index.cjs +57 -0
- package/dist/voice/testing/index.cjs.map +1 -0
- package/dist/voice/testing/index.d.cts +21 -0
- package/dist/voice/testing/index.d.ts +21 -0
- package/dist/voice/testing/index.d.ts.map +1 -0
- package/dist/voice/testing/index.js +35 -0
- package/dist/voice/testing/index.js.map +1 -0
- package/dist/voice/testing/run_result.cjs +817 -0
- package/dist/voice/testing/run_result.cjs.map +1 -0
- package/dist/voice/testing/run_result.d.cts +385 -0
- package/dist/voice/testing/run_result.d.ts +385 -0
- package/dist/voice/testing/run_result.d.ts.map +1 -0
- package/dist/voice/testing/run_result.js +790 -0
- package/dist/voice/testing/run_result.js.map +1 -0
- package/dist/voice/testing/types.cjs +46 -0
- package/dist/voice/testing/types.cjs.map +1 -0
- package/dist/voice/testing/types.d.cts +83 -0
- package/dist/voice/testing/types.d.ts +83 -0
- package/dist/voice/testing/types.d.ts.map +1 -0
- package/dist/voice/testing/types.js +19 -0
- package/dist/voice/testing/types.js.map +1 -0
- package/dist/voice/transcription/synchronizer.cjs +139 -15
- package/dist/voice/transcription/synchronizer.cjs.map +1 -1
- package/dist/voice/transcription/synchronizer.d.cts +35 -4
- package/dist/voice/transcription/synchronizer.d.ts +35 -4
- package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
- package/dist/voice/transcription/synchronizer.js +143 -16
- package/dist/voice/transcription/synchronizer.js.map +1 -1
- package/dist/voice/transcription/synchronizer.test.cjs +151 -0
- package/dist/voice/transcription/synchronizer.test.cjs.map +1 -0
- package/dist/voice/transcription/synchronizer.test.js +150 -0
- package/dist/voice/transcription/synchronizer.test.js.map +1 -0
- package/dist/voice/turn_config/endpointing.cjs +33 -0
- package/dist/voice/turn_config/endpointing.cjs.map +1 -0
- package/dist/voice/turn_config/endpointing.d.cts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts +30 -0
- package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
- package/dist/voice/turn_config/endpointing.js +9 -0
- package/dist/voice/turn_config/endpointing.js.map +1 -0
- package/dist/voice/turn_config/interruption.cjs +37 -0
- package/dist/voice/turn_config/interruption.cjs.map +1 -0
- package/dist/voice/turn_config/interruption.d.cts +53 -0
- package/dist/voice/turn_config/interruption.d.ts +53 -0
- package/dist/voice/turn_config/interruption.d.ts.map +1 -0
- package/dist/voice/turn_config/interruption.js +13 -0
- package/dist/voice/turn_config/interruption.js.map +1 -0
- package/dist/voice/turn_config/turn_handling.cjs +35 -0
- package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
- package/dist/voice/turn_config/turn_handling.d.cts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts +36 -0
- package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
- package/dist/voice/turn_config/turn_handling.js +11 -0
- package/dist/voice/turn_config/turn_handling.js.map +1 -0
- package/dist/voice/turn_config/utils.cjs +157 -0
- package/dist/voice/turn_config/utils.cjs.map +1 -0
- package/dist/voice/turn_config/utils.d.cts +37 -0
- package/dist/voice/turn_config/utils.d.ts +37 -0
- package/dist/voice/turn_config/utils.d.ts.map +1 -0
- package/dist/voice/turn_config/utils.js +131 -0
- package/dist/voice/turn_config/utils.js.map +1 -0
- package/dist/voice/turn_config/utils.test.cjs +128 -0
- package/dist/voice/turn_config/utils.test.cjs.map +1 -0
- package/dist/voice/turn_config/utils.test.js +127 -0
- package/dist/voice/turn_config/utils.test.js.map +1 -0
- package/dist/voice/utils.cjs +47 -0
- package/dist/voice/utils.cjs.map +1 -0
- package/dist/voice/utils.d.cts +4 -0
- package/dist/voice/utils.d.ts +4 -0
- package/dist/voice/utils.d.ts.map +1 -0
- package/dist/voice/utils.js +23 -0
- package/dist/voice/utils.js.map +1 -0
- package/dist/worker.cjs +44 -52
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +18 -8
- package/dist/worker.d.ts +18 -8
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +43 -43
- package/dist/worker.js.map +1 -1
- package/package.json +35 -13
- package/resources/NOTICE +2 -0
- package/resources/keyboard-typing.ogg +0 -0
- package/resources/keyboard-typing2.ogg +0 -0
- package/resources/office-ambience.ogg +0 -0
- package/src/_exceptions.ts +5 -0
- package/src/audio.ts +132 -1
- package/src/beta/index.ts +9 -0
- package/src/beta/workflows/index.ts +9 -0
- package/src/beta/workflows/task_group.ts +203 -0
- package/src/cli.ts +57 -66
- package/src/connection_pool.test.ts +346 -0
- package/src/connection_pool.ts +307 -0
- package/src/constants.ts +14 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/http_server.ts +18 -6
- package/src/index.ts +15 -13
- package/src/inference/api_protos.ts +85 -2
- package/src/inference/index.ts +32 -4
- package/src/inference/interruption/defaults.ts +51 -0
- package/src/inference/interruption/errors.ts +25 -0
- package/src/inference/interruption/http_transport.ts +207 -0
- package/src/inference/interruption/interruption_cache_entry.ts +50 -0
- package/src/inference/interruption/interruption_detector.ts +204 -0
- package/src/inference/interruption/interruption_stream.ts +467 -0
- package/src/inference/interruption/types.ts +84 -0
- package/src/inference/interruption/utils.test.ts +132 -0
- package/src/inference/interruption/utils.ts +137 -0
- package/src/inference/interruption/ws_transport.ts +416 -0
- package/src/inference/llm.ts +214 -163
- package/src/inference/stt.test.ts +253 -0
- package/src/inference/stt.ts +449 -208
- package/src/inference/tts.test.ts +354 -0
- package/src/inference/tts.ts +417 -115
- package/src/inference/utils.ts +30 -2
- package/src/ipc/inference_proc_executor.ts +11 -3
- package/src/ipc/inference_proc_lazy_main.ts +13 -1
- package/src/ipc/job_proc_executor.ts +11 -1
- package/src/ipc/job_proc_lazy_main.ts +86 -20
- package/src/ipc/supervised_proc.test.ts +153 -0
- package/src/ipc/supervised_proc.ts +39 -10
- package/src/job.ts +120 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
- package/src/llm/chat_context.test.ts +787 -0
- package/src/llm/chat_context.ts +493 -2
- package/src/llm/fallback_adapter.test.ts +238 -0
- package/src/llm/fallback_adapter.ts +394 -0
- package/src/llm/index.ts +13 -0
- package/src/llm/llm.ts +77 -12
- package/src/llm/provider_format/google.test.ts +72 -1
- package/src/llm/provider_format/google.ts +10 -6
- package/src/llm/provider_format/index.ts +7 -2
- package/src/llm/provider_format/openai.test.ts +480 -2
- package/src/llm/provider_format/openai.ts +152 -21
- package/src/llm/provider_format/utils.ts +11 -5
- package/src/llm/realtime.ts +23 -2
- package/src/llm/remote_chat_context.ts +2 -2
- package/src/llm/tool_context.test.ts +210 -1
- package/src/llm/tool_context.ts +115 -17
- package/src/llm/utils.test.ts +103 -2
- package/src/llm/utils.ts +152 -16
- package/src/llm/zod-utils.test.ts +577 -0
- package/src/llm/zod-utils.ts +153 -0
- package/src/log.ts +71 -19
- package/src/metrics/base.ts +78 -19
- package/src/metrics/index.ts +12 -0
- package/src/metrics/model_usage.test.ts +545 -0
- package/src/metrics/model_usage.ts +262 -0
- package/src/metrics/usage_collector.ts +14 -3
- package/src/metrics/utils.ts +27 -7
- package/src/stream/deferred_stream.test.ts +3 -3
- package/src/stream/deferred_stream.ts +43 -11
- package/src/stream/index.ts +1 -0
- package/src/stream/multi_input_stream.test.ts +545 -0
- package/src/stream/multi_input_stream.ts +172 -0
- package/src/stream/stream_channel.test.ts +37 -0
- package/src/stream/stream_channel.ts +43 -3
- package/src/stt/stream_adapter.ts +30 -9
- package/src/stt/stt.ts +140 -23
- package/src/telemetry/index.ts +28 -0
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +218 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/trace_types.ts +109 -0
- package/src/telemetry/traces.ts +673 -0
- package/src/telemetry/utils.ts +61 -0
- package/src/tokenize/basic/sentence.ts +3 -3
- package/src/tokenize/tokenizer.test.ts +4 -0
- package/src/transcription.ts +6 -0
- package/src/tts/fallback_adapter.ts +586 -0
- package/src/tts/index.ts +1 -0
- package/src/tts/stream_adapter.ts +38 -8
- package/src/tts/tts.ts +245 -62
- package/src/typed_promise.ts +67 -0
- package/src/types.ts +62 -33
- package/src/utils.test.ts +90 -10
- package/src/utils.ts +178 -33
- package/src/vad.ts +42 -18
- package/src/version.ts +1 -1
- package/src/voice/agent.test.ts +347 -2
- package/src/voice/agent.ts +346 -44
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +1457 -388
- package/src/voice/agent_session.ts +817 -112
- package/src/voice/audio_recognition.ts +845 -70
- package/src/voice/audio_recognition_span.test.ts +341 -0
- package/src/voice/avatar/datastream_io.ts +9 -1
- package/src/voice/background_audio.ts +494 -0
- package/src/voice/events.ts +27 -7
- package/src/voice/generation.ts +310 -56
- package/src/voice/generation_tools.test.ts +268 -0
- package/src/voice/index.ts +17 -3
- package/src/voice/interruption_detection.test.ts +151 -0
- package/src/voice/io.ts +115 -12
- package/src/voice/recorder_io/index.ts +4 -0
- package/src/voice/recorder_io/recorder_io.ts +783 -0
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +136 -0
- package/src/voice/report.ts +140 -0
- package/src/voice/room_io/_input.ts +45 -10
- package/src/voice/room_io/_output.ts +26 -14
- package/src/voice/room_io/room_io.ts +67 -22
- package/src/voice/speech_handle.ts +38 -6
- package/src/voice/testing/fake_llm.ts +138 -0
- package/src/voice/testing/index.ts +52 -0
- package/src/voice/testing/run_result.ts +995 -0
- package/src/voice/testing/types.ts +118 -0
- package/src/voice/transcription/synchronizer.test.ts +206 -0
- package/src/voice/transcription/synchronizer.ts +204 -19
- package/src/voice/turn_config/endpointing.ts +33 -0
- package/src/voice/turn_config/interruption.ts +56 -0
- package/src/voice/turn_config/turn_handling.ts +45 -0
- package/src/voice/turn_config/utils.test.ts +148 -0
- package/src/voice/turn_config/utils.ts +167 -0
- package/src/voice/utils.ts +29 -0
- package/src/worker.ts +92 -78
- package/src/llm/__snapshots__/utils.test.ts.snap +0 -65
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { SpanStatusCode, context as otelContext, trace } from "@opentelemetry/api";
|
|
2
|
+
import * as traceTypes from "./trace_types.js";
|
|
3
|
+
import { tracer } from "./traces.js";
|
|
4
|
+
function recordException(span, error) {
|
|
5
|
+
span.recordException(error);
|
|
6
|
+
span.setStatus({
|
|
7
|
+
code: SpanStatusCode.ERROR,
|
|
8
|
+
message: error.message
|
|
9
|
+
});
|
|
10
|
+
span.setAttributes({
|
|
11
|
+
[traceTypes.ATTR_EXCEPTION_TYPE]: error.constructor.name,
|
|
12
|
+
[traceTypes.ATTR_EXCEPTION_MESSAGE]: error.message,
|
|
13
|
+
[traceTypes.ATTR_EXCEPTION_TRACE]: error.stack || ""
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
function recordRealtimeMetrics(span, metrics) {
|
|
17
|
+
const attrs = {
|
|
18
|
+
[traceTypes.ATTR_GEN_AI_REQUEST_MODEL]: metrics.label || "unknown",
|
|
19
|
+
[traceTypes.ATTR_REALTIME_MODEL_METRICS]: JSON.stringify(metrics),
|
|
20
|
+
[traceTypes.ATTR_GEN_AI_USAGE_INPUT_TOKENS]: metrics.inputTokens,
|
|
21
|
+
[traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: metrics.outputTokens,
|
|
22
|
+
[traceTypes.ATTR_GEN_AI_USAGE_INPUT_TEXT_TOKENS]: metrics.inputTokenDetails.textTokens,
|
|
23
|
+
[traceTypes.ATTR_GEN_AI_USAGE_INPUT_AUDIO_TOKENS]: metrics.inputTokenDetails.audioTokens,
|
|
24
|
+
[traceTypes.ATTR_GEN_AI_USAGE_INPUT_CACHED_TOKENS]: metrics.inputTokenDetails.cachedTokens,
|
|
25
|
+
[traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TEXT_TOKENS]: metrics.outputTokenDetails.textTokens,
|
|
26
|
+
[traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_AUDIO_TOKENS]: metrics.outputTokenDetails.audioTokens
|
|
27
|
+
};
|
|
28
|
+
if (metrics.ttftMs !== void 0 && metrics.ttftMs !== -1) {
|
|
29
|
+
const completionStartTime = metrics.timestamp + metrics.ttftMs;
|
|
30
|
+
const completionStartTimeUtc = new Date(completionStartTime).toISOString();
|
|
31
|
+
attrs[traceTypes.ATTR_LANGFUSE_COMPLETION_START_TIME] = completionStartTimeUtc;
|
|
32
|
+
}
|
|
33
|
+
if (span.isRecording()) {
|
|
34
|
+
span.setAttributes(attrs);
|
|
35
|
+
} else {
|
|
36
|
+
const currentContext = otelContext.active();
|
|
37
|
+
const spanContext = trace.setSpan(currentContext, span);
|
|
38
|
+
tracer.getTracer().startActiveSpan("realtime_metrics", {}, spanContext, (child) => {
|
|
39
|
+
try {
|
|
40
|
+
child.setAttributes(attrs);
|
|
41
|
+
} finally {
|
|
42
|
+
child.end();
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
export {
|
|
48
|
+
recordException,
|
|
49
|
+
recordRealtimeMetrics
|
|
50
|
+
};
|
|
51
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/telemetry/utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type Span, SpanStatusCode, context as otelContext, trace } from '@opentelemetry/api';\nimport type { RealtimeModelMetrics } from '../metrics/base.js';\nimport * as traceTypes from './trace_types.js';\nimport { tracer } from './traces.js';\n\nexport function recordException(span: Span, error: Error): void {\n span.recordException(error);\n span.setStatus({\n code: SpanStatusCode.ERROR,\n message: error.message,\n });\n\n // Set exception attributes for better visibility\n // (in case the exception event is not rendered by the backend)\n span.setAttributes({\n [traceTypes.ATTR_EXCEPTION_TYPE]: error.constructor.name,\n [traceTypes.ATTR_EXCEPTION_MESSAGE]: error.message,\n [traceTypes.ATTR_EXCEPTION_TRACE]: error.stack || '',\n });\n}\n\nexport function recordRealtimeMetrics(span: Span, metrics: RealtimeModelMetrics): void {\n const attrs: Record<string, string | number> = {\n [traceTypes.ATTR_GEN_AI_REQUEST_MODEL]: metrics.label || 'unknown',\n [traceTypes.ATTR_REALTIME_MODEL_METRICS]: JSON.stringify(metrics),\n [traceTypes.ATTR_GEN_AI_USAGE_INPUT_TOKENS]: metrics.inputTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: metrics.outputTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_INPUT_TEXT_TOKENS]: metrics.inputTokenDetails.textTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_INPUT_AUDIO_TOKENS]: metrics.inputTokenDetails.audioTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_INPUT_CACHED_TOKENS]: metrics.inputTokenDetails.cachedTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TEXT_TOKENS]: metrics.outputTokenDetails.textTokens,\n [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_AUDIO_TOKENS]: metrics.outputTokenDetails.audioTokens,\n };\n\n // Add LangFuse-specific completion start time if TTFT is available\n if (metrics.ttftMs !== undefined && metrics.ttftMs !== -1) {\n const completionStartTime = metrics.timestamp + metrics.ttftMs;\n // Convert to UTC ISO string for LangFuse compatibility\n const completionStartTimeUtc = new Date(completionStartTime).toISOString();\n attrs[traceTypes.ATTR_LANGFUSE_COMPLETION_START_TIME] = completionStartTimeUtc;\n }\n\n if (span.isRecording()) {\n span.setAttributes(attrs);\n } else {\n const currentContext = otelContext.active();\n const spanContext = trace.setSpan(currentContext, span);\n\n // Create a dedicated child span for orphaned metrics\n tracer.getTracer().startActiveSpan('realtime_metrics', {}, spanContext, (child) => {\n try {\n child.setAttributes(attrs);\n } finally {\n child.end();\n }\n });\n }\n}\n"],"mappings":"AAGA,SAAoB,gBAAgB,WAAW,aAAa,aAAa;AAEzE,YAAY,gBAAgB;AAC5B,SAAS,cAAc;AAEhB,SAAS,gBAAgB,MAAY,OAAoB;AAC9D,OAAK,gBAAgB,KAAK;AAC1B,OAAK,UAAU;AAAA,IACb,MAAM,eAAe;AAAA,IACrB,SAAS,MAAM;AAAA,EACjB,CAAC;AAID,OAAK,cAAc;AAAA,IACjB,CAAC,WAAW,mBAAmB,GAAG,MAAM,YAAY;AAAA,IACpD,CAAC,WAAW,sBAAsB,GAAG,MAAM;AAAA,IAC3C,CAAC,WAAW,oBAAoB,GAAG,MAAM,SAAS;AAAA,EACpD,CAAC;AACH;AAEO,SAAS,sBAAsB,MAAY,SAAqC;AACrF,QAAM,QAAyC;AAAA,IAC7C,CAAC,WAAW,yBAAyB,GAAG,QAAQ,SAAS;AAAA,IACzD,CAAC,WAAW,2BAA2B,GAAG,KAAK,UAAU,OAAO;AAAA,IAChE,CAAC,WAAW,8BAA8B,GAAG,QAAQ;AAAA,IACrD,CAAC,WAAW,+BAA+B,GAAG,QAAQ;AAAA,IACtD,CAAC,WAAW,mCAAmC,GAAG,QAAQ,kBAAkB;AAAA,IAC5E,CAAC,WAAW,oCAAoC,GAAG,QAAQ,kBAAkB;AAAA,IAC7E,CAAC,WAAW,qCAAqC,GAAG,QAAQ,kBAAkB;AAAA,IAC9E,CAAC,WAAW,oCAAoC,GAAG,QAAQ,mBAAmB;AAAA,IAC9E,CAAC,WAAW,qCAAqC,GAAG,QAAQ,mBAAmB;AAAA,EACjF;AAGA,MAAI,QAAQ,WAAW,UAAa,QAAQ,WAAW,IAAI;AACzD,UAAM,sBAAsB,QAAQ,YAAY,QAAQ;AAExD,UAAM,yBAAyB,IAAI,KAAK,mBAAmB,EAAE,YAAY;AACzE,UAAM,WAAW,mCAAmC,IAAI;AAAA,EAC1D;AAEA,MAAI,KAAK,YAAY,GAAG;AACtB,SAAK,cAAc,KAAK;AAAA,EAC1B,OAAO;AACL,UAAM,iBAAiB,YAAY,OAAO;AAC1C,UAAM,cAAc,MAAM,QAAQ,gBAAgB,IAAI;AAGtD,WAAO,UAAU,EAAE,gBAAgB,oBAAoB,CAAC,GAAG,aAAa,CAAC,UAAU;AACjF,UAAI;AACF,cAAM,cAAc,KAAK;AAAA,MAC3B,UAAE;AACA,cAAM,IAAI;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH;AACF;","names":[]}
|
|
@@ -27,7 +27,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
27
27
|
const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
|
|
28
28
|
const starters = /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)/g;
|
|
29
29
|
const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;
|
|
30
|
-
const websites = /
|
|
30
|
+
const websites = /(\w+\.)+(com|net|org|io|gov|edu|me)/g;
|
|
31
31
|
const digits = /([0-9])/g;
|
|
32
32
|
const dots = /\.{2,}/g;
|
|
33
33
|
if (retainFormat) {
|
|
@@ -36,7 +36,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
36
36
|
text = text.replaceAll("\n", " ");
|
|
37
37
|
}
|
|
38
38
|
text = text.replaceAll(prefixes, "$1<prd>");
|
|
39
|
-
text = text.
|
|
39
|
+
text = text.replace(websites, (match) => match.replaceAll(".", "<prd>"));
|
|
40
40
|
text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
|
|
41
41
|
text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
|
|
42
42
|
text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
|
|
@@ -60,7 +60,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
60
60
|
text = text.replaceAll('."', '".');
|
|
61
61
|
text = text.replaceAll('!"', '"!');
|
|
62
62
|
text = text.replaceAll('?"', '"?');
|
|
63
|
-
text = text.
|
|
63
|
+
text = text.replace(/\.(?=\s|$)/g, ".<stop>");
|
|
64
64
|
text = text.replaceAll("?", "?<stop>");
|
|
65
65
|
text = text.replaceAll("!", "!<stop>");
|
|
66
66
|
text = text.replaceAll("<prd>", ".");
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /(\\w+\\.)+(com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replace(websites, (match) => match.replaceAll('.', '<prd>'));\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replace(/\\.(?=\\s|$)/g, '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,QAAQ,UAAU,CAAC,UAAU,MAAM,WAAW,KAAK,OAAO,CAAC;AACvE,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,QAAQ,eAAe,SAAS;AAC5C,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -4,7 +4,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
4
4
|
const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
|
|
5
5
|
const starters = /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)/g;
|
|
6
6
|
const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;
|
|
7
|
-
const websites = /
|
|
7
|
+
const websites = /(\w+\.)+(com|net|org|io|gov|edu|me)/g;
|
|
8
8
|
const digits = /([0-9])/g;
|
|
9
9
|
const dots = /\.{2,}/g;
|
|
10
10
|
if (retainFormat) {
|
|
@@ -13,7 +13,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
13
13
|
text = text.replaceAll("\n", " ");
|
|
14
14
|
}
|
|
15
15
|
text = text.replaceAll(prefixes, "$1<prd>");
|
|
16
|
-
text = text.
|
|
16
|
+
text = text.replace(websites, (match) => match.replaceAll(".", "<prd>"));
|
|
17
17
|
text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, "g"), "$1<prd>$2");
|
|
18
18
|
text = text.replaceAll(dots, (match) => "<prd>".repeat(match.length));
|
|
19
19
|
text = text.replaceAll("Ph.D.", "Ph<prd>D<prd>");
|
|
@@ -37,7 +37,7 @@ const splitSentences = (text, minLength = 20, retainFormat = false) => {
|
|
|
37
37
|
text = text.replaceAll('."', '".');
|
|
38
38
|
text = text.replaceAll('!"', '"!');
|
|
39
39
|
text = text.replaceAll('?"', '"?');
|
|
40
|
-
text = text.
|
|
40
|
+
text = text.replace(/\.(?=\s|$)/g, ".<stop>");
|
|
41
41
|
text = text.replaceAll("?", "?<stop>");
|
|
42
42
|
text = text.replaceAll("!", "!<stop>");
|
|
43
43
|
text = text.replaceAll("<prd>", ".");
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /
|
|
1
|
+
{"version":3,"sources":["../../../src/tokenize/basic/sentence.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\n\n/**\n * Split the text into sentences.\n */\nexport const splitSentences = (\n text: string,\n minLength = 20,\n retainFormat: boolean = false,\n): [string, number, number][] => {\n const alphabets = /([A-Za-z])/g;\n const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;\n const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;\n const starters =\n /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)/g;\n const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g;\n const websites = /(\\w+\\.)+(com|net|org|io|gov|edu|me)/g;\n const digits = /([0-9])/g;\n const dots = /\\.{2,}/g;\n\n if (retainFormat) {\n text = text.replaceAll('\\n', '<nel><stop>');\n } else {\n text = text.replaceAll('\\n', ' ');\n }\n\n text = text.replaceAll(prefixes, '$1<prd>');\n text = text.replace(websites, (match) => match.replaceAll('.', '<prd>'));\n text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');\n text = text.replaceAll(dots, (match) => '<prd>'.repeat(match.length));\n text = text.replaceAll('Ph.D.', 'Ph<prd>D<prd>');\n text = text.replaceAll(new RegExp(`\\\\s${alphabets.source}[.] `, 'g'), ' $1<prd> ');\n text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1<stop> $2');\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>$3<prd>',\n );\n text = text.replaceAll(\n new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'),\n '$1<prd>$2<prd>',\n );\n text = text.replaceAll(\n new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'),\n '$1<stop> $2',\n );\n text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1<prd>');\n text = text.replaceAll('.”', '”.');\n text = text.replaceAll('.\"', '\".');\n text = text.replaceAll('!\"', '\"!');\n text = text.replaceAll('?\"', '\"?');\n text = text.replace(/\\.(?=\\s|$)/g, '.<stop>');\n text = text.replaceAll('?', '?<stop>');\n text = text.replaceAll('!', '!<stop>');\n text = text.replaceAll('<prd>', '.');\n\n if (retainFormat) {\n text = text.replaceAll('<nel>', '\\n');\n }\n\n const split = text.split('<stop>');\n text = text.replaceAll('<stop>', '');\n\n const sentences: [string, number, number][] = [];\n let buf = '';\n let start = 0;\n let end = 0;\n const prePad = retainFormat ? '' : ' ';\n for (const match of split) {\n const sentence = retainFormat ? match : match.trim();\n if (!sentence) continue;\n\n buf += prePad + sentence;\n end += match.length;\n if (buf.length > minLength) {\n sentences.push([buf.slice(prePad.length), start, end]);\n start = end;\n buf = '';\n }\n }\n\n if (buf) {\n sentences.push([buf.slice(prePad.length), start, text.length - 1]);\n }\n\n return sentences;\n};\n"],"mappings":"AAOO,MAAM,iBAAiB,CAC5B,MACA,YAAY,IACZ,eAAwB,UACO;AAC/B,QAAM,YAAY;AAClB,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,WACJ;AACF,QAAM,WAAW;AACjB,QAAM,WAAW;AACjB,QAAM,SAAS;AACf,QAAM,OAAO;AAEb,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,MAAM,aAAa;AAAA,EAC5C,OAAO;AACL,WAAO,KAAK,WAAW,MAAM,GAAG;AAAA,EAClC;AAEA,SAAO,KAAK,WAAW,UAAU,SAAS;AAC1C,SAAO,KAAK,QAAQ,UAAU,CAAC,UAAU,MAAM,WAAW,KAAK,OAAO,CAAC;AACvE,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,OAAO,MAAM,MAAM,OAAO,MAAM,IAAI,GAAG,GAAG,WAAW;AAC1F,SAAO,KAAK,WAAW,MAAM,CAAC,UAAU,QAAQ,OAAO,MAAM,MAAM,CAAC;AACpE,SAAO,KAAK,WAAW,SAAS,eAAe;AAC/C,SAAO,KAAK,WAAW,IAAI,OAAO,MAAM,UAAU,MAAM,QAAQ,GAAG,GAAG,WAAW;AACjF,SAAO,KAAK,WAAW,IAAI,OAAO,GAAG,SAAS,MAAM,IAAI,SAAS,MAAM,IAAI,GAAG,GAAG,aAAa;AAC9F,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IACpF;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,GAAG,UAAU,MAAM,MAAM,UAAU,MAAM,OAAO,GAAG;AAAA,IAC9D;AAAA,EACF;AACA,SAAO,KAAK;AAAA,IACV,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,SAAS,MAAM,IAAI,GAAG;AAAA,IAC3D;AAAA,EACF;AACA,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,SAAS,MAAM,OAAO,GAAG,GAAG,SAAS;AAC3E,SAAO,KAAK,WAAW,IAAI,OAAO,IAAI,UAAU,MAAM,OAAO,GAAG,GAAG,SAAS;AAC5E,SAAO,KAAK,WAAW,WAAM,SAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,WAAW,MAAM,IAAI;AACjC,SAAO,KAAK,QAAQ,eAAe,SAAS;AAC5C,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,KAAK,SAAS;AACrC,SAAO,KAAK,WAAW,SAAS,GAAG;AAEnC,MAAI,cAAc;AAChB,WAAO,KAAK,WAAW,SAAS,IAAI;AAAA,EACtC;AAEA,QAAM,QAAQ,KAAK,MAAM,QAAQ;AACjC,SAAO,KAAK,WAAW,UAAU,EAAE;AAEnC,QAAM,YAAwC,CAAC;AAC/C,MAAI,MAAM;AACV,MAAI,QAAQ;AACZ,MAAI,MAAM;AACV,QAAM,SAAS,eAAe,KAAK;AACnC,aAAW,SAAS,OAAO;AACzB,UAAM,WAAW,eAAe,QAAQ,MAAM,KAAK;AACnD,QAAI,CAAC,SAAU;AAEf,WAAO,SAAS;AAChB,WAAO,MAAM;AACb,QAAI,IAAI,SAAS,WAAW;AAC1B,gBAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,GAAG,CAAC;AACrD,cAAQ;AACR,YAAM;AAAA,IACR;AAAA,EACF;AAEA,MAAI,KAAK;AACP,cAAU,KAAK,CAAC,IAAI,MAAM,OAAO,MAAM,GAAG,OAAO,KAAK,SAAS,CAAC,CAAC;AAAA,EACnE;AAEA,SAAO;AACT;","names":[]}
|
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
var import_vitest = require("vitest");
|
|
3
3
|
var import_basic = require("./basic/index.cjs");
|
|
4
4
|
var import_paragraph = require("./basic/paragraph.cjs");
|
|
5
|
-
const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
|
|
5
|
+
const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. Find additional resources on livekit.com. Find additional resources on docs.livekit.com. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
|
|
6
6
|
const EXPECTED_MIN_20 = [
|
|
7
7
|
"Hi! LiveKit is a platform for live audio and video applications and services.",
|
|
8
8
|
"R.T.C stands for Real-Time Communication... again R.T.C.",
|
|
9
9
|
"Mr. Theo is testing the sentence tokenizer.",
|
|
10
10
|
"This is a test. Another test.",
|
|
11
11
|
"A short sentence. A longer sentence that is longer than the previous sentence.",
|
|
12
|
+
"Find additional resources on livekit.com.",
|
|
13
|
+
"Find additional resources on docs.livekit.com.",
|
|
12
14
|
"f(x) = x * 2.54 + 42.",
|
|
13
15
|
"Hey! Hi! Hello!"
|
|
14
16
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":";AAGA,oBAAqC;AACrC,mBAAgE;AAChE,uBAAgC;AAEhC,MAAM,OACJ;AAUF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAAA,IAEA,wBAAS,aAAa,MAAM;AAC1B,8BAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,+BAAkB;AAExC,0BAAG,uCAAuC,MAAM;AAC9C,gCAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,0BAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,2BAAc;AAEpC,0BAAG,mCAAmC,MAAM;AAC1C,gCAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,0BAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,gCAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,2BAAc,KAAK;AAE9C,4BAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,4BAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,wCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,0BAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,sCAAO,4BAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,mBAAmB,MAAM;AAChC,0BAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,sCAAO,kCAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'Find additional resources on livekit.com. ' +\n 'Find additional resources on docs.livekit.com. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'Find additional resources on livekit.com.',\n 'Find additional resources on docs.livekit.com.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":";AAGA,oBAAqC;AACrC,mBAAgE;AAChE,uBAAgC;AAEhC,MAAM,OACJ;AAYF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAAA,IAEA,wBAAS,aAAa,MAAM;AAC1B,8BAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,+BAAkB;AAExC,0BAAG,uCAAuC,MAAM;AAC9C,gCAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,0BAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,2BAAc;AAEpC,0BAAG,mCAAmC,MAAM;AAC1C,gCAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,0BAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,sCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,gCAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,2BAAc,KAAK;AAE9C,4BAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,4BAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,wCAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,iBAAiB,MAAM;AAC9B,0BAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,sCAAO,4BAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,8BAAS,mBAAmB,MAAM;AAChC,0BAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,sCAAO,kCAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
2
|
import { SentenceTokenizer, WordTokenizer, hyphenateWord } from "./basic/index.js";
|
|
3
3
|
import { splitParagraphs } from "./basic/paragraph.js";
|
|
4
|
-
const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
|
|
4
|
+
const TEXT = "Hi! LiveKit is a platform for live audio and video applications and services. R.T.C stands for Real-Time Communication... again R.T.C. Mr. Theo is testing the sentence tokenizer. This is a test. Another test. A short sentence. A longer sentence that is longer than the previous sentence. Find additional resources on livekit.com. Find additional resources on docs.livekit.com. f(x) = x * 2.54 + 42. Hey! Hi! Hello! ";
|
|
5
5
|
const EXPECTED_MIN_20 = [
|
|
6
6
|
"Hi! LiveKit is a platform for live audio and video applications and services.",
|
|
7
7
|
"R.T.C stands for Real-Time Communication... again R.T.C.",
|
|
8
8
|
"Mr. Theo is testing the sentence tokenizer.",
|
|
9
9
|
"This is a test. Another test.",
|
|
10
10
|
"A short sentence. A longer sentence that is longer than the previous sentence.",
|
|
11
|
+
"Find additional resources on livekit.com.",
|
|
12
|
+
"Find additional resources on docs.livekit.com.",
|
|
11
13
|
"f(x) = x * 2.54 + 42.",
|
|
12
14
|
"Hey! Hi! Hello!"
|
|
13
15
|
];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":"AAGA,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,mBAAmB,eAAe,qBAAqB;AAChE,SAAS,uBAAuB;AAEhC,MAAM,OACJ;AAUF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAEA,SAAS,aAAa,MAAM;AAC1B,WAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,kBAAkB;AAExC,OAAG,uCAAuC,MAAM;AAC9C,aAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,OAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,cAAc;AAEpC,OAAG,mCAAmC,MAAM;AAC1C,aAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,OAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,aAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,cAAc,KAAK;AAE9C,SAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,SAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,qBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,OAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,eAAO,cAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,mBAAmB,MAAM;AAChC,OAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,eAAO,gBAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/tokenize/tokenizer.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { SentenceTokenizer, WordTokenizer, hyphenateWord } from './basic/index.js';\nimport { splitParagraphs } from './basic/paragraph.js';\n\nconst TEXT =\n 'Hi! ' +\n 'LiveKit is a platform for live audio and video applications and services. ' +\n 'R.T.C stands for Real-Time Communication... again R.T.C. ' +\n 'Mr. Theo is testing the sentence tokenizer. ' +\n 'This is a test. Another test. ' +\n 'A short sentence. ' +\n 'A longer sentence that is longer than the previous sentence. ' +\n 'Find additional resources on livekit.com. ' +\n 'Find additional resources on docs.livekit.com. ' +\n 'f(x) = x * 2.54 + 42. ' +\n 'Hey! Hi! Hello! ';\n\nconst EXPECTED_MIN_20 = [\n 'Hi! LiveKit is a platform for live audio and video applications and services.',\n 'R.T.C stands for Real-Time Communication... again R.T.C.',\n 'Mr. Theo is testing the sentence tokenizer.',\n 'This is a test. Another test.',\n 'A short sentence. A longer sentence that is longer than the previous sentence.',\n 'Find additional resources on livekit.com.',\n 'Find additional resources on docs.livekit.com.',\n 'f(x) = x * 2.54 + 42.',\n 'Hey! Hi! Hello!',\n];\n\nconst WORDS_TEXT = 'This is a test. Blabla another test! multiple consecutive spaces: done';\nconst WORDS_EXPECTED = [\n 'This',\n 'is',\n 'a',\n 'test',\n 'Blabla',\n 'another',\n 'test',\n 'multiple',\n 'consecutive',\n 'spaces',\n 'done',\n];\n\nconst WORDS_PUNCT_TEXT =\n 'This is <phoneme alphabet=\"cmu-arpabet\" ph=\"AE K CH UW AH L IY\">actually</phoneme> tricky to handle.';\nconst WORDS_PUNCT_EXPECTED = [\n 'This',\n 'is',\n '<phoneme',\n 'alphabet=\"cmu-arpabet\"',\n 'ph=\"AE',\n 'K',\n 'CH',\n 'UW',\n 'AH',\n 'L',\n 'IY\">actually</phoneme>',\n 'tricky',\n 'to',\n 'handle.',\n];\n\nconst HYPHENATOR_TEXT = ['Segment', 'expected', 'communication', 'window', 'welcome', 'bedroom'];\nconst HYPHENATOR_EXPECTED = [\n ['Seg', 'ment'],\n ['ex', 'pect', 'ed'],\n ['com', 'mu', 'ni', 'ca', 'tion'],\n ['win', 'dow'],\n ['wel', 'come'],\n ['bed', 'room'],\n];\n\nconst PARAGRAPH_TEST_CASES: [string, [string, number, number][]][] = [\n ['Single paragraph.', [['Single paragraph.', 0, 17]]],\n [\n 'Paragraph 1.\\n\\nParagraph 2.',\n [\n ['Paragraph 1.', 0, 12],\n ['Paragraph 2.', 14, 26],\n ],\n ],\n [\n 'Para 1.\\n\\nPara 2.\\n\\nPara 3.',\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 9, 16],\n ['Para 3.', 18, 25],\n ],\n ],\n ['\\n\\nParagraph with leading newlines.', [['Paragraph with leading newlines.', 2, 34]]],\n ['Paragraph with trailing newlines.\\n\\n', [['Paragraph with trailing newlines.', 0, 33]]],\n [\n '\\n\\n Paragraph with leading and trailing spaces. \\n\\n',\n [['Paragraph with leading and trailing spaces.', 4, 47]],\n ],\n [\n 'Para 1.\\n\\n\\n\\nPara 2.', // Multiple newlines between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 11, 18],\n ],\n ],\n [\n 'Para 1.\\n \\n \\nPara 2.', // Newlines with spaces between paragraphs\n [\n ['Para 1.', 0, 7],\n ['Para 2.', 12, 19],\n ],\n ],\n [\n '', // Empty string\n [],\n ],\n [\n '\\n\\n\\n', // Only newlines\n [],\n ],\n [\n 'Line 1\\nLine 2\\nLine 3', // Single paragraph with newlines\n [['Line 1\\nLine 2\\nLine 3', 0, 20]],\n ],\n];\n\ndescribe('tokenizer', () => {\n describe('SentenceTokenizer', () => {\n const tokenizer = new SentenceTokenizer();\n\n it('should tokenize sentences correctly', () => {\n expect(tokenizer.tokenize(TEXT).every((x, i) => EXPECTED_MIN_20[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize sentences correctly', async () => {\n const pattern = [1, 2, 4];\n let text = TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of EXPECTED_MIN_20) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n describe('WordTokenizer', () => {\n const tokenizer = new WordTokenizer();\n\n it('should tokenize words correctly', () => {\n expect(tokenizer.tokenize(WORDS_TEXT).every((x, i) => WORDS_EXPECTED[i] === x)).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_TEXT;\n const chunks = [];\n const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)))\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizer.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n\n describe('punctuation handling', () => {\n const tokenizerPunct = new WordTokenizer(false);\n\n it('should tokenize words correctly', () => {\n expect(\n tokenizerPunct.tokenize(WORDS_PUNCT_TEXT).every((x, i) => WORDS_PUNCT_EXPECTED[i] === x),\n ).toBeTruthy();\n });\n\n it('should stream tokenize words correctly', async () => {\n const pattern = [1, 2, 4];\n let text = WORDS_PUNCT_TEXT;\n const chunks = [];\n const patternIter = Array(\n Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0)),\n )\n .fill(pattern)\n .flat()\n [Symbol.iterator]();\n\n for (const size of patternIter) {\n if (!text) break;\n chunks.push(text.slice(undefined, size));\n text = text.slice(size);\n }\n const stream = tokenizerPunct.stream();\n for (const chunk of chunks) {\n stream.pushText(chunk);\n }\n stream.endInput();\n stream.close();\n\n for (const x of WORDS_PUNCT_EXPECTED) {\n await stream.next().then((value) => {\n if (value.value) {\n expect(value.value.token).toStrictEqual(x);\n }\n });\n }\n });\n });\n });\n describe('hyphenateWord', () => {\n it('should hyphenate correctly', () => {\n HYPHENATOR_TEXT.forEach((x, i) => {\n expect(hyphenateWord(x)).toStrictEqual(HYPHENATOR_EXPECTED[i]);\n });\n });\n });\n describe('splitParagraphs', () => {\n it('should tokenize paragraphs correctly', () => {\n PARAGRAPH_TEST_CASES.forEach(([a, b]) => {\n expect(splitParagraphs(a)).toStrictEqual(b);\n });\n });\n });\n});\n"],"mappings":"AAGA,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,mBAAmB,eAAe,qBAAqB;AAChE,SAAS,uBAAuB;AAEhC,MAAM,OACJ;AAYF,MAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,aAAa;AACnB,MAAM,iBAAiB;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,mBACJ;AACF,MAAM,uBAAuB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,MAAM,kBAAkB,CAAC,WAAW,YAAY,iBAAiB,UAAU,WAAW,SAAS;AAC/F,MAAM,sBAAsB;AAAA,EAC1B,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,MAAM,QAAQ,IAAI;AAAA,EACnB,CAAC,OAAO,MAAM,MAAM,MAAM,MAAM;AAAA,EAChC,CAAC,OAAO,KAAK;AAAA,EACb,CAAC,OAAO,MAAM;AAAA,EACd,CAAC,OAAO,MAAM;AAChB;AAEA,MAAM,uBAA+D;AAAA,EACnE,CAAC,qBAAqB,CAAC,CAAC,qBAAqB,GAAG,EAAE,CAAC,CAAC;AAAA,EACpD;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,gBAAgB,GAAG,EAAE;AAAA,MACtB,CAAC,gBAAgB,IAAI,EAAE;AAAA,IACzB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,GAAG,EAAE;AAAA,MACjB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA,CAAC,wCAAwC,CAAC,CAAC,oCAAoC,GAAG,EAAE,CAAC,CAAC;AAAA,EACtF,CAAC,yCAAyC,CAAC,CAAC,qCAAqC,GAAG,EAAE,CAAC,CAAC;AAAA,EACxF;AAAA,IACE;AAAA,IACA,CAAC,CAAC,+CAA+C,GAAG,EAAE,CAAC;AAAA,EACzD;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA;AAAA,MACE,CAAC,WAAW,GAAG,CAAC;AAAA,MAChB,CAAC,WAAW,IAAI,EAAE;AAAA,IACpB;AAAA,EACF;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC;AAAA,EACH;AAAA,EACA;AAAA,IACE;AAAA;AAAA,IACA,CAAC,CAAC,0BAA0B,GAAG,EAAE,CAAC;AAAA,EACpC;AACF;AAEA,SAAS,aAAa,MAAM;AAC1B,WAAS,qBAAqB,MAAM;AAClC,UAAM,YAAY,IAAI,kBAAkB;AAExC,OAAG,uCAAuC,MAAM;AAC9C,aAAO,UAAU,SAAS,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IACxF,CAAC;AAED,OAAG,8CAA8C,YAAY;AAC3D,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,iBAAiB;AAC/B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,UAAM,YAAY,IAAI,cAAc;AAEpC,OAAG,mCAAmC,MAAM;AAC1C,aAAO,UAAU,SAAS,UAAU,EAAE,MAAM,CAAC,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW;AAAA,IAC7F,CAAC;AAED,OAAG,0CAA0C,YAAY;AACvD,YAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,UAAI,OAAO;AACX,YAAM,SAAS,CAAC;AAChB,YAAM,cAAc,MAAM,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAC1F,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,iBAAW,QAAQ,aAAa;AAC9B,YAAI,CAAC,KAAM;AACX,eAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,eAAO,KAAK,MAAM,IAAI;AAAA,MACxB;AACA,YAAM,SAAS,UAAU,OAAO;AAChC,iBAAW,SAAS,QAAQ;AAC1B,eAAO,SAAS,KAAK;AAAA,MACvB;AACA,aAAO,SAAS;AAChB,aAAO,MAAM;AAEb,iBAAW,KAAK,gBAAgB;AAC9B,cAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,cAAI,MAAM,OAAO;AACf,mBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,UAC3C;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,CAAC;AAED,aAAS,wBAAwB,MAAM;AACrC,YAAM,iBAAiB,IAAI,cAAc,KAAK;AAE9C,SAAG,mCAAmC,MAAM;AAC1C;AAAA,UACE,eAAe,SAAS,gBAAgB,EAAE,MAAM,CAAC,GAAG,MAAM,qBAAqB,CAAC,MAAM,CAAC;AAAA,QACzF,EAAE,WAAW;AAAA,MACf,CAAC;AAED,SAAG,0CAA0C,YAAY;AACvD,cAAM,UAAU,CAAC,GAAG,GAAG,CAAC;AACxB,YAAI,OAAO;AACX,cAAM,SAAS,CAAC;AAChB,cAAM,cAAc;AAAA,UAClB,KAAK,KAAK,KAAK,SAAS,QAAQ,OAAO,CAAC,KAAK,QAAQ,MAAM,KAAK,CAAC,CAAC;AAAA,QACpE,EACG,KAAK,OAAO,EACZ,KAAK,EACL,OAAO,QAAQ,EAAE;AAEpB,mBAAW,QAAQ,aAAa;AAC9B,cAAI,CAAC,KAAM;AACX,iBAAO,KAAK,KAAK,MAAM,QAAW,IAAI,CAAC;AACvC,iBAAO,KAAK,MAAM,IAAI;AAAA,QACxB;AACA,cAAM,SAAS,eAAe,OAAO;AACrC,mBAAW,SAAS,QAAQ;AAC1B,iBAAO,SAAS,KAAK;AAAA,QACvB;AACA,eAAO,SAAS;AAChB,eAAO,MAAM;AAEb,mBAAW,KAAK,sBAAsB;AACpC,gBAAM,OAAO,KAAK,EAAE,KAAK,CAAC,UAAU;AAClC,gBAAI,MAAM,OAAO;AACf,qBAAO,MAAM,MAAM,KAAK,EAAE,cAAc,CAAC;AAAA,YAC3C;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,iBAAiB,MAAM;AAC9B,OAAG,8BAA8B,MAAM;AACrC,sBAAgB,QAAQ,CAAC,GAAG,MAAM;AAChC,eAAO,cAAc,CAAC,CAAC,EAAE,cAAc,oBAAoB,CAAC,CAAC;AAAA,MAC/D,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACD,WAAS,mBAAmB,MAAM;AAChC,OAAG,wCAAwC,MAAM;AAC/C,2BAAqB,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACvC,eAAO,gBAAgB,CAAC,CAAC,EAAE,cAAc,CAAC;AAAA,MAC5C,CAAC;AAAA,IACH,CAAC;AAAA,EACH,CAAC;AACH,CAAC;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAqC;AACrC,sBAA2B;AAE3B,yBAA6B;AAC7B,sBAAsB;AAEtB,mBAAsD;AAGtD,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,EAC/C,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AACpB;AAmBO,MAAM,8BAA+B,gCAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,oBAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,gCAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,gCAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,2BAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAtHvB;AAuHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,YAAQ,wBAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qCAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qCAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n /** Language code for transcription. */\n language: string;\n /** Speech speed multiplier. */\n speed: number;\n /** Delay between sentences in milliseconds. */\n newSentenceDelay: number;\n /** Tokenizer for splitting text into sentences. */\n sentenceTokenizer: SentenceTokenizer;\n /** Function to hyphenate words. */\n hyphenateWord: (word: string) => string[];\n /** Function to split text into words with positions. */\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAqC;AACrC,sBAA2B;AAE3B,yBAA6B;AAC7B,sBAAsB;AAEtB,mBAAsD;AAGtD,MAAM,uBAAuB;AAiBtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,EAC/C,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AACpB;AAmBO,MAAM,8BAA+B,gCAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,oBAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,gCAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,gCAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,2BAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AA5HvB;AA6HI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,YAAQ,wBAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qCAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qCAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
package/dist/transcription.d.cts
CHANGED
|
@@ -3,11 +3,17 @@ import { AudioFrame } from '@livekit/rtc-node';
|
|
|
3
3
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
4
4
|
import type { SentenceTokenizer } from './tokenize/tokenizer.js';
|
|
5
5
|
export interface TextSyncOptions {
|
|
6
|
+
/** Language code for transcription. */
|
|
6
7
|
language: string;
|
|
8
|
+
/** Speech speed multiplier. */
|
|
7
9
|
speed: number;
|
|
10
|
+
/** Delay between sentences in milliseconds. */
|
|
8
11
|
newSentenceDelay: number;
|
|
12
|
+
/** Tokenizer for splitting text into sentences. */
|
|
9
13
|
sentenceTokenizer: SentenceTokenizer;
|
|
14
|
+
/** Function to hyphenate words. */
|
|
10
15
|
hyphenateWord: (word: string) => string[];
|
|
16
|
+
/** Function to split text into words with positions. */
|
|
11
17
|
splitWords: (words: string) => [string, number, number][];
|
|
12
18
|
}
|
|
13
19
|
export declare const defaultTextSyncOptions: TextSyncOptions;
|
package/dist/transcription.d.ts
CHANGED
|
@@ -3,11 +3,17 @@ import { AudioFrame } from '@livekit/rtc-node';
|
|
|
3
3
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
4
4
|
import type { SentenceTokenizer } from './tokenize/tokenizer.js';
|
|
5
5
|
export interface TextSyncOptions {
|
|
6
|
+
/** Language code for transcription. */
|
|
6
7
|
language: string;
|
|
8
|
+
/** Speech speed multiplier. */
|
|
7
9
|
speed: number;
|
|
10
|
+
/** Delay between sentences in milliseconds. */
|
|
8
11
|
newSentenceDelay: number;
|
|
12
|
+
/** Tokenizer for splitting text into sentences. */
|
|
9
13
|
sentenceTokenizer: SentenceTokenizer;
|
|
14
|
+
/** Function to hyphenate words. */
|
|
10
15
|
hyphenateWord: (word: string) => string[];
|
|
16
|
+
/** Function to split text into words with positions. */
|
|
11
17
|
splitWords: (words: string) => [string, number, number][];
|
|
12
18
|
}
|
|
13
19
|
export declare const defaultTextSyncOptions: TextSyncOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAMjF,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;CAC3D;AAED,eAAO,MAAM,sBAAsB,EAAE,eAOpC,CAAC;AAeF,KAAK,aAAa,GAAG;IACnB,WAAW,EAAE,CAAC,IAAI,EAAE,oBAAoB,KAAK,IAAI,CAAC;CACnD,CAAC;oDAEoE,aAAa,aAAa,CAAC;AAAjG,qBAAa,qBAAsB,SAAQ,0BAAuD;;gBAsBpF,IAAI,EAAE,eAAe;IAOjC,SAAS,CAAC,KAAK,EAAE,UAAU;IAU3B,QAAQ,CAAC,IAAI,EAAE,MAAM;IAkBrB,mBAAmB;IAYnB,kBAAkB;IAalB,qBAAqB;IASrB,sBAAsB;IAKtB,IAAI,UAAU,IAAI,MAAM,CAEvB;IAEK,KAAK,CAAC,SAAS,EAAE,OAAO;CA6J/B"}
|
|
1
|
+
{"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAMjF,MAAM,WAAW,eAAe;IAC9B,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,gBAAgB,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,mCAAmC;IACnC,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,wDAAwD;IACxD,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;CAC3D;AAED,eAAO,MAAM,sBAAsB,EAAE,eAOpC,CAAC;AAeF,KAAK,aAAa,GAAG;IACnB,WAAW,EAAE,CAAC,IAAI,EAAE,oBAAoB,KAAK,IAAI,CAAC;CACnD,CAAC;oDAEoE,aAAa,aAAa,CAAC;AAAjG,qBAAa,qBAAsB,SAAQ,0BAAuD;;gBAsBpF,IAAI,EAAE,eAAe;IAOjC,SAAS,CAAC,KAAK,EAAE,UAAU;IAU3B,QAAQ,CAAC,IAAI,EAAE,MAAM;IAkBrB,mBAAmB;IAYnB,kBAAkB;IAalB,qBAAqB;IASrB,sBAAsB;IAKtB,IAAI,UAAU,IAAI,MAAM,CAEvB;IAEK,KAAK,CAAC,SAAS,EAAE,OAAO;CA6J/B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":"AAGA,SAAS,4BAA4B;AACrC,SAAS,kBAAkB;AAE3B,SAAS,oBAAoB;AAC7B,SAAS,aAAa;AAEtB,SAAS,oBAAoB,QAAQ,iBAAiB;AAGtD,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,EAC/C,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AACpB;AAmBO,MAAM,8BAA+B,aAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,OAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,mBAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,mBAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,WAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAtHvB;AAuHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,UAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qBAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qBAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n /** Language code for transcription. */\n language: string;\n /** Speech speed multiplier. */\n speed: number;\n /** Delay between sentences in milliseconds. */\n newSentenceDelay: number;\n /** Tokenizer for splitting text into sentences. */\n sentenceTokenizer: SentenceTokenizer;\n /** Function to hyphenate words. */\n hyphenateWord: (word: string) => string[];\n /** Function to split text into words with positions. */\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":"AAGA,SAAS,4BAA4B;AACrC,SAAS,kBAAkB;AAE3B,SAAS,oBAAoB;AAC7B,SAAS,aAAa;AAEtB,SAAS,oBAAoB,QAAQ,iBAAiB;AAGtD,MAAM,uBAAuB;AAiBtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,EAC/C,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AACpB;AAmBO,MAAM,8BAA+B,aAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,OAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,mBAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,mBAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,WAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AA5HvB;AA6HI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,UAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qBAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qBAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|